{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999605610843073, "eval_steps": 500, "global_step": 9508, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00010517044184731881, "grad_norm": 59.92323260669162, "learning_rate": 5.0000000000000004e-08, "loss": 2.6475, "step": 1 }, { "epoch": 0.00021034088369463762, "grad_norm": 63.10940340752038, "learning_rate": 1.0000000000000001e-07, "loss": 2.7602, "step": 2 }, { "epoch": 0.00031551132554195644, "grad_norm": 57.05123636971969, "learning_rate": 1.5000000000000002e-07, "loss": 2.6286, "step": 3 }, { "epoch": 0.00042068176738927523, "grad_norm": 58.99194594170873, "learning_rate": 2.0000000000000002e-07, "loss": 2.6448, "step": 4 }, { "epoch": 0.000525852209236594, "grad_norm": 60.12716290697537, "learning_rate": 2.5000000000000004e-07, "loss": 2.7222, "step": 5 }, { "epoch": 0.0006310226510839129, "grad_norm": 65.69643117171603, "learning_rate": 3.0000000000000004e-07, "loss": 2.7782, "step": 6 }, { "epoch": 0.0007361930929312317, "grad_norm": 54.90002763855268, "learning_rate": 3.5000000000000004e-07, "loss": 2.5453, "step": 7 }, { "epoch": 0.0008413635347785505, "grad_norm": 61.987907330986985, "learning_rate": 4.0000000000000003e-07, "loss": 2.6941, "step": 8 }, { "epoch": 0.0009465339766258693, "grad_norm": 56.061715666844634, "learning_rate": 4.5000000000000003e-07, "loss": 2.6707, "step": 9 }, { "epoch": 0.001051704418473188, "grad_norm": 63.95430356271557, "learning_rate": 5.000000000000001e-07, "loss": 2.6813, "step": 10 }, { "epoch": 0.001156874860320507, "grad_norm": 57.584908322695675, "learning_rate": 5.5e-07, "loss": 2.4722, "step": 11 }, { "epoch": 0.0012620453021678258, "grad_norm": 54.9509876269393, "learning_rate": 6.000000000000001e-07, "loss": 2.4741, "step": 12 }, { "epoch": 0.0013672157440151446, "grad_norm": 56.36235985124101, "learning_rate": 6.5e-07, "loss": 2.638, "step": 13 }, { "epoch": 0.0014723861858624634, "grad_norm": 46.019883313287565, "learning_rate": 7.000000000000001e-07, "loss": 2.2947, "step": 14 }, { "epoch": 0.001577556627709782, "grad_norm": 39.36043169435088, "learning_rate": 7.5e-07, "loss": 1.9651, "step": 15 }, { "epoch": 0.001682727069557101, "grad_norm": 44.53064805034052, "learning_rate": 8.000000000000001e-07, "loss": 2.1088, "step": 16 }, { "epoch": 0.0017878975114044198, "grad_norm": 40.77122432291506, "learning_rate": 8.500000000000001e-07, "loss": 1.9506, "step": 17 }, { "epoch": 0.0018930679532517386, "grad_norm": 33.413887606907345, "learning_rate": 9.000000000000001e-07, "loss": 1.8277, "step": 18 }, { "epoch": 0.0019982383950990575, "grad_norm": 35.333835421159975, "learning_rate": 9.500000000000001e-07, "loss": 1.7883, "step": 19 }, { "epoch": 0.002103408836946376, "grad_norm": 19.922751692123384, "learning_rate": 1.0000000000000002e-06, "loss": 1.4946, "step": 20 }, { "epoch": 0.002208579278793695, "grad_norm": 18.4584352012479, "learning_rate": 1.0500000000000001e-06, "loss": 1.4215, "step": 21 }, { "epoch": 0.002313749720641014, "grad_norm": 19.920485594493087, "learning_rate": 1.1e-06, "loss": 1.4416, "step": 22 }, { "epoch": 0.002418920162488333, "grad_norm": 16.206117061616325, "learning_rate": 1.1500000000000002e-06, "loss": 1.408, "step": 23 }, { "epoch": 0.0025240906043356515, "grad_norm": 14.345274194736033, "learning_rate": 1.2000000000000002e-06, "loss": 1.3782, "step": 24 }, { "epoch": 0.00262926104618297, "grad_norm": 14.507180671796032, "learning_rate": 1.25e-06, "loss": 1.3618, "step": 25 }, { "epoch": 0.002734431488030289, "grad_norm": 9.60203081004738, "learning_rate": 1.3e-06, "loss": 1.2395, "step": 26 }, { "epoch": 0.002839601929877608, "grad_norm": 8.95961863190965, "learning_rate": 1.3500000000000002e-06, "loss": 1.1838, "step": 27 }, { "epoch": 0.002944772371724927, "grad_norm": 7.743322908623659, "learning_rate": 1.4000000000000001e-06, "loss": 1.149, "step": 28 }, { "epoch": 0.0030499428135722455, "grad_norm": 8.000577585626917, "learning_rate": 1.45e-06, "loss": 1.1665, "step": 29 }, { "epoch": 0.003155113255419564, "grad_norm": 7.385939102175253, "learning_rate": 1.5e-06, "loss": 1.1347, "step": 30 }, { "epoch": 0.0032602836972668832, "grad_norm": 7.716119057096808, "learning_rate": 1.5500000000000002e-06, "loss": 1.1274, "step": 31 }, { "epoch": 0.003365454139114202, "grad_norm": 7.434862596192311, "learning_rate": 1.6000000000000001e-06, "loss": 1.1286, "step": 32 }, { "epoch": 0.003470624580961521, "grad_norm": 7.734788123462178, "learning_rate": 1.6500000000000003e-06, "loss": 1.1539, "step": 33 }, { "epoch": 0.0035757950228088396, "grad_norm": 5.966202361098791, "learning_rate": 1.7000000000000002e-06, "loss": 1.1445, "step": 34 }, { "epoch": 0.003680965464656158, "grad_norm": 5.487171731422804, "learning_rate": 1.75e-06, "loss": 1.1067, "step": 35 }, { "epoch": 0.0037861359065034773, "grad_norm": 4.431591345917228, "learning_rate": 1.8000000000000001e-06, "loss": 1.0843, "step": 36 }, { "epoch": 0.003891306348350796, "grad_norm": 6.362482878285757, "learning_rate": 1.85e-06, "loss": 1.1035, "step": 37 }, { "epoch": 0.003996476790198115, "grad_norm": 5.97952819240149, "learning_rate": 1.9000000000000002e-06, "loss": 1.1151, "step": 38 }, { "epoch": 0.004101647232045434, "grad_norm": 7.236076440399765, "learning_rate": 1.9500000000000004e-06, "loss": 1.111, "step": 39 }, { "epoch": 0.004206817673892752, "grad_norm": 7.231000749346946, "learning_rate": 2.0000000000000003e-06, "loss": 1.0934, "step": 40 }, { "epoch": 0.004311988115740071, "grad_norm": 7.458173023837684, "learning_rate": 2.05e-06, "loss": 1.1124, "step": 41 }, { "epoch": 0.00441715855758739, "grad_norm": 7.048786741399918, "learning_rate": 2.1000000000000002e-06, "loss": 1.0972, "step": 42 }, { "epoch": 0.0045223289994347085, "grad_norm": 5.904771802109113, "learning_rate": 2.15e-06, "loss": 1.1162, "step": 43 }, { "epoch": 0.004627499441282028, "grad_norm": 8.892328258421735, "learning_rate": 2.2e-06, "loss": 1.1236, "step": 44 }, { "epoch": 0.004732669883129347, "grad_norm": 5.984008442771571, "learning_rate": 2.25e-06, "loss": 1.1132, "step": 45 }, { "epoch": 0.004837840324976666, "grad_norm": 4.433024003668077, "learning_rate": 2.3000000000000004e-06, "loss": 1.0935, "step": 46 }, { "epoch": 0.004943010766823984, "grad_norm": 6.204751514592563, "learning_rate": 2.35e-06, "loss": 1.0928, "step": 47 }, { "epoch": 0.005048181208671303, "grad_norm": 5.759130507078808, "learning_rate": 2.4000000000000003e-06, "loss": 1.129, "step": 48 }, { "epoch": 0.005153351650518622, "grad_norm": 7.090689618351067, "learning_rate": 2.4500000000000003e-06, "loss": 1.1351, "step": 49 }, { "epoch": 0.00525852209236594, "grad_norm": 5.9876797839663745, "learning_rate": 2.5e-06, "loss": 1.0683, "step": 50 }, { "epoch": 0.005363692534213259, "grad_norm": 7.595469488307658, "learning_rate": 2.55e-06, "loss": 1.0764, "step": 51 }, { "epoch": 0.005468862976060578, "grad_norm": 5.978805506171299, "learning_rate": 2.6e-06, "loss": 1.1101, "step": 52 }, { "epoch": 0.005574033417907897, "grad_norm": 5.389419934729486, "learning_rate": 2.6500000000000005e-06, "loss": 1.1183, "step": 53 }, { "epoch": 0.005679203859755216, "grad_norm": 4.867913228404486, "learning_rate": 2.7000000000000004e-06, "loss": 1.1145, "step": 54 }, { "epoch": 0.005784374301602535, "grad_norm": 8.051057067872096, "learning_rate": 2.7500000000000004e-06, "loss": 1.0788, "step": 55 }, { "epoch": 0.005889544743449854, "grad_norm": 3.529571123644078, "learning_rate": 2.8000000000000003e-06, "loss": 1.0933, "step": 56 }, { "epoch": 0.005994715185297172, "grad_norm": 5.92833470887133, "learning_rate": 2.85e-06, "loss": 1.0814, "step": 57 }, { "epoch": 0.006099885627144491, "grad_norm": 4.04957198212851, "learning_rate": 2.9e-06, "loss": 1.0894, "step": 58 }, { "epoch": 0.00620505606899181, "grad_norm": 5.75800280839568, "learning_rate": 2.95e-06, "loss": 1.1081, "step": 59 }, { "epoch": 0.006310226510839128, "grad_norm": 7.099778116155824, "learning_rate": 3e-06, "loss": 1.0787, "step": 60 }, { "epoch": 0.006415396952686447, "grad_norm": 6.963451888461215, "learning_rate": 3.05e-06, "loss": 1.0834, "step": 61 }, { "epoch": 0.0065205673945337665, "grad_norm": 6.132860287326327, "learning_rate": 3.1000000000000004e-06, "loss": 1.1122, "step": 62 }, { "epoch": 0.0066257378363810855, "grad_norm": 5.193732285933575, "learning_rate": 3.1500000000000003e-06, "loss": 1.0615, "step": 63 }, { "epoch": 0.006730908278228404, "grad_norm": 8.634476668713994, "learning_rate": 3.2000000000000003e-06, "loss": 1.1458, "step": 64 }, { "epoch": 0.006836078720075723, "grad_norm": 6.06480211066226, "learning_rate": 3.2500000000000002e-06, "loss": 1.0445, "step": 65 }, { "epoch": 0.006941249161923042, "grad_norm": 6.9138684741530305, "learning_rate": 3.3000000000000006e-06, "loss": 1.1032, "step": 66 }, { "epoch": 0.00704641960377036, "grad_norm": 5.982376980555477, "learning_rate": 3.3500000000000005e-06, "loss": 1.0606, "step": 67 }, { "epoch": 0.007151590045617679, "grad_norm": 5.730675646561972, "learning_rate": 3.4000000000000005e-06, "loss": 1.0767, "step": 68 }, { "epoch": 0.007256760487464998, "grad_norm": 4.46760589551449, "learning_rate": 3.45e-06, "loss": 1.054, "step": 69 }, { "epoch": 0.007361930929312316, "grad_norm": 6.883313991446145, "learning_rate": 3.5e-06, "loss": 1.06, "step": 70 }, { "epoch": 0.0074671013711596354, "grad_norm": 6.893517348928994, "learning_rate": 3.5500000000000003e-06, "loss": 1.076, "step": 71 }, { "epoch": 0.0075722718130069545, "grad_norm": 6.622296647960904, "learning_rate": 3.6000000000000003e-06, "loss": 1.0866, "step": 72 }, { "epoch": 0.007677442254854274, "grad_norm": 7.375737365019887, "learning_rate": 3.65e-06, "loss": 1.1335, "step": 73 }, { "epoch": 0.007782612696701592, "grad_norm": 6.842915774489913, "learning_rate": 3.7e-06, "loss": 1.0819, "step": 74 }, { "epoch": 0.00788778313854891, "grad_norm": 4.643560225418101, "learning_rate": 3.7500000000000005e-06, "loss": 1.0627, "step": 75 }, { "epoch": 0.00799295358039623, "grad_norm": 6.239260554512991, "learning_rate": 3.8000000000000005e-06, "loss": 1.0939, "step": 76 }, { "epoch": 0.008098124022243549, "grad_norm": 6.739902596879763, "learning_rate": 3.85e-06, "loss": 1.0527, "step": 77 }, { "epoch": 0.008203294464090868, "grad_norm": 4.805461911549342, "learning_rate": 3.900000000000001e-06, "loss": 1.0995, "step": 78 }, { "epoch": 0.008308464905938185, "grad_norm": 4.600713739789344, "learning_rate": 3.95e-06, "loss": 1.0967, "step": 79 }, { "epoch": 0.008413635347785504, "grad_norm": 5.785549449267131, "learning_rate": 4.000000000000001e-06, "loss": 1.079, "step": 80 }, { "epoch": 0.008518805789632823, "grad_norm": 5.974468879016202, "learning_rate": 4.05e-06, "loss": 1.0991, "step": 81 }, { "epoch": 0.008623976231480143, "grad_norm": 6.971544736299156, "learning_rate": 4.1e-06, "loss": 1.0889, "step": 82 }, { "epoch": 0.008729146673327462, "grad_norm": 5.443939888698424, "learning_rate": 4.15e-06, "loss": 1.0758, "step": 83 }, { "epoch": 0.00883431711517478, "grad_norm": 4.987130690796422, "learning_rate": 4.2000000000000004e-06, "loss": 1.1191, "step": 84 }, { "epoch": 0.0089394875570221, "grad_norm": 4.91454726971417, "learning_rate": 4.25e-06, "loss": 1.0437, "step": 85 }, { "epoch": 0.009044657998869417, "grad_norm": 4.1569197342316375, "learning_rate": 4.3e-06, "loss": 1.0681, "step": 86 }, { "epoch": 0.009149828440716736, "grad_norm": 6.3276090379354795, "learning_rate": 4.350000000000001e-06, "loss": 1.0924, "step": 87 }, { "epoch": 0.009254998882564055, "grad_norm": 7.136019535146, "learning_rate": 4.4e-06, "loss": 1.062, "step": 88 }, { "epoch": 0.009360169324411374, "grad_norm": 7.000501847709018, "learning_rate": 4.450000000000001e-06, "loss": 1.0825, "step": 89 }, { "epoch": 0.009465339766258693, "grad_norm": 8.348084064157154, "learning_rate": 4.5e-06, "loss": 1.098, "step": 90 }, { "epoch": 0.009570510208106012, "grad_norm": 6.3365124926671585, "learning_rate": 4.5500000000000005e-06, "loss": 1.0722, "step": 91 }, { "epoch": 0.009675680649953331, "grad_norm": 6.850521475132954, "learning_rate": 4.600000000000001e-06, "loss": 1.0531, "step": 92 }, { "epoch": 0.009780851091800649, "grad_norm": 3.534183914997874, "learning_rate": 4.65e-06, "loss": 1.0619, "step": 93 }, { "epoch": 0.009886021533647968, "grad_norm": 5.513355921851675, "learning_rate": 4.7e-06, "loss": 1.0853, "step": 94 }, { "epoch": 0.009991191975495287, "grad_norm": 4.3299555896256745, "learning_rate": 4.75e-06, "loss": 1.0755, "step": 95 }, { "epoch": 0.010096362417342606, "grad_norm": 6.325453913242973, "learning_rate": 4.800000000000001e-06, "loss": 1.0889, "step": 96 }, { "epoch": 0.010201532859189925, "grad_norm": 4.789718329298915, "learning_rate": 4.85e-06, "loss": 1.0862, "step": 97 }, { "epoch": 0.010306703301037244, "grad_norm": 3.8761128602402297, "learning_rate": 4.9000000000000005e-06, "loss": 1.0612, "step": 98 }, { "epoch": 0.010411873742884561, "grad_norm": 5.009647930586521, "learning_rate": 4.95e-06, "loss": 1.0595, "step": 99 }, { "epoch": 0.01051704418473188, "grad_norm": 4.418331270407165, "learning_rate": 5e-06, "loss": 1.0824, "step": 100 }, { "epoch": 0.0106222146265792, "grad_norm": 3.9028162186690802, "learning_rate": 4.99999986061529e-06, "loss": 1.0651, "step": 101 }, { "epoch": 0.010727385068426519, "grad_norm": 6.8929955441897075, "learning_rate": 4.999999442461175e-06, "loss": 1.0997, "step": 102 }, { "epoch": 0.010832555510273838, "grad_norm": 4.7248893287683185, "learning_rate": 4.999998745537701e-06, "loss": 1.0686, "step": 103 }, { "epoch": 0.010937725952121157, "grad_norm": 5.812256570251753, "learning_rate": 4.999997769844946e-06, "loss": 1.0618, "step": 104 }, { "epoch": 0.011042896393968476, "grad_norm": 2.708772175167679, "learning_rate": 4.999996515383019e-06, "loss": 1.0804, "step": 105 }, { "epoch": 0.011148066835815793, "grad_norm": 6.2527627142277815, "learning_rate": 4.9999949821520596e-06, "loss": 1.1207, "step": 106 }, { "epoch": 0.011253237277663112, "grad_norm": 5.5457481955779455, "learning_rate": 4.99999317015224e-06, "loss": 1.091, "step": 107 }, { "epoch": 0.011358407719510431, "grad_norm": 5.761317582160581, "learning_rate": 4.999991079383761e-06, "loss": 1.095, "step": 108 }, { "epoch": 0.01146357816135775, "grad_norm": 4.220512736500614, "learning_rate": 4.999988709846856e-06, "loss": 1.053, "step": 109 }, { "epoch": 0.01156874860320507, "grad_norm": 6.66166762678115, "learning_rate": 4.999986061541789e-06, "loss": 1.0754, "step": 110 }, { "epoch": 0.011673919045052389, "grad_norm": 3.35655019228154, "learning_rate": 4.999983134468855e-06, "loss": 1.0594, "step": 111 }, { "epoch": 0.011779089486899708, "grad_norm": 4.939800790239611, "learning_rate": 4.999979928628382e-06, "loss": 1.0732, "step": 112 }, { "epoch": 0.011884259928747025, "grad_norm": 5.566242564632212, "learning_rate": 4.999976444020727e-06, "loss": 1.1101, "step": 113 }, { "epoch": 0.011989430370594344, "grad_norm": 4.129321853432761, "learning_rate": 4.999972680646276e-06, "loss": 1.0468, "step": 114 }, { "epoch": 0.012094600812441663, "grad_norm": 5.459895093767263, "learning_rate": 4.999968638505452e-06, "loss": 1.1098, "step": 115 }, { "epoch": 0.012199771254288982, "grad_norm": 4.268158570515893, "learning_rate": 4.9999643175987045e-06, "loss": 1.0813, "step": 116 }, { "epoch": 0.012304941696136301, "grad_norm": 5.11466661467585, "learning_rate": 4.999959717926515e-06, "loss": 1.0552, "step": 117 }, { "epoch": 0.01241011213798362, "grad_norm": 3.4664159857194274, "learning_rate": 4.999954839489396e-06, "loss": 1.0541, "step": 118 }, { "epoch": 0.01251528257983094, "grad_norm": 3.9511165586614445, "learning_rate": 4.999949682287893e-06, "loss": 1.058, "step": 119 }, { "epoch": 0.012620453021678257, "grad_norm": 3.614330210857767, "learning_rate": 4.999944246322579e-06, "loss": 1.0401, "step": 120 }, { "epoch": 0.012725623463525576, "grad_norm": 5.105632604786978, "learning_rate": 4.999938531594062e-06, "loss": 1.0793, "step": 121 }, { "epoch": 0.012830793905372895, "grad_norm": 4.957380407595618, "learning_rate": 4.999932538102978e-06, "loss": 1.0787, "step": 122 }, { "epoch": 0.012935964347220214, "grad_norm": 4.643940273061057, "learning_rate": 4.999926265849997e-06, "loss": 1.0568, "step": 123 }, { "epoch": 0.013041134789067533, "grad_norm": 4.627911589515215, "learning_rate": 4.999919714835816e-06, "loss": 1.1011, "step": 124 }, { "epoch": 0.013146305230914852, "grad_norm": 3.102924045859167, "learning_rate": 4.999912885061167e-06, "loss": 1.0854, "step": 125 }, { "epoch": 0.013251475672762171, "grad_norm": 4.92620716306597, "learning_rate": 4.999905776526811e-06, "loss": 1.106, "step": 126 }, { "epoch": 0.013356646114609488, "grad_norm": 3.527926170990561, "learning_rate": 4.9998983892335416e-06, "loss": 1.0882, "step": 127 }, { "epoch": 0.013461816556456807, "grad_norm": 4.005511364388478, "learning_rate": 4.999890723182183e-06, "loss": 1.0608, "step": 128 }, { "epoch": 0.013566986998304126, "grad_norm": 3.6947799424472887, "learning_rate": 4.9998827783735875e-06, "loss": 1.082, "step": 129 }, { "epoch": 0.013672157440151446, "grad_norm": 4.8537336025866225, "learning_rate": 4.999874554808643e-06, "loss": 1.0154, "step": 130 }, { "epoch": 0.013777327881998765, "grad_norm": 5.224073594225298, "learning_rate": 4.999866052488265e-06, "loss": 1.0672, "step": 131 }, { "epoch": 0.013882498323846084, "grad_norm": 6.699186460471001, "learning_rate": 4.999857271413403e-06, "loss": 1.1189, "step": 132 }, { "epoch": 0.013987668765693401, "grad_norm": 3.974289494553639, "learning_rate": 4.999848211585035e-06, "loss": 1.0751, "step": 133 }, { "epoch": 0.01409283920754072, "grad_norm": 4.528291252084844, "learning_rate": 4.999838873004173e-06, "loss": 1.0478, "step": 134 }, { "epoch": 0.01419800964938804, "grad_norm": 4.971558341946056, "learning_rate": 4.999829255671857e-06, "loss": 1.0358, "step": 135 }, { "epoch": 0.014303180091235358, "grad_norm": 4.085942947769176, "learning_rate": 4.99981935958916e-06, "loss": 1.0944, "step": 136 }, { "epoch": 0.014408350533082677, "grad_norm": 6.664417732538393, "learning_rate": 4.999809184757185e-06, "loss": 1.0917, "step": 137 }, { "epoch": 0.014513520974929996, "grad_norm": 4.806426966250999, "learning_rate": 4.999798731177066e-06, "loss": 1.1051, "step": 138 }, { "epoch": 0.014618691416777315, "grad_norm": 4.561326503607282, "learning_rate": 4.9997879988499695e-06, "loss": 1.0322, "step": 139 }, { "epoch": 0.014723861858624633, "grad_norm": 4.264177348921955, "learning_rate": 4.999776987777093e-06, "loss": 1.0489, "step": 140 }, { "epoch": 0.014829032300471952, "grad_norm": 4.72227071149687, "learning_rate": 4.9997656979596624e-06, "loss": 1.0356, "step": 141 }, { "epoch": 0.014934202742319271, "grad_norm": 5.240830574426804, "learning_rate": 4.999754129398938e-06, "loss": 1.0517, "step": 142 }, { "epoch": 0.01503937318416659, "grad_norm": 3.7824663189008674, "learning_rate": 4.999742282096209e-06, "loss": 1.0765, "step": 143 }, { "epoch": 0.015144543626013909, "grad_norm": 4.031279478713754, "learning_rate": 4.9997301560527976e-06, "loss": 1.043, "step": 144 }, { "epoch": 0.015249714067861228, "grad_norm": 3.4761815646490244, "learning_rate": 4.999717751270055e-06, "loss": 1.0808, "step": 145 }, { "epoch": 0.015354884509708547, "grad_norm": 3.5505878417744525, "learning_rate": 4.9997050677493646e-06, "loss": 1.0576, "step": 146 }, { "epoch": 0.015460054951555864, "grad_norm": 5.655354096949263, "learning_rate": 4.999692105492141e-06, "loss": 1.1257, "step": 147 }, { "epoch": 0.015565225393403184, "grad_norm": 4.080899903125995, "learning_rate": 4.999678864499828e-06, "loss": 1.1199, "step": 148 }, { "epoch": 0.015670395835250504, "grad_norm": 4.634309433383539, "learning_rate": 4.9996653447739054e-06, "loss": 1.105, "step": 149 }, { "epoch": 0.01577556627709782, "grad_norm": 4.261945588917611, "learning_rate": 4.999651546315877e-06, "loss": 1.0543, "step": 150 }, { "epoch": 0.01588073671894514, "grad_norm": 4.213617826403962, "learning_rate": 4.999637469127284e-06, "loss": 1.0604, "step": 151 }, { "epoch": 0.01598590716079246, "grad_norm": 5.107781163495558, "learning_rate": 4.9996231132096955e-06, "loss": 1.075, "step": 152 }, { "epoch": 0.016091077602639777, "grad_norm": 4.210150800964649, "learning_rate": 4.999608478564713e-06, "loss": 1.0672, "step": 153 }, { "epoch": 0.016196248044487098, "grad_norm": 3.1431426378379084, "learning_rate": 4.999593565193965e-06, "loss": 1.0347, "step": 154 }, { "epoch": 0.016301418486334415, "grad_norm": 4.579222492136831, "learning_rate": 4.999578373099119e-06, "loss": 1.0858, "step": 155 }, { "epoch": 0.016406588928181736, "grad_norm": 5.066019223738902, "learning_rate": 4.999562902281866e-06, "loss": 1.0952, "step": 156 }, { "epoch": 0.016511759370029053, "grad_norm": 2.6553215468038216, "learning_rate": 4.999547152743933e-06, "loss": 1.062, "step": 157 }, { "epoch": 0.01661692981187637, "grad_norm": 2.465224547489308, "learning_rate": 4.999531124487074e-06, "loss": 1.0518, "step": 158 }, { "epoch": 0.01672210025372369, "grad_norm": 3.446424772289735, "learning_rate": 4.999514817513079e-06, "loss": 1.0765, "step": 159 }, { "epoch": 0.01682727069557101, "grad_norm": 5.154039712884113, "learning_rate": 4.999498231823765e-06, "loss": 1.0515, "step": 160 }, { "epoch": 0.01693244113741833, "grad_norm": 3.327200405368395, "learning_rate": 4.9994813674209805e-06, "loss": 1.0734, "step": 161 }, { "epoch": 0.017037611579265647, "grad_norm": 6.642307412221283, "learning_rate": 4.999464224306607e-06, "loss": 1.1043, "step": 162 }, { "epoch": 0.017142782021112968, "grad_norm": 3.530091197441022, "learning_rate": 4.999446802482556e-06, "loss": 1.0833, "step": 163 }, { "epoch": 0.017247952462960285, "grad_norm": 5.657095039085531, "learning_rate": 4.99942910195077e-06, "loss": 1.0632, "step": 164 }, { "epoch": 0.017353122904807602, "grad_norm": 5.574109052251118, "learning_rate": 4.999411122713223e-06, "loss": 1.095, "step": 165 }, { "epoch": 0.017458293346654923, "grad_norm": 5.919741121398457, "learning_rate": 4.99939286477192e-06, "loss": 1.0714, "step": 166 }, { "epoch": 0.01756346378850224, "grad_norm": 5.72438831771799, "learning_rate": 4.999374328128896e-06, "loss": 1.1101, "step": 167 }, { "epoch": 0.01766863423034956, "grad_norm": 3.6834749885484404, "learning_rate": 4.99935551278622e-06, "loss": 1.0671, "step": 168 }, { "epoch": 0.01777380467219688, "grad_norm": 3.573832404245989, "learning_rate": 4.999336418745987e-06, "loss": 1.0852, "step": 169 }, { "epoch": 0.0178789751140442, "grad_norm": 4.735678508134887, "learning_rate": 4.9993170460103295e-06, "loss": 1.0536, "step": 170 }, { "epoch": 0.017984145555891517, "grad_norm": 4.056452634779431, "learning_rate": 4.9992973945814045e-06, "loss": 1.0438, "step": 171 }, { "epoch": 0.018089315997738834, "grad_norm": 4.957195740828829, "learning_rate": 4.999277464461405e-06, "loss": 1.0839, "step": 172 }, { "epoch": 0.018194486439586155, "grad_norm": 3.926000211879302, "learning_rate": 4.9992572556525535e-06, "loss": 1.0788, "step": 173 }, { "epoch": 0.018299656881433472, "grad_norm": 4.037823469074097, "learning_rate": 4.999236768157103e-06, "loss": 1.0648, "step": 174 }, { "epoch": 0.018404827323280793, "grad_norm": 3.5763594650594706, "learning_rate": 4.999216001977338e-06, "loss": 1.0776, "step": 175 }, { "epoch": 0.01850999776512811, "grad_norm": 4.216315800530571, "learning_rate": 4.999194957115574e-06, "loss": 1.0727, "step": 176 }, { "epoch": 0.01861516820697543, "grad_norm": 3.7534868688617897, "learning_rate": 4.999173633574158e-06, "loss": 1.0114, "step": 177 }, { "epoch": 0.01872033864882275, "grad_norm": 3.2854824452012257, "learning_rate": 4.999152031355468e-06, "loss": 1.0757, "step": 178 }, { "epoch": 0.018825509090670066, "grad_norm": 5.452335174224296, "learning_rate": 4.999130150461912e-06, "loss": 1.1222, "step": 179 }, { "epoch": 0.018930679532517387, "grad_norm": 4.533901928234252, "learning_rate": 4.999107990895931e-06, "loss": 1.0774, "step": 180 }, { "epoch": 0.019035849974364704, "grad_norm": 3.9049991918717053, "learning_rate": 4.999085552659995e-06, "loss": 1.0649, "step": 181 }, { "epoch": 0.019141020416212025, "grad_norm": 3.813729543734002, "learning_rate": 4.9990628357566055e-06, "loss": 1.0448, "step": 182 }, { "epoch": 0.019246190858059342, "grad_norm": 3.5500653660890524, "learning_rate": 4.999039840188297e-06, "loss": 1.0776, "step": 183 }, { "epoch": 0.019351361299906663, "grad_norm": 4.714466443116458, "learning_rate": 4.999016565957633e-06, "loss": 1.1458, "step": 184 }, { "epoch": 0.01945653174175398, "grad_norm": 5.858915587801188, "learning_rate": 4.9989930130672085e-06, "loss": 1.067, "step": 185 }, { "epoch": 0.019561702183601298, "grad_norm": 6.034299083940081, "learning_rate": 4.99896918151965e-06, "loss": 1.0984, "step": 186 }, { "epoch": 0.01966687262544862, "grad_norm": 4.074883949501879, "learning_rate": 4.9989450713176156e-06, "loss": 1.0182, "step": 187 }, { "epoch": 0.019772043067295936, "grad_norm": 3.8546892311347984, "learning_rate": 4.998920682463794e-06, "loss": 1.1006, "step": 188 }, { "epoch": 0.019877213509143257, "grad_norm": 2.8323107910480316, "learning_rate": 4.998896014960904e-06, "loss": 1.1025, "step": 189 }, { "epoch": 0.019982383950990574, "grad_norm": 3.2647909530548818, "learning_rate": 4.998871068811695e-06, "loss": 1.0553, "step": 190 }, { "epoch": 0.02008755439283789, "grad_norm": 4.8772055016374924, "learning_rate": 4.99884584401895e-06, "loss": 1.0435, "step": 191 }, { "epoch": 0.020192724834685212, "grad_norm": 4.446998405870374, "learning_rate": 4.998820340585482e-06, "loss": 1.0841, "step": 192 }, { "epoch": 0.02029789527653253, "grad_norm": 6.789642965482605, "learning_rate": 4.998794558514135e-06, "loss": 1.0501, "step": 193 }, { "epoch": 0.02040306571837985, "grad_norm": 5.368972583293381, "learning_rate": 4.998768497807783e-06, "loss": 1.0759, "step": 194 }, { "epoch": 0.020508236160227167, "grad_norm": 4.6463740756090175, "learning_rate": 4.9987421584693316e-06, "loss": 1.0359, "step": 195 }, { "epoch": 0.02061340660207449, "grad_norm": 3.638853744840631, "learning_rate": 4.99871554050172e-06, "loss": 1.043, "step": 196 }, { "epoch": 0.020718577043921806, "grad_norm": 5.963532277665872, "learning_rate": 4.998688643907914e-06, "loss": 1.0821, "step": 197 }, { "epoch": 0.020823747485769123, "grad_norm": 4.211503968938633, "learning_rate": 4.998661468690914e-06, "loss": 1.0341, "step": 198 }, { "epoch": 0.020928917927616444, "grad_norm": 5.9295459087612095, "learning_rate": 4.9986340148537506e-06, "loss": 1.0743, "step": 199 }, { "epoch": 0.02103408836946376, "grad_norm": 3.4504029111986774, "learning_rate": 4.998606282399484e-06, "loss": 1.0975, "step": 200 }, { "epoch": 0.021139258811311082, "grad_norm": 2.7556837965327654, "learning_rate": 4.998578271331207e-06, "loss": 1.1087, "step": 201 }, { "epoch": 0.0212444292531584, "grad_norm": 4.467263377291867, "learning_rate": 4.998549981652043e-06, "loss": 1.0805, "step": 202 }, { "epoch": 0.02134959969500572, "grad_norm": 3.12366809969675, "learning_rate": 4.998521413365147e-06, "loss": 1.1032, "step": 203 }, { "epoch": 0.021454770136853037, "grad_norm": 3.678013910323886, "learning_rate": 4.998492566473704e-06, "loss": 1.0831, "step": 204 }, { "epoch": 0.021559940578700355, "grad_norm": 4.534344934215397, "learning_rate": 4.998463440980931e-06, "loss": 1.0911, "step": 205 }, { "epoch": 0.021665111020547675, "grad_norm": 5.977433768607247, "learning_rate": 4.998434036890075e-06, "loss": 1.0532, "step": 206 }, { "epoch": 0.021770281462394993, "grad_norm": 3.097171042849419, "learning_rate": 4.998404354204416e-06, "loss": 1.0624, "step": 207 }, { "epoch": 0.021875451904242314, "grad_norm": 4.741569481149735, "learning_rate": 4.998374392927262e-06, "loss": 1.0814, "step": 208 }, { "epoch": 0.02198062234608963, "grad_norm": 5.387514870731206, "learning_rate": 4.998344153061957e-06, "loss": 1.0667, "step": 209 }, { "epoch": 0.02208579278793695, "grad_norm": 3.107349100997203, "learning_rate": 4.99831363461187e-06, "loss": 1.0283, "step": 210 }, { "epoch": 0.02219096322978427, "grad_norm": 4.446119580642245, "learning_rate": 4.998282837580405e-06, "loss": 1.0642, "step": 211 }, { "epoch": 0.022296133671631586, "grad_norm": 3.6643593798924377, "learning_rate": 4.998251761970997e-06, "loss": 1.0556, "step": 212 }, { "epoch": 0.022401304113478907, "grad_norm": 3.2516463208152917, "learning_rate": 4.99822040778711e-06, "loss": 1.0386, "step": 213 }, { "epoch": 0.022506474555326225, "grad_norm": 5.733316290126696, "learning_rate": 4.998188775032241e-06, "loss": 1.0906, "step": 214 }, { "epoch": 0.022611644997173545, "grad_norm": 4.91709171945248, "learning_rate": 4.998156863709917e-06, "loss": 1.0903, "step": 215 }, { "epoch": 0.022716815439020863, "grad_norm": 4.657782710443831, "learning_rate": 4.998124673823695e-06, "loss": 1.1172, "step": 216 }, { "epoch": 0.022821985880868183, "grad_norm": 6.169459224100382, "learning_rate": 4.998092205377168e-06, "loss": 1.0814, "step": 217 }, { "epoch": 0.0229271563227155, "grad_norm": 4.948076959697061, "learning_rate": 4.998059458373952e-06, "loss": 1.1041, "step": 218 }, { "epoch": 0.023032326764562818, "grad_norm": 5.515487044434082, "learning_rate": 4.998026432817702e-06, "loss": 1.0771, "step": 219 }, { "epoch": 0.02313749720641014, "grad_norm": 3.6929446615249364, "learning_rate": 4.9979931287121e-06, "loss": 1.0612, "step": 220 }, { "epoch": 0.023242667648257456, "grad_norm": 3.8867790075290976, "learning_rate": 4.9979595460608575e-06, "loss": 1.0381, "step": 221 }, { "epoch": 0.023347838090104777, "grad_norm": 4.9882563091630825, "learning_rate": 4.997925684867721e-06, "loss": 1.096, "step": 222 }, { "epoch": 0.023453008531952094, "grad_norm": 3.8165955011698727, "learning_rate": 4.997891545136467e-06, "loss": 1.04, "step": 223 }, { "epoch": 0.023558178973799415, "grad_norm": 4.840235092033454, "learning_rate": 4.997857126870902e-06, "loss": 1.0896, "step": 224 }, { "epoch": 0.023663349415646732, "grad_norm": 5.334008497621717, "learning_rate": 4.9978224300748625e-06, "loss": 1.1243, "step": 225 }, { "epoch": 0.02376851985749405, "grad_norm": 3.6424315914693337, "learning_rate": 4.997787454752217e-06, "loss": 1.0253, "step": 226 }, { "epoch": 0.02387369029934137, "grad_norm": 3.7986132911207617, "learning_rate": 4.997752200906868e-06, "loss": 1.0196, "step": 227 }, { "epoch": 0.023978860741188688, "grad_norm": 3.5858200402131777, "learning_rate": 4.997716668542746e-06, "loss": 1.0516, "step": 228 }, { "epoch": 0.02408403118303601, "grad_norm": 4.090414058227785, "learning_rate": 4.997680857663812e-06, "loss": 1.0889, "step": 229 }, { "epoch": 0.024189201624883326, "grad_norm": 6.149411432586733, "learning_rate": 4.997644768274059e-06, "loss": 1.0915, "step": 230 }, { "epoch": 0.024294372066730647, "grad_norm": 4.990447662747164, "learning_rate": 4.997608400377513e-06, "loss": 1.1035, "step": 231 }, { "epoch": 0.024399542508577964, "grad_norm": 3.6379678181805626, "learning_rate": 4.997571753978228e-06, "loss": 1.0523, "step": 232 }, { "epoch": 0.02450471295042528, "grad_norm": 4.063194976272717, "learning_rate": 4.99753482908029e-06, "loss": 1.0584, "step": 233 }, { "epoch": 0.024609883392272602, "grad_norm": 4.805240814647609, "learning_rate": 4.997497625687818e-06, "loss": 1.1217, "step": 234 }, { "epoch": 0.02471505383411992, "grad_norm": 4.213633680202407, "learning_rate": 4.997460143804958e-06, "loss": 1.0653, "step": 235 }, { "epoch": 0.02482022427596724, "grad_norm": 4.605436966627413, "learning_rate": 4.997422383435893e-06, "loss": 1.0525, "step": 236 }, { "epoch": 0.024925394717814558, "grad_norm": 3.403523309179731, "learning_rate": 4.99738434458483e-06, "loss": 1.0587, "step": 237 }, { "epoch": 0.02503056515966188, "grad_norm": 3.2122262832368973, "learning_rate": 4.997346027256013e-06, "loss": 1.0795, "step": 238 }, { "epoch": 0.025135735601509196, "grad_norm": 4.571309867532744, "learning_rate": 4.997307431453713e-06, "loss": 1.0673, "step": 239 }, { "epoch": 0.025240906043356513, "grad_norm": 3.5447243507334396, "learning_rate": 4.9972685571822355e-06, "loss": 1.0261, "step": 240 }, { "epoch": 0.025346076485203834, "grad_norm": 3.2377036811282336, "learning_rate": 4.997229404445914e-06, "loss": 1.0424, "step": 241 }, { "epoch": 0.02545124692705115, "grad_norm": 3.6188960442806017, "learning_rate": 4.997189973249115e-06, "loss": 1.0553, "step": 242 }, { "epoch": 0.025556417368898472, "grad_norm": 3.3956125214606336, "learning_rate": 4.997150263596236e-06, "loss": 1.0437, "step": 243 }, { "epoch": 0.02566158781074579, "grad_norm": 4.3265152622652, "learning_rate": 4.997110275491702e-06, "loss": 1.0729, "step": 244 }, { "epoch": 0.02576675825259311, "grad_norm": 4.350664171417144, "learning_rate": 4.997070008939976e-06, "loss": 1.0522, "step": 245 }, { "epoch": 0.025871928694440428, "grad_norm": 3.8316671691883872, "learning_rate": 4.997029463945545e-06, "loss": 1.0685, "step": 246 }, { "epoch": 0.025977099136287745, "grad_norm": 3.5671479642145076, "learning_rate": 4.996988640512931e-06, "loss": 1.066, "step": 247 }, { "epoch": 0.026082269578135066, "grad_norm": 3.947898665647903, "learning_rate": 4.996947538646687e-06, "loss": 1.0524, "step": 248 }, { "epoch": 0.026187440019982383, "grad_norm": 3.499962475992957, "learning_rate": 4.996906158351396e-06, "loss": 1.0395, "step": 249 }, { "epoch": 0.026292610461829704, "grad_norm": 3.653224022870733, "learning_rate": 4.99686449963167e-06, "loss": 1.0245, "step": 250 }, { "epoch": 0.02639778090367702, "grad_norm": 3.2905872706720634, "learning_rate": 4.996822562492157e-06, "loss": 1.1054, "step": 251 }, { "epoch": 0.026502951345524342, "grad_norm": 4.813210566349707, "learning_rate": 4.996780346937532e-06, "loss": 1.1126, "step": 252 }, { "epoch": 0.02660812178737166, "grad_norm": 3.8457085777908926, "learning_rate": 4.996737852972503e-06, "loss": 1.0783, "step": 253 }, { "epoch": 0.026713292229218977, "grad_norm": 3.7993606994618316, "learning_rate": 4.9966950806018075e-06, "loss": 1.0461, "step": 254 }, { "epoch": 0.026818462671066298, "grad_norm": 4.8200166677976375, "learning_rate": 4.996652029830216e-06, "loss": 1.0875, "step": 255 }, { "epoch": 0.026923633112913615, "grad_norm": 4.328119154393376, "learning_rate": 4.996608700662528e-06, "loss": 1.0739, "step": 256 }, { "epoch": 0.027028803554760936, "grad_norm": 3.1342866705412686, "learning_rate": 4.996565093103576e-06, "loss": 1.0948, "step": 257 }, { "epoch": 0.027133973996608253, "grad_norm": 4.961920624574888, "learning_rate": 4.996521207158222e-06, "loss": 1.0635, "step": 258 }, { "epoch": 0.027239144438455574, "grad_norm": 3.360160077037182, "learning_rate": 4.99647704283136e-06, "loss": 1.0652, "step": 259 }, { "epoch": 0.02734431488030289, "grad_norm": 3.5370384271061077, "learning_rate": 4.9964326001279145e-06, "loss": 1.0602, "step": 260 }, { "epoch": 0.02744948532215021, "grad_norm": 3.991312523764047, "learning_rate": 4.996387879052841e-06, "loss": 1.0563, "step": 261 }, { "epoch": 0.02755465576399753, "grad_norm": 3.80460958358854, "learning_rate": 4.996342879611126e-06, "loss": 1.0713, "step": 262 }, { "epoch": 0.027659826205844847, "grad_norm": 4.454127422134502, "learning_rate": 4.996297601807788e-06, "loss": 1.0725, "step": 263 }, { "epoch": 0.027764996647692167, "grad_norm": 4.743749897023404, "learning_rate": 4.996252045647875e-06, "loss": 1.0079, "step": 264 }, { "epoch": 0.027870167089539485, "grad_norm": 4.979818582007352, "learning_rate": 4.9962062111364675e-06, "loss": 1.0908, "step": 265 }, { "epoch": 0.027975337531386802, "grad_norm": 3.8751951820571597, "learning_rate": 4.996160098278676e-06, "loss": 1.0515, "step": 266 }, { "epoch": 0.028080507973234123, "grad_norm": 3.5344802127222783, "learning_rate": 4.996113707079644e-06, "loss": 1.0124, "step": 267 }, { "epoch": 0.02818567841508144, "grad_norm": 4.089090640577849, "learning_rate": 4.996067037544542e-06, "loss": 1.0692, "step": 268 }, { "epoch": 0.02829084885692876, "grad_norm": 3.2448736578587423, "learning_rate": 4.996020089678575e-06, "loss": 1.039, "step": 269 }, { "epoch": 0.02839601929877608, "grad_norm": 4.037941021774882, "learning_rate": 4.995972863486978e-06, "loss": 1.08, "step": 270 }, { "epoch": 0.0285011897406234, "grad_norm": 4.186559650612413, "learning_rate": 4.9959253589750185e-06, "loss": 1.0745, "step": 271 }, { "epoch": 0.028606360182470716, "grad_norm": 2.7388820923632777, "learning_rate": 4.995877576147993e-06, "loss": 1.0651, "step": 272 }, { "epoch": 0.028711530624318034, "grad_norm": 3.5856157401608666, "learning_rate": 4.9958295150112275e-06, "loss": 1.0874, "step": 273 }, { "epoch": 0.028816701066165355, "grad_norm": 3.380461362281521, "learning_rate": 4.995781175570083e-06, "loss": 1.0709, "step": 274 }, { "epoch": 0.028921871508012672, "grad_norm": 3.5335871406927724, "learning_rate": 4.99573255782995e-06, "loss": 1.0515, "step": 275 }, { "epoch": 0.029027041949859993, "grad_norm": 4.463613237000542, "learning_rate": 4.995683661796249e-06, "loss": 1.0656, "step": 276 }, { "epoch": 0.02913221239170731, "grad_norm": 3.4536408983322886, "learning_rate": 4.995634487474433e-06, "loss": 1.0725, "step": 277 }, { "epoch": 0.02923738283355463, "grad_norm": 4.333343256417978, "learning_rate": 4.995585034869984e-06, "loss": 1.0408, "step": 278 }, { "epoch": 0.029342553275401948, "grad_norm": 3.9790917572612847, "learning_rate": 4.995535303988418e-06, "loss": 1.0569, "step": 279 }, { "epoch": 0.029447723717249265, "grad_norm": 3.5455400588602717, "learning_rate": 4.99548529483528e-06, "loss": 1.049, "step": 280 }, { "epoch": 0.029552894159096586, "grad_norm": 2.336572942885481, "learning_rate": 4.995435007416145e-06, "loss": 1.0106, "step": 281 }, { "epoch": 0.029658064600943904, "grad_norm": 4.019746471320661, "learning_rate": 4.995384441736622e-06, "loss": 1.0501, "step": 282 }, { "epoch": 0.029763235042791224, "grad_norm": 5.0468126375485065, "learning_rate": 4.995333597802349e-06, "loss": 1.1031, "step": 283 }, { "epoch": 0.029868405484638542, "grad_norm": 4.142494686730891, "learning_rate": 4.995282475618995e-06, "loss": 1.0887, "step": 284 }, { "epoch": 0.029973575926485863, "grad_norm": 5.271308613066555, "learning_rate": 4.9952310751922615e-06, "loss": 1.0625, "step": 285 }, { "epoch": 0.03007874636833318, "grad_norm": 4.001834907924138, "learning_rate": 4.995179396527878e-06, "loss": 1.0622, "step": 286 }, { "epoch": 0.030183916810180497, "grad_norm": 3.9801706203568856, "learning_rate": 4.99512743963161e-06, "loss": 1.0791, "step": 287 }, { "epoch": 0.030289087252027818, "grad_norm": 5.1051985033669425, "learning_rate": 4.99507520450925e-06, "loss": 1.0684, "step": 288 }, { "epoch": 0.030394257693875135, "grad_norm": 3.936963708694301, "learning_rate": 4.995022691166621e-06, "loss": 1.047, "step": 289 }, { "epoch": 0.030499428135722456, "grad_norm": 3.714404487265949, "learning_rate": 4.994969899609581e-06, "loss": 1.0303, "step": 290 }, { "epoch": 0.030604598577569773, "grad_norm": 4.912241616953762, "learning_rate": 4.994916829844015e-06, "loss": 1.0836, "step": 291 }, { "epoch": 0.030709769019417094, "grad_norm": 4.067161813822889, "learning_rate": 4.994863481875842e-06, "loss": 1.0335, "step": 292 }, { "epoch": 0.03081493946126441, "grad_norm": 4.6662969482103644, "learning_rate": 4.994809855711009e-06, "loss": 1.0666, "step": 293 }, { "epoch": 0.03092010990311173, "grad_norm": 4.517226678040155, "learning_rate": 4.994755951355496e-06, "loss": 1.0358, "step": 294 }, { "epoch": 0.03102528034495905, "grad_norm": 3.7309332333912724, "learning_rate": 4.994701768815317e-06, "loss": 1.0611, "step": 295 }, { "epoch": 0.031130450786806367, "grad_norm": 4.014104758735321, "learning_rate": 4.994647308096509e-06, "loss": 1.0727, "step": 296 }, { "epoch": 0.031235621228653688, "grad_norm": 3.29886953781129, "learning_rate": 4.994592569205148e-06, "loss": 1.0919, "step": 297 }, { "epoch": 0.03134079167050101, "grad_norm": 2.7078199003469257, "learning_rate": 4.994537552147337e-06, "loss": 1.0173, "step": 298 }, { "epoch": 0.031445962112348326, "grad_norm": 2.8266073729377332, "learning_rate": 4.994482256929211e-06, "loss": 1.0709, "step": 299 }, { "epoch": 0.03155113255419564, "grad_norm": 3.268397966576739, "learning_rate": 4.994426683556935e-06, "loss": 1.0217, "step": 300 }, { "epoch": 0.03165630299604296, "grad_norm": 2.6632983909674994, "learning_rate": 4.994370832036705e-06, "loss": 1.0279, "step": 301 }, { "epoch": 0.03176147343789028, "grad_norm": 4.0187186523919705, "learning_rate": 4.994314702374753e-06, "loss": 1.0646, "step": 302 }, { "epoch": 0.0318666438797376, "grad_norm": 4.204278869511361, "learning_rate": 4.994258294577333e-06, "loss": 1.0762, "step": 303 }, { "epoch": 0.03197181432158492, "grad_norm": 4.383109964538248, "learning_rate": 4.9942016086507384e-06, "loss": 1.0602, "step": 304 }, { "epoch": 0.03207698476343224, "grad_norm": 5.459005697486776, "learning_rate": 4.9941446446012874e-06, "loss": 1.0854, "step": 305 }, { "epoch": 0.032182155205279554, "grad_norm": 3.251384242833329, "learning_rate": 4.994087402435335e-06, "loss": 1.0001, "step": 306 }, { "epoch": 0.03228732564712687, "grad_norm": 4.629466051075931, "learning_rate": 4.994029882159261e-06, "loss": 1.0552, "step": 307 }, { "epoch": 0.032392496088974196, "grad_norm": 2.716529865958913, "learning_rate": 4.993972083779482e-06, "loss": 1.0108, "step": 308 }, { "epoch": 0.03249766653082151, "grad_norm": 3.575967885671494, "learning_rate": 4.993914007302441e-06, "loss": 1.0709, "step": 309 }, { "epoch": 0.03260283697266883, "grad_norm": 3.5669292643875186, "learning_rate": 4.993855652734616e-06, "loss": 1.0912, "step": 310 }, { "epoch": 0.03270800741451615, "grad_norm": 4.010833762984559, "learning_rate": 4.993797020082511e-06, "loss": 1.0583, "step": 311 }, { "epoch": 0.03281317785636347, "grad_norm": 4.104510377305269, "learning_rate": 4.9937381093526675e-06, "loss": 1.0087, "step": 312 }, { "epoch": 0.03291834829821079, "grad_norm": 4.784979263043128, "learning_rate": 4.993678920551651e-06, "loss": 1.0615, "step": 313 }, { "epoch": 0.03302351874005811, "grad_norm": 4.164191063473395, "learning_rate": 4.993619453686065e-06, "loss": 1.0409, "step": 314 }, { "epoch": 0.033128689181905424, "grad_norm": 3.1649556775301733, "learning_rate": 4.993559708762538e-06, "loss": 1.0811, "step": 315 }, { "epoch": 0.03323385962375274, "grad_norm": 3.6637842760946917, "learning_rate": 4.993499685787732e-06, "loss": 1.0539, "step": 316 }, { "epoch": 0.033339030065600066, "grad_norm": 3.1821001996556157, "learning_rate": 4.993439384768342e-06, "loss": 1.0799, "step": 317 }, { "epoch": 0.03344420050744738, "grad_norm": 3.3225742030256535, "learning_rate": 4.993378805711091e-06, "loss": 1.079, "step": 318 }, { "epoch": 0.0335493709492947, "grad_norm": 3.7196848657078045, "learning_rate": 4.993317948622733e-06, "loss": 1.0537, "step": 319 }, { "epoch": 0.03365454139114202, "grad_norm": 3.0347036015285385, "learning_rate": 4.993256813510055e-06, "loss": 1.0425, "step": 320 }, { "epoch": 0.033759711832989335, "grad_norm": 3.4466877790068056, "learning_rate": 4.993195400379875e-06, "loss": 1.0275, "step": 321 }, { "epoch": 0.03386488227483666, "grad_norm": 3.4937996524768393, "learning_rate": 4.993133709239039e-06, "loss": 1.0074, "step": 322 }, { "epoch": 0.03397005271668398, "grad_norm": 3.961714485473967, "learning_rate": 4.993071740094428e-06, "loss": 1.0152, "step": 323 }, { "epoch": 0.034075223158531294, "grad_norm": 3.882633562046403, "learning_rate": 4.993009492952951e-06, "loss": 1.0408, "step": 324 }, { "epoch": 0.03418039360037861, "grad_norm": 4.777184945580783, "learning_rate": 4.992946967821549e-06, "loss": 1.095, "step": 325 }, { "epoch": 0.034285564042225936, "grad_norm": 4.8588365098982775, "learning_rate": 4.9928841647071936e-06, "loss": 1.0568, "step": 326 }, { "epoch": 0.03439073448407325, "grad_norm": 3.6839425202673297, "learning_rate": 4.992821083616889e-06, "loss": 1.0496, "step": 327 }, { "epoch": 0.03449590492592057, "grad_norm": 3.66693362260413, "learning_rate": 4.992757724557669e-06, "loss": 1.0753, "step": 328 }, { "epoch": 0.03460107536776789, "grad_norm": 4.978727167916742, "learning_rate": 4.992694087536597e-06, "loss": 1.0729, "step": 329 }, { "epoch": 0.034706245809615205, "grad_norm": 5.114707679163974, "learning_rate": 4.992630172560771e-06, "loss": 1.0943, "step": 330 }, { "epoch": 0.03481141625146253, "grad_norm": 3.163523736651089, "learning_rate": 4.992565979637318e-06, "loss": 1.0419, "step": 331 }, { "epoch": 0.034916586693309846, "grad_norm": 3.636944273388002, "learning_rate": 4.9925015087733945e-06, "loss": 1.047, "step": 332 }, { "epoch": 0.035021757135157164, "grad_norm": 4.259395764869889, "learning_rate": 4.992436759976191e-06, "loss": 1.0776, "step": 333 }, { "epoch": 0.03512692757700448, "grad_norm": 3.920573922209101, "learning_rate": 4.992371733252926e-06, "loss": 1.0535, "step": 334 }, { "epoch": 0.0352320980188518, "grad_norm": 4.293490908646825, "learning_rate": 4.992306428610853e-06, "loss": 1.0483, "step": 335 }, { "epoch": 0.03533726846069912, "grad_norm": 3.6789152142394914, "learning_rate": 4.9922408460572505e-06, "loss": 1.0418, "step": 336 }, { "epoch": 0.03544243890254644, "grad_norm": 3.3817970301847238, "learning_rate": 4.992174985599434e-06, "loss": 1.0514, "step": 337 }, { "epoch": 0.03554760934439376, "grad_norm": 3.3702499323179596, "learning_rate": 4.992108847244746e-06, "loss": 1.0259, "step": 338 }, { "epoch": 0.035652779786241075, "grad_norm": 4.5111317152835815, "learning_rate": 4.9920424310005625e-06, "loss": 1.0555, "step": 339 }, { "epoch": 0.0357579502280884, "grad_norm": 4.693632339648465, "learning_rate": 4.9919757368742895e-06, "loss": 1.0464, "step": 340 }, { "epoch": 0.035863120669935716, "grad_norm": 3.7052320429842736, "learning_rate": 4.9919087648733634e-06, "loss": 1.0429, "step": 341 }, { "epoch": 0.035968291111783034, "grad_norm": 5.043659801808216, "learning_rate": 4.991841515005253e-06, "loss": 1.0648, "step": 342 }, { "epoch": 0.03607346155363035, "grad_norm": 4.146978032877157, "learning_rate": 4.991773987277455e-06, "loss": 1.1074, "step": 343 }, { "epoch": 0.03617863199547767, "grad_norm": 6.009958045778625, "learning_rate": 4.991706181697501e-06, "loss": 1.0836, "step": 344 }, { "epoch": 0.03628380243732499, "grad_norm": 4.730758307272077, "learning_rate": 4.991638098272951e-06, "loss": 1.0436, "step": 345 }, { "epoch": 0.03638897287917231, "grad_norm": 4.19923954684141, "learning_rate": 4.991569737011398e-06, "loss": 1.0276, "step": 346 }, { "epoch": 0.03649414332101963, "grad_norm": 3.5491173146821247, "learning_rate": 4.991501097920464e-06, "loss": 1.0366, "step": 347 }, { "epoch": 0.036599313762866945, "grad_norm": 3.4759309555067213, "learning_rate": 4.991432181007803e-06, "loss": 1.0173, "step": 348 }, { "epoch": 0.03670448420471426, "grad_norm": 3.817356633667997, "learning_rate": 4.9913629862811e-06, "loss": 1.0628, "step": 349 }, { "epoch": 0.036809654646561586, "grad_norm": 3.537846598304811, "learning_rate": 4.99129351374807e-06, "loss": 1.057, "step": 350 }, { "epoch": 0.036914825088408904, "grad_norm": 5.421441758466292, "learning_rate": 4.99122376341646e-06, "loss": 1.0986, "step": 351 }, { "epoch": 0.03701999553025622, "grad_norm": 4.12752414216299, "learning_rate": 4.9911537352940485e-06, "loss": 1.0398, "step": 352 }, { "epoch": 0.03712516597210354, "grad_norm": 4.437838613524334, "learning_rate": 4.991083429388643e-06, "loss": 1.0359, "step": 353 }, { "epoch": 0.03723033641395086, "grad_norm": 3.22408613692183, "learning_rate": 4.991012845708084e-06, "loss": 1.0653, "step": 354 }, { "epoch": 0.03733550685579818, "grad_norm": 3.3399036692465884, "learning_rate": 4.990941984260241e-06, "loss": 1.0765, "step": 355 }, { "epoch": 0.0374406772976455, "grad_norm": 3.936416890688034, "learning_rate": 4.9908708450530174e-06, "loss": 1.051, "step": 356 }, { "epoch": 0.037545847739492814, "grad_norm": 3.990025349268498, "learning_rate": 4.990799428094345e-06, "loss": 1.0507, "step": 357 }, { "epoch": 0.03765101818134013, "grad_norm": 4.239682909525811, "learning_rate": 4.9907277333921865e-06, "loss": 1.0568, "step": 358 }, { "epoch": 0.037756188623187456, "grad_norm": 3.7354470248901133, "learning_rate": 4.9906557609545375e-06, "loss": 1.0847, "step": 359 }, { "epoch": 0.03786135906503477, "grad_norm": 5.431410350419551, "learning_rate": 4.990583510789423e-06, "loss": 1.088, "step": 360 }, { "epoch": 0.03796652950688209, "grad_norm": 4.049632017073229, "learning_rate": 4.990510982904899e-06, "loss": 1.0853, "step": 361 }, { "epoch": 0.03807169994872941, "grad_norm": 3.8736875944016775, "learning_rate": 4.990438177309054e-06, "loss": 1.0628, "step": 362 }, { "epoch": 0.038176870390576725, "grad_norm": 2.99774541371039, "learning_rate": 4.9903650940100055e-06, "loss": 1.0509, "step": 363 }, { "epoch": 0.03828204083242405, "grad_norm": 5.319456457308596, "learning_rate": 4.990291733015904e-06, "loss": 1.037, "step": 364 }, { "epoch": 0.03838721127427137, "grad_norm": 4.702856139460145, "learning_rate": 4.9902180943349286e-06, "loss": 1.0805, "step": 365 }, { "epoch": 0.038492381716118684, "grad_norm": 3.8095265476696665, "learning_rate": 4.99014417797529e-06, "loss": 1.0669, "step": 366 }, { "epoch": 0.038597552157966, "grad_norm": 4.961192575518297, "learning_rate": 4.990069983945233e-06, "loss": 1.0844, "step": 367 }, { "epoch": 0.038702722599813326, "grad_norm": 4.891981596003543, "learning_rate": 4.989995512253028e-06, "loss": 1.0502, "step": 368 }, { "epoch": 0.03880789304166064, "grad_norm": 3.6566189119364667, "learning_rate": 4.989920762906981e-06, "loss": 1.06, "step": 369 }, { "epoch": 0.03891306348350796, "grad_norm": 5.900942628114732, "learning_rate": 4.989845735915426e-06, "loss": 1.068, "step": 370 }, { "epoch": 0.03901823392535528, "grad_norm": 3.8108566487033477, "learning_rate": 4.98977043128673e-06, "loss": 1.0531, "step": 371 }, { "epoch": 0.039123404367202595, "grad_norm": 3.9068966220872006, "learning_rate": 4.9896948490292905e-06, "loss": 1.064, "step": 372 }, { "epoch": 0.03922857480904992, "grad_norm": 4.376179595496623, "learning_rate": 4.989618989151533e-06, "loss": 1.0025, "step": 373 }, { "epoch": 0.03933374525089724, "grad_norm": 3.6380450371166155, "learning_rate": 4.989542851661919e-06, "loss": 1.0173, "step": 374 }, { "epoch": 0.039438915692744554, "grad_norm": 4.554841578105061, "learning_rate": 4.9894664365689385e-06, "loss": 1.0903, "step": 375 }, { "epoch": 0.03954408613459187, "grad_norm": 2.853296420735663, "learning_rate": 4.9893897438811106e-06, "loss": 1.0246, "step": 376 }, { "epoch": 0.03964925657643919, "grad_norm": 4.257333189026485, "learning_rate": 4.989312773606988e-06, "loss": 1.097, "step": 377 }, { "epoch": 0.03975442701828651, "grad_norm": 3.678981492068401, "learning_rate": 4.989235525755154e-06, "loss": 1.0611, "step": 378 }, { "epoch": 0.03985959746013383, "grad_norm": 5.195313256569016, "learning_rate": 4.989158000334221e-06, "loss": 1.0454, "step": 379 }, { "epoch": 0.03996476790198115, "grad_norm": 1.8104051625470303, "learning_rate": 4.989080197352834e-06, "loss": 1.0422, "step": 380 }, { "epoch": 0.040069938343828465, "grad_norm": 3.538275104798571, "learning_rate": 4.989002116819671e-06, "loss": 1.0597, "step": 381 }, { "epoch": 0.04017510878567578, "grad_norm": 2.8012444815542716, "learning_rate": 4.9889237587434355e-06, "loss": 1.0183, "step": 382 }, { "epoch": 0.04028027922752311, "grad_norm": 3.367459724491466, "learning_rate": 4.988845123132867e-06, "loss": 1.0388, "step": 383 }, { "epoch": 0.040385449669370424, "grad_norm": 4.145106659583553, "learning_rate": 4.988766209996733e-06, "loss": 1.0344, "step": 384 }, { "epoch": 0.04049062011121774, "grad_norm": 3.40206472561461, "learning_rate": 4.9886870193438325e-06, "loss": 1.052, "step": 385 }, { "epoch": 0.04059579055306506, "grad_norm": 3.725684215375892, "learning_rate": 4.988607551182997e-06, "loss": 1.0809, "step": 386 }, { "epoch": 0.04070096099491238, "grad_norm": 3.731707512512842, "learning_rate": 4.9885278055230875e-06, "loss": 1.0266, "step": 387 }, { "epoch": 0.0408061314367597, "grad_norm": 3.928396974689701, "learning_rate": 4.988447782372996e-06, "loss": 1.063, "step": 388 }, { "epoch": 0.04091130187860702, "grad_norm": 4.31218060090711, "learning_rate": 4.9883674817416464e-06, "loss": 1.0864, "step": 389 }, { "epoch": 0.041016472320454335, "grad_norm": 3.0718531395576583, "learning_rate": 4.988286903637991e-06, "loss": 1.0459, "step": 390 }, { "epoch": 0.04112164276230165, "grad_norm": 2.9157630700754886, "learning_rate": 4.988206048071017e-06, "loss": 1.0581, "step": 391 }, { "epoch": 0.04122681320414898, "grad_norm": 3.8247937216792, "learning_rate": 4.988124915049739e-06, "loss": 1.0286, "step": 392 }, { "epoch": 0.041331983645996294, "grad_norm": 2.6874655896655097, "learning_rate": 4.988043504583206e-06, "loss": 1.0387, "step": 393 }, { "epoch": 0.04143715408784361, "grad_norm": 2.9913671043398677, "learning_rate": 4.987961816680493e-06, "loss": 1.0536, "step": 394 }, { "epoch": 0.04154232452969093, "grad_norm": 2.8088862124300404, "learning_rate": 4.987879851350711e-06, "loss": 1.078, "step": 395 }, { "epoch": 0.041647494971538246, "grad_norm": 3.755585245451105, "learning_rate": 4.987797608602997e-06, "loss": 1.0792, "step": 396 }, { "epoch": 0.04175266541338557, "grad_norm": 2.8770765754045846, "learning_rate": 4.987715088446526e-06, "loss": 1.0757, "step": 397 }, { "epoch": 0.04185783585523289, "grad_norm": 3.273137502812656, "learning_rate": 4.987632290890497e-06, "loss": 1.0471, "step": 398 }, { "epoch": 0.041963006297080205, "grad_norm": 3.636240730716032, "learning_rate": 4.987549215944143e-06, "loss": 1.0532, "step": 399 }, { "epoch": 0.04206817673892752, "grad_norm": 4.208273895144222, "learning_rate": 4.987465863616727e-06, "loss": 1.0681, "step": 400 }, { "epoch": 0.042173347180774846, "grad_norm": 3.5106533475647748, "learning_rate": 4.987382233917545e-06, "loss": 1.0538, "step": 401 }, { "epoch": 0.042278517622622164, "grad_norm": 4.886913187088557, "learning_rate": 4.98729832685592e-06, "loss": 1.0417, "step": 402 }, { "epoch": 0.04238368806446948, "grad_norm": 5.560665522659327, "learning_rate": 4.98721414244121e-06, "loss": 1.0699, "step": 403 }, { "epoch": 0.0424888585063168, "grad_norm": 4.094323716122683, "learning_rate": 4.987129680682802e-06, "loss": 1.0764, "step": 404 }, { "epoch": 0.042594028948164116, "grad_norm": 2.6629248986818075, "learning_rate": 4.987044941590114e-06, "loss": 1.0667, "step": 405 }, { "epoch": 0.04269919939001144, "grad_norm": 3.4109848570390193, "learning_rate": 4.986959925172594e-06, "loss": 1.037, "step": 406 }, { "epoch": 0.04280436983185876, "grad_norm": 3.0342413572937317, "learning_rate": 4.986874631439724e-06, "loss": 1.0709, "step": 407 }, { "epoch": 0.042909540273706075, "grad_norm": 2.8390095815269465, "learning_rate": 4.9867890604010125e-06, "loss": 1.0196, "step": 408 }, { "epoch": 0.04301471071555339, "grad_norm": 3.0707226746539464, "learning_rate": 4.9867032120660045e-06, "loss": 1.0393, "step": 409 }, { "epoch": 0.04311988115740071, "grad_norm": 3.8038872687449765, "learning_rate": 4.98661708644427e-06, "loss": 1.032, "step": 410 }, { "epoch": 0.043225051599248034, "grad_norm": 3.6068516146454193, "learning_rate": 4.986530683545414e-06, "loss": 1.0328, "step": 411 }, { "epoch": 0.04333022204109535, "grad_norm": 3.8245339487596546, "learning_rate": 4.98644400337907e-06, "loss": 1.103, "step": 412 }, { "epoch": 0.04343539248294267, "grad_norm": 3.4368283671133746, "learning_rate": 4.986357045954905e-06, "loss": 1.0507, "step": 413 }, { "epoch": 0.043540562924789986, "grad_norm": 2.74188819425057, "learning_rate": 4.986269811282615e-06, "loss": 1.0316, "step": 414 }, { "epoch": 0.04364573336663731, "grad_norm": 3.679825151599156, "learning_rate": 4.9861822993719255e-06, "loss": 1.0097, "step": 415 }, { "epoch": 0.04375090380848463, "grad_norm": 3.297855659802533, "learning_rate": 4.986094510232597e-06, "loss": 1.0219, "step": 416 }, { "epoch": 0.043856074250331945, "grad_norm": 3.4058245947376173, "learning_rate": 4.986006443874419e-06, "loss": 1.0377, "step": 417 }, { "epoch": 0.04396124469217926, "grad_norm": 4.363555255178052, "learning_rate": 4.9859181003072095e-06, "loss": 1.0637, "step": 418 }, { "epoch": 0.04406641513402658, "grad_norm": 3.9928133996636395, "learning_rate": 4.985829479540821e-06, "loss": 1.0591, "step": 419 }, { "epoch": 0.0441715855758739, "grad_norm": 4.073405278783087, "learning_rate": 4.985740581585135e-06, "loss": 1.059, "step": 420 }, { "epoch": 0.04427675601772122, "grad_norm": 4.084709791211826, "learning_rate": 4.985651406450063e-06, "loss": 1.0313, "step": 421 }, { "epoch": 0.04438192645956854, "grad_norm": 3.715460139932838, "learning_rate": 4.985561954145552e-06, "loss": 1.0913, "step": 422 }, { "epoch": 0.044487096901415855, "grad_norm": 3.7233678758471744, "learning_rate": 4.985472224681573e-06, "loss": 1.0577, "step": 423 }, { "epoch": 0.04459226734326317, "grad_norm": 5.652763390578416, "learning_rate": 4.985382218068133e-06, "loss": 1.0382, "step": 424 }, { "epoch": 0.0446974377851105, "grad_norm": 3.468271207036182, "learning_rate": 4.985291934315269e-06, "loss": 1.0498, "step": 425 }, { "epoch": 0.044802608226957814, "grad_norm": 3.1935939850144233, "learning_rate": 4.985201373433048e-06, "loss": 1.0395, "step": 426 }, { "epoch": 0.04490777866880513, "grad_norm": 3.714801883999893, "learning_rate": 4.985110535431569e-06, "loss": 1.063, "step": 427 }, { "epoch": 0.04501294911065245, "grad_norm": 3.441136685461427, "learning_rate": 4.985019420320959e-06, "loss": 1.0503, "step": 428 }, { "epoch": 0.04511811955249977, "grad_norm": 2.8419115986373042, "learning_rate": 4.98492802811138e-06, "loss": 1.0479, "step": 429 }, { "epoch": 0.04522328999434709, "grad_norm": 3.039904416144775, "learning_rate": 4.984836358813022e-06, "loss": 1.0469, "step": 430 }, { "epoch": 0.04532846043619441, "grad_norm": 4.691661020519824, "learning_rate": 4.984744412436107e-06, "loss": 1.0572, "step": 431 }, { "epoch": 0.045433630878041725, "grad_norm": 4.540987997363801, "learning_rate": 4.9846521889908885e-06, "loss": 1.0781, "step": 432 }, { "epoch": 0.04553880131988904, "grad_norm": 4.296517950327922, "learning_rate": 4.984559688487649e-06, "loss": 1.0635, "step": 433 }, { "epoch": 0.04564397176173637, "grad_norm": 3.2572651182872585, "learning_rate": 4.984466910936703e-06, "loss": 1.0681, "step": 434 }, { "epoch": 0.045749142203583684, "grad_norm": 2.9557462605367415, "learning_rate": 4.9843738563483975e-06, "loss": 1.0567, "step": 435 }, { "epoch": 0.045854312645431, "grad_norm": 3.9469604897643022, "learning_rate": 4.984280524733107e-06, "loss": 1.0509, "step": 436 }, { "epoch": 0.04595948308727832, "grad_norm": 2.7541457544733188, "learning_rate": 4.984186916101239e-06, "loss": 1.0414, "step": 437 }, { "epoch": 0.046064653529125636, "grad_norm": 3.622690550239156, "learning_rate": 4.984093030463233e-06, "loss": 1.0864, "step": 438 }, { "epoch": 0.04616982397097296, "grad_norm": 4.32000715987469, "learning_rate": 4.983998867829557e-06, "loss": 1.0398, "step": 439 }, { "epoch": 0.04627499441282028, "grad_norm": 3.7777411421192792, "learning_rate": 4.98390442821071e-06, "loss": 1.0565, "step": 440 }, { "epoch": 0.046380164854667595, "grad_norm": 3.47497239447561, "learning_rate": 4.983809711617224e-06, "loss": 1.0516, "step": 441 }, { "epoch": 0.04648533529651491, "grad_norm": 3.8957477430994687, "learning_rate": 4.98371471805966e-06, "loss": 1.0513, "step": 442 }, { "epoch": 0.04659050573836224, "grad_norm": 2.4570852022191603, "learning_rate": 4.983619447548611e-06, "loss": 1.0278, "step": 443 }, { "epoch": 0.046695676180209554, "grad_norm": 4.079822189897958, "learning_rate": 4.9835239000947005e-06, "loss": 1.0827, "step": 444 }, { "epoch": 0.04680084662205687, "grad_norm": 3.547742042978678, "learning_rate": 4.983428075708582e-06, "loss": 1.1007, "step": 445 }, { "epoch": 0.04690601706390419, "grad_norm": 4.0885742408881125, "learning_rate": 4.983331974400941e-06, "loss": 1.0316, "step": 446 }, { "epoch": 0.047011187505751506, "grad_norm": 3.103253885780263, "learning_rate": 4.983235596182494e-06, "loss": 1.0792, "step": 447 }, { "epoch": 0.04711635794759883, "grad_norm": 2.6930299837503164, "learning_rate": 4.983138941063988e-06, "loss": 1.0769, "step": 448 }, { "epoch": 0.04722152838944615, "grad_norm": 2.828513719063435, "learning_rate": 4.9830420090562e-06, "loss": 1.0662, "step": 449 }, { "epoch": 0.047326698831293465, "grad_norm": 3.6605584013370245, "learning_rate": 4.982944800169939e-06, "loss": 1.0513, "step": 450 }, { "epoch": 0.04743186927314078, "grad_norm": 2.902862443517808, "learning_rate": 4.982847314416044e-06, "loss": 1.0157, "step": 451 }, { "epoch": 0.0475370397149881, "grad_norm": 2.8680844974252686, "learning_rate": 4.982749551805387e-06, "loss": 1.0493, "step": 452 }, { "epoch": 0.047642210156835424, "grad_norm": 2.8491457922180197, "learning_rate": 4.9826515123488675e-06, "loss": 1.0603, "step": 453 }, { "epoch": 0.04774738059868274, "grad_norm": 3.0486623847828334, "learning_rate": 4.982553196057419e-06, "loss": 1.0604, "step": 454 }, { "epoch": 0.04785255104053006, "grad_norm": 2.2871258606215874, "learning_rate": 4.982454602942004e-06, "loss": 1.0367, "step": 455 }, { "epoch": 0.047957721482377376, "grad_norm": 3.822855535156918, "learning_rate": 4.982355733013616e-06, "loss": 1.0289, "step": 456 }, { "epoch": 0.04806289192422469, "grad_norm": 3.4057137793165486, "learning_rate": 4.98225658628328e-06, "loss": 1.0331, "step": 457 }, { "epoch": 0.04816806236607202, "grad_norm": 4.014699719424169, "learning_rate": 4.982157162762052e-06, "loss": 1.0255, "step": 458 }, { "epoch": 0.048273232807919335, "grad_norm": 4.028054004330074, "learning_rate": 4.982057462461018e-06, "loss": 1.0444, "step": 459 }, { "epoch": 0.04837840324976665, "grad_norm": 5.391630404479916, "learning_rate": 4.981957485391297e-06, "loss": 1.0536, "step": 460 }, { "epoch": 0.04848357369161397, "grad_norm": 2.3994992334050105, "learning_rate": 4.981857231564034e-06, "loss": 1.0103, "step": 461 }, { "epoch": 0.048588744133461294, "grad_norm": 3.798097639454563, "learning_rate": 4.9817567009904114e-06, "loss": 1.0238, "step": 462 }, { "epoch": 0.04869391457530861, "grad_norm": 4.714715898953462, "learning_rate": 4.981655893681637e-06, "loss": 1.0237, "step": 463 }, { "epoch": 0.04879908501715593, "grad_norm": 4.388735872403405, "learning_rate": 4.981554809648952e-06, "loss": 1.076, "step": 464 }, { "epoch": 0.048904255459003246, "grad_norm": 4.403273076780409, "learning_rate": 4.981453448903629e-06, "loss": 1.0737, "step": 465 }, { "epoch": 0.04900942590085056, "grad_norm": 2.5521629736975404, "learning_rate": 4.98135181145697e-06, "loss": 1.0514, "step": 466 }, { "epoch": 0.04911459634269789, "grad_norm": 3.8606552004883534, "learning_rate": 4.981249897320307e-06, "loss": 1.0413, "step": 467 }, { "epoch": 0.049219766784545205, "grad_norm": 2.8692792806121536, "learning_rate": 4.981147706505007e-06, "loss": 1.0463, "step": 468 }, { "epoch": 0.04932493722639252, "grad_norm": 2.7409626254110315, "learning_rate": 4.981045239022462e-06, "loss": 1.0609, "step": 469 }, { "epoch": 0.04943010766823984, "grad_norm": 2.9586823141347707, "learning_rate": 4.9809424948841e-06, "loss": 1.0576, "step": 470 }, { "epoch": 0.04953527811008716, "grad_norm": 2.6686230862505886, "learning_rate": 4.980839474101378e-06, "loss": 1.0157, "step": 471 }, { "epoch": 0.04964044855193448, "grad_norm": 3.139097480801446, "learning_rate": 4.980736176685782e-06, "loss": 1.0454, "step": 472 }, { "epoch": 0.0497456189937818, "grad_norm": 3.100054655774797, "learning_rate": 4.980632602648831e-06, "loss": 1.0296, "step": 473 }, { "epoch": 0.049850789435629116, "grad_norm": 4.665650390709168, "learning_rate": 4.980528752002075e-06, "loss": 1.0421, "step": 474 }, { "epoch": 0.04995595987747643, "grad_norm": 3.2001730382677067, "learning_rate": 4.9804246247570935e-06, "loss": 1.0599, "step": 475 }, { "epoch": 0.05006113031932376, "grad_norm": 3.5026752591463493, "learning_rate": 4.980320220925498e-06, "loss": 1.0683, "step": 476 }, { "epoch": 0.050166300761171075, "grad_norm": 4.538529420292572, "learning_rate": 4.98021554051893e-06, "loss": 1.0342, "step": 477 }, { "epoch": 0.05027147120301839, "grad_norm": 2.9057622514006374, "learning_rate": 4.980110583549062e-06, "loss": 1.0419, "step": 478 }, { "epoch": 0.05037664164486571, "grad_norm": 4.168498583654285, "learning_rate": 4.980005350027598e-06, "loss": 1.0235, "step": 479 }, { "epoch": 0.05048181208671303, "grad_norm": 3.465721454429658, "learning_rate": 4.979899839966273e-06, "loss": 1.0825, "step": 480 }, { "epoch": 0.05058698252856035, "grad_norm": 3.9874964500146994, "learning_rate": 4.97979405337685e-06, "loss": 1.0683, "step": 481 }, { "epoch": 0.05069215297040767, "grad_norm": 3.256605985477986, "learning_rate": 4.979687990271126e-06, "loss": 1.0309, "step": 482 }, { "epoch": 0.050797323412254985, "grad_norm": 2.822984342445266, "learning_rate": 4.979581650660929e-06, "loss": 1.0334, "step": 483 }, { "epoch": 0.0509024938541023, "grad_norm": 4.047140521535769, "learning_rate": 4.979475034558115e-06, "loss": 1.0336, "step": 484 }, { "epoch": 0.05100766429594962, "grad_norm": 3.121659309179012, "learning_rate": 4.979368141974575e-06, "loss": 1.0975, "step": 485 }, { "epoch": 0.051112834737796944, "grad_norm": 4.436709845774703, "learning_rate": 4.979260972922226e-06, "loss": 1.0085, "step": 486 }, { "epoch": 0.05121800517964426, "grad_norm": 5.131430337585738, "learning_rate": 4.9791535274130185e-06, "loss": 1.0919, "step": 487 }, { "epoch": 0.05132317562149158, "grad_norm": 4.09555529702269, "learning_rate": 4.9790458054589344e-06, "loss": 1.0877, "step": 488 }, { "epoch": 0.051428346063338896, "grad_norm": 3.287984548144607, "learning_rate": 4.9789378070719854e-06, "loss": 1.0707, "step": 489 }, { "epoch": 0.05153351650518622, "grad_norm": 3.3946741219834977, "learning_rate": 4.978829532264213e-06, "loss": 1.055, "step": 490 }, { "epoch": 0.05163868694703354, "grad_norm": 3.058713885707903, "learning_rate": 4.978720981047692e-06, "loss": 1.0804, "step": 491 }, { "epoch": 0.051743857388880855, "grad_norm": 3.6709984313290933, "learning_rate": 4.978612153434527e-06, "loss": 1.0564, "step": 492 }, { "epoch": 0.05184902783072817, "grad_norm": 5.006510707239013, "learning_rate": 4.9785030494368515e-06, "loss": 1.0852, "step": 493 }, { "epoch": 0.05195419827257549, "grad_norm": 3.653015612885224, "learning_rate": 4.978393669066832e-06, "loss": 1.0751, "step": 494 }, { "epoch": 0.052059368714422814, "grad_norm": 3.339203412325409, "learning_rate": 4.978284012336666e-06, "loss": 1.0509, "step": 495 }, { "epoch": 0.05216453915627013, "grad_norm": 4.106088996601506, "learning_rate": 4.978174079258581e-06, "loss": 1.0331, "step": 496 }, { "epoch": 0.05226970959811745, "grad_norm": 4.537839424849167, "learning_rate": 4.978063869844834e-06, "loss": 1.0399, "step": 497 }, { "epoch": 0.052374880039964766, "grad_norm": 3.671123861891514, "learning_rate": 4.977953384107716e-06, "loss": 1.0404, "step": 498 }, { "epoch": 0.052480050481812084, "grad_norm": 3.835556456175973, "learning_rate": 4.977842622059546e-06, "loss": 1.042, "step": 499 }, { "epoch": 0.05258522092365941, "grad_norm": 3.810143572024863, "learning_rate": 4.977731583712675e-06, "loss": 1.0619, "step": 500 }, { "epoch": 0.052690391365506725, "grad_norm": 2.977023375505081, "learning_rate": 4.977620269079485e-06, "loss": 1.0557, "step": 501 }, { "epoch": 0.05279556180735404, "grad_norm": 2.785680113793684, "learning_rate": 4.977508678172388e-06, "loss": 1.0228, "step": 502 }, { "epoch": 0.05290073224920136, "grad_norm": 3.1735915547497573, "learning_rate": 4.977396811003828e-06, "loss": 1.0574, "step": 503 }, { "epoch": 0.053005902691048684, "grad_norm": 2.4497703751127227, "learning_rate": 4.977284667586278e-06, "loss": 1.0517, "step": 504 }, { "epoch": 0.053111073132896, "grad_norm": 5.411811630465418, "learning_rate": 4.977172247932243e-06, "loss": 1.087, "step": 505 }, { "epoch": 0.05321624357474332, "grad_norm": 3.763750928236047, "learning_rate": 4.97705955205426e-06, "loss": 1.0788, "step": 506 }, { "epoch": 0.053321414016590636, "grad_norm": 3.322087579873048, "learning_rate": 4.976946579964893e-06, "loss": 1.0785, "step": 507 }, { "epoch": 0.05342658445843795, "grad_norm": 4.615164294462559, "learning_rate": 4.976833331676742e-06, "loss": 1.0071, "step": 508 }, { "epoch": 0.05353175490028528, "grad_norm": 4.4250976293850295, "learning_rate": 4.976719807202434e-06, "loss": 1.0403, "step": 509 }, { "epoch": 0.053636925342132595, "grad_norm": 3.8283999846040877, "learning_rate": 4.976606006554626e-06, "loss": 1.0061, "step": 510 }, { "epoch": 0.05374209578397991, "grad_norm": 3.441735254441529, "learning_rate": 4.976491929746011e-06, "loss": 1.0699, "step": 511 }, { "epoch": 0.05384726622582723, "grad_norm": 3.3474483003361835, "learning_rate": 4.976377576789307e-06, "loss": 1.0513, "step": 512 }, { "epoch": 0.05395243666767455, "grad_norm": 3.2087836619502603, "learning_rate": 4.976262947697265e-06, "loss": 1.015, "step": 513 }, { "epoch": 0.05405760710952187, "grad_norm": 4.79038201957431, "learning_rate": 4.97614804248267e-06, "loss": 1.0619, "step": 514 }, { "epoch": 0.05416277755136919, "grad_norm": 5.0334628294143595, "learning_rate": 4.976032861158332e-06, "loss": 1.0501, "step": 515 }, { "epoch": 0.054267947993216506, "grad_norm": 5.716661298960797, "learning_rate": 4.9759174037370955e-06, "loss": 1.051, "step": 516 }, { "epoch": 0.05437311843506382, "grad_norm": 3.219972550593159, "learning_rate": 4.975801670231835e-06, "loss": 1.0549, "step": 517 }, { "epoch": 0.05447828887691115, "grad_norm": 4.042629628393561, "learning_rate": 4.975685660655456e-06, "loss": 1.0104, "step": 518 }, { "epoch": 0.054583459318758465, "grad_norm": 4.588832225167543, "learning_rate": 4.975569375020894e-06, "loss": 1.0681, "step": 519 }, { "epoch": 0.05468862976060578, "grad_norm": 3.6574107244998344, "learning_rate": 4.9754528133411144e-06, "loss": 1.0306, "step": 520 }, { "epoch": 0.0547938002024531, "grad_norm": 3.064757154713279, "learning_rate": 4.975335975629117e-06, "loss": 0.9752, "step": 521 }, { "epoch": 0.05489897064430042, "grad_norm": 3.7277635260897997, "learning_rate": 4.975218861897929e-06, "loss": 1.0457, "step": 522 }, { "epoch": 0.05500414108614774, "grad_norm": 3.2935157357520124, "learning_rate": 4.9751014721606104e-06, "loss": 1.0834, "step": 523 }, { "epoch": 0.05510931152799506, "grad_norm": 2.341096648916625, "learning_rate": 4.97498380643025e-06, "loss": 1.0159, "step": 524 }, { "epoch": 0.055214481969842376, "grad_norm": 2.9750956985730785, "learning_rate": 4.974865864719969e-06, "loss": 1.0646, "step": 525 }, { "epoch": 0.05531965241168969, "grad_norm": 2.712385516803827, "learning_rate": 4.974747647042918e-06, "loss": 1.0122, "step": 526 }, { "epoch": 0.05542482285353701, "grad_norm": 3.225400308884659, "learning_rate": 4.9746291534122805e-06, "loss": 1.0523, "step": 527 }, { "epoch": 0.055529993295384335, "grad_norm": 4.290571209392225, "learning_rate": 4.974510383841269e-06, "loss": 1.0738, "step": 528 }, { "epoch": 0.05563516373723165, "grad_norm": 4.014053600914674, "learning_rate": 4.974391338343126e-06, "loss": 1.0592, "step": 529 }, { "epoch": 0.05574033417907897, "grad_norm": 3.3268020956167166, "learning_rate": 4.974272016931127e-06, "loss": 1.043, "step": 530 }, { "epoch": 0.05584550462092629, "grad_norm": 4.333148338760938, "learning_rate": 4.974152419618579e-06, "loss": 1.0473, "step": 531 }, { "epoch": 0.055950675062773604, "grad_norm": 2.8778208791693616, "learning_rate": 4.974032546418816e-06, "loss": 1.0629, "step": 532 }, { "epoch": 0.05605584550462093, "grad_norm": 3.2582876551113142, "learning_rate": 4.973912397345204e-06, "loss": 1.0486, "step": 533 }, { "epoch": 0.056161015946468246, "grad_norm": 3.287154664003592, "learning_rate": 4.973791972411142e-06, "loss": 1.0428, "step": 534 }, { "epoch": 0.05626618638831556, "grad_norm": 3.331118043139556, "learning_rate": 4.973671271630057e-06, "loss": 1.0282, "step": 535 }, { "epoch": 0.05637135683016288, "grad_norm": 3.3363705578676464, "learning_rate": 4.973550295015411e-06, "loss": 1.0733, "step": 536 }, { "epoch": 0.056476527272010205, "grad_norm": 3.191076118323749, "learning_rate": 4.973429042580691e-06, "loss": 1.0458, "step": 537 }, { "epoch": 0.05658169771385752, "grad_norm": 3.2347419761171716, "learning_rate": 4.973307514339418e-06, "loss": 1.053, "step": 538 }, { "epoch": 0.05668686815570484, "grad_norm": 3.4907812740771575, "learning_rate": 4.973185710305145e-06, "loss": 1.0548, "step": 539 }, { "epoch": 0.05679203859755216, "grad_norm": 4.184646901965426, "learning_rate": 4.973063630491451e-06, "loss": 1.0761, "step": 540 }, { "epoch": 0.056897209039399474, "grad_norm": 3.0281792628868645, "learning_rate": 4.972941274911953e-06, "loss": 1.0389, "step": 541 }, { "epoch": 0.0570023794812468, "grad_norm": 3.6955546648249378, "learning_rate": 4.972818643580291e-06, "loss": 1.0145, "step": 542 }, { "epoch": 0.057107549923094116, "grad_norm": 3.1190860521523813, "learning_rate": 4.972695736510141e-06, "loss": 0.9828, "step": 543 }, { "epoch": 0.05721272036494143, "grad_norm": 3.100972513801203, "learning_rate": 4.972572553715208e-06, "loss": 1.035, "step": 544 }, { "epoch": 0.05731789080678875, "grad_norm": 3.0257460963065297, "learning_rate": 4.972449095209227e-06, "loss": 1.0376, "step": 545 }, { "epoch": 0.05742306124863607, "grad_norm": 3.932692512358207, "learning_rate": 4.972325361005966e-06, "loss": 1.0455, "step": 546 }, { "epoch": 0.05752823169048339, "grad_norm": 3.196511181526071, "learning_rate": 4.9722013511192215e-06, "loss": 0.9909, "step": 547 }, { "epoch": 0.05763340213233071, "grad_norm": 3.467930305235877, "learning_rate": 4.9720770655628216e-06, "loss": 1.044, "step": 548 }, { "epoch": 0.057738572574178026, "grad_norm": 4.5794753754186495, "learning_rate": 4.971952504350625e-06, "loss": 1.0603, "step": 549 }, { "epoch": 0.057843743016025344, "grad_norm": 3.2755087160087073, "learning_rate": 4.9718276674965215e-06, "loss": 1.066, "step": 550 }, { "epoch": 0.05794891345787267, "grad_norm": 3.9349343797276197, "learning_rate": 4.971702555014431e-06, "loss": 1.0681, "step": 551 }, { "epoch": 0.058054083899719985, "grad_norm": 4.361018090944349, "learning_rate": 4.971577166918305e-06, "loss": 1.0697, "step": 552 }, { "epoch": 0.0581592543415673, "grad_norm": 4.570084487039526, "learning_rate": 4.971451503222125e-06, "loss": 1.0249, "step": 553 }, { "epoch": 0.05826442478341462, "grad_norm": 4.362303063650197, "learning_rate": 4.971325563939903e-06, "loss": 1.0465, "step": 554 }, { "epoch": 0.05836959522526194, "grad_norm": 2.2989031239751285, "learning_rate": 4.971199349085683e-06, "loss": 1.0107, "step": 555 }, { "epoch": 0.05847476566710926, "grad_norm": 3.672401717225256, "learning_rate": 4.971072858673539e-06, "loss": 1.0765, "step": 556 }, { "epoch": 0.05857993610895658, "grad_norm": 3.633753822132858, "learning_rate": 4.970946092717574e-06, "loss": 1.0491, "step": 557 }, { "epoch": 0.058685106550803896, "grad_norm": 4.381583646223104, "learning_rate": 4.970819051231927e-06, "loss": 1.0819, "step": 558 }, { "epoch": 0.058790276992651214, "grad_norm": 3.3145358767742987, "learning_rate": 4.970691734230759e-06, "loss": 1.0243, "step": 559 }, { "epoch": 0.05889544743449853, "grad_norm": 2.16306091430307, "learning_rate": 4.970564141728271e-06, "loss": 1.0251, "step": 560 }, { "epoch": 0.059000617876345855, "grad_norm": 4.923343581263921, "learning_rate": 4.9704362737386894e-06, "loss": 1.0501, "step": 561 }, { "epoch": 0.05910578831819317, "grad_norm": 3.808471042980745, "learning_rate": 4.970308130276273e-06, "loss": 1.0398, "step": 562 }, { "epoch": 0.05921095876004049, "grad_norm": 3.13104997246359, "learning_rate": 4.970179711355308e-06, "loss": 1.0325, "step": 563 }, { "epoch": 0.05931612920188781, "grad_norm": 3.0500679981758436, "learning_rate": 4.970051016990118e-06, "loss": 1.0579, "step": 564 }, { "epoch": 0.05942129964373513, "grad_norm": 2.9497172428170204, "learning_rate": 4.96992204719505e-06, "loss": 1.049, "step": 565 }, { "epoch": 0.05952647008558245, "grad_norm": 1.9764188767604873, "learning_rate": 4.9697928019844885e-06, "loss": 1.0358, "step": 566 }, { "epoch": 0.059631640527429766, "grad_norm": 3.9746447469142185, "learning_rate": 4.969663281372842e-06, "loss": 1.0577, "step": 567 }, { "epoch": 0.059736810969277084, "grad_norm": 3.788137117647662, "learning_rate": 4.969533485374556e-06, "loss": 0.9947, "step": 568 }, { "epoch": 0.0598419814111244, "grad_norm": 4.405021349521944, "learning_rate": 4.9694034140041015e-06, "loss": 1.0581, "step": 569 }, { "epoch": 0.059947151852971725, "grad_norm": 2.0847517500095334, "learning_rate": 4.9692730672759835e-06, "loss": 1.0142, "step": 570 }, { "epoch": 0.06005232229481904, "grad_norm": 5.811807304246141, "learning_rate": 4.969142445204736e-06, "loss": 1.0579, "step": 571 }, { "epoch": 0.06015749273666636, "grad_norm": 3.222186946748591, "learning_rate": 4.969011547804925e-06, "loss": 1.0306, "step": 572 }, { "epoch": 0.06026266317851368, "grad_norm": 3.7831417030237837, "learning_rate": 4.968880375091147e-06, "loss": 1.0391, "step": 573 }, { "epoch": 0.060367833620360994, "grad_norm": 3.9362867297970143, "learning_rate": 4.968748927078028e-06, "loss": 1.0684, "step": 574 }, { "epoch": 0.06047300406220832, "grad_norm": 3.329344719091068, "learning_rate": 4.968617203780226e-06, "loss": 1.0679, "step": 575 }, { "epoch": 0.060578174504055636, "grad_norm": 2.693926328348047, "learning_rate": 4.968485205212429e-06, "loss": 1.0686, "step": 576 }, { "epoch": 0.06068334494590295, "grad_norm": 4.752717425247337, "learning_rate": 4.968352931389355e-06, "loss": 1.0477, "step": 577 }, { "epoch": 0.06078851538775027, "grad_norm": 5.382651519152381, "learning_rate": 4.968220382325755e-06, "loss": 1.0522, "step": 578 }, { "epoch": 0.060893685829597595, "grad_norm": 5.095229123548204, "learning_rate": 4.968087558036408e-06, "loss": 1.0342, "step": 579 }, { "epoch": 0.06099885627144491, "grad_norm": 2.9622063334614506, "learning_rate": 4.967954458536126e-06, "loss": 1.0545, "step": 580 }, { "epoch": 0.06110402671329223, "grad_norm": 4.010704877984255, "learning_rate": 4.96782108383975e-06, "loss": 1.0396, "step": 581 }, { "epoch": 0.06120919715513955, "grad_norm": 3.679353200958754, "learning_rate": 4.967687433962152e-06, "loss": 1.0354, "step": 582 }, { "epoch": 0.061314367596986864, "grad_norm": 4.1840739624189345, "learning_rate": 4.967553508918236e-06, "loss": 1.0032, "step": 583 }, { "epoch": 0.06141953803883419, "grad_norm": 3.8257222018053394, "learning_rate": 4.967419308722935e-06, "loss": 1.0402, "step": 584 }, { "epoch": 0.061524708480681506, "grad_norm": 4.453654621625787, "learning_rate": 4.967284833391213e-06, "loss": 1.0446, "step": 585 }, { "epoch": 0.06162987892252882, "grad_norm": 3.37619320579649, "learning_rate": 4.967150082938066e-06, "loss": 1.0381, "step": 586 }, { "epoch": 0.06173504936437614, "grad_norm": 4.183795371545729, "learning_rate": 4.9670150573785195e-06, "loss": 1.0677, "step": 587 }, { "epoch": 0.06184021980622346, "grad_norm": 2.901597121091973, "learning_rate": 4.96687975672763e-06, "loss": 1.0499, "step": 588 }, { "epoch": 0.06194539024807078, "grad_norm": 3.838808993150415, "learning_rate": 4.966744181000483e-06, "loss": 1.0541, "step": 589 }, { "epoch": 0.0620505606899181, "grad_norm": 3.750578964588987, "learning_rate": 4.966608330212198e-06, "loss": 1.0377, "step": 590 }, { "epoch": 0.06215573113176542, "grad_norm": 3.6029951214777403, "learning_rate": 4.9664722043779226e-06, "loss": 1.0386, "step": 591 }, { "epoch": 0.062260901573612734, "grad_norm": 3.430327053415612, "learning_rate": 4.966335803512837e-06, "loss": 1.0532, "step": 592 }, { "epoch": 0.06236607201546006, "grad_norm": 2.7139325949372775, "learning_rate": 4.96619912763215e-06, "loss": 1.0649, "step": 593 }, { "epoch": 0.062471242457307376, "grad_norm": 3.1492912949831884, "learning_rate": 4.966062176751101e-06, "loss": 1.0493, "step": 594 }, { "epoch": 0.0625764128991547, "grad_norm": 4.125956595172075, "learning_rate": 4.965924950884964e-06, "loss": 1.0206, "step": 595 }, { "epoch": 0.06268158334100202, "grad_norm": 3.7929128777641425, "learning_rate": 4.965787450049038e-06, "loss": 1.0299, "step": 596 }, { "epoch": 0.06278675378284933, "grad_norm": 4.0810239450371215, "learning_rate": 4.965649674258657e-06, "loss": 1.1232, "step": 597 }, { "epoch": 0.06289192422469665, "grad_norm": 4.6131544836052, "learning_rate": 4.965511623529183e-06, "loss": 1.0422, "step": 598 }, { "epoch": 0.06299709466654396, "grad_norm": 4.798402879282435, "learning_rate": 4.965373297876009e-06, "loss": 1.0787, "step": 599 }, { "epoch": 0.06310226510839129, "grad_norm": 4.335100341025516, "learning_rate": 4.965234697314563e-06, "loss": 0.9823, "step": 600 }, { "epoch": 0.06320743555023861, "grad_norm": 3.842720236255419, "learning_rate": 4.965095821860296e-06, "loss": 1.0744, "step": 601 }, { "epoch": 0.06331260599208592, "grad_norm": 5.2410971364749335, "learning_rate": 4.964956671528696e-06, "loss": 1.0372, "step": 602 }, { "epoch": 0.06341777643393325, "grad_norm": 3.974109563837657, "learning_rate": 4.964817246335278e-06, "loss": 1.0889, "step": 603 }, { "epoch": 0.06352294687578056, "grad_norm": 3.8457386538934326, "learning_rate": 4.96467754629559e-06, "loss": 1.0567, "step": 604 }, { "epoch": 0.06362811731762788, "grad_norm": 4.314777488524298, "learning_rate": 4.964537571425209e-06, "loss": 1.0197, "step": 605 }, { "epoch": 0.0637332877594752, "grad_norm": 3.1075473114740966, "learning_rate": 4.9643973217397446e-06, "loss": 1.042, "step": 606 }, { "epoch": 0.06383845820132251, "grad_norm": 2.8685624789514637, "learning_rate": 4.964256797254834e-06, "loss": 1.0444, "step": 607 }, { "epoch": 0.06394362864316984, "grad_norm": 3.0667602949659627, "learning_rate": 4.964115997986148e-06, "loss": 1.0434, "step": 608 }, { "epoch": 0.06404879908501715, "grad_norm": 2.3511531120455955, "learning_rate": 4.963974923949386e-06, "loss": 1.0321, "step": 609 }, { "epoch": 0.06415396952686447, "grad_norm": 2.7978514748891055, "learning_rate": 4.963833575160278e-06, "loss": 1.0402, "step": 610 }, { "epoch": 0.0642591399687118, "grad_norm": 4.049018255338047, "learning_rate": 4.963691951634588e-06, "loss": 1.0826, "step": 611 }, { "epoch": 0.06436431041055911, "grad_norm": 3.3678790577984072, "learning_rate": 4.9635500533881065e-06, "loss": 1.0306, "step": 612 }, { "epoch": 0.06446948085240643, "grad_norm": 3.0634850122257786, "learning_rate": 4.963407880436657e-06, "loss": 1.0558, "step": 613 }, { "epoch": 0.06457465129425374, "grad_norm": 3.0330037801287957, "learning_rate": 4.963265432796092e-06, "loss": 1.0743, "step": 614 }, { "epoch": 0.06467982173610107, "grad_norm": 3.071000708505292, "learning_rate": 4.963122710482295e-06, "loss": 1.0543, "step": 615 }, { "epoch": 0.06478499217794839, "grad_norm": 2.8610188182071385, "learning_rate": 4.962979713511183e-06, "loss": 1.0579, "step": 616 }, { "epoch": 0.0648901626197957, "grad_norm": 4.206292540959885, "learning_rate": 4.962836441898699e-06, "loss": 1.0701, "step": 617 }, { "epoch": 0.06499533306164303, "grad_norm": 4.356324104061252, "learning_rate": 4.96269289566082e-06, "loss": 1.0955, "step": 618 }, { "epoch": 0.06510050350349035, "grad_norm": 3.319002510122104, "learning_rate": 4.9625490748135525e-06, "loss": 1.0207, "step": 619 }, { "epoch": 0.06520567394533766, "grad_norm": 3.7127033614212026, "learning_rate": 4.962404979372933e-06, "loss": 1.0698, "step": 620 }, { "epoch": 0.06531084438718499, "grad_norm": 4.18629805171437, "learning_rate": 4.962260609355029e-06, "loss": 1.0392, "step": 621 }, { "epoch": 0.0654160148290323, "grad_norm": 4.993374261310267, "learning_rate": 4.962115964775941e-06, "loss": 1.008, "step": 622 }, { "epoch": 0.06552118527087962, "grad_norm": 3.017269799303926, "learning_rate": 4.961971045651796e-06, "loss": 1.0126, "step": 623 }, { "epoch": 0.06562635571272694, "grad_norm": 2.700741825605453, "learning_rate": 4.961825851998754e-06, "loss": 1.0446, "step": 624 }, { "epoch": 0.06573152615457425, "grad_norm": 3.3134417945987567, "learning_rate": 4.961680383833005e-06, "loss": 1.0507, "step": 625 }, { "epoch": 0.06583669659642158, "grad_norm": 4.294665174497018, "learning_rate": 4.961534641170771e-06, "loss": 1.0448, "step": 626 }, { "epoch": 0.06594186703826889, "grad_norm": 3.659408792404163, "learning_rate": 4.961388624028303e-06, "loss": 1.0181, "step": 627 }, { "epoch": 0.06604703748011621, "grad_norm": 3.640846682505268, "learning_rate": 4.9612423324218816e-06, "loss": 1.1181, "step": 628 }, { "epoch": 0.06615220792196354, "grad_norm": 3.086905697715193, "learning_rate": 4.961095766367821e-06, "loss": 1.0573, "step": 629 }, { "epoch": 0.06625737836381085, "grad_norm": 3.9597870998951468, "learning_rate": 4.960948925882464e-06, "loss": 1.0285, "step": 630 }, { "epoch": 0.06636254880565817, "grad_norm": 4.292598646500938, "learning_rate": 4.960801810982184e-06, "loss": 1.0601, "step": 631 }, { "epoch": 0.06646771924750548, "grad_norm": 3.600297435919102, "learning_rate": 4.9606544216833865e-06, "loss": 1.0509, "step": 632 }, { "epoch": 0.06657288968935281, "grad_norm": 3.7580399038893755, "learning_rate": 4.960506758002506e-06, "loss": 1.0315, "step": 633 }, { "epoch": 0.06667806013120013, "grad_norm": 3.2034136066067047, "learning_rate": 4.960358819956007e-06, "loss": 1.0486, "step": 634 }, { "epoch": 0.06678323057304744, "grad_norm": 2.650453131312526, "learning_rate": 4.960210607560387e-06, "loss": 1.004, "step": 635 }, { "epoch": 0.06688840101489477, "grad_norm": 3.3291779041483793, "learning_rate": 4.960062120832174e-06, "loss": 1.0592, "step": 636 }, { "epoch": 0.06699357145674208, "grad_norm": 4.162217220561789, "learning_rate": 4.959913359787923e-06, "loss": 1.0178, "step": 637 }, { "epoch": 0.0670987418985894, "grad_norm": 2.909767419364276, "learning_rate": 4.959764324444224e-06, "loss": 1.0572, "step": 638 }, { "epoch": 0.06720391234043673, "grad_norm": 3.4590810268323127, "learning_rate": 4.959615014817694e-06, "loss": 0.9815, "step": 639 }, { "epoch": 0.06730908278228404, "grad_norm": 5.476510291071485, "learning_rate": 4.959465430924983e-06, "loss": 1.0637, "step": 640 }, { "epoch": 0.06741425322413136, "grad_norm": 3.499714211214465, "learning_rate": 4.9593155727827705e-06, "loss": 1.0513, "step": 641 }, { "epoch": 0.06751942366597867, "grad_norm": 2.055004505673057, "learning_rate": 4.9591654404077675e-06, "loss": 1.043, "step": 642 }, { "epoch": 0.067624594107826, "grad_norm": 3.4790021910325915, "learning_rate": 4.959015033816714e-06, "loss": 1.0246, "step": 643 }, { "epoch": 0.06772976454967332, "grad_norm": 3.278908306612351, "learning_rate": 4.958864353026382e-06, "loss": 1.0671, "step": 644 }, { "epoch": 0.06783493499152063, "grad_norm": 3.2634424157131585, "learning_rate": 4.958713398053574e-06, "loss": 1.052, "step": 645 }, { "epoch": 0.06794010543336795, "grad_norm": 3.123492650449054, "learning_rate": 4.958562168915121e-06, "loss": 1.0305, "step": 646 }, { "epoch": 0.06804527587521528, "grad_norm": 3.237487642730812, "learning_rate": 4.9584106656278884e-06, "loss": 1.0463, "step": 647 }, { "epoch": 0.06815044631706259, "grad_norm": 4.187978143084541, "learning_rate": 4.958258888208769e-06, "loss": 1.0597, "step": 648 }, { "epoch": 0.06825561675890991, "grad_norm": 2.9512732989254475, "learning_rate": 4.958106836674686e-06, "loss": 1.0864, "step": 649 }, { "epoch": 0.06836078720075722, "grad_norm": 2.922076078908869, "learning_rate": 4.957954511042598e-06, "loss": 1.0638, "step": 650 }, { "epoch": 0.06846595764260455, "grad_norm": 4.07040889167094, "learning_rate": 4.957801911329485e-06, "loss": 1.0417, "step": 651 }, { "epoch": 0.06857112808445187, "grad_norm": 2.8281332452599495, "learning_rate": 4.957649037552368e-06, "loss": 1.0614, "step": 652 }, { "epoch": 0.06867629852629918, "grad_norm": 2.6425711970959544, "learning_rate": 4.957495889728291e-06, "loss": 1.0922, "step": 653 }, { "epoch": 0.0687814689681465, "grad_norm": 2.4951631686592157, "learning_rate": 4.957342467874331e-06, "loss": 1.017, "step": 654 }, { "epoch": 0.06888663940999382, "grad_norm": 2.738893400348362, "learning_rate": 4.957188772007597e-06, "loss": 1.0489, "step": 655 }, { "epoch": 0.06899180985184114, "grad_norm": 4.8906186641004785, "learning_rate": 4.957034802145227e-06, "loss": 1.0226, "step": 656 }, { "epoch": 0.06909698029368846, "grad_norm": 3.9715716237558287, "learning_rate": 4.9568805583043905e-06, "loss": 1.0836, "step": 657 }, { "epoch": 0.06920215073553578, "grad_norm": 2.7599656326533384, "learning_rate": 4.956726040502285e-06, "loss": 1.0429, "step": 658 }, { "epoch": 0.0693073211773831, "grad_norm": 3.4427680533746687, "learning_rate": 4.956571248756142e-06, "loss": 1.03, "step": 659 }, { "epoch": 0.06941249161923041, "grad_norm": 3.5854381263268666, "learning_rate": 4.9564161830832214e-06, "loss": 1.0513, "step": 660 }, { "epoch": 0.06951766206107773, "grad_norm": 3.7704419908620355, "learning_rate": 4.956260843500814e-06, "loss": 1.0662, "step": 661 }, { "epoch": 0.06962283250292506, "grad_norm": 4.3580242444386545, "learning_rate": 4.9561052300262415e-06, "loss": 1.0661, "step": 662 }, { "epoch": 0.06972800294477237, "grad_norm": 3.720266896736387, "learning_rate": 4.955949342676856e-06, "loss": 1.0175, "step": 663 }, { "epoch": 0.06983317338661969, "grad_norm": 4.7410376644337475, "learning_rate": 4.955793181470041e-06, "loss": 1.0149, "step": 664 }, { "epoch": 0.069938343828467, "grad_norm": 3.32459207886422, "learning_rate": 4.955636746423208e-06, "loss": 1.0271, "step": 665 }, { "epoch": 0.07004351427031433, "grad_norm": 4.773668981576176, "learning_rate": 4.9554800375538026e-06, "loss": 1.1019, "step": 666 }, { "epoch": 0.07014868471216165, "grad_norm": 4.563403967070941, "learning_rate": 4.955323054879298e-06, "loss": 1.0346, "step": 667 }, { "epoch": 0.07025385515400896, "grad_norm": 3.2801934792913885, "learning_rate": 4.9551657984171995e-06, "loss": 1.0036, "step": 668 }, { "epoch": 0.07035902559585629, "grad_norm": 2.815254256813279, "learning_rate": 4.955008268185041e-06, "loss": 1.0693, "step": 669 }, { "epoch": 0.0704641960377036, "grad_norm": 3.9847665137382737, "learning_rate": 4.954850464200391e-06, "loss": 1.0579, "step": 670 }, { "epoch": 0.07056936647955092, "grad_norm": 3.8295707952048708, "learning_rate": 4.954692386480843e-06, "loss": 1.0618, "step": 671 }, { "epoch": 0.07067453692139825, "grad_norm": 4.523650449147604, "learning_rate": 4.954534035044025e-06, "loss": 1.0736, "step": 672 }, { "epoch": 0.07077970736324556, "grad_norm": 3.6692146168192235, "learning_rate": 4.954375409907595e-06, "loss": 1.0575, "step": 673 }, { "epoch": 0.07088487780509288, "grad_norm": 3.325725179037857, "learning_rate": 4.954216511089242e-06, "loss": 1.0396, "step": 674 }, { "epoch": 0.07099004824694019, "grad_norm": 3.3396872270434512, "learning_rate": 4.954057338606681e-06, "loss": 1.0595, "step": 675 }, { "epoch": 0.07109521868878751, "grad_norm": 2.472910469422091, "learning_rate": 4.953897892477664e-06, "loss": 1.0438, "step": 676 }, { "epoch": 0.07120038913063484, "grad_norm": 2.9528197412204227, "learning_rate": 4.953738172719969e-06, "loss": 1.0478, "step": 677 }, { "epoch": 0.07130555957248215, "grad_norm": 3.1922126178334853, "learning_rate": 4.953578179351407e-06, "loss": 1.0706, "step": 678 }, { "epoch": 0.07141073001432947, "grad_norm": 2.9060870572179778, "learning_rate": 4.953417912389817e-06, "loss": 1.0375, "step": 679 }, { "epoch": 0.0715159004561768, "grad_norm": 2.2896504072768393, "learning_rate": 4.9532573718530716e-06, "loss": 1.0023, "step": 680 }, { "epoch": 0.07162107089802411, "grad_norm": 3.4112113846941288, "learning_rate": 4.953096557759072e-06, "loss": 1.0565, "step": 681 }, { "epoch": 0.07172624133987143, "grad_norm": 3.7035390337251837, "learning_rate": 4.9529354701257486e-06, "loss": 1.0406, "step": 682 }, { "epoch": 0.07183141178171874, "grad_norm": 2.8453052460942705, "learning_rate": 4.9527741089710664e-06, "loss": 1.0652, "step": 683 }, { "epoch": 0.07193658222356607, "grad_norm": 3.7620645383784233, "learning_rate": 4.952612474313017e-06, "loss": 1.047, "step": 684 }, { "epoch": 0.07204175266541339, "grad_norm": 4.070346821177919, "learning_rate": 4.9524505661696235e-06, "loss": 1.0757, "step": 685 }, { "epoch": 0.0721469231072607, "grad_norm": 3.5783791981742135, "learning_rate": 4.952288384558941e-06, "loss": 1.0512, "step": 686 }, { "epoch": 0.07225209354910803, "grad_norm": 3.0673840522581046, "learning_rate": 4.952125929499054e-06, "loss": 1.0278, "step": 687 }, { "epoch": 0.07235726399095534, "grad_norm": 3.2229108300056653, "learning_rate": 4.9519632010080765e-06, "loss": 1.0657, "step": 688 }, { "epoch": 0.07246243443280266, "grad_norm": 2.652129171991843, "learning_rate": 4.951800199104155e-06, "loss": 1.0299, "step": 689 }, { "epoch": 0.07256760487464999, "grad_norm": 3.364270267477337, "learning_rate": 4.951636923805464e-06, "loss": 1.0527, "step": 690 }, { "epoch": 0.0726727753164973, "grad_norm": 3.4812038018158136, "learning_rate": 4.951473375130212e-06, "loss": 1.0331, "step": 691 }, { "epoch": 0.07277794575834462, "grad_norm": 3.7937124089162704, "learning_rate": 4.951309553096635e-06, "loss": 1.0407, "step": 692 }, { "epoch": 0.07288311620019193, "grad_norm": 3.1503406868653787, "learning_rate": 4.951145457723e-06, "loss": 1.0531, "step": 693 }, { "epoch": 0.07298828664203925, "grad_norm": 2.8144705447628526, "learning_rate": 4.9509810890276065e-06, "loss": 1.1033, "step": 694 }, { "epoch": 0.07309345708388658, "grad_norm": 3.3436807422524053, "learning_rate": 4.95081644702878e-06, "loss": 1.0837, "step": 695 }, { "epoch": 0.07319862752573389, "grad_norm": 4.010147599775435, "learning_rate": 4.950651531744882e-06, "loss": 1.0769, "step": 696 }, { "epoch": 0.07330379796758121, "grad_norm": 3.1326979849262195, "learning_rate": 4.9504863431943004e-06, "loss": 1.0019, "step": 697 }, { "epoch": 0.07340896840942852, "grad_norm": 3.1431506128416995, "learning_rate": 4.9503208813954565e-06, "loss": 1.0326, "step": 698 }, { "epoch": 0.07351413885127585, "grad_norm": 3.563862494912588, "learning_rate": 4.9501551463667985e-06, "loss": 1.0365, "step": 699 }, { "epoch": 0.07361930929312317, "grad_norm": 2.9908810962971812, "learning_rate": 4.949989138126809e-06, "loss": 1.0655, "step": 700 }, { "epoch": 0.07372447973497048, "grad_norm": 2.169128476617307, "learning_rate": 4.949822856693997e-06, "loss": 1.0292, "step": 701 }, { "epoch": 0.07382965017681781, "grad_norm": 4.359267398693771, "learning_rate": 4.949656302086907e-06, "loss": 1.0228, "step": 702 }, { "epoch": 0.07393482061866512, "grad_norm": 3.787799254738041, "learning_rate": 4.949489474324108e-06, "loss": 1.0041, "step": 703 }, { "epoch": 0.07403999106051244, "grad_norm": 2.667883375421812, "learning_rate": 4.949322373424206e-06, "loss": 1.0423, "step": 704 }, { "epoch": 0.07414516150235977, "grad_norm": 2.9302612190851787, "learning_rate": 4.949154999405832e-06, "loss": 1.0259, "step": 705 }, { "epoch": 0.07425033194420708, "grad_norm": 4.283595912438445, "learning_rate": 4.94898735228765e-06, "loss": 1.0675, "step": 706 }, { "epoch": 0.0743555023860544, "grad_norm": 3.2175291514292996, "learning_rate": 4.948819432088353e-06, "loss": 1.0215, "step": 707 }, { "epoch": 0.07446067282790172, "grad_norm": 3.7422947687070645, "learning_rate": 4.948651238826667e-06, "loss": 1.0539, "step": 708 }, { "epoch": 0.07456584326974904, "grad_norm": 3.6570635294285383, "learning_rate": 4.948482772521346e-06, "loss": 1.0375, "step": 709 }, { "epoch": 0.07467101371159636, "grad_norm": 4.267142662139774, "learning_rate": 4.948314033191175e-06, "loss": 1.0495, "step": 710 }, { "epoch": 0.07477618415344367, "grad_norm": 3.6286208637111277, "learning_rate": 4.948145020854971e-06, "loss": 1.0083, "step": 711 }, { "epoch": 0.074881354595291, "grad_norm": 3.942094346556032, "learning_rate": 4.947975735531578e-06, "loss": 1.0355, "step": 712 }, { "epoch": 0.07498652503713832, "grad_norm": 2.0042428048097256, "learning_rate": 4.947806177239875e-06, "loss": 1.066, "step": 713 }, { "epoch": 0.07509169547898563, "grad_norm": 3.4172310190837143, "learning_rate": 4.947636345998768e-06, "loss": 1.0153, "step": 714 }, { "epoch": 0.07519686592083295, "grad_norm": 3.2523213929196273, "learning_rate": 4.947466241827194e-06, "loss": 1.0243, "step": 715 }, { "epoch": 0.07530203636268026, "grad_norm": 4.081933448185989, "learning_rate": 4.947295864744121e-06, "loss": 0.9925, "step": 716 }, { "epoch": 0.07540720680452759, "grad_norm": 3.0031168886878583, "learning_rate": 4.947125214768549e-06, "loss": 1.0452, "step": 717 }, { "epoch": 0.07551237724637491, "grad_norm": 2.8581225129342287, "learning_rate": 4.946954291919505e-06, "loss": 1.0324, "step": 718 }, { "epoch": 0.07561754768822222, "grad_norm": 2.663605746652252, "learning_rate": 4.946783096216049e-06, "loss": 1.0029, "step": 719 }, { "epoch": 0.07572271813006955, "grad_norm": 3.2647089266169984, "learning_rate": 4.94661162767727e-06, "loss": 1.0534, "step": 720 }, { "epoch": 0.07582788857191686, "grad_norm": 3.6156070223917216, "learning_rate": 4.94643988632229e-06, "loss": 0.9935, "step": 721 }, { "epoch": 0.07593305901376418, "grad_norm": 3.758495500937088, "learning_rate": 4.946267872170256e-06, "loss": 0.9976, "step": 722 }, { "epoch": 0.0760382294556115, "grad_norm": 2.8459717369309736, "learning_rate": 4.946095585240353e-06, "loss": 1.0202, "step": 723 }, { "epoch": 0.07614339989745882, "grad_norm": 4.228677921247087, "learning_rate": 4.945923025551789e-06, "loss": 1.0273, "step": 724 }, { "epoch": 0.07624857033930614, "grad_norm": 2.354998022999138, "learning_rate": 4.945750193123808e-06, "loss": 1.0082, "step": 725 }, { "epoch": 0.07635374078115345, "grad_norm": 3.1974238173146965, "learning_rate": 4.94557708797568e-06, "loss": 1.0845, "step": 726 }, { "epoch": 0.07645891122300078, "grad_norm": 3.2991867123662186, "learning_rate": 4.94540371012671e-06, "loss": 1.0363, "step": 727 }, { "epoch": 0.0765640816648481, "grad_norm": 4.790231421271602, "learning_rate": 4.945230059596229e-06, "loss": 1.0522, "step": 728 }, { "epoch": 0.07666925210669541, "grad_norm": 2.985913413424653, "learning_rate": 4.945056136403601e-06, "loss": 1.0011, "step": 729 }, { "epoch": 0.07677442254854273, "grad_norm": 3.4749105660247452, "learning_rate": 4.944881940568219e-06, "loss": 1.0506, "step": 730 }, { "epoch": 0.07687959299039004, "grad_norm": 2.331821909393682, "learning_rate": 4.94470747210951e-06, "loss": 0.9939, "step": 731 }, { "epoch": 0.07698476343223737, "grad_norm": 3.2284649198570965, "learning_rate": 4.944532731046926e-06, "loss": 1.0831, "step": 732 }, { "epoch": 0.07708993387408469, "grad_norm": 3.902120982308078, "learning_rate": 4.944357717399952e-06, "loss": 1.1132, "step": 733 }, { "epoch": 0.077195104315932, "grad_norm": 3.4172360346109785, "learning_rate": 4.944182431188106e-06, "loss": 1.073, "step": 734 }, { "epoch": 0.07730027475777933, "grad_norm": 3.6093384619673254, "learning_rate": 4.94400687243093e-06, "loss": 1.0422, "step": 735 }, { "epoch": 0.07740544519962665, "grad_norm": 2.938298485477721, "learning_rate": 4.943831041148003e-06, "loss": 0.991, "step": 736 }, { "epoch": 0.07751061564147396, "grad_norm": 3.5768669215903386, "learning_rate": 4.94365493735893e-06, "loss": 1.0159, "step": 737 }, { "epoch": 0.07761578608332129, "grad_norm": 3.236703601168475, "learning_rate": 4.943478561083349e-06, "loss": 1.0307, "step": 738 }, { "epoch": 0.0777209565251686, "grad_norm": 4.136119049876218, "learning_rate": 4.9433019123409264e-06, "loss": 1.0184, "step": 739 }, { "epoch": 0.07782612696701592, "grad_norm": 3.485691899686312, "learning_rate": 4.94312499115136e-06, "loss": 1.0624, "step": 740 }, { "epoch": 0.07793129740886325, "grad_norm": 3.294064033644912, "learning_rate": 4.942947797534379e-06, "loss": 1.0091, "step": 741 }, { "epoch": 0.07803646785071056, "grad_norm": 3.388312825573561, "learning_rate": 4.942770331509741e-06, "loss": 1.048, "step": 742 }, { "epoch": 0.07814163829255788, "grad_norm": 3.8425212422184707, "learning_rate": 4.942592593097235e-06, "loss": 1.0264, "step": 743 }, { "epoch": 0.07824680873440519, "grad_norm": 4.216912890195311, "learning_rate": 4.942414582316679e-06, "loss": 1.0031, "step": 744 }, { "epoch": 0.07835197917625251, "grad_norm": 2.943110966674974, "learning_rate": 4.942236299187926e-06, "loss": 1.0654, "step": 745 }, { "epoch": 0.07845714961809984, "grad_norm": 2.7561756500798076, "learning_rate": 4.942057743730852e-06, "loss": 1.0762, "step": 746 }, { "epoch": 0.07856232005994715, "grad_norm": 2.5060436964593458, "learning_rate": 4.941878915965369e-06, "loss": 1.0292, "step": 747 }, { "epoch": 0.07866749050179447, "grad_norm": 4.677558119647075, "learning_rate": 4.941699815911418e-06, "loss": 1.0099, "step": 748 }, { "epoch": 0.07877266094364178, "grad_norm": 2.6303885402682554, "learning_rate": 4.94152044358897e-06, "loss": 1.0585, "step": 749 }, { "epoch": 0.07887783138548911, "grad_norm": 2.9906552945797777, "learning_rate": 4.941340799018026e-06, "loss": 1.0716, "step": 750 }, { "epoch": 0.07898300182733643, "grad_norm": 3.1557961688407, "learning_rate": 4.941160882218617e-06, "loss": 1.0114, "step": 751 }, { "epoch": 0.07908817226918374, "grad_norm": 2.5082121223741614, "learning_rate": 4.940980693210807e-06, "loss": 1.0839, "step": 752 }, { "epoch": 0.07919334271103107, "grad_norm": 4.115073295467908, "learning_rate": 4.940800232014688e-06, "loss": 1.0528, "step": 753 }, { "epoch": 0.07929851315287838, "grad_norm": 4.221836693988338, "learning_rate": 4.940619498650381e-06, "loss": 1.0458, "step": 754 }, { "epoch": 0.0794036835947257, "grad_norm": 3.43936918626118, "learning_rate": 4.940438493138041e-06, "loss": 1.1102, "step": 755 }, { "epoch": 0.07950885403657303, "grad_norm": 2.534335147364592, "learning_rate": 4.9402572154978515e-06, "loss": 1.03, "step": 756 }, { "epoch": 0.07961402447842034, "grad_norm": 3.6985951390943397, "learning_rate": 4.940075665750026e-06, "loss": 1.0355, "step": 757 }, { "epoch": 0.07971919492026766, "grad_norm": 3.654741106636443, "learning_rate": 4.939893843914808e-06, "loss": 1.0647, "step": 758 }, { "epoch": 0.07982436536211497, "grad_norm": 3.686292494310798, "learning_rate": 4.9397117500124725e-06, "loss": 1.0508, "step": 759 }, { "epoch": 0.0799295358039623, "grad_norm": 4.219962087918095, "learning_rate": 4.939529384063325e-06, "loss": 1.0415, "step": 760 }, { "epoch": 0.08003470624580962, "grad_norm": 3.5870732019483387, "learning_rate": 4.9393467460876995e-06, "loss": 1.0291, "step": 761 }, { "epoch": 0.08013987668765693, "grad_norm": 3.5610335680344902, "learning_rate": 4.939163836105964e-06, "loss": 1.0283, "step": 762 }, { "epoch": 0.08024504712950425, "grad_norm": 2.8440793536671802, "learning_rate": 4.938980654138511e-06, "loss": 1.0676, "step": 763 }, { "epoch": 0.08035021757135156, "grad_norm": 3.5144508685059512, "learning_rate": 4.93879720020577e-06, "loss": 0.9927, "step": 764 }, { "epoch": 0.08045538801319889, "grad_norm": 3.6070080778872353, "learning_rate": 4.938613474328195e-06, "loss": 1.0694, "step": 765 }, { "epoch": 0.08056055845504621, "grad_norm": 4.242572556515164, "learning_rate": 4.938429476526273e-06, "loss": 1.027, "step": 766 }, { "epoch": 0.08066572889689352, "grad_norm": 2.4803307039348605, "learning_rate": 4.938245206820522e-06, "loss": 1.0119, "step": 767 }, { "epoch": 0.08077089933874085, "grad_norm": 2.9387959106412804, "learning_rate": 4.938060665231491e-06, "loss": 1.0404, "step": 768 }, { "epoch": 0.08087606978058817, "grad_norm": 4.172139806499708, "learning_rate": 4.937875851779755e-06, "loss": 1.0385, "step": 769 }, { "epoch": 0.08098124022243548, "grad_norm": 3.0840295037017786, "learning_rate": 4.937690766485924e-06, "loss": 1.0129, "step": 770 }, { "epoch": 0.08108641066428281, "grad_norm": 3.070735572354231, "learning_rate": 4.9375054093706356e-06, "loss": 1.0741, "step": 771 }, { "epoch": 0.08119158110613012, "grad_norm": 3.1878677650163247, "learning_rate": 4.937319780454559e-06, "loss": 1.0794, "step": 772 }, { "epoch": 0.08129675154797744, "grad_norm": 2.863706515463834, "learning_rate": 4.937133879758394e-06, "loss": 1.0336, "step": 773 }, { "epoch": 0.08140192198982477, "grad_norm": 3.0701280575553334, "learning_rate": 4.936947707302868e-06, "loss": 1.0586, "step": 774 }, { "epoch": 0.08150709243167208, "grad_norm": 3.7990360279352218, "learning_rate": 4.936761263108742e-06, "loss": 1.013, "step": 775 }, { "epoch": 0.0816122628735194, "grad_norm": 3.2463147605003493, "learning_rate": 4.936574547196806e-06, "loss": 1.0674, "step": 776 }, { "epoch": 0.08171743331536671, "grad_norm": 3.346659843217337, "learning_rate": 4.93638755958788e-06, "loss": 1.0484, "step": 777 }, { "epoch": 0.08182260375721404, "grad_norm": 3.421009993046732, "learning_rate": 4.9362003003028135e-06, "loss": 0.9867, "step": 778 }, { "epoch": 0.08192777419906136, "grad_norm": 3.5732995197430086, "learning_rate": 4.93601276936249e-06, "loss": 1.0472, "step": 779 }, { "epoch": 0.08203294464090867, "grad_norm": 2.5915581383584554, "learning_rate": 4.935824966787818e-06, "loss": 1.0096, "step": 780 }, { "epoch": 0.082138115082756, "grad_norm": 2.6084843786308376, "learning_rate": 4.935636892599741e-06, "loss": 1.0011, "step": 781 }, { "epoch": 0.0822432855246033, "grad_norm": 2.7979653896224375, "learning_rate": 4.935448546819229e-06, "loss": 1.0501, "step": 782 }, { "epoch": 0.08234845596645063, "grad_norm": 3.1927150256696253, "learning_rate": 4.935259929467285e-06, "loss": 1.0239, "step": 783 }, { "epoch": 0.08245362640829795, "grad_norm": 3.5723314450964945, "learning_rate": 4.935071040564942e-06, "loss": 1.06, "step": 784 }, { "epoch": 0.08255879685014526, "grad_norm": 2.951913249700751, "learning_rate": 4.93488188013326e-06, "loss": 1.091, "step": 785 }, { "epoch": 0.08266396729199259, "grad_norm": 2.897178403548757, "learning_rate": 4.9346924481933345e-06, "loss": 0.9997, "step": 786 }, { "epoch": 0.0827691377338399, "grad_norm": 2.976619910088866, "learning_rate": 4.9345027447662876e-06, "loss": 1.0628, "step": 787 }, { "epoch": 0.08287430817568722, "grad_norm": 5.562882028586305, "learning_rate": 4.934312769873273e-06, "loss": 1.0514, "step": 788 }, { "epoch": 0.08297947861753455, "grad_norm": 3.3122007501483086, "learning_rate": 4.934122523535474e-06, "loss": 1.017, "step": 789 }, { "epoch": 0.08308464905938186, "grad_norm": 4.484356852207891, "learning_rate": 4.9339320057741045e-06, "loss": 1.0628, "step": 790 }, { "epoch": 0.08318981950122918, "grad_norm": 2.662851425317841, "learning_rate": 4.933741216610409e-06, "loss": 1.068, "step": 791 }, { "epoch": 0.08329498994307649, "grad_norm": 3.0734598927453765, "learning_rate": 4.933550156065662e-06, "loss": 1.043, "step": 792 }, { "epoch": 0.08340016038492382, "grad_norm": 2.692919866577266, "learning_rate": 4.933358824161167e-06, "loss": 1.0257, "step": 793 }, { "epoch": 0.08350533082677114, "grad_norm": 3.656399597645738, "learning_rate": 4.933167220918262e-06, "loss": 1.0345, "step": 794 }, { "epoch": 0.08361050126861845, "grad_norm": 3.587236612420999, "learning_rate": 4.9329753463583095e-06, "loss": 1.0455, "step": 795 }, { "epoch": 0.08371567171046577, "grad_norm": 3.656718452993634, "learning_rate": 4.932783200502705e-06, "loss": 1.0298, "step": 796 }, { "epoch": 0.0838208421523131, "grad_norm": 3.00036115403809, "learning_rate": 4.932590783372877e-06, "loss": 1.022, "step": 797 }, { "epoch": 0.08392601259416041, "grad_norm": 2.5105535700401242, "learning_rate": 4.9323980949902786e-06, "loss": 1.0868, "step": 798 }, { "epoch": 0.08403118303600773, "grad_norm": 3.7118503835471546, "learning_rate": 4.9322051353763965e-06, "loss": 1.03, "step": 799 }, { "epoch": 0.08413635347785504, "grad_norm": 4.549244874514477, "learning_rate": 4.932011904552749e-06, "loss": 1.0312, "step": 800 }, { "epoch": 0.08424152391970237, "grad_norm": 2.695319366140478, "learning_rate": 4.931818402540881e-06, "loss": 1.0456, "step": 801 }, { "epoch": 0.08434669436154969, "grad_norm": 5.172251410102943, "learning_rate": 4.93162462936237e-06, "loss": 1.06, "step": 802 }, { "epoch": 0.084451864803397, "grad_norm": 3.731288930108174, "learning_rate": 4.931430585038823e-06, "loss": 1.0429, "step": 803 }, { "epoch": 0.08455703524524433, "grad_norm": 2.688017597159252, "learning_rate": 4.931236269591878e-06, "loss": 1.0244, "step": 804 }, { "epoch": 0.08466220568709164, "grad_norm": 2.8764757418451796, "learning_rate": 4.9310416830432025e-06, "loss": 1.0227, "step": 805 }, { "epoch": 0.08476737612893896, "grad_norm": 4.0235649503508455, "learning_rate": 4.930846825414495e-06, "loss": 1.0423, "step": 806 }, { "epoch": 0.08487254657078629, "grad_norm": 3.536335146675604, "learning_rate": 4.930651696727482e-06, "loss": 1.0062, "step": 807 }, { "epoch": 0.0849777170126336, "grad_norm": 2.957564708059679, "learning_rate": 4.930456297003923e-06, "loss": 1.0522, "step": 808 }, { "epoch": 0.08508288745448092, "grad_norm": 3.219817117203254, "learning_rate": 4.930260626265607e-06, "loss": 1.0915, "step": 809 }, { "epoch": 0.08518805789632823, "grad_norm": 2.2575110274834636, "learning_rate": 4.930064684534352e-06, "loss": 1.0395, "step": 810 }, { "epoch": 0.08529322833817556, "grad_norm": 3.301667345209895, "learning_rate": 4.929868471832007e-06, "loss": 1.0394, "step": 811 }, { "epoch": 0.08539839878002288, "grad_norm": 4.501375773442117, "learning_rate": 4.929671988180452e-06, "loss": 1.0537, "step": 812 }, { "epoch": 0.08550356922187019, "grad_norm": 2.9447996895825437, "learning_rate": 4.929475233601595e-06, "loss": 1.0163, "step": 813 }, { "epoch": 0.08560873966371751, "grad_norm": 3.890755121107722, "learning_rate": 4.929278208117378e-06, "loss": 1.0418, "step": 814 }, { "epoch": 0.08571391010556483, "grad_norm": 2.55891195688046, "learning_rate": 4.929080911749769e-06, "loss": 1.0191, "step": 815 }, { "epoch": 0.08581908054741215, "grad_norm": 4.672850050572631, "learning_rate": 4.928883344520768e-06, "loss": 1.0363, "step": 816 }, { "epoch": 0.08592425098925947, "grad_norm": 3.1282491114827575, "learning_rate": 4.928685506452407e-06, "loss": 1.0421, "step": 817 }, { "epoch": 0.08602942143110678, "grad_norm": 2.0646227484923143, "learning_rate": 4.928487397566743e-06, "loss": 1.0407, "step": 818 }, { "epoch": 0.08613459187295411, "grad_norm": 3.2962481364973564, "learning_rate": 4.928289017885871e-06, "loss": 1.0724, "step": 819 }, { "epoch": 0.08623976231480142, "grad_norm": 3.4645243615129897, "learning_rate": 4.92809036743191e-06, "loss": 1.0396, "step": 820 }, { "epoch": 0.08634493275664874, "grad_norm": 2.3605676802931286, "learning_rate": 4.92789144622701e-06, "loss": 0.9944, "step": 821 }, { "epoch": 0.08645010319849607, "grad_norm": 4.514015314071715, "learning_rate": 4.927692254293354e-06, "loss": 1.0792, "step": 822 }, { "epoch": 0.08655527364034338, "grad_norm": 3.364110041018555, "learning_rate": 4.927492791653153e-06, "loss": 1.0558, "step": 823 }, { "epoch": 0.0866604440821907, "grad_norm": 2.1273722043275995, "learning_rate": 4.927293058328647e-06, "loss": 1.0158, "step": 824 }, { "epoch": 0.08676561452403801, "grad_norm": 3.5256816639423327, "learning_rate": 4.92709305434211e-06, "loss": 1.0353, "step": 825 }, { "epoch": 0.08687078496588534, "grad_norm": 3.6339397393250095, "learning_rate": 4.926892779715843e-06, "loss": 1.0626, "step": 826 }, { "epoch": 0.08697595540773266, "grad_norm": 2.9798523498933567, "learning_rate": 4.926692234472178e-06, "loss": 1.0116, "step": 827 }, { "epoch": 0.08708112584957997, "grad_norm": 3.025215020843175, "learning_rate": 4.9264914186334775e-06, "loss": 1.0143, "step": 828 }, { "epoch": 0.0871862962914273, "grad_norm": 2.931478722868882, "learning_rate": 4.926290332222134e-06, "loss": 1.0852, "step": 829 }, { "epoch": 0.08729146673327462, "grad_norm": 3.179330588406845, "learning_rate": 4.9260889752605715e-06, "loss": 1.0382, "step": 830 }, { "epoch": 0.08739663717512193, "grad_norm": 2.7220659147274264, "learning_rate": 4.925887347771241e-06, "loss": 1.0312, "step": 831 }, { "epoch": 0.08750180761696925, "grad_norm": 4.610622268460886, "learning_rate": 4.925685449776627e-06, "loss": 1.046, "step": 832 }, { "epoch": 0.08760697805881656, "grad_norm": 2.720186510106471, "learning_rate": 4.925483281299242e-06, "loss": 1.0246, "step": 833 }, { "epoch": 0.08771214850066389, "grad_norm": 2.4258313763166854, "learning_rate": 4.925280842361628e-06, "loss": 1.0416, "step": 834 }, { "epoch": 0.08781731894251121, "grad_norm": 2.5395043340810464, "learning_rate": 4.925078132986361e-06, "loss": 0.9997, "step": 835 }, { "epoch": 0.08792248938435852, "grad_norm": 3.0748465405966168, "learning_rate": 4.924875153196042e-06, "loss": 1.0486, "step": 836 }, { "epoch": 0.08802765982620585, "grad_norm": 3.676854554009859, "learning_rate": 4.924671903013308e-06, "loss": 1.0313, "step": 837 }, { "epoch": 0.08813283026805316, "grad_norm": 5.11342505319437, "learning_rate": 4.9244683824608205e-06, "loss": 1.0866, "step": 838 }, { "epoch": 0.08823800070990048, "grad_norm": 3.364235859687841, "learning_rate": 4.924264591561275e-06, "loss": 1.0374, "step": 839 }, { "epoch": 0.0883431711517478, "grad_norm": 2.543264664118595, "learning_rate": 4.924060530337394e-06, "loss": 1.036, "step": 840 }, { "epoch": 0.08844834159359512, "grad_norm": 2.2631599527781208, "learning_rate": 4.9238561988119346e-06, "loss": 1.0327, "step": 841 }, { "epoch": 0.08855351203544244, "grad_norm": 3.3000605867237742, "learning_rate": 4.923651597007679e-06, "loss": 1.0459, "step": 842 }, { "epoch": 0.08865868247728975, "grad_norm": 2.7652313110005657, "learning_rate": 4.923446724947443e-06, "loss": 1.0426, "step": 843 }, { "epoch": 0.08876385291913708, "grad_norm": 3.1194029297763133, "learning_rate": 4.923241582654071e-06, "loss": 1.0286, "step": 844 }, { "epoch": 0.0888690233609844, "grad_norm": 4.097579369777372, "learning_rate": 4.923036170150438e-06, "loss": 1.0021, "step": 845 }, { "epoch": 0.08897419380283171, "grad_norm": 2.741301220555694, "learning_rate": 4.922830487459449e-06, "loss": 1.0333, "step": 846 }, { "epoch": 0.08907936424467904, "grad_norm": 3.542962902887874, "learning_rate": 4.92262453460404e-06, "loss": 1.0349, "step": 847 }, { "epoch": 0.08918453468652635, "grad_norm": 2.855697339700191, "learning_rate": 4.922418311607176e-06, "loss": 1.0311, "step": 848 }, { "epoch": 0.08928970512837367, "grad_norm": 2.880135557426853, "learning_rate": 4.922211818491852e-06, "loss": 1.0054, "step": 849 }, { "epoch": 0.089394875570221, "grad_norm": 4.213156822451781, "learning_rate": 4.922005055281094e-06, "loss": 1.0374, "step": 850 }, { "epoch": 0.0895000460120683, "grad_norm": 3.6779910799103632, "learning_rate": 4.921798021997957e-06, "loss": 0.9997, "step": 851 }, { "epoch": 0.08960521645391563, "grad_norm": 4.758221935382132, "learning_rate": 4.921590718665527e-06, "loss": 1.0371, "step": 852 }, { "epoch": 0.08971038689576294, "grad_norm": 3.3975540812884444, "learning_rate": 4.921383145306922e-06, "loss": 1.0318, "step": 853 }, { "epoch": 0.08981555733761026, "grad_norm": 2.530639515419742, "learning_rate": 4.921175301945284e-06, "loss": 1.0652, "step": 854 }, { "epoch": 0.08992072777945759, "grad_norm": 2.8662710938283986, "learning_rate": 4.920967188603794e-06, "loss": 1.024, "step": 855 }, { "epoch": 0.0900258982213049, "grad_norm": 3.4254687930282737, "learning_rate": 4.920758805305654e-06, "loss": 1.0682, "step": 856 }, { "epoch": 0.09013106866315222, "grad_norm": 2.1300497289243707, "learning_rate": 4.920550152074103e-06, "loss": 1.0442, "step": 857 }, { "epoch": 0.09023623910499955, "grad_norm": 2.7404728800806417, "learning_rate": 4.920341228932406e-06, "loss": 1.0344, "step": 858 }, { "epoch": 0.09034140954684686, "grad_norm": 3.579891745395384, "learning_rate": 4.9201320359038595e-06, "loss": 1.016, "step": 859 }, { "epoch": 0.09044657998869418, "grad_norm": 3.5079338774058355, "learning_rate": 4.919922573011791e-06, "loss": 1.017, "step": 860 }, { "epoch": 0.09055175043054149, "grad_norm": 4.474680092802719, "learning_rate": 4.919712840279559e-06, "loss": 1.1165, "step": 861 }, { "epoch": 0.09065692087238882, "grad_norm": 2.7950268131476217, "learning_rate": 4.9195028377305465e-06, "loss": 1.0025, "step": 862 }, { "epoch": 0.09076209131423614, "grad_norm": 3.6168740028611377, "learning_rate": 4.919292565388172e-06, "loss": 1.0431, "step": 863 }, { "epoch": 0.09086726175608345, "grad_norm": 3.569181810804004, "learning_rate": 4.919082023275884e-06, "loss": 0.9941, "step": 864 }, { "epoch": 0.09097243219793077, "grad_norm": 2.687691480063549, "learning_rate": 4.918871211417157e-06, "loss": 1.0376, "step": 865 }, { "epoch": 0.09107760263977809, "grad_norm": 3.481903769104546, "learning_rate": 4.9186601298355e-06, "loss": 1.0285, "step": 866 }, { "epoch": 0.09118277308162541, "grad_norm": 3.2967751447777798, "learning_rate": 4.91844877855445e-06, "loss": 1.0255, "step": 867 }, { "epoch": 0.09128794352347273, "grad_norm": 3.258456644130292, "learning_rate": 4.918237157597574e-06, "loss": 1.009, "step": 868 }, { "epoch": 0.09139311396532004, "grad_norm": 4.155311114794585, "learning_rate": 4.918025266988469e-06, "loss": 1.0554, "step": 869 }, { "epoch": 0.09149828440716737, "grad_norm": 2.719205408216921, "learning_rate": 4.917813106750763e-06, "loss": 1.0526, "step": 870 }, { "epoch": 0.09160345484901468, "grad_norm": 3.7086484713956644, "learning_rate": 4.917600676908114e-06, "loss": 1.0841, "step": 871 }, { "epoch": 0.091708625290862, "grad_norm": 3.6291790131528265, "learning_rate": 4.9173879774842085e-06, "loss": 1.0466, "step": 872 }, { "epoch": 0.09181379573270933, "grad_norm": 4.901385161683924, "learning_rate": 4.917175008502763e-06, "loss": 1.0683, "step": 873 }, { "epoch": 0.09191896617455664, "grad_norm": 2.6391942505342145, "learning_rate": 4.91696176998753e-06, "loss": 1.0368, "step": 874 }, { "epoch": 0.09202413661640396, "grad_norm": 2.92397301023947, "learning_rate": 4.916748261962282e-06, "loss": 1.0502, "step": 875 }, { "epoch": 0.09212930705825127, "grad_norm": 3.3487651603386643, "learning_rate": 4.916534484450829e-06, "loss": 1.0492, "step": 876 }, { "epoch": 0.0922344775000986, "grad_norm": 3.4178169663489615, "learning_rate": 4.9163204374770085e-06, "loss": 1.0182, "step": 877 }, { "epoch": 0.09233964794194592, "grad_norm": 2.660490436372227, "learning_rate": 4.916106121064689e-06, "loss": 1.0636, "step": 878 }, { "epoch": 0.09244481838379323, "grad_norm": 3.647201920237282, "learning_rate": 4.915891535237768e-06, "loss": 1.0018, "step": 879 }, { "epoch": 0.09254998882564056, "grad_norm": 2.7950649078172547, "learning_rate": 4.915676680020173e-06, "loss": 1.0415, "step": 880 }, { "epoch": 0.09265515926748787, "grad_norm": 3.308392412587543, "learning_rate": 4.915461555435863e-06, "loss": 1.0095, "step": 881 }, { "epoch": 0.09276032970933519, "grad_norm": 2.497275944616539, "learning_rate": 4.915246161508825e-06, "loss": 1.0251, "step": 882 }, { "epoch": 0.09286550015118251, "grad_norm": 2.4296994999785975, "learning_rate": 4.915030498263079e-06, "loss": 1.0309, "step": 883 }, { "epoch": 0.09297067059302982, "grad_norm": 4.151821039375308, "learning_rate": 4.914814565722671e-06, "loss": 1.0479, "step": 884 }, { "epoch": 0.09307584103487715, "grad_norm": 2.935706432111893, "learning_rate": 4.91459836391168e-06, "loss": 1.0566, "step": 885 }, { "epoch": 0.09318101147672447, "grad_norm": 3.4020244169255527, "learning_rate": 4.914381892854214e-06, "loss": 1.0392, "step": 886 }, { "epoch": 0.09328618191857178, "grad_norm": 3.4742996375581985, "learning_rate": 4.914165152574412e-06, "loss": 1.0323, "step": 887 }, { "epoch": 0.09339135236041911, "grad_norm": 4.274200371311763, "learning_rate": 4.913948143096442e-06, "loss": 1.0627, "step": 888 }, { "epoch": 0.09349652280226642, "grad_norm": 5.300639145263014, "learning_rate": 4.9137308644445e-06, "loss": 1.015, "step": 889 }, { "epoch": 0.09360169324411374, "grad_norm": 3.5689399063136884, "learning_rate": 4.913513316642818e-06, "loss": 1.0453, "step": 890 }, { "epoch": 0.09370686368596107, "grad_norm": 4.104293284314525, "learning_rate": 4.913295499715651e-06, "loss": 1.039, "step": 891 }, { "epoch": 0.09381203412780838, "grad_norm": 2.9248882626299046, "learning_rate": 4.913077413687289e-06, "loss": 1.0304, "step": 892 }, { "epoch": 0.0939172045696557, "grad_norm": 2.565656733778235, "learning_rate": 4.91285905858205e-06, "loss": 1.0233, "step": 893 }, { "epoch": 0.09402237501150301, "grad_norm": 4.317147708295007, "learning_rate": 4.912640434424283e-06, "loss": 1.0293, "step": 894 }, { "epoch": 0.09412754545335034, "grad_norm": 4.1886808246014615, "learning_rate": 4.912421541238365e-06, "loss": 1.0664, "step": 895 }, { "epoch": 0.09423271589519766, "grad_norm": 3.5710378646260996, "learning_rate": 4.912202379048704e-06, "loss": 1.0582, "step": 896 }, { "epoch": 0.09433788633704497, "grad_norm": 3.8755773448584523, "learning_rate": 4.91198294787974e-06, "loss": 1.0265, "step": 897 }, { "epoch": 0.0944430567788923, "grad_norm": 3.2106737884292484, "learning_rate": 4.91176324775594e-06, "loss": 1.0467, "step": 898 }, { "epoch": 0.0945482272207396, "grad_norm": 2.239928932171705, "learning_rate": 4.911543278701802e-06, "loss": 1.0499, "step": 899 }, { "epoch": 0.09465339766258693, "grad_norm": 4.27992709829204, "learning_rate": 4.9113230407418565e-06, "loss": 1.0314, "step": 900 }, { "epoch": 0.09475856810443425, "grad_norm": 3.7991157017709134, "learning_rate": 4.911102533900659e-06, "loss": 1.019, "step": 901 }, { "epoch": 0.09486373854628156, "grad_norm": 3.2843177713241345, "learning_rate": 4.910881758202799e-06, "loss": 1.0747, "step": 902 }, { "epoch": 0.09496890898812889, "grad_norm": 3.3614094256905336, "learning_rate": 4.910660713672895e-06, "loss": 1.047, "step": 903 }, { "epoch": 0.0950740794299762, "grad_norm": 2.546889529528352, "learning_rate": 4.910439400335595e-06, "loss": 1.0134, "step": 904 }, { "epoch": 0.09517924987182352, "grad_norm": 3.5988909556040927, "learning_rate": 4.910217818215576e-06, "loss": 1.0236, "step": 905 }, { "epoch": 0.09528442031367085, "grad_norm": 4.327738766483183, "learning_rate": 4.909995967337548e-06, "loss": 1.0353, "step": 906 }, { "epoch": 0.09538959075551816, "grad_norm": 4.8588271006072326, "learning_rate": 4.9097738477262466e-06, "loss": 1.0939, "step": 907 }, { "epoch": 0.09549476119736548, "grad_norm": 4.033803872334996, "learning_rate": 4.9095514594064434e-06, "loss": 1.0285, "step": 908 }, { "epoch": 0.09559993163921279, "grad_norm": 3.75933079753237, "learning_rate": 4.9093288024029325e-06, "loss": 1.0483, "step": 909 }, { "epoch": 0.09570510208106012, "grad_norm": 3.0548663786720534, "learning_rate": 4.9091058767405455e-06, "loss": 1.051, "step": 910 }, { "epoch": 0.09581027252290744, "grad_norm": 2.784053866442591, "learning_rate": 4.908882682444137e-06, "loss": 1.059, "step": 911 }, { "epoch": 0.09591544296475475, "grad_norm": 2.130590850889847, "learning_rate": 4.908659219538598e-06, "loss": 1.0208, "step": 912 }, { "epoch": 0.09602061340660208, "grad_norm": 2.3000679174789456, "learning_rate": 4.908435488048844e-06, "loss": 1.0253, "step": 913 }, { "epoch": 0.09612578384844939, "grad_norm": 3.0043936554319193, "learning_rate": 4.908211487999825e-06, "loss": 1.0579, "step": 914 }, { "epoch": 0.09623095429029671, "grad_norm": 2.864142159496675, "learning_rate": 4.9079872194165155e-06, "loss": 1.0353, "step": 915 }, { "epoch": 0.09633612473214404, "grad_norm": 3.8952660611202754, "learning_rate": 4.907762682323926e-06, "loss": 1.0701, "step": 916 }, { "epoch": 0.09644129517399135, "grad_norm": 3.9160674317154913, "learning_rate": 4.907537876747094e-06, "loss": 1.0547, "step": 917 }, { "epoch": 0.09654646561583867, "grad_norm": 2.7081904691236613, "learning_rate": 4.907312802711086e-06, "loss": 0.9969, "step": 918 }, { "epoch": 0.096651636057686, "grad_norm": 4.209552166676453, "learning_rate": 4.907087460240999e-06, "loss": 1.0433, "step": 919 }, { "epoch": 0.0967568064995333, "grad_norm": 2.3073683643555083, "learning_rate": 4.906861849361962e-06, "loss": 1.0019, "step": 920 }, { "epoch": 0.09686197694138063, "grad_norm": 3.94540024204696, "learning_rate": 4.906635970099131e-06, "loss": 1.0257, "step": 921 }, { "epoch": 0.09696714738322794, "grad_norm": 3.9569458882368895, "learning_rate": 4.906409822477695e-06, "loss": 1.0383, "step": 922 }, { "epoch": 0.09707231782507526, "grad_norm": 3.4725916392841043, "learning_rate": 4.906183406522869e-06, "loss": 1.006, "step": 923 }, { "epoch": 0.09717748826692259, "grad_norm": 3.5244971631589577, "learning_rate": 4.9059567222599015e-06, "loss": 0.992, "step": 924 }, { "epoch": 0.0972826587087699, "grad_norm": 3.274079488886004, "learning_rate": 4.90572976971407e-06, "loss": 0.992, "step": 925 }, { "epoch": 0.09738782915061722, "grad_norm": 2.5329141838670455, "learning_rate": 4.905502548910681e-06, "loss": 1.041, "step": 926 }, { "epoch": 0.09749299959246453, "grad_norm": 2.8385757720404765, "learning_rate": 4.90527505987507e-06, "loss": 1.0303, "step": 927 }, { "epoch": 0.09759817003431186, "grad_norm": 2.930548884609321, "learning_rate": 4.905047302632606e-06, "loss": 1.0359, "step": 928 }, { "epoch": 0.09770334047615918, "grad_norm": 4.156000403439952, "learning_rate": 4.904819277208685e-06, "loss": 1.0979, "step": 929 }, { "epoch": 0.09780851091800649, "grad_norm": 2.536688544557416, "learning_rate": 4.904590983628732e-06, "loss": 1.0609, "step": 930 }, { "epoch": 0.09791368135985382, "grad_norm": 3.9076531761075897, "learning_rate": 4.904362421918205e-06, "loss": 1.0202, "step": 931 }, { "epoch": 0.09801885180170113, "grad_norm": 3.5518902322982493, "learning_rate": 4.904133592102591e-06, "loss": 1.0553, "step": 932 }, { "epoch": 0.09812402224354845, "grad_norm": 3.1936214651164696, "learning_rate": 4.9039044942074055e-06, "loss": 1.0646, "step": 933 }, { "epoch": 0.09822919268539577, "grad_norm": 4.135090085556305, "learning_rate": 4.903675128258194e-06, "loss": 1.0229, "step": 934 }, { "epoch": 0.09833436312724309, "grad_norm": 2.2494098003681673, "learning_rate": 4.903445494280534e-06, "loss": 1.0082, "step": 935 }, { "epoch": 0.09843953356909041, "grad_norm": 3.148431264860253, "learning_rate": 4.90321559230003e-06, "loss": 1.03, "step": 936 }, { "epoch": 0.09854470401093772, "grad_norm": 3.4526369350300383, "learning_rate": 4.902985422342319e-06, "loss": 1.0412, "step": 937 }, { "epoch": 0.09864987445278504, "grad_norm": 1.9933016033441238, "learning_rate": 4.902754984433067e-06, "loss": 1.0032, "step": 938 }, { "epoch": 0.09875504489463237, "grad_norm": 2.560711359722772, "learning_rate": 4.902524278597969e-06, "loss": 1.0586, "step": 939 }, { "epoch": 0.09886021533647968, "grad_norm": 3.6578892637833404, "learning_rate": 4.9022933048627496e-06, "loss": 1.0093, "step": 940 }, { "epoch": 0.098965385778327, "grad_norm": 3.2769623438585196, "learning_rate": 4.902062063253165e-06, "loss": 1.0349, "step": 941 }, { "epoch": 0.09907055622017431, "grad_norm": 2.629492634710313, "learning_rate": 4.901830553795001e-06, "loss": 1.0607, "step": 942 }, { "epoch": 0.09917572666202164, "grad_norm": 3.126871970690739, "learning_rate": 4.9015987765140715e-06, "loss": 1.0513, "step": 943 }, { "epoch": 0.09928089710386896, "grad_norm": 5.290623571841186, "learning_rate": 4.901366731436223e-06, "loss": 1.0321, "step": 944 }, { "epoch": 0.09938606754571627, "grad_norm": 2.549896186428418, "learning_rate": 4.901134418587329e-06, "loss": 1.0487, "step": 945 }, { "epoch": 0.0994912379875636, "grad_norm": 3.3781283890627596, "learning_rate": 4.900901837993295e-06, "loss": 1.0193, "step": 946 }, { "epoch": 0.09959640842941092, "grad_norm": 3.3355283132202738, "learning_rate": 4.900668989680055e-06, "loss": 0.9998, "step": 947 }, { "epoch": 0.09970157887125823, "grad_norm": 2.6098798632254003, "learning_rate": 4.900435873673574e-06, "loss": 0.9725, "step": 948 }, { "epoch": 0.09980674931310556, "grad_norm": 2.802666593583616, "learning_rate": 4.900202489999845e-06, "loss": 1.0438, "step": 949 }, { "epoch": 0.09991191975495287, "grad_norm": 2.0938671069598858, "learning_rate": 4.899968838684893e-06, "loss": 1.0205, "step": 950 }, { "epoch": 0.10001709019680019, "grad_norm": 3.6038776689006586, "learning_rate": 4.8997349197547724e-06, "loss": 1.0578, "step": 951 }, { "epoch": 0.10012226063864751, "grad_norm": 4.970829658802114, "learning_rate": 4.899500733235567e-06, "loss": 1.025, "step": 952 }, { "epoch": 0.10022743108049482, "grad_norm": 4.678353320775025, "learning_rate": 4.899266279153388e-06, "loss": 1.0622, "step": 953 }, { "epoch": 0.10033260152234215, "grad_norm": 5.741743419399476, "learning_rate": 4.899031557534383e-06, "loss": 1.0874, "step": 954 }, { "epoch": 0.10043777196418946, "grad_norm": 3.104804866637022, "learning_rate": 4.8987965684047215e-06, "loss": 1.0013, "step": 955 }, { "epoch": 0.10054294240603678, "grad_norm": 3.03354521888864, "learning_rate": 4.898561311790609e-06, "loss": 1.004, "step": 956 }, { "epoch": 0.10064811284788411, "grad_norm": 3.424553202851718, "learning_rate": 4.898325787718277e-06, "loss": 1.0363, "step": 957 }, { "epoch": 0.10075328328973142, "grad_norm": 3.000149075657252, "learning_rate": 4.898089996213988e-06, "loss": 1.0371, "step": 958 }, { "epoch": 0.10085845373157874, "grad_norm": 2.3914498005066194, "learning_rate": 4.897853937304037e-06, "loss": 1.0101, "step": 959 }, { "epoch": 0.10096362417342605, "grad_norm": 3.7269730617951997, "learning_rate": 4.897617611014744e-06, "loss": 1.0741, "step": 960 }, { "epoch": 0.10106879461527338, "grad_norm": 3.4685995382297, "learning_rate": 4.897381017372462e-06, "loss": 1.0635, "step": 961 }, { "epoch": 0.1011739650571207, "grad_norm": 2.414965651072397, "learning_rate": 4.897144156403573e-06, "loss": 1.0601, "step": 962 }, { "epoch": 0.10127913549896801, "grad_norm": 3.3913472210312117, "learning_rate": 4.8969070281344895e-06, "loss": 1.0682, "step": 963 }, { "epoch": 0.10138430594081534, "grad_norm": 2.6184438328601387, "learning_rate": 4.896669632591652e-06, "loss": 1.0295, "step": 964 }, { "epoch": 0.10148947638266265, "grad_norm": 3.3103117280466154, "learning_rate": 4.8964319698015325e-06, "loss": 1.0155, "step": 965 }, { "epoch": 0.10159464682450997, "grad_norm": 2.7095856980343966, "learning_rate": 4.896194039790632e-06, "loss": 1.0172, "step": 966 }, { "epoch": 0.1016998172663573, "grad_norm": 3.606502874104067, "learning_rate": 4.895955842585483e-06, "loss": 1.0368, "step": 967 }, { "epoch": 0.1018049877082046, "grad_norm": 3.7818643892335024, "learning_rate": 4.895717378212644e-06, "loss": 1.0736, "step": 968 }, { "epoch": 0.10191015815005193, "grad_norm": 3.9660970245154865, "learning_rate": 4.895478646698707e-06, "loss": 1.0143, "step": 969 }, { "epoch": 0.10201532859189924, "grad_norm": 3.472975484177083, "learning_rate": 4.895239648070292e-06, "loss": 1.0414, "step": 970 }, { "epoch": 0.10212049903374656, "grad_norm": 3.2923024045337277, "learning_rate": 4.895000382354049e-06, "loss": 1.0605, "step": 971 }, { "epoch": 0.10222566947559389, "grad_norm": 4.414663168893738, "learning_rate": 4.89476084957666e-06, "loss": 1.0273, "step": 972 }, { "epoch": 0.1023308399174412, "grad_norm": 5.295626697084187, "learning_rate": 4.894521049764831e-06, "loss": 1.0586, "step": 973 }, { "epoch": 0.10243601035928852, "grad_norm": 2.9933688401634213, "learning_rate": 4.8942809829453046e-06, "loss": 1.0011, "step": 974 }, { "epoch": 0.10254118080113583, "grad_norm": 3.1480184087773253, "learning_rate": 4.894040649144849e-06, "loss": 1.0485, "step": 975 }, { "epoch": 0.10264635124298316, "grad_norm": 3.0397182716899254, "learning_rate": 4.893800048390264e-06, "loss": 1.0304, "step": 976 }, { "epoch": 0.10275152168483048, "grad_norm": 3.4747456565622787, "learning_rate": 4.893559180708378e-06, "loss": 1.0068, "step": 977 }, { "epoch": 0.10285669212667779, "grad_norm": 3.54567802483446, "learning_rate": 4.8933180461260485e-06, "loss": 1.0231, "step": 978 }, { "epoch": 0.10296186256852512, "grad_norm": 3.4856402750957307, "learning_rate": 4.893076644670166e-06, "loss": 1.0518, "step": 979 }, { "epoch": 0.10306703301037244, "grad_norm": 2.97684659916138, "learning_rate": 4.892834976367647e-06, "loss": 0.9815, "step": 980 }, { "epoch": 0.10317220345221975, "grad_norm": 4.321536953737777, "learning_rate": 4.8925930412454405e-06, "loss": 1.0374, "step": 981 }, { "epoch": 0.10327737389406708, "grad_norm": 3.6585034863508605, "learning_rate": 4.8923508393305224e-06, "loss": 0.9504, "step": 982 }, { "epoch": 0.10338254433591439, "grad_norm": 3.3098381288492074, "learning_rate": 4.892108370649902e-06, "loss": 1.0276, "step": 983 }, { "epoch": 0.10348771477776171, "grad_norm": 3.3462769999723383, "learning_rate": 4.891865635230616e-06, "loss": 1.0415, "step": 984 }, { "epoch": 0.10359288521960903, "grad_norm": 3.1379133999705426, "learning_rate": 4.891622633099731e-06, "loss": 1.0503, "step": 985 }, { "epoch": 0.10369805566145635, "grad_norm": 2.096943884410061, "learning_rate": 4.8913793642843434e-06, "loss": 1.0132, "step": 986 }, { "epoch": 0.10380322610330367, "grad_norm": 3.49899699410188, "learning_rate": 4.89113582881158e-06, "loss": 1.036, "step": 987 }, { "epoch": 0.10390839654515098, "grad_norm": 4.363397554483991, "learning_rate": 4.890892026708596e-06, "loss": 1.0641, "step": 988 }, { "epoch": 0.1040135669869983, "grad_norm": 2.843253723260328, "learning_rate": 4.8906479580025774e-06, "loss": 1.0259, "step": 989 }, { "epoch": 0.10411873742884563, "grad_norm": 3.720334378029494, "learning_rate": 4.890403622720742e-06, "loss": 1.0301, "step": 990 }, { "epoch": 0.10422390787069294, "grad_norm": 3.4282471758253483, "learning_rate": 4.890159020890333e-06, "loss": 1.0388, "step": 991 }, { "epoch": 0.10432907831254026, "grad_norm": 4.570380050609438, "learning_rate": 4.889914152538625e-06, "loss": 1.0445, "step": 992 }, { "epoch": 0.10443424875438757, "grad_norm": 4.005408760979363, "learning_rate": 4.889669017692924e-06, "loss": 1.0779, "step": 993 }, { "epoch": 0.1045394191962349, "grad_norm": 3.317322954004475, "learning_rate": 4.889423616380564e-06, "loss": 1.0502, "step": 994 }, { "epoch": 0.10464458963808222, "grad_norm": 2.196763228351377, "learning_rate": 4.889177948628908e-06, "loss": 1.0265, "step": 995 }, { "epoch": 0.10474976007992953, "grad_norm": 2.877333453688417, "learning_rate": 4.8889320144653525e-06, "loss": 1.0354, "step": 996 }, { "epoch": 0.10485493052177686, "grad_norm": 1.912523311494541, "learning_rate": 4.8886858139173185e-06, "loss": 1.0046, "step": 997 }, { "epoch": 0.10496010096362417, "grad_norm": 4.312918317607742, "learning_rate": 4.88843934701226e-06, "loss": 1.0117, "step": 998 }, { "epoch": 0.10506527140547149, "grad_norm": 3.0476518763567144, "learning_rate": 4.888192613777661e-06, "loss": 1.0288, "step": 999 }, { "epoch": 0.10517044184731882, "grad_norm": 2.9241631966551433, "learning_rate": 4.887945614241034e-06, "loss": 1.0364, "step": 1000 }, { "epoch": 0.10527561228916613, "grad_norm": 2.0938595160121287, "learning_rate": 4.88769834842992e-06, "loss": 0.999, "step": 1001 }, { "epoch": 0.10538078273101345, "grad_norm": 2.9042647406620397, "learning_rate": 4.887450816371892e-06, "loss": 1.0036, "step": 1002 }, { "epoch": 0.10548595317286076, "grad_norm": 3.430761146907295, "learning_rate": 4.887203018094552e-06, "loss": 1.0187, "step": 1003 }, { "epoch": 0.10559112361470809, "grad_norm": 3.564602317410496, "learning_rate": 4.88695495362553e-06, "loss": 1.0255, "step": 1004 }, { "epoch": 0.10569629405655541, "grad_norm": 2.3078908692690443, "learning_rate": 4.886706622992489e-06, "loss": 1.0303, "step": 1005 }, { "epoch": 0.10580146449840272, "grad_norm": 2.761094777536776, "learning_rate": 4.886458026223118e-06, "loss": 1.0259, "step": 1006 }, { "epoch": 0.10590663494025004, "grad_norm": 3.4909322317604388, "learning_rate": 4.88620916334514e-06, "loss": 1.0701, "step": 1007 }, { "epoch": 0.10601180538209737, "grad_norm": 3.042806287394382, "learning_rate": 4.885960034386302e-06, "loss": 1.0188, "step": 1008 }, { "epoch": 0.10611697582394468, "grad_norm": 3.4715361900462773, "learning_rate": 4.885710639374387e-06, "loss": 1.0059, "step": 1009 }, { "epoch": 0.106222146265792, "grad_norm": 2.7625550172439524, "learning_rate": 4.885460978337201e-06, "loss": 1.0592, "step": 1010 }, { "epoch": 0.10632731670763931, "grad_norm": 2.7235208933430597, "learning_rate": 4.885211051302586e-06, "loss": 1.0214, "step": 1011 }, { "epoch": 0.10643248714948664, "grad_norm": 2.7923910284329403, "learning_rate": 4.88496085829841e-06, "loss": 1.0122, "step": 1012 }, { "epoch": 0.10653765759133396, "grad_norm": 4.09723932625378, "learning_rate": 4.884710399352572e-06, "loss": 1.0252, "step": 1013 }, { "epoch": 0.10664282803318127, "grad_norm": 4.277407726951024, "learning_rate": 4.884459674492997e-06, "loss": 1.0445, "step": 1014 }, { "epoch": 0.1067479984750286, "grad_norm": 2.741940824883468, "learning_rate": 4.884208683747647e-06, "loss": 1.0452, "step": 1015 }, { "epoch": 0.1068531689168759, "grad_norm": 2.846019968822997, "learning_rate": 4.883957427144507e-06, "loss": 1.0386, "step": 1016 }, { "epoch": 0.10695833935872323, "grad_norm": 3.0668518235856776, "learning_rate": 4.8837059047115955e-06, "loss": 1.073, "step": 1017 }, { "epoch": 0.10706350980057056, "grad_norm": 2.7044021207813835, "learning_rate": 4.883454116476957e-06, "loss": 1.025, "step": 1018 }, { "epoch": 0.10716868024241787, "grad_norm": 2.9884834738601533, "learning_rate": 4.88320206246867e-06, "loss": 1.0362, "step": 1019 }, { "epoch": 0.10727385068426519, "grad_norm": 2.6459541792130823, "learning_rate": 4.88294974271484e-06, "loss": 1.035, "step": 1020 }, { "epoch": 0.1073790211261125, "grad_norm": 3.296712064083779, "learning_rate": 4.882697157243601e-06, "loss": 1.0486, "step": 1021 }, { "epoch": 0.10748419156795982, "grad_norm": 3.5876433786961885, "learning_rate": 4.882444306083121e-06, "loss": 1.0428, "step": 1022 }, { "epoch": 0.10758936200980715, "grad_norm": 2.4991241646439426, "learning_rate": 4.882191189261592e-06, "loss": 1.0133, "step": 1023 }, { "epoch": 0.10769453245165446, "grad_norm": 3.07812377252626, "learning_rate": 4.881937806807241e-06, "loss": 1.0337, "step": 1024 }, { "epoch": 0.10779970289350178, "grad_norm": 2.229456998154068, "learning_rate": 4.881684158748321e-06, "loss": 1.0208, "step": 1025 }, { "epoch": 0.1079048733353491, "grad_norm": 3.7922511732147144, "learning_rate": 4.881430245113115e-06, "loss": 1.0326, "step": 1026 }, { "epoch": 0.10801004377719642, "grad_norm": 3.1647389772945695, "learning_rate": 4.881176065929938e-06, "loss": 1.0131, "step": 1027 }, { "epoch": 0.10811521421904374, "grad_norm": 2.8627766739221787, "learning_rate": 4.880921621227131e-06, "loss": 1.024, "step": 1028 }, { "epoch": 0.10822038466089105, "grad_norm": 2.5687462636056035, "learning_rate": 4.880666911033068e-06, "loss": 1.0409, "step": 1029 }, { "epoch": 0.10832555510273838, "grad_norm": 2.916734582685727, "learning_rate": 4.880411935376151e-06, "loss": 1.0788, "step": 1030 }, { "epoch": 0.10843072554458569, "grad_norm": 4.000486986234842, "learning_rate": 4.880156694284811e-06, "loss": 1.0599, "step": 1031 }, { "epoch": 0.10853589598643301, "grad_norm": 3.5425566453452295, "learning_rate": 4.87990118778751e-06, "loss": 1.0306, "step": 1032 }, { "epoch": 0.10864106642828034, "grad_norm": 3.259354111862751, "learning_rate": 4.879645415912739e-06, "loss": 1.0667, "step": 1033 }, { "epoch": 0.10874623687012765, "grad_norm": 3.7603711436052585, "learning_rate": 4.8793893786890186e-06, "loss": 1.0272, "step": 1034 }, { "epoch": 0.10885140731197497, "grad_norm": 2.650644535850584, "learning_rate": 4.879133076144898e-06, "loss": 1.0186, "step": 1035 }, { "epoch": 0.1089565777538223, "grad_norm": 3.152266769786039, "learning_rate": 4.8788765083089586e-06, "loss": 1.0406, "step": 1036 }, { "epoch": 0.1090617481956696, "grad_norm": 3.240897604655947, "learning_rate": 4.878619675209809e-06, "loss": 1.0398, "step": 1037 }, { "epoch": 0.10916691863751693, "grad_norm": 2.145650055329262, "learning_rate": 4.8783625768760865e-06, "loss": 1.0149, "step": 1038 }, { "epoch": 0.10927208907936424, "grad_norm": 2.495958954073448, "learning_rate": 4.878105213336462e-06, "loss": 1.0143, "step": 1039 }, { "epoch": 0.10937725952121156, "grad_norm": 2.669064484615587, "learning_rate": 4.877847584619632e-06, "loss": 1.0499, "step": 1040 }, { "epoch": 0.10948242996305889, "grad_norm": 2.877634072763941, "learning_rate": 4.8775896907543245e-06, "loss": 1.0111, "step": 1041 }, { "epoch": 0.1095876004049062, "grad_norm": 2.928402468643089, "learning_rate": 4.877331531769297e-06, "loss": 1.0233, "step": 1042 }, { "epoch": 0.10969277084675352, "grad_norm": 3.1475271796192756, "learning_rate": 4.877073107693336e-06, "loss": 1.0445, "step": 1043 }, { "epoch": 0.10979794128860083, "grad_norm": 2.7416601116581, "learning_rate": 4.876814418555257e-06, "loss": 1.0513, "step": 1044 }, { "epoch": 0.10990311173044816, "grad_norm": 2.468594567435823, "learning_rate": 4.876555464383908e-06, "loss": 1.0396, "step": 1045 }, { "epoch": 0.11000828217229548, "grad_norm": 3.7894959288974306, "learning_rate": 4.876296245208162e-06, "loss": 1.0068, "step": 1046 }, { "epoch": 0.11011345261414279, "grad_norm": 4.315993647092533, "learning_rate": 4.876036761056925e-06, "loss": 1.0244, "step": 1047 }, { "epoch": 0.11021862305599012, "grad_norm": 2.7986247997518605, "learning_rate": 4.875777011959131e-06, "loss": 1.0252, "step": 1048 }, { "epoch": 0.11032379349783743, "grad_norm": 2.872248737665653, "learning_rate": 4.875516997943745e-06, "loss": 1.0191, "step": 1049 }, { "epoch": 0.11042896393968475, "grad_norm": 3.51973767363257, "learning_rate": 4.8752567190397605e-06, "loss": 1.0226, "step": 1050 }, { "epoch": 0.11053413438153208, "grad_norm": 2.793611486570461, "learning_rate": 4.874996175276199e-06, "loss": 1.0259, "step": 1051 }, { "epoch": 0.11063930482337939, "grad_norm": 3.5618775194605243, "learning_rate": 4.8747353666821155e-06, "loss": 1.0744, "step": 1052 }, { "epoch": 0.11074447526522671, "grad_norm": 3.7992122350452693, "learning_rate": 4.8744742932865905e-06, "loss": 1.0727, "step": 1053 }, { "epoch": 0.11084964570707402, "grad_norm": 2.1766727817716065, "learning_rate": 4.874212955118736e-06, "loss": 1.0345, "step": 1054 }, { "epoch": 0.11095481614892135, "grad_norm": 2.988093572670979, "learning_rate": 4.873951352207694e-06, "loss": 0.9793, "step": 1055 }, { "epoch": 0.11105998659076867, "grad_norm": 3.100468284349954, "learning_rate": 4.873689484582634e-06, "loss": 1.1039, "step": 1056 }, { "epoch": 0.11116515703261598, "grad_norm": 2.234883911073863, "learning_rate": 4.873427352272758e-06, "loss": 1.0484, "step": 1057 }, { "epoch": 0.1112703274744633, "grad_norm": 3.034027361644701, "learning_rate": 4.8731649553072945e-06, "loss": 1.0477, "step": 1058 }, { "epoch": 0.11137549791631061, "grad_norm": 2.7701642330357976, "learning_rate": 4.872902293715502e-06, "loss": 0.9995, "step": 1059 }, { "epoch": 0.11148066835815794, "grad_norm": 3.5596220939294545, "learning_rate": 4.872639367526672e-06, "loss": 1.0595, "step": 1060 }, { "epoch": 0.11158583880000526, "grad_norm": 2.4396549311475755, "learning_rate": 4.872376176770121e-06, "loss": 1.0638, "step": 1061 }, { "epoch": 0.11169100924185257, "grad_norm": 2.7581484936556913, "learning_rate": 4.872112721475196e-06, "loss": 1.0511, "step": 1062 }, { "epoch": 0.1117961796836999, "grad_norm": 3.59528480669715, "learning_rate": 4.871849001671276e-06, "loss": 1.0209, "step": 1063 }, { "epoch": 0.11190135012554721, "grad_norm": 3.0507361752299187, "learning_rate": 4.871585017387767e-06, "loss": 1.0476, "step": 1064 }, { "epoch": 0.11200652056739453, "grad_norm": 3.6738695742416176, "learning_rate": 4.871320768654105e-06, "loss": 1.0659, "step": 1065 }, { "epoch": 0.11211169100924186, "grad_norm": 3.8253088021331867, "learning_rate": 4.871056255499758e-06, "loss": 0.9856, "step": 1066 }, { "epoch": 0.11221686145108917, "grad_norm": 3.1580111988558994, "learning_rate": 4.870791477954218e-06, "loss": 1.0082, "step": 1067 }, { "epoch": 0.11232203189293649, "grad_norm": 3.0599335655636146, "learning_rate": 4.87052643604701e-06, "loss": 1.0662, "step": 1068 }, { "epoch": 0.11242720233478382, "grad_norm": 2.2702607865189224, "learning_rate": 4.870261129807692e-06, "loss": 1.0522, "step": 1069 }, { "epoch": 0.11253237277663113, "grad_norm": 2.6723836637796845, "learning_rate": 4.869995559265844e-06, "loss": 1.0286, "step": 1070 }, { "epoch": 0.11263754321847845, "grad_norm": 4.21492156394967, "learning_rate": 4.869729724451081e-06, "loss": 1.0665, "step": 1071 }, { "epoch": 0.11274271366032576, "grad_norm": 2.8588027598894703, "learning_rate": 4.869463625393044e-06, "loss": 1.0236, "step": 1072 }, { "epoch": 0.11284788410217308, "grad_norm": 3.3016899697968882, "learning_rate": 4.869197262121406e-06, "loss": 1.0559, "step": 1073 }, { "epoch": 0.11295305454402041, "grad_norm": 3.419706325986653, "learning_rate": 4.8689306346658704e-06, "loss": 1.011, "step": 1074 }, { "epoch": 0.11305822498586772, "grad_norm": 2.0613879638549, "learning_rate": 4.868663743056165e-06, "loss": 0.9756, "step": 1075 }, { "epoch": 0.11316339542771504, "grad_norm": 3.333749579337557, "learning_rate": 4.868396587322053e-06, "loss": 1.0821, "step": 1076 }, { "epoch": 0.11326856586956235, "grad_norm": 3.0234091587963032, "learning_rate": 4.868129167493322e-06, "loss": 1.0416, "step": 1077 }, { "epoch": 0.11337373631140968, "grad_norm": 3.1722297477760466, "learning_rate": 4.867861483599793e-06, "loss": 1.049, "step": 1078 }, { "epoch": 0.113478906753257, "grad_norm": 3.123942723713283, "learning_rate": 4.867593535671315e-06, "loss": 1.0038, "step": 1079 }, { "epoch": 0.11358407719510431, "grad_norm": 3.3849552815537054, "learning_rate": 4.867325323737765e-06, "loss": 0.9871, "step": 1080 }, { "epoch": 0.11368924763695164, "grad_norm": 3.6000442180720498, "learning_rate": 4.8670568478290515e-06, "loss": 1.0303, "step": 1081 }, { "epoch": 0.11379441807879895, "grad_norm": 3.0065860923788676, "learning_rate": 4.866788107975111e-06, "loss": 1.0284, "step": 1082 }, { "epoch": 0.11389958852064627, "grad_norm": 3.307728595187707, "learning_rate": 4.866519104205911e-06, "loss": 1.0484, "step": 1083 }, { "epoch": 0.1140047589624936, "grad_norm": 2.9561692789631078, "learning_rate": 4.866249836551447e-06, "loss": 1.012, "step": 1084 }, { "epoch": 0.1141099294043409, "grad_norm": 2.5709168983222503, "learning_rate": 4.865980305041746e-06, "loss": 1.024, "step": 1085 }, { "epoch": 0.11421509984618823, "grad_norm": 2.7856609101318104, "learning_rate": 4.865710509706859e-06, "loss": 1.0148, "step": 1086 }, { "epoch": 0.11432027028803554, "grad_norm": 4.12784259860208, "learning_rate": 4.8654404505768735e-06, "loss": 1.0658, "step": 1087 }, { "epoch": 0.11442544072988287, "grad_norm": 3.9354921027135186, "learning_rate": 4.865170127681903e-06, "loss": 1.0093, "step": 1088 }, { "epoch": 0.11453061117173019, "grad_norm": 4.361516591990753, "learning_rate": 4.8648995410520905e-06, "loss": 1.0849, "step": 1089 }, { "epoch": 0.1146357816135775, "grad_norm": 3.874224839358617, "learning_rate": 4.864628690717607e-06, "loss": 1.0004, "step": 1090 }, { "epoch": 0.11474095205542482, "grad_norm": 2.713904318986021, "learning_rate": 4.8643575767086555e-06, "loss": 0.9988, "step": 1091 }, { "epoch": 0.11484612249727214, "grad_norm": 4.127820079366728, "learning_rate": 4.864086199055467e-06, "loss": 1.0265, "step": 1092 }, { "epoch": 0.11495129293911946, "grad_norm": 4.16373977878337, "learning_rate": 4.863814557788303e-06, "loss": 1.0445, "step": 1093 }, { "epoch": 0.11505646338096678, "grad_norm": 4.340273179649079, "learning_rate": 4.863542652937453e-06, "loss": 1.0214, "step": 1094 }, { "epoch": 0.1151616338228141, "grad_norm": 3.5979531619959593, "learning_rate": 4.863270484533237e-06, "loss": 0.9954, "step": 1095 }, { "epoch": 0.11526680426466142, "grad_norm": 3.7563254164355624, "learning_rate": 4.862998052606001e-06, "loss": 1.0446, "step": 1096 }, { "epoch": 0.11537197470650874, "grad_norm": 2.8311030399401904, "learning_rate": 4.862725357186129e-06, "loss": 0.9762, "step": 1097 }, { "epoch": 0.11547714514835605, "grad_norm": 3.26148888613775, "learning_rate": 4.862452398304024e-06, "loss": 1.0667, "step": 1098 }, { "epoch": 0.11558231559020338, "grad_norm": 2.2978414859803964, "learning_rate": 4.862179175990124e-06, "loss": 0.9832, "step": 1099 }, { "epoch": 0.11568748603205069, "grad_norm": 2.8821374960941304, "learning_rate": 4.861905690274896e-06, "loss": 1.0225, "step": 1100 }, { "epoch": 0.11579265647389801, "grad_norm": 2.657398753259678, "learning_rate": 4.861631941188836e-06, "loss": 1.036, "step": 1101 }, { "epoch": 0.11589782691574534, "grad_norm": 1.7911673644478512, "learning_rate": 4.861357928762468e-06, "loss": 1.0355, "step": 1102 }, { "epoch": 0.11600299735759265, "grad_norm": 3.1971189200731316, "learning_rate": 4.8610836530263485e-06, "loss": 1.0348, "step": 1103 }, { "epoch": 0.11610816779943997, "grad_norm": 3.9670686015745438, "learning_rate": 4.860809114011059e-06, "loss": 1.0403, "step": 1104 }, { "epoch": 0.11621333824128728, "grad_norm": 3.4009369343225426, "learning_rate": 4.860534311747215e-06, "loss": 1.0572, "step": 1105 }, { "epoch": 0.1163185086831346, "grad_norm": 4.1414177645044745, "learning_rate": 4.860259246265456e-06, "loss": 1.0702, "step": 1106 }, { "epoch": 0.11642367912498193, "grad_norm": 4.123107172342563, "learning_rate": 4.859983917596458e-06, "loss": 1.0605, "step": 1107 }, { "epoch": 0.11652884956682924, "grad_norm": 3.969640922287132, "learning_rate": 4.859708325770919e-06, "loss": 1.008, "step": 1108 }, { "epoch": 0.11663402000867656, "grad_norm": 2.90888044600641, "learning_rate": 4.859432470819572e-06, "loss": 1.0459, "step": 1109 }, { "epoch": 0.11673919045052387, "grad_norm": 2.831787199338218, "learning_rate": 4.859156352773174e-06, "loss": 1.0261, "step": 1110 }, { "epoch": 0.1168443608923712, "grad_norm": 3.987477080469216, "learning_rate": 4.858879971662518e-06, "loss": 1.0415, "step": 1111 }, { "epoch": 0.11694953133421852, "grad_norm": 3.9586675261870274, "learning_rate": 4.85860332751842e-06, "loss": 1.0675, "step": 1112 }, { "epoch": 0.11705470177606583, "grad_norm": 2.6528061023001386, "learning_rate": 4.858326420371728e-06, "loss": 1.0326, "step": 1113 }, { "epoch": 0.11715987221791316, "grad_norm": 3.2831047642667297, "learning_rate": 4.858049250253321e-06, "loss": 1.0562, "step": 1114 }, { "epoch": 0.11726504265976047, "grad_norm": 3.144259175603822, "learning_rate": 4.8577718171941036e-06, "loss": 1.0119, "step": 1115 }, { "epoch": 0.11737021310160779, "grad_norm": 3.6872799055696244, "learning_rate": 4.857494121225014e-06, "loss": 1.0456, "step": 1116 }, { "epoch": 0.11747538354345512, "grad_norm": 3.1976523712311273, "learning_rate": 4.857216162377015e-06, "loss": 1.0582, "step": 1117 }, { "epoch": 0.11758055398530243, "grad_norm": 2.9008275495280342, "learning_rate": 4.8569379406811034e-06, "loss": 1.0335, "step": 1118 }, { "epoch": 0.11768572442714975, "grad_norm": 3.5671660679987265, "learning_rate": 4.856659456168301e-06, "loss": 1.0251, "step": 1119 }, { "epoch": 0.11779089486899706, "grad_norm": 3.9412822209537426, "learning_rate": 4.856380708869663e-06, "loss": 1.0671, "step": 1120 }, { "epoch": 0.11789606531084439, "grad_norm": 3.191017359982901, "learning_rate": 4.85610169881627e-06, "loss": 1.0434, "step": 1121 }, { "epoch": 0.11800123575269171, "grad_norm": 3.195874922814993, "learning_rate": 4.855822426039236e-06, "loss": 0.9924, "step": 1122 }, { "epoch": 0.11810640619453902, "grad_norm": 2.751515168083024, "learning_rate": 4.855542890569701e-06, "loss": 1.0373, "step": 1123 }, { "epoch": 0.11821157663638635, "grad_norm": 2.896579359878487, "learning_rate": 4.855263092438834e-06, "loss": 1.0463, "step": 1124 }, { "epoch": 0.11831674707823366, "grad_norm": 3.367419965470842, "learning_rate": 4.8549830316778365e-06, "loss": 1.0156, "step": 1125 }, { "epoch": 0.11842191752008098, "grad_norm": 2.2007559598381357, "learning_rate": 4.854702708317937e-06, "loss": 1.0342, "step": 1126 }, { "epoch": 0.1185270879619283, "grad_norm": 2.729565964737428, "learning_rate": 4.8544221223903925e-06, "loss": 1.0192, "step": 1127 }, { "epoch": 0.11863225840377561, "grad_norm": 4.234177007738469, "learning_rate": 4.854141273926492e-06, "loss": 0.9931, "step": 1128 }, { "epoch": 0.11873742884562294, "grad_norm": 4.1159850072468025, "learning_rate": 4.8538601629575525e-06, "loss": 1.048, "step": 1129 }, { "epoch": 0.11884259928747026, "grad_norm": 3.030352704362315, "learning_rate": 4.8535787895149186e-06, "loss": 1.0481, "step": 1130 }, { "epoch": 0.11894776972931757, "grad_norm": 2.416745852348571, "learning_rate": 4.853297153629967e-06, "loss": 1.0426, "step": 1131 }, { "epoch": 0.1190529401711649, "grad_norm": 2.953961483423199, "learning_rate": 4.853015255334101e-06, "loss": 1.0233, "step": 1132 }, { "epoch": 0.11915811061301221, "grad_norm": 3.1479898039953103, "learning_rate": 4.852733094658756e-06, "loss": 1.0074, "step": 1133 }, { "epoch": 0.11926328105485953, "grad_norm": 2.0774548323641397, "learning_rate": 4.852450671635395e-06, "loss": 1.01, "step": 1134 }, { "epoch": 0.11936845149670686, "grad_norm": 2.51715458277697, "learning_rate": 4.852167986295508e-06, "loss": 1.0401, "step": 1135 }, { "epoch": 0.11947362193855417, "grad_norm": 3.6879308333975587, "learning_rate": 4.851885038670618e-06, "loss": 1.0577, "step": 1136 }, { "epoch": 0.11957879238040149, "grad_norm": 3.27065440253345, "learning_rate": 4.851601828792278e-06, "loss": 1.0163, "step": 1137 }, { "epoch": 0.1196839628222488, "grad_norm": 2.2130774677775853, "learning_rate": 4.8513183566920654e-06, "loss": 1.0543, "step": 1138 }, { "epoch": 0.11978913326409613, "grad_norm": 2.8314329893329093, "learning_rate": 4.8510346224015896e-06, "loss": 1.0796, "step": 1139 }, { "epoch": 0.11989430370594345, "grad_norm": 3.7243358852096953, "learning_rate": 4.85075062595249e-06, "loss": 1.0379, "step": 1140 }, { "epoch": 0.11999947414779076, "grad_norm": 3.7144900747943623, "learning_rate": 4.850466367376435e-06, "loss": 1.0259, "step": 1141 }, { "epoch": 0.12010464458963808, "grad_norm": 3.231616766464792, "learning_rate": 4.850181846705121e-06, "loss": 1.0482, "step": 1142 }, { "epoch": 0.1202098150314854, "grad_norm": 3.3082034220519416, "learning_rate": 4.8498970639702745e-06, "loss": 1.0284, "step": 1143 }, { "epoch": 0.12031498547333272, "grad_norm": 2.9443493452344183, "learning_rate": 4.84961201920365e-06, "loss": 1.0511, "step": 1144 }, { "epoch": 0.12042015591518004, "grad_norm": 3.196336464928972, "learning_rate": 4.849326712437033e-06, "loss": 1.0723, "step": 1145 }, { "epoch": 0.12052532635702735, "grad_norm": 2.71928604807591, "learning_rate": 4.849041143702238e-06, "loss": 1.0112, "step": 1146 }, { "epoch": 0.12063049679887468, "grad_norm": 3.533369997436239, "learning_rate": 4.8487553130311065e-06, "loss": 0.9867, "step": 1147 }, { "epoch": 0.12073566724072199, "grad_norm": 2.455468182286396, "learning_rate": 4.848469220455512e-06, "loss": 0.9962, "step": 1148 }, { "epoch": 0.12084083768256931, "grad_norm": 2.3850247504745465, "learning_rate": 4.848182866007356e-06, "loss": 1.0686, "step": 1149 }, { "epoch": 0.12094600812441664, "grad_norm": 2.5387686707247843, "learning_rate": 4.84789624971857e-06, "loss": 1.0425, "step": 1150 }, { "epoch": 0.12105117856626395, "grad_norm": 3.482360328905752, "learning_rate": 4.8476093716211125e-06, "loss": 1.0271, "step": 1151 }, { "epoch": 0.12115634900811127, "grad_norm": 3.6287895041841702, "learning_rate": 4.847322231746973e-06, "loss": 1.0491, "step": 1152 }, { "epoch": 0.12126151944995858, "grad_norm": 2.7406507151068404, "learning_rate": 4.84703483012817e-06, "loss": 1.0579, "step": 1153 }, { "epoch": 0.1213666898918059, "grad_norm": 2.093402317749306, "learning_rate": 4.846747166796751e-06, "loss": 1.0114, "step": 1154 }, { "epoch": 0.12147186033365323, "grad_norm": 2.7833406137397896, "learning_rate": 4.846459241784793e-06, "loss": 1.0164, "step": 1155 }, { "epoch": 0.12157703077550054, "grad_norm": 3.5971545223060626, "learning_rate": 4.846171055124401e-06, "loss": 1.0388, "step": 1156 }, { "epoch": 0.12168220121734787, "grad_norm": 4.388318975491837, "learning_rate": 4.845882606847712e-06, "loss": 1.0544, "step": 1157 }, { "epoch": 0.12178737165919519, "grad_norm": 3.471550657627086, "learning_rate": 4.845593896986888e-06, "loss": 1.0936, "step": 1158 }, { "epoch": 0.1218925421010425, "grad_norm": 3.0123013262369924, "learning_rate": 4.845304925574122e-06, "loss": 0.9964, "step": 1159 }, { "epoch": 0.12199771254288982, "grad_norm": 2.2624703252236453, "learning_rate": 4.84501569264164e-06, "loss": 1.0276, "step": 1160 }, { "epoch": 0.12210288298473713, "grad_norm": 2.675714197722898, "learning_rate": 4.8447261982216905e-06, "loss": 0.9954, "step": 1161 }, { "epoch": 0.12220805342658446, "grad_norm": 4.13088015952959, "learning_rate": 4.8444364423465555e-06, "loss": 0.9864, "step": 1162 }, { "epoch": 0.12231322386843178, "grad_norm": 4.229159717585945, "learning_rate": 4.844146425048545e-06, "loss": 1.0392, "step": 1163 }, { "epoch": 0.1224183943102791, "grad_norm": 3.419828767103554, "learning_rate": 4.843856146359999e-06, "loss": 1.0395, "step": 1164 }, { "epoch": 0.12252356475212642, "grad_norm": 4.317347247396286, "learning_rate": 4.843565606313283e-06, "loss": 1.0242, "step": 1165 }, { "epoch": 0.12262873519397373, "grad_norm": 3.754270500912085, "learning_rate": 4.843274804940798e-06, "loss": 1.052, "step": 1166 }, { "epoch": 0.12273390563582105, "grad_norm": 2.8312910701769294, "learning_rate": 4.8429837422749695e-06, "loss": 1.0404, "step": 1167 }, { "epoch": 0.12283907607766838, "grad_norm": 3.823314861494254, "learning_rate": 4.842692418348251e-06, "loss": 1.0094, "step": 1168 }, { "epoch": 0.12294424651951569, "grad_norm": 3.9925424351563095, "learning_rate": 4.842400833193131e-06, "loss": 1.0336, "step": 1169 }, { "epoch": 0.12304941696136301, "grad_norm": 4.5156463343872755, "learning_rate": 4.84210898684212e-06, "loss": 1.0294, "step": 1170 }, { "epoch": 0.12315458740321032, "grad_norm": 4.134735379560587, "learning_rate": 4.841816879327764e-06, "loss": 1.0681, "step": 1171 }, { "epoch": 0.12325975784505765, "grad_norm": 3.5718684762008586, "learning_rate": 4.8415245106826335e-06, "loss": 1.0457, "step": 1172 }, { "epoch": 0.12336492828690497, "grad_norm": 2.8524025783294835, "learning_rate": 4.841231880939331e-06, "loss": 1.051, "step": 1173 }, { "epoch": 0.12347009872875228, "grad_norm": 2.1112242358528794, "learning_rate": 4.840938990130486e-06, "loss": 0.9749, "step": 1174 }, { "epoch": 0.1235752691705996, "grad_norm": 3.489044644429071, "learning_rate": 4.840645838288759e-06, "loss": 1.0839, "step": 1175 }, { "epoch": 0.12368043961244692, "grad_norm": 3.7851298708992007, "learning_rate": 4.840352425446838e-06, "loss": 1.0008, "step": 1176 }, { "epoch": 0.12378561005429424, "grad_norm": 3.534928614423682, "learning_rate": 4.840058751637441e-06, "loss": 0.9969, "step": 1177 }, { "epoch": 0.12389078049614156, "grad_norm": 3.866088509887153, "learning_rate": 4.839764816893315e-06, "loss": 1.0296, "step": 1178 }, { "epoch": 0.12399595093798887, "grad_norm": 3.8493227086097836, "learning_rate": 4.839470621247235e-06, "loss": 0.9829, "step": 1179 }, { "epoch": 0.1241011213798362, "grad_norm": 3.863296748292079, "learning_rate": 4.839176164732009e-06, "loss": 1.0043, "step": 1180 }, { "epoch": 0.12420629182168351, "grad_norm": 3.044556390384978, "learning_rate": 4.838881447380468e-06, "loss": 1.0635, "step": 1181 }, { "epoch": 0.12431146226353083, "grad_norm": 3.036859307808052, "learning_rate": 4.838586469225477e-06, "loss": 1.068, "step": 1182 }, { "epoch": 0.12441663270537816, "grad_norm": 3.4852695563768936, "learning_rate": 4.838291230299927e-06, "loss": 1.0264, "step": 1183 }, { "epoch": 0.12452180314722547, "grad_norm": 3.541328436650173, "learning_rate": 4.837995730636742e-06, "loss": 1.0692, "step": 1184 }, { "epoch": 0.12462697358907279, "grad_norm": 2.403850262061819, "learning_rate": 4.83769997026887e-06, "loss": 1.0131, "step": 1185 }, { "epoch": 0.12473214403092012, "grad_norm": 2.0179723281866204, "learning_rate": 4.837403949229291e-06, "loss": 1.033, "step": 1186 }, { "epoch": 0.12483731447276743, "grad_norm": 3.288715529975734, "learning_rate": 4.837107667551015e-06, "loss": 1.0143, "step": 1187 }, { "epoch": 0.12494248491461475, "grad_norm": 2.488652116770584, "learning_rate": 4.8368111252670776e-06, "loss": 1.0064, "step": 1188 }, { "epoch": 0.12504765535646206, "grad_norm": 2.099768655554014, "learning_rate": 4.836514322410548e-06, "loss": 1.0311, "step": 1189 }, { "epoch": 0.1251528257983094, "grad_norm": 2.61507115515271, "learning_rate": 4.83621725901452e-06, "loss": 1.0037, "step": 1190 }, { "epoch": 0.1252579962401567, "grad_norm": 3.5627001839530887, "learning_rate": 4.83591993511212e-06, "loss": 1.0104, "step": 1191 }, { "epoch": 0.12536316668200403, "grad_norm": 2.7611171047918854, "learning_rate": 4.835622350736499e-06, "loss": 1.0015, "step": 1192 }, { "epoch": 0.12546833712385133, "grad_norm": 2.6167789911665333, "learning_rate": 4.835324505920845e-06, "loss": 1.0208, "step": 1193 }, { "epoch": 0.12557350756569866, "grad_norm": 3.3090032658379624, "learning_rate": 4.835026400698366e-06, "loss": 1.0339, "step": 1194 }, { "epoch": 0.12567867800754598, "grad_norm": 2.7592196627213172, "learning_rate": 4.8347280351023044e-06, "loss": 1.021, "step": 1195 }, { "epoch": 0.1257838484493933, "grad_norm": 3.529997579629742, "learning_rate": 4.83442940916593e-06, "loss": 1.0269, "step": 1196 }, { "epoch": 0.12588901889124063, "grad_norm": 3.0771754582486324, "learning_rate": 4.834130522922541e-06, "loss": 1.0385, "step": 1197 }, { "epoch": 0.12599418933308792, "grad_norm": 3.019098612661189, "learning_rate": 4.8338313764054676e-06, "loss": 1.0067, "step": 1198 }, { "epoch": 0.12609935977493525, "grad_norm": 3.1455190399414774, "learning_rate": 4.8335319696480655e-06, "loss": 0.9901, "step": 1199 }, { "epoch": 0.12620453021678257, "grad_norm": 2.815592993177342, "learning_rate": 4.833232302683721e-06, "loss": 1.0099, "step": 1200 }, { "epoch": 0.1263097006586299, "grad_norm": 2.1662241166960974, "learning_rate": 4.83293237554585e-06, "loss": 1.0209, "step": 1201 }, { "epoch": 0.12641487110047722, "grad_norm": 2.5510337259609717, "learning_rate": 4.832632188267896e-06, "loss": 1.0417, "step": 1202 }, { "epoch": 0.12652004154232452, "grad_norm": 2.7666819520333146, "learning_rate": 4.832331740883333e-06, "loss": 1.0352, "step": 1203 }, { "epoch": 0.12662521198417184, "grad_norm": 3.0449829988980937, "learning_rate": 4.832031033425663e-06, "loss": 1.0291, "step": 1204 }, { "epoch": 0.12673038242601917, "grad_norm": 2.6400119751853564, "learning_rate": 4.831730065928416e-06, "loss": 1.0378, "step": 1205 }, { "epoch": 0.1268355528678665, "grad_norm": 2.94538447937034, "learning_rate": 4.831428838425153e-06, "loss": 1.0311, "step": 1206 }, { "epoch": 0.12694072330971382, "grad_norm": 2.3594393856071614, "learning_rate": 4.8311273509494635e-06, "loss": 1.0124, "step": 1207 }, { "epoch": 0.1270458937515611, "grad_norm": 2.153915292661592, "learning_rate": 4.830825603534967e-06, "loss": 1.0277, "step": 1208 }, { "epoch": 0.12715106419340844, "grad_norm": 3.456220143305514, "learning_rate": 4.8305235962153075e-06, "loss": 1.0868, "step": 1209 }, { "epoch": 0.12725623463525576, "grad_norm": 2.7043626503920337, "learning_rate": 4.830221329024163e-06, "loss": 1.0537, "step": 1210 }, { "epoch": 0.12736140507710308, "grad_norm": 3.589165033013744, "learning_rate": 4.829918801995239e-06, "loss": 1.0796, "step": 1211 }, { "epoch": 0.1274665755189504, "grad_norm": 3.373260416606364, "learning_rate": 4.829616015162269e-06, "loss": 1.0191, "step": 1212 }, { "epoch": 0.1275717459607977, "grad_norm": 3.5871990636482796, "learning_rate": 4.8293129685590164e-06, "loss": 1.0407, "step": 1213 }, { "epoch": 0.12767691640264503, "grad_norm": 4.2127544793207745, "learning_rate": 4.8290096622192735e-06, "loss": 1.0372, "step": 1214 }, { "epoch": 0.12778208684449235, "grad_norm": 3.511106624322245, "learning_rate": 4.82870609617686e-06, "loss": 1.0408, "step": 1215 }, { "epoch": 0.12788725728633968, "grad_norm": 2.8132894727294055, "learning_rate": 4.828402270465628e-06, "loss": 0.9943, "step": 1216 }, { "epoch": 0.127992427728187, "grad_norm": 2.4459083353657807, "learning_rate": 4.828098185119454e-06, "loss": 1.0259, "step": 1217 }, { "epoch": 0.1280975981700343, "grad_norm": 2.3329129365329337, "learning_rate": 4.827793840172247e-06, "loss": 1.0269, "step": 1218 }, { "epoch": 0.12820276861188162, "grad_norm": 2.8128127508148015, "learning_rate": 4.827489235657944e-06, "loss": 1.0505, "step": 1219 }, { "epoch": 0.12830793905372895, "grad_norm": 2.339660968358773, "learning_rate": 4.827184371610511e-06, "loss": 1.0047, "step": 1220 }, { "epoch": 0.12841310949557627, "grad_norm": 3.3186117188660584, "learning_rate": 4.826879248063943e-06, "loss": 1.0416, "step": 1221 }, { "epoch": 0.1285182799374236, "grad_norm": 3.0496345780537077, "learning_rate": 4.826573865052261e-06, "loss": 1.0285, "step": 1222 }, { "epoch": 0.1286234503792709, "grad_norm": 3.0696030887083134, "learning_rate": 4.8262682226095215e-06, "loss": 1.0221, "step": 1223 }, { "epoch": 0.12872862082111822, "grad_norm": 2.911026501529926, "learning_rate": 4.825962320769804e-06, "loss": 0.996, "step": 1224 }, { "epoch": 0.12883379126296554, "grad_norm": 2.6415248186701414, "learning_rate": 4.825656159567218e-06, "loss": 1.0077, "step": 1225 }, { "epoch": 0.12893896170481287, "grad_norm": 3.015861478144796, "learning_rate": 4.8253497390359035e-06, "loss": 1.0199, "step": 1226 }, { "epoch": 0.1290441321466602, "grad_norm": 2.968448842373254, "learning_rate": 4.82504305921003e-06, "loss": 1.0586, "step": 1227 }, { "epoch": 0.1291493025885075, "grad_norm": 3.6356255518516805, "learning_rate": 4.824736120123794e-06, "loss": 1.0877, "step": 1228 }, { "epoch": 0.1292544730303548, "grad_norm": 2.2178443735382616, "learning_rate": 4.824428921811421e-06, "loss": 1.0224, "step": 1229 }, { "epoch": 0.12935964347220213, "grad_norm": 2.5367079837422497, "learning_rate": 4.824121464307168e-06, "loss": 1.0118, "step": 1230 }, { "epoch": 0.12946481391404946, "grad_norm": 3.130091176692925, "learning_rate": 4.823813747645315e-06, "loss": 1.0146, "step": 1231 }, { "epoch": 0.12956998435589678, "grad_norm": 2.4669259065457902, "learning_rate": 4.823505771860178e-06, "loss": 1.0262, "step": 1232 }, { "epoch": 0.1296751547977441, "grad_norm": 2.3946588572460015, "learning_rate": 4.823197536986098e-06, "loss": 1.0202, "step": 1233 }, { "epoch": 0.1297803252395914, "grad_norm": 3.28718019202575, "learning_rate": 4.822889043057446e-06, "loss": 0.996, "step": 1234 }, { "epoch": 0.12988549568143873, "grad_norm": 3.172840137269599, "learning_rate": 4.82258029010862e-06, "loss": 0.9892, "step": 1235 }, { "epoch": 0.12999066612328605, "grad_norm": 3.469596633957251, "learning_rate": 4.82227127817405e-06, "loss": 1.0587, "step": 1236 }, { "epoch": 0.13009583656513338, "grad_norm": 2.555342312316807, "learning_rate": 4.821962007288191e-06, "loss": 1.0267, "step": 1237 }, { "epoch": 0.1302010070069807, "grad_norm": 2.5256812029218128, "learning_rate": 4.821652477485531e-06, "loss": 1.0289, "step": 1238 }, { "epoch": 0.130306177448828, "grad_norm": 3.1983565580222875, "learning_rate": 4.821342688800586e-06, "loss": 1.0321, "step": 1239 }, { "epoch": 0.13041134789067532, "grad_norm": 3.620474013802864, "learning_rate": 4.821032641267897e-06, "loss": 0.9785, "step": 1240 }, { "epoch": 0.13051651833252265, "grad_norm": 2.661892927357515, "learning_rate": 4.820722334922039e-06, "loss": 1.039, "step": 1241 }, { "epoch": 0.13062168877436997, "grad_norm": 3.34786582341296, "learning_rate": 4.820411769797611e-06, "loss": 1.0484, "step": 1242 }, { "epoch": 0.1307268592162173, "grad_norm": 3.9689276628030985, "learning_rate": 4.820100945929247e-06, "loss": 1.0423, "step": 1243 }, { "epoch": 0.1308320296580646, "grad_norm": 2.883967378632545, "learning_rate": 4.819789863351603e-06, "loss": 1.0503, "step": 1244 }, { "epoch": 0.13093720009991192, "grad_norm": 3.105797028005016, "learning_rate": 4.819478522099369e-06, "loss": 1.0227, "step": 1245 }, { "epoch": 0.13104237054175924, "grad_norm": 1.6808339529344005, "learning_rate": 4.819166922207261e-06, "loss": 1.0181, "step": 1246 }, { "epoch": 0.13114754098360656, "grad_norm": 2.7847369838128326, "learning_rate": 4.818855063710026e-06, "loss": 1.0094, "step": 1247 }, { "epoch": 0.1312527114254539, "grad_norm": 2.8619801152801365, "learning_rate": 4.8185429466424375e-06, "loss": 1.0377, "step": 1248 }, { "epoch": 0.13135788186730118, "grad_norm": 3.00420826947291, "learning_rate": 4.8182305710393e-06, "loss": 1.0415, "step": 1249 }, { "epoch": 0.1314630523091485, "grad_norm": 3.106603153157244, "learning_rate": 4.8179179369354445e-06, "loss": 1.0087, "step": 1250 }, { "epoch": 0.13156822275099583, "grad_norm": 2.9567174545180936, "learning_rate": 4.817605044365733e-06, "loss": 1.0042, "step": 1251 }, { "epoch": 0.13167339319284316, "grad_norm": 3.6684630626815578, "learning_rate": 4.817291893365055e-06, "loss": 1.0208, "step": 1252 }, { "epoch": 0.13177856363469048, "grad_norm": 3.8333923409888406, "learning_rate": 4.8169784839683295e-06, "loss": 1.032, "step": 1253 }, { "epoch": 0.13188373407653778, "grad_norm": 3.076774148237722, "learning_rate": 4.816664816210505e-06, "loss": 1.0345, "step": 1254 }, { "epoch": 0.1319889045183851, "grad_norm": 2.528183859455226, "learning_rate": 4.816350890126556e-06, "loss": 1.0463, "step": 1255 }, { "epoch": 0.13209407496023243, "grad_norm": 3.2080429059373765, "learning_rate": 4.816036705751489e-06, "loss": 1.0083, "step": 1256 }, { "epoch": 0.13219924540207975, "grad_norm": 2.996843724364759, "learning_rate": 4.8157222631203374e-06, "loss": 0.9658, "step": 1257 }, { "epoch": 0.13230441584392708, "grad_norm": 2.823099339288846, "learning_rate": 4.815407562268165e-06, "loss": 1.0478, "step": 1258 }, { "epoch": 0.13240958628577437, "grad_norm": 2.5147629342097635, "learning_rate": 4.815092603230063e-06, "loss": 0.9942, "step": 1259 }, { "epoch": 0.1325147567276217, "grad_norm": 3.1482493227003316, "learning_rate": 4.81477738604115e-06, "loss": 0.9881, "step": 1260 }, { "epoch": 0.13261992716946902, "grad_norm": 3.020760053243842, "learning_rate": 4.814461910736578e-06, "loss": 1.0364, "step": 1261 }, { "epoch": 0.13272509761131634, "grad_norm": 2.221968445422404, "learning_rate": 4.814146177351523e-06, "loss": 1.0265, "step": 1262 }, { "epoch": 0.13283026805316367, "grad_norm": 3.0176270858205565, "learning_rate": 4.8138301859211925e-06, "loss": 1.0519, "step": 1263 }, { "epoch": 0.13293543849501097, "grad_norm": 2.2986812981864317, "learning_rate": 4.813513936480821e-06, "loss": 1.03, "step": 1264 }, { "epoch": 0.1330406089368583, "grad_norm": 2.8952298044973057, "learning_rate": 4.8131974290656745e-06, "loss": 1.0342, "step": 1265 }, { "epoch": 0.13314577937870561, "grad_norm": 2.8246589734902225, "learning_rate": 4.812880663711045e-06, "loss": 1.0491, "step": 1266 }, { "epoch": 0.13325094982055294, "grad_norm": 2.5406616352440134, "learning_rate": 4.812563640452254e-06, "loss": 1.0069, "step": 1267 }, { "epoch": 0.13335612026240026, "grad_norm": 2.3100739684989167, "learning_rate": 4.812246359324653e-06, "loss": 1.002, "step": 1268 }, { "epoch": 0.13346129070424756, "grad_norm": 2.9322165786317997, "learning_rate": 4.811928820363621e-06, "loss": 1.0207, "step": 1269 }, { "epoch": 0.13356646114609488, "grad_norm": 2.3628042524685533, "learning_rate": 4.811611023604564e-06, "loss": 1.0314, "step": 1270 }, { "epoch": 0.1336716315879422, "grad_norm": 3.3682424272817895, "learning_rate": 4.811292969082922e-06, "loss": 0.9997, "step": 1271 }, { "epoch": 0.13377680202978953, "grad_norm": 2.796544714713144, "learning_rate": 4.8109746568341595e-06, "loss": 1.0163, "step": 1272 }, { "epoch": 0.13388197247163686, "grad_norm": 3.2927205255203997, "learning_rate": 4.81065608689377e-06, "loss": 1.0652, "step": 1273 }, { "epoch": 0.13398714291348415, "grad_norm": 4.2986772325983935, "learning_rate": 4.810337259297277e-06, "loss": 1.0425, "step": 1274 }, { "epoch": 0.13409231335533148, "grad_norm": 2.5363498831942675, "learning_rate": 4.810018174080233e-06, "loss": 1.0201, "step": 1275 }, { "epoch": 0.1341974837971788, "grad_norm": 3.5836252312590973, "learning_rate": 4.809698831278217e-06, "loss": 1.0456, "step": 1276 }, { "epoch": 0.13430265423902613, "grad_norm": 3.425235368215492, "learning_rate": 4.80937923092684e-06, "loss": 1.0208, "step": 1277 }, { "epoch": 0.13440782468087345, "grad_norm": 1.9745893346472898, "learning_rate": 4.809059373061737e-06, "loss": 0.9924, "step": 1278 }, { "epoch": 0.13451299512272075, "grad_norm": 3.0646315393918835, "learning_rate": 4.808739257718578e-06, "loss": 0.9939, "step": 1279 }, { "epoch": 0.13461816556456807, "grad_norm": 2.479949101456566, "learning_rate": 4.808418884933056e-06, "loss": 1.0327, "step": 1280 }, { "epoch": 0.1347233360064154, "grad_norm": 2.922429890182779, "learning_rate": 4.808098254740897e-06, "loss": 1.0177, "step": 1281 }, { "epoch": 0.13482850644826272, "grad_norm": 1.924186395677562, "learning_rate": 4.807777367177851e-06, "loss": 0.9983, "step": 1282 }, { "epoch": 0.13493367689011004, "grad_norm": 3.935674804500333, "learning_rate": 4.807456222279703e-06, "loss": 1.0269, "step": 1283 }, { "epoch": 0.13503884733195734, "grad_norm": 2.938713767635199, "learning_rate": 4.80713482008226e-06, "loss": 1.0239, "step": 1284 }, { "epoch": 0.13514401777380466, "grad_norm": 3.1136862313700497, "learning_rate": 4.806813160621362e-06, "loss": 0.9802, "step": 1285 }, { "epoch": 0.135249188215652, "grad_norm": 3.3108785416004722, "learning_rate": 4.806491243932877e-06, "loss": 1.0428, "step": 1286 }, { "epoch": 0.1353543586574993, "grad_norm": 2.1210612289323048, "learning_rate": 4.806169070052701e-06, "loss": 0.9905, "step": 1287 }, { "epoch": 0.13545952909934664, "grad_norm": 3.1193765406401797, "learning_rate": 4.805846639016759e-06, "loss": 1.0397, "step": 1288 }, { "epoch": 0.13556469954119393, "grad_norm": 3.3591628388988757, "learning_rate": 4.805523950861004e-06, "loss": 1.0248, "step": 1289 }, { "epoch": 0.13566986998304126, "grad_norm": 2.4188726077223746, "learning_rate": 4.8052010056214184e-06, "loss": 1.0516, "step": 1290 }, { "epoch": 0.13577504042488858, "grad_norm": 3.4651111802218093, "learning_rate": 4.804877803334013e-06, "loss": 1.0804, "step": 1291 }, { "epoch": 0.1358802108667359, "grad_norm": 2.919536255232778, "learning_rate": 4.8045543440348276e-06, "loss": 1.0286, "step": 1292 }, { "epoch": 0.13598538130858323, "grad_norm": 2.2477800261870344, "learning_rate": 4.804230627759931e-06, "loss": 1.0185, "step": 1293 }, { "epoch": 0.13609055175043056, "grad_norm": 2.571200906414569, "learning_rate": 4.803906654545419e-06, "loss": 1.018, "step": 1294 }, { "epoch": 0.13619572219227785, "grad_norm": 2.760586584220439, "learning_rate": 4.803582424427419e-06, "loss": 1.0139, "step": 1295 }, { "epoch": 0.13630089263412518, "grad_norm": 2.566832369387431, "learning_rate": 4.803257937442082e-06, "loss": 1.0429, "step": 1296 }, { "epoch": 0.1364060630759725, "grad_norm": 4.222863669394409, "learning_rate": 4.802933193625593e-06, "loss": 1.0491, "step": 1297 }, { "epoch": 0.13651123351781982, "grad_norm": 3.8303058287728664, "learning_rate": 4.802608193014164e-06, "loss": 1.0619, "step": 1298 }, { "epoch": 0.13661640395966715, "grad_norm": 2.6093520415311167, "learning_rate": 4.802282935644034e-06, "loss": 1.0233, "step": 1299 }, { "epoch": 0.13672157440151445, "grad_norm": 2.9601576956633706, "learning_rate": 4.8019574215514705e-06, "loss": 1.0153, "step": 1300 }, { "epoch": 0.13682674484336177, "grad_norm": 2.1944445212698986, "learning_rate": 4.801631650772774e-06, "loss": 0.9904, "step": 1301 }, { "epoch": 0.1369319152852091, "grad_norm": 2.6684642895304327, "learning_rate": 4.801305623344268e-06, "loss": 1.0708, "step": 1302 }, { "epoch": 0.13703708572705642, "grad_norm": 2.686121392463823, "learning_rate": 4.800979339302308e-06, "loss": 1.0116, "step": 1303 }, { "epoch": 0.13714225616890374, "grad_norm": 2.8185256267861836, "learning_rate": 4.800652798683277e-06, "loss": 1.055, "step": 1304 }, { "epoch": 0.13724742661075104, "grad_norm": 2.793491565244081, "learning_rate": 4.800326001523586e-06, "loss": 0.9955, "step": 1305 }, { "epoch": 0.13735259705259836, "grad_norm": 4.301260101751436, "learning_rate": 4.799998947859678e-06, "loss": 1.0288, "step": 1306 }, { "epoch": 0.1374577674944457, "grad_norm": 2.1242732201639183, "learning_rate": 4.799671637728019e-06, "loss": 1.0248, "step": 1307 }, { "epoch": 0.137562937936293, "grad_norm": 3.5698968807461364, "learning_rate": 4.799344071165108e-06, "loss": 1.0036, "step": 1308 }, { "epoch": 0.13766810837814034, "grad_norm": 2.5835211133401237, "learning_rate": 4.799016248207472e-06, "loss": 1.0235, "step": 1309 }, { "epoch": 0.13777327881998763, "grad_norm": 3.0261001648609454, "learning_rate": 4.7986881688916646e-06, "loss": 1.0384, "step": 1310 }, { "epoch": 0.13787844926183496, "grad_norm": 3.474891168157717, "learning_rate": 4.798359833254269e-06, "loss": 1.0484, "step": 1311 }, { "epoch": 0.13798361970368228, "grad_norm": 3.1716499888572036, "learning_rate": 4.798031241331898e-06, "loss": 1.0298, "step": 1312 }, { "epoch": 0.1380887901455296, "grad_norm": 2.7269924098282514, "learning_rate": 4.7977023931611916e-06, "loss": 1.0619, "step": 1313 }, { "epoch": 0.13819396058737693, "grad_norm": 3.0594179433515247, "learning_rate": 4.797373288778819e-06, "loss": 1.0194, "step": 1314 }, { "epoch": 0.13829913102922423, "grad_norm": 2.6485215434411704, "learning_rate": 4.797043928221479e-06, "loss": 1.0012, "step": 1315 }, { "epoch": 0.13840430147107155, "grad_norm": 2.387465962136887, "learning_rate": 4.796714311525897e-06, "loss": 0.9964, "step": 1316 }, { "epoch": 0.13850947191291887, "grad_norm": 3.2239747245175883, "learning_rate": 4.796384438728827e-06, "loss": 1.0092, "step": 1317 }, { "epoch": 0.1386146423547662, "grad_norm": 3.378894724491444, "learning_rate": 4.796054309867053e-06, "loss": 1.063, "step": 1318 }, { "epoch": 0.13871981279661352, "grad_norm": 2.9523255723706203, "learning_rate": 4.7957239249773876e-06, "loss": 1.0547, "step": 1319 }, { "epoch": 0.13882498323846082, "grad_norm": 2.7245192269142366, "learning_rate": 4.795393284096671e-06, "loss": 1.0321, "step": 1320 }, { "epoch": 0.13893015368030814, "grad_norm": 3.402757468965259, "learning_rate": 4.7950623872617705e-06, "loss": 1.0365, "step": 1321 }, { "epoch": 0.13903532412215547, "grad_norm": 2.0645918619669907, "learning_rate": 4.7947312345095865e-06, "loss": 1.0378, "step": 1322 }, { "epoch": 0.1391404945640028, "grad_norm": 3.1649331722389027, "learning_rate": 4.794399825877044e-06, "loss": 1.0502, "step": 1323 }, { "epoch": 0.13924566500585012, "grad_norm": 3.0438185017979316, "learning_rate": 4.794068161401097e-06, "loss": 1.0202, "step": 1324 }, { "epoch": 0.1393508354476974, "grad_norm": 2.8567093139911126, "learning_rate": 4.793736241118729e-06, "loss": 0.9863, "step": 1325 }, { "epoch": 0.13945600588954474, "grad_norm": 3.1861700342675783, "learning_rate": 4.793404065066951e-06, "loss": 1.052, "step": 1326 }, { "epoch": 0.13956117633139206, "grad_norm": 2.268534254398867, "learning_rate": 4.7930716332828055e-06, "loss": 1.0266, "step": 1327 }, { "epoch": 0.13966634677323939, "grad_norm": 2.7798949911303215, "learning_rate": 4.792738945803358e-06, "loss": 1.0042, "step": 1328 }, { "epoch": 0.1397715172150867, "grad_norm": 2.9625802390516673, "learning_rate": 4.792406002665708e-06, "loss": 1.0692, "step": 1329 }, { "epoch": 0.139876687656934, "grad_norm": 3.3097071364207076, "learning_rate": 4.792072803906982e-06, "loss": 1.0495, "step": 1330 }, { "epoch": 0.13998185809878133, "grad_norm": 3.3911434469914252, "learning_rate": 4.791739349564332e-06, "loss": 1.0219, "step": 1331 }, { "epoch": 0.14008702854062866, "grad_norm": 2.4415653316509323, "learning_rate": 4.791405639674941e-06, "loss": 1.0159, "step": 1332 }, { "epoch": 0.14019219898247598, "grad_norm": 2.673194071720424, "learning_rate": 4.791071674276021e-06, "loss": 1.0428, "step": 1333 }, { "epoch": 0.1402973694243233, "grad_norm": 3.4270530074992207, "learning_rate": 4.790737453404812e-06, "loss": 1.0427, "step": 1334 }, { "epoch": 0.1404025398661706, "grad_norm": 2.370560923133824, "learning_rate": 4.790402977098583e-06, "loss": 1.0541, "step": 1335 }, { "epoch": 0.14050771030801792, "grad_norm": 3.178115791238882, "learning_rate": 4.7900682453946276e-06, "loss": 1.0314, "step": 1336 }, { "epoch": 0.14061288074986525, "grad_norm": 2.6802658373773176, "learning_rate": 4.7897332583302745e-06, "loss": 1.0589, "step": 1337 }, { "epoch": 0.14071805119171257, "grad_norm": 2.848166188577203, "learning_rate": 4.789398015942875e-06, "loss": 1.0272, "step": 1338 }, { "epoch": 0.1408232216335599, "grad_norm": 3.1907811105881922, "learning_rate": 4.789062518269812e-06, "loss": 1.034, "step": 1339 }, { "epoch": 0.1409283920754072, "grad_norm": 2.9382816317350375, "learning_rate": 4.7887267653484965e-06, "loss": 1.015, "step": 1340 }, { "epoch": 0.14103356251725452, "grad_norm": 4.496979260313411, "learning_rate": 4.788390757216367e-06, "loss": 1.0533, "step": 1341 }, { "epoch": 0.14113873295910184, "grad_norm": 2.6947544952945037, "learning_rate": 4.7880544939108915e-06, "loss": 1.045, "step": 1342 }, { "epoch": 0.14124390340094917, "grad_norm": 2.6878213842926644, "learning_rate": 4.787717975469566e-06, "loss": 1.0424, "step": 1343 }, { "epoch": 0.1413490738427965, "grad_norm": 2.9207345197550323, "learning_rate": 4.787381201929913e-06, "loss": 1.0479, "step": 1344 }, { "epoch": 0.1414542442846438, "grad_norm": 2.8600747613086024, "learning_rate": 4.787044173329489e-06, "loss": 1.0018, "step": 1345 }, { "epoch": 0.1415594147264911, "grad_norm": 2.456864436992996, "learning_rate": 4.7867068897058725e-06, "loss": 1.0043, "step": 1346 }, { "epoch": 0.14166458516833844, "grad_norm": 3.0962033894183087, "learning_rate": 4.7863693510966735e-06, "loss": 0.9958, "step": 1347 }, { "epoch": 0.14176975561018576, "grad_norm": 2.6323675844789016, "learning_rate": 4.786031557539532e-06, "loss": 1.0502, "step": 1348 }, { "epoch": 0.14187492605203308, "grad_norm": 2.694477249584249, "learning_rate": 4.785693509072112e-06, "loss": 1.0372, "step": 1349 }, { "epoch": 0.14198009649388038, "grad_norm": 3.3629384103067452, "learning_rate": 4.785355205732111e-06, "loss": 0.9938, "step": 1350 }, { "epoch": 0.1420852669357277, "grad_norm": 3.334830674838776, "learning_rate": 4.78501664755725e-06, "loss": 1.0405, "step": 1351 }, { "epoch": 0.14219043737757503, "grad_norm": 2.2938533826137455, "learning_rate": 4.784677834585283e-06, "loss": 1.0207, "step": 1352 }, { "epoch": 0.14229560781942235, "grad_norm": 2.5803341129447896, "learning_rate": 4.784338766853989e-06, "loss": 1.0151, "step": 1353 }, { "epoch": 0.14240077826126968, "grad_norm": 2.9122499893707383, "learning_rate": 4.783999444401178e-06, "loss": 1.0644, "step": 1354 }, { "epoch": 0.142505948703117, "grad_norm": 2.091992322725459, "learning_rate": 4.783659867264685e-06, "loss": 0.9981, "step": 1355 }, { "epoch": 0.1426111191449643, "grad_norm": 3.065753050616509, "learning_rate": 4.783320035482378e-06, "loss": 0.9894, "step": 1356 }, { "epoch": 0.14271628958681162, "grad_norm": 3.1525287256847903, "learning_rate": 4.7829799490921485e-06, "loss": 1.077, "step": 1357 }, { "epoch": 0.14282146002865895, "grad_norm": 3.3013362649199065, "learning_rate": 4.7826396081319206e-06, "loss": 1.0439, "step": 1358 }, { "epoch": 0.14292663047050627, "grad_norm": 2.6267213566754295, "learning_rate": 4.782299012639644e-06, "loss": 1.0254, "step": 1359 }, { "epoch": 0.1430318009123536, "grad_norm": 3.664812917190621, "learning_rate": 4.781958162653298e-06, "loss": 1.0252, "step": 1360 }, { "epoch": 0.1431369713542009, "grad_norm": 2.3021182933754103, "learning_rate": 4.781617058210889e-06, "loss": 1.0615, "step": 1361 }, { "epoch": 0.14324214179604822, "grad_norm": 2.881195846130054, "learning_rate": 4.781275699350455e-06, "loss": 1.0005, "step": 1362 }, { "epoch": 0.14334731223789554, "grad_norm": 3.7980632477387632, "learning_rate": 4.780934086110059e-06, "loss": 1.0062, "step": 1363 }, { "epoch": 0.14345248267974287, "grad_norm": 2.4526767159057505, "learning_rate": 4.780592218527793e-06, "loss": 1.0588, "step": 1364 }, { "epoch": 0.1435576531215902, "grad_norm": 2.7801413202576177, "learning_rate": 4.780250096641778e-06, "loss": 1.004, "step": 1365 }, { "epoch": 0.1436628235634375, "grad_norm": 3.2606411773565442, "learning_rate": 4.779907720490164e-06, "loss": 1.0199, "step": 1366 }, { "epoch": 0.1437679940052848, "grad_norm": 3.222765541930548, "learning_rate": 4.779565090111129e-06, "loss": 0.9907, "step": 1367 }, { "epoch": 0.14387316444713213, "grad_norm": 3.1387995556419357, "learning_rate": 4.779222205542877e-06, "loss": 1.0304, "step": 1368 }, { "epoch": 0.14397833488897946, "grad_norm": 2.389261805095194, "learning_rate": 4.778879066823644e-06, "loss": 1.0449, "step": 1369 }, { "epoch": 0.14408350533082678, "grad_norm": 2.724316181670948, "learning_rate": 4.778535673991692e-06, "loss": 1.0199, "step": 1370 }, { "epoch": 0.14418867577267408, "grad_norm": 3.0155113196278585, "learning_rate": 4.7781920270853126e-06, "loss": 1.0333, "step": 1371 }, { "epoch": 0.1442938462145214, "grad_norm": 3.5146591280021164, "learning_rate": 4.777848126142824e-06, "loss": 1.0147, "step": 1372 }, { "epoch": 0.14439901665636873, "grad_norm": 2.61346142352295, "learning_rate": 4.777503971202574e-06, "loss": 1.0076, "step": 1373 }, { "epoch": 0.14450418709821605, "grad_norm": 2.7874977280860618, "learning_rate": 4.77715956230294e-06, "loss": 0.994, "step": 1374 }, { "epoch": 0.14460935754006338, "grad_norm": 3.1736286121545536, "learning_rate": 4.776814899482325e-06, "loss": 1.0834, "step": 1375 }, { "epoch": 0.14471452798191067, "grad_norm": 3.6191087311210732, "learning_rate": 4.776469982779161e-06, "loss": 1.0153, "step": 1376 }, { "epoch": 0.144819698423758, "grad_norm": 3.745985677561027, "learning_rate": 4.7761248122319105e-06, "loss": 0.9798, "step": 1377 }, { "epoch": 0.14492486886560532, "grad_norm": 2.3804794207514406, "learning_rate": 4.775779387879061e-06, "loss": 1.0106, "step": 1378 }, { "epoch": 0.14503003930745265, "grad_norm": 2.0323842954289346, "learning_rate": 4.7754337097591315e-06, "loss": 1.031, "step": 1379 }, { "epoch": 0.14513520974929997, "grad_norm": 2.633921230163831, "learning_rate": 4.7750877779106666e-06, "loss": 1.0546, "step": 1380 }, { "epoch": 0.14524038019114727, "grad_norm": 3.477178349518341, "learning_rate": 4.774741592372242e-06, "loss": 1.0429, "step": 1381 }, { "epoch": 0.1453455506329946, "grad_norm": 3.025833331172734, "learning_rate": 4.774395153182459e-06, "loss": 1.0074, "step": 1382 }, { "epoch": 0.14545072107484192, "grad_norm": 2.8582583766175005, "learning_rate": 4.774048460379947e-06, "loss": 1.0239, "step": 1383 }, { "epoch": 0.14555589151668924, "grad_norm": 3.5823549566490933, "learning_rate": 4.773701514003367e-06, "loss": 1.0257, "step": 1384 }, { "epoch": 0.14566106195853656, "grad_norm": 2.6124292693903612, "learning_rate": 4.773354314091405e-06, "loss": 0.9493, "step": 1385 }, { "epoch": 0.14576623240038386, "grad_norm": 2.8268960445709226, "learning_rate": 4.773006860682777e-06, "loss": 1.0065, "step": 1386 }, { "epoch": 0.14587140284223118, "grad_norm": 3.9272556505800273, "learning_rate": 4.772659153816228e-06, "loss": 1.0455, "step": 1387 }, { "epoch": 0.1459765732840785, "grad_norm": 3.071553519913472, "learning_rate": 4.7723111935305275e-06, "loss": 1.0151, "step": 1388 }, { "epoch": 0.14608174372592583, "grad_norm": 2.713380086603046, "learning_rate": 4.7719629798644775e-06, "loss": 1.0394, "step": 1389 }, { "epoch": 0.14618691416777316, "grad_norm": 2.5543229953092608, "learning_rate": 4.7716145128569054e-06, "loss": 1.0254, "step": 1390 }, { "epoch": 0.14629208460962045, "grad_norm": 2.5719448084092216, "learning_rate": 4.771265792546669e-06, "loss": 0.9972, "step": 1391 }, { "epoch": 0.14639725505146778, "grad_norm": 2.360610533233657, "learning_rate": 4.770916818972653e-06, "loss": 1.0448, "step": 1392 }, { "epoch": 0.1465024254933151, "grad_norm": 2.8007366713065274, "learning_rate": 4.77056759217377e-06, "loss": 1.0058, "step": 1393 }, { "epoch": 0.14660759593516243, "grad_norm": 2.524726744796562, "learning_rate": 4.770218112188964e-06, "loss": 1.0684, "step": 1394 }, { "epoch": 0.14671276637700975, "grad_norm": 3.3903828921485943, "learning_rate": 4.769868379057201e-06, "loss": 1.1003, "step": 1395 }, { "epoch": 0.14681793681885705, "grad_norm": 3.4978205266954037, "learning_rate": 4.7695183928174804e-06, "loss": 1.037, "step": 1396 }, { "epoch": 0.14692310726070437, "grad_norm": 3.3446642465813152, "learning_rate": 4.76916815350883e-06, "loss": 1.0366, "step": 1397 }, { "epoch": 0.1470282777025517, "grad_norm": 2.2392665764959836, "learning_rate": 4.768817661170302e-06, "loss": 1.0026, "step": 1398 }, { "epoch": 0.14713344814439902, "grad_norm": 2.8612244152019963, "learning_rate": 4.768466915840981e-06, "loss": 1.0446, "step": 1399 }, { "epoch": 0.14723861858624634, "grad_norm": 3.3281982579588085, "learning_rate": 4.768115917559976e-06, "loss": 1.0825, "step": 1400 }, { "epoch": 0.14734378902809364, "grad_norm": 2.2985012611056597, "learning_rate": 4.767764666366427e-06, "loss": 1.0283, "step": 1401 }, { "epoch": 0.14744895946994097, "grad_norm": 2.6342734587607755, "learning_rate": 4.767413162299501e-06, "loss": 0.9836, "step": 1402 }, { "epoch": 0.1475541299117883, "grad_norm": 2.555811598919671, "learning_rate": 4.7670614053983945e-06, "loss": 0.9753, "step": 1403 }, { "epoch": 0.14765930035363561, "grad_norm": 4.202733894529233, "learning_rate": 4.766709395702329e-06, "loss": 1.0445, "step": 1404 }, { "epoch": 0.14776447079548294, "grad_norm": 2.8209004214002, "learning_rate": 4.766357133250558e-06, "loss": 1.0223, "step": 1405 }, { "epoch": 0.14786964123733023, "grad_norm": 2.8446733034162657, "learning_rate": 4.766004618082361e-06, "loss": 1.0289, "step": 1406 }, { "epoch": 0.14797481167917756, "grad_norm": 4.068733400424128, "learning_rate": 4.765651850237046e-06, "loss": 1.0016, "step": 1407 }, { "epoch": 0.14807998212102488, "grad_norm": 3.844649508056965, "learning_rate": 4.765298829753949e-06, "loss": 0.9918, "step": 1408 }, { "epoch": 0.1481851525628722, "grad_norm": 2.919584733622149, "learning_rate": 4.764945556672435e-06, "loss": 1.0238, "step": 1409 }, { "epoch": 0.14829032300471953, "grad_norm": 3.7015807840351584, "learning_rate": 4.764592031031898e-06, "loss": 1.0315, "step": 1410 }, { "epoch": 0.14839549344656686, "grad_norm": 4.273538161097895, "learning_rate": 4.7642382528717565e-06, "loss": 1.0426, "step": 1411 }, { "epoch": 0.14850066388841415, "grad_norm": 3.509784403656781, "learning_rate": 4.763884222231461e-06, "loss": 0.9692, "step": 1412 }, { "epoch": 0.14860583433026148, "grad_norm": 3.524891382474383, "learning_rate": 4.763529939150489e-06, "loss": 1.023, "step": 1413 }, { "epoch": 0.1487110047721088, "grad_norm": 2.860810743236083, "learning_rate": 4.763175403668344e-06, "loss": 0.9906, "step": 1414 }, { "epoch": 0.14881617521395613, "grad_norm": 3.051115350337341, "learning_rate": 4.762820615824561e-06, "loss": 1.0535, "step": 1415 }, { "epoch": 0.14892134565580345, "grad_norm": 2.868633232025887, "learning_rate": 4.7624655756587e-06, "loss": 1.0226, "step": 1416 }, { "epoch": 0.14902651609765075, "grad_norm": 3.2091074851672823, "learning_rate": 4.762110283210353e-06, "loss": 1.0125, "step": 1417 }, { "epoch": 0.14913168653949807, "grad_norm": 3.4892610849853614, "learning_rate": 4.761754738519136e-06, "loss": 1.0046, "step": 1418 }, { "epoch": 0.1492368569813454, "grad_norm": 3.5426727194444623, "learning_rate": 4.761398941624696e-06, "loss": 1.012, "step": 1419 }, { "epoch": 0.14934202742319272, "grad_norm": 2.648969769760997, "learning_rate": 4.761042892566707e-06, "loss": 1.0218, "step": 1420 }, { "epoch": 0.14944719786504004, "grad_norm": 3.2305776271347746, "learning_rate": 4.7606865913848725e-06, "loss": 1.0702, "step": 1421 }, { "epoch": 0.14955236830688734, "grad_norm": 3.1832515293962027, "learning_rate": 4.760330038118919e-06, "loss": 1.0464, "step": 1422 }, { "epoch": 0.14965753874873466, "grad_norm": 3.4113754972892743, "learning_rate": 4.759973232808609e-06, "loss": 1.0189, "step": 1423 }, { "epoch": 0.149762709190582, "grad_norm": 2.720125937598134, "learning_rate": 4.759616175493726e-06, "loss": 0.9823, "step": 1424 }, { "epoch": 0.1498678796324293, "grad_norm": 3.4131804917283537, "learning_rate": 4.7592588662140876e-06, "loss": 1.0335, "step": 1425 }, { "epoch": 0.14997305007427664, "grad_norm": 2.5441205030353937, "learning_rate": 4.7589013050095345e-06, "loss": 1.0354, "step": 1426 }, { "epoch": 0.15007822051612393, "grad_norm": 3.064947944270619, "learning_rate": 4.758543491919938e-06, "loss": 1.0068, "step": 1427 }, { "epoch": 0.15018339095797126, "grad_norm": 3.2381546378652417, "learning_rate": 4.7581854269851975e-06, "loss": 1.0831, "step": 1428 }, { "epoch": 0.15028856139981858, "grad_norm": 3.0616456434518002, "learning_rate": 4.75782711024524e-06, "loss": 1.0218, "step": 1429 }, { "epoch": 0.1503937318416659, "grad_norm": 2.923899250348212, "learning_rate": 4.757468541740019e-06, "loss": 1.0323, "step": 1430 }, { "epoch": 0.15049890228351323, "grad_norm": 2.8260428144309735, "learning_rate": 4.75710972150952e-06, "loss": 1.0136, "step": 1431 }, { "epoch": 0.15060407272536053, "grad_norm": 2.9366911956981085, "learning_rate": 4.756750649593753e-06, "loss": 1.0514, "step": 1432 }, { "epoch": 0.15070924316720785, "grad_norm": 2.4211017023085635, "learning_rate": 4.756391326032757e-06, "loss": 1.0258, "step": 1433 }, { "epoch": 0.15081441360905518, "grad_norm": 2.819024865923823, "learning_rate": 4.756031750866601e-06, "loss": 1.0051, "step": 1434 }, { "epoch": 0.1509195840509025, "grad_norm": 3.1999851809479063, "learning_rate": 4.755671924135379e-06, "loss": 1.0053, "step": 1435 }, { "epoch": 0.15102475449274982, "grad_norm": 3.4482793978325272, "learning_rate": 4.755311845879214e-06, "loss": 1.0168, "step": 1436 }, { "epoch": 0.15112992493459712, "grad_norm": 3.2963820376471755, "learning_rate": 4.754951516138259e-06, "loss": 1.0435, "step": 1437 }, { "epoch": 0.15123509537644445, "grad_norm": 2.6226202887722674, "learning_rate": 4.754590934952692e-06, "loss": 1.0022, "step": 1438 }, { "epoch": 0.15134026581829177, "grad_norm": 4.119115148771308, "learning_rate": 4.754230102362723e-06, "loss": 1.0604, "step": 1439 }, { "epoch": 0.1514454362601391, "grad_norm": 3.3530136943675606, "learning_rate": 4.7538690184085845e-06, "loss": 1.0374, "step": 1440 }, { "epoch": 0.15155060670198642, "grad_norm": 3.0643118980396307, "learning_rate": 4.7535076831305425e-06, "loss": 1.0299, "step": 1441 }, { "epoch": 0.15165577714383371, "grad_norm": 2.6808250795383914, "learning_rate": 4.753146096568888e-06, "loss": 1.0616, "step": 1442 }, { "epoch": 0.15176094758568104, "grad_norm": 2.762417572989777, "learning_rate": 4.752784258763941e-06, "loss": 1.043, "step": 1443 }, { "epoch": 0.15186611802752836, "grad_norm": 2.069150811277505, "learning_rate": 4.752422169756048e-06, "loss": 0.9979, "step": 1444 }, { "epoch": 0.1519712884693757, "grad_norm": 2.952492663200309, "learning_rate": 4.7520598295855866e-06, "loss": 1.0638, "step": 1445 }, { "epoch": 0.152076458911223, "grad_norm": 2.4223572836685126, "learning_rate": 4.751697238292959e-06, "loss": 1.0165, "step": 1446 }, { "epoch": 0.1521816293530703, "grad_norm": 3.035954168307138, "learning_rate": 4.751334395918598e-06, "loss": 1.0296, "step": 1447 }, { "epoch": 0.15228679979491763, "grad_norm": 2.5050761120596396, "learning_rate": 4.7509713025029624e-06, "loss": 1.0469, "step": 1448 }, { "epoch": 0.15239197023676496, "grad_norm": 3.6719733529575693, "learning_rate": 4.750607958086541e-06, "loss": 1.0405, "step": 1449 }, { "epoch": 0.15249714067861228, "grad_norm": 2.122334436297666, "learning_rate": 4.750244362709848e-06, "loss": 1.0358, "step": 1450 }, { "epoch": 0.1526023111204596, "grad_norm": 3.307644507612065, "learning_rate": 4.749880516413428e-06, "loss": 1.0013, "step": 1451 }, { "epoch": 0.1527074815623069, "grad_norm": 2.26272280730536, "learning_rate": 4.749516419237854e-06, "loss": 1.0286, "step": 1452 }, { "epoch": 0.15281265200415423, "grad_norm": 3.4033816929506444, "learning_rate": 4.749152071223724e-06, "loss": 1.0483, "step": 1453 }, { "epoch": 0.15291782244600155, "grad_norm": 3.19081203769598, "learning_rate": 4.748787472411665e-06, "loss": 1.054, "step": 1454 }, { "epoch": 0.15302299288784887, "grad_norm": 2.863548719789538, "learning_rate": 4.748422622842335e-06, "loss": 1.0378, "step": 1455 }, { "epoch": 0.1531281633296962, "grad_norm": 2.918561339352643, "learning_rate": 4.748057522556415e-06, "loss": 1.064, "step": 1456 }, { "epoch": 0.1532333337715435, "grad_norm": 3.235112382704938, "learning_rate": 4.747692171594619e-06, "loss": 1.0154, "step": 1457 }, { "epoch": 0.15333850421339082, "grad_norm": 2.9715433832923392, "learning_rate": 4.747326569997684e-06, "loss": 0.9907, "step": 1458 }, { "epoch": 0.15344367465523814, "grad_norm": 3.7107017840666696, "learning_rate": 4.746960717806379e-06, "loss": 0.998, "step": 1459 }, { "epoch": 0.15354884509708547, "grad_norm": 3.1443639363904614, "learning_rate": 4.7465946150615e-06, "loss": 1.0366, "step": 1460 }, { "epoch": 0.1536540155389328, "grad_norm": 3.281239643947147, "learning_rate": 4.746228261803868e-06, "loss": 1.0366, "step": 1461 }, { "epoch": 0.1537591859807801, "grad_norm": 3.382772295417614, "learning_rate": 4.745861658074336e-06, "loss": 1.0344, "step": 1462 }, { "epoch": 0.1538643564226274, "grad_norm": 2.466197221139934, "learning_rate": 4.745494803913781e-06, "loss": 1.0028, "step": 1463 }, { "epoch": 0.15396952686447474, "grad_norm": 3.360579078456302, "learning_rate": 4.745127699363115e-06, "loss": 1.0104, "step": 1464 }, { "epoch": 0.15407469730632206, "grad_norm": 4.356696217441136, "learning_rate": 4.744760344463267e-06, "loss": 1.0674, "step": 1465 }, { "epoch": 0.15417986774816939, "grad_norm": 3.2306718414761044, "learning_rate": 4.744392739255203e-06, "loss": 1.0717, "step": 1466 }, { "epoch": 0.15428503819001668, "grad_norm": 2.2164328768086197, "learning_rate": 4.744024883779915e-06, "loss": 1.0049, "step": 1467 }, { "epoch": 0.154390208631864, "grad_norm": 2.932022946712518, "learning_rate": 4.74365677807842e-06, "loss": 1.0116, "step": 1468 }, { "epoch": 0.15449537907371133, "grad_norm": 2.590086012736109, "learning_rate": 4.743288422191764e-06, "loss": 1.0302, "step": 1469 }, { "epoch": 0.15460054951555866, "grad_norm": 2.1435708761875665, "learning_rate": 4.7429198161610225e-06, "loss": 1.0083, "step": 1470 }, { "epoch": 0.15470571995740598, "grad_norm": 3.630046810094376, "learning_rate": 4.7425509600272974e-06, "loss": 0.9893, "step": 1471 }, { "epoch": 0.1548108903992533, "grad_norm": 2.719889362421328, "learning_rate": 4.742181853831721e-06, "loss": 1.0572, "step": 1472 }, { "epoch": 0.1549160608411006, "grad_norm": 2.327018336326447, "learning_rate": 4.74181249761545e-06, "loss": 0.9962, "step": 1473 }, { "epoch": 0.15502123128294792, "grad_norm": 2.3785350704725112, "learning_rate": 4.74144289141967e-06, "loss": 0.9893, "step": 1474 }, { "epoch": 0.15512640172479525, "grad_norm": 2.59518918661727, "learning_rate": 4.741073035285595e-06, "loss": 0.9894, "step": 1475 }, { "epoch": 0.15523157216664257, "grad_norm": 3.4946347737501915, "learning_rate": 4.7407029292544675e-06, "loss": 1.0119, "step": 1476 }, { "epoch": 0.1553367426084899, "grad_norm": 2.6605925838001627, "learning_rate": 4.740332573367557e-06, "loss": 1.0098, "step": 1477 }, { "epoch": 0.1554419130503372, "grad_norm": 3.8403647907126643, "learning_rate": 4.739961967666161e-06, "loss": 1.0622, "step": 1478 }, { "epoch": 0.15554708349218452, "grad_norm": 2.4061249592344356, "learning_rate": 4.739591112191605e-06, "loss": 0.9919, "step": 1479 }, { "epoch": 0.15565225393403184, "grad_norm": 3.619438134390123, "learning_rate": 4.739220006985243e-06, "loss": 1.0495, "step": 1480 }, { "epoch": 0.15575742437587917, "grad_norm": 2.7851891041711814, "learning_rate": 4.738848652088454e-06, "loss": 1.0299, "step": 1481 }, { "epoch": 0.1558625948177265, "grad_norm": 2.975714954933627, "learning_rate": 4.738477047542649e-06, "loss": 1.0205, "step": 1482 }, { "epoch": 0.1559677652595738, "grad_norm": 3.878474331685192, "learning_rate": 4.738105193389264e-06, "loss": 0.988, "step": 1483 }, { "epoch": 0.1560729357014211, "grad_norm": 3.449910786924835, "learning_rate": 4.737733089669764e-06, "loss": 1.0652, "step": 1484 }, { "epoch": 0.15617810614326844, "grad_norm": 2.3712070448593803, "learning_rate": 4.737360736425641e-06, "loss": 1.0411, "step": 1485 }, { "epoch": 0.15628327658511576, "grad_norm": 2.6454413280340874, "learning_rate": 4.736988133698416e-06, "loss": 1.0204, "step": 1486 }, { "epoch": 0.15638844702696308, "grad_norm": 3.004156559443189, "learning_rate": 4.736615281529635e-06, "loss": 1.0051, "step": 1487 }, { "epoch": 0.15649361746881038, "grad_norm": 3.7915743827388186, "learning_rate": 4.736242179960877e-06, "loss": 1.0261, "step": 1488 }, { "epoch": 0.1565987879106577, "grad_norm": 3.602538359343123, "learning_rate": 4.735868829033744e-06, "loss": 1.0642, "step": 1489 }, { "epoch": 0.15670395835250503, "grad_norm": 1.966550689023779, "learning_rate": 4.735495228789867e-06, "loss": 1.0363, "step": 1490 }, { "epoch": 0.15680912879435235, "grad_norm": 2.511000996166126, "learning_rate": 4.735121379270907e-06, "loss": 1.025, "step": 1491 }, { "epoch": 0.15691429923619968, "grad_norm": 2.563979152422195, "learning_rate": 4.734747280518549e-06, "loss": 1.0361, "step": 1492 }, { "epoch": 0.15701946967804697, "grad_norm": 2.2916478990627946, "learning_rate": 4.73437293257451e-06, "loss": 1.0462, "step": 1493 }, { "epoch": 0.1571246401198943, "grad_norm": 2.2807919186264436, "learning_rate": 4.733998335480532e-06, "loss": 1.0184, "step": 1494 }, { "epoch": 0.15722981056174162, "grad_norm": 2.7351488990346535, "learning_rate": 4.733623489278385e-06, "loss": 1.0148, "step": 1495 }, { "epoch": 0.15733498100358895, "grad_norm": 2.893572157477807, "learning_rate": 4.733248394009867e-06, "loss": 1.0028, "step": 1496 }, { "epoch": 0.15744015144543627, "grad_norm": 2.6060141894891595, "learning_rate": 4.732873049716805e-06, "loss": 0.998, "step": 1497 }, { "epoch": 0.15754532188728357, "grad_norm": 2.7708906663010313, "learning_rate": 4.732497456441052e-06, "loss": 1.0671, "step": 1498 }, { "epoch": 0.1576504923291309, "grad_norm": 3.472148650720611, "learning_rate": 4.732121614224491e-06, "loss": 1.0178, "step": 1499 }, { "epoch": 0.15775566277097822, "grad_norm": 2.5207390547929003, "learning_rate": 4.731745523109029e-06, "loss": 1.0238, "step": 1500 }, { "epoch": 0.15786083321282554, "grad_norm": 3.4771528545364316, "learning_rate": 4.731369183136605e-06, "loss": 1.0706, "step": 1501 }, { "epoch": 0.15796600365467287, "grad_norm": 2.65521118845941, "learning_rate": 4.730992594349183e-06, "loss": 1.0402, "step": 1502 }, { "epoch": 0.15807117409652016, "grad_norm": 3.10957683893925, "learning_rate": 4.730615756788756e-06, "loss": 0.9857, "step": 1503 }, { "epoch": 0.15817634453836749, "grad_norm": 2.6542173527792463, "learning_rate": 4.730238670497345e-06, "loss": 1.0336, "step": 1504 }, { "epoch": 0.1582815149802148, "grad_norm": 3.77211681230834, "learning_rate": 4.729861335516995e-06, "loss": 1.0299, "step": 1505 }, { "epoch": 0.15838668542206213, "grad_norm": 2.6411474196043727, "learning_rate": 4.7294837518897855e-06, "loss": 1.0441, "step": 1506 }, { "epoch": 0.15849185586390946, "grad_norm": 2.99873384055499, "learning_rate": 4.729105919657818e-06, "loss": 1.0002, "step": 1507 }, { "epoch": 0.15859702630575676, "grad_norm": 2.8324948060160904, "learning_rate": 4.728727838863224e-06, "loss": 1.0513, "step": 1508 }, { "epoch": 0.15870219674760408, "grad_norm": 2.455839993218039, "learning_rate": 4.728349509548163e-06, "loss": 1.0082, "step": 1509 }, { "epoch": 0.1588073671894514, "grad_norm": 3.5575923268239027, "learning_rate": 4.7279709317548215e-06, "loss": 1.0516, "step": 1510 }, { "epoch": 0.15891253763129873, "grad_norm": 2.940216951551406, "learning_rate": 4.727592105525413e-06, "loss": 1.0099, "step": 1511 }, { "epoch": 0.15901770807314605, "grad_norm": 2.8061910489333397, "learning_rate": 4.72721303090218e-06, "loss": 1.0284, "step": 1512 }, { "epoch": 0.15912287851499335, "grad_norm": 2.8488993729192567, "learning_rate": 4.726833707927393e-06, "loss": 1.0029, "step": 1513 }, { "epoch": 0.15922804895684067, "grad_norm": 2.3760044861779015, "learning_rate": 4.7264541366433495e-06, "loss": 1.064, "step": 1514 }, { "epoch": 0.159333219398688, "grad_norm": 3.03368894409861, "learning_rate": 4.726074317092373e-06, "loss": 1.057, "step": 1515 }, { "epoch": 0.15943838984053532, "grad_norm": 2.6274139546172273, "learning_rate": 4.7256942493168176e-06, "loss": 1.0468, "step": 1516 }, { "epoch": 0.15954356028238265, "grad_norm": 2.5208665813866533, "learning_rate": 4.725313933359064e-06, "loss": 1.01, "step": 1517 }, { "epoch": 0.15964873072422994, "grad_norm": 3.0144732325896313, "learning_rate": 4.724933369261519e-06, "loss": 1.0093, "step": 1518 }, { "epoch": 0.15975390116607727, "grad_norm": 3.3250090964686274, "learning_rate": 4.72455255706662e-06, "loss": 0.9936, "step": 1519 }, { "epoch": 0.1598590716079246, "grad_norm": 3.306440325363034, "learning_rate": 4.724171496816831e-06, "loss": 1.002, "step": 1520 }, { "epoch": 0.15996424204977192, "grad_norm": 2.978656372246788, "learning_rate": 4.7237901885546405e-06, "loss": 1.0175, "step": 1521 }, { "epoch": 0.16006941249161924, "grad_norm": 3.4879449084728056, "learning_rate": 4.72340863232257e-06, "loss": 1.05, "step": 1522 }, { "epoch": 0.16017458293346654, "grad_norm": 2.7810346030912405, "learning_rate": 4.723026828163164e-06, "loss": 1.0297, "step": 1523 }, { "epoch": 0.16027975337531386, "grad_norm": 2.2616816284442205, "learning_rate": 4.722644776118999e-06, "loss": 1.0186, "step": 1524 }, { "epoch": 0.16038492381716118, "grad_norm": 2.729508624247899, "learning_rate": 4.722262476232674e-06, "loss": 1.0272, "step": 1525 }, { "epoch": 0.1604900942590085, "grad_norm": 3.219956094139774, "learning_rate": 4.72187992854682e-06, "loss": 1.0983, "step": 1526 }, { "epoch": 0.16059526470085583, "grad_norm": 3.9250885931506914, "learning_rate": 4.7214971331040945e-06, "loss": 1.0105, "step": 1527 }, { "epoch": 0.16070043514270313, "grad_norm": 4.60638365380386, "learning_rate": 4.721114089947181e-06, "loss": 1.0343, "step": 1528 }, { "epoch": 0.16080560558455045, "grad_norm": 4.087070072305575, "learning_rate": 4.720730799118792e-06, "loss": 1.072, "step": 1529 }, { "epoch": 0.16091077602639778, "grad_norm": 2.114182974470423, "learning_rate": 4.7203472606616685e-06, "loss": 1.0551, "step": 1530 }, { "epoch": 0.1610159464682451, "grad_norm": 2.7564719333605305, "learning_rate": 4.719963474618576e-06, "loss": 1.0046, "step": 1531 }, { "epoch": 0.16112111691009243, "grad_norm": 2.194526925536452, "learning_rate": 4.719579441032312e-06, "loss": 0.9904, "step": 1532 }, { "epoch": 0.16122628735193975, "grad_norm": 3.069579977353765, "learning_rate": 4.7191951599456974e-06, "loss": 1.0293, "step": 1533 }, { "epoch": 0.16133145779378705, "grad_norm": 2.6614677051330355, "learning_rate": 4.718810631401584e-06, "loss": 0.969, "step": 1534 }, { "epoch": 0.16143662823563437, "grad_norm": 2.0239894866349384, "learning_rate": 4.718425855442847e-06, "loss": 1.0166, "step": 1535 }, { "epoch": 0.1615417986774817, "grad_norm": 2.6814734955623747, "learning_rate": 4.718040832112396e-06, "loss": 0.9991, "step": 1536 }, { "epoch": 0.16164696911932902, "grad_norm": 3.3899909411502156, "learning_rate": 4.717655561453161e-06, "loss": 1.0229, "step": 1537 }, { "epoch": 0.16175213956117634, "grad_norm": 3.6416888404436514, "learning_rate": 4.7172700435081024e-06, "loss": 1.0336, "step": 1538 }, { "epoch": 0.16185731000302364, "grad_norm": 3.297177936886574, "learning_rate": 4.71688427832021e-06, "loss": 1.0382, "step": 1539 }, { "epoch": 0.16196248044487097, "grad_norm": 2.9251719307297983, "learning_rate": 4.716498265932501e-06, "loss": 1.0379, "step": 1540 }, { "epoch": 0.1620676508867183, "grad_norm": 3.1867271581724332, "learning_rate": 4.716112006388015e-06, "loss": 1.0093, "step": 1541 }, { "epoch": 0.16217282132856561, "grad_norm": 2.7382289471102927, "learning_rate": 4.715725499729826e-06, "loss": 1.0596, "step": 1542 }, { "epoch": 0.16227799177041294, "grad_norm": 2.482681348080373, "learning_rate": 4.715338746001031e-06, "loss": 1.0241, "step": 1543 }, { "epoch": 0.16238316221226023, "grad_norm": 3.321185423836697, "learning_rate": 4.7149517452447565e-06, "loss": 1.065, "step": 1544 }, { "epoch": 0.16248833265410756, "grad_norm": 2.2553546129954034, "learning_rate": 4.714564497504156e-06, "loss": 1.0071, "step": 1545 }, { "epoch": 0.16259350309595488, "grad_norm": 2.9289085282753473, "learning_rate": 4.714177002822411e-06, "loss": 1.0756, "step": 1546 }, { "epoch": 0.1626986735378022, "grad_norm": 2.6229389236880514, "learning_rate": 4.7137892612427296e-06, "loss": 0.9867, "step": 1547 }, { "epoch": 0.16280384397964953, "grad_norm": 2.5687055044071756, "learning_rate": 4.713401272808348e-06, "loss": 1.0312, "step": 1548 }, { "epoch": 0.16290901442149683, "grad_norm": 3.075733116731394, "learning_rate": 4.713013037562531e-06, "loss": 0.999, "step": 1549 }, { "epoch": 0.16301418486334415, "grad_norm": 2.4489321077973316, "learning_rate": 4.712624555548568e-06, "loss": 1.0424, "step": 1550 }, { "epoch": 0.16311935530519148, "grad_norm": 2.786568331849303, "learning_rate": 4.712235826809779e-06, "loss": 1.0076, "step": 1551 }, { "epoch": 0.1632245257470388, "grad_norm": 4.261177829125735, "learning_rate": 4.711846851389511e-06, "loss": 1.055, "step": 1552 }, { "epoch": 0.16332969618888613, "grad_norm": 3.0252873549859878, "learning_rate": 4.711457629331136e-06, "loss": 1.0215, "step": 1553 }, { "epoch": 0.16343486663073342, "grad_norm": 2.127523638438989, "learning_rate": 4.711068160678056e-06, "loss": 1.035, "step": 1554 }, { "epoch": 0.16354003707258075, "grad_norm": 3.1748982619719097, "learning_rate": 4.710678445473701e-06, "loss": 1.0526, "step": 1555 }, { "epoch": 0.16364520751442807, "grad_norm": 3.0204839143683087, "learning_rate": 4.710288483761524e-06, "loss": 1.0283, "step": 1556 }, { "epoch": 0.1637503779562754, "grad_norm": 2.5309450768958883, "learning_rate": 4.709898275585013e-06, "loss": 1.0435, "step": 1557 }, { "epoch": 0.16385554839812272, "grad_norm": 2.9972306044536636, "learning_rate": 4.709507820987676e-06, "loss": 0.9908, "step": 1558 }, { "epoch": 0.16396071883997002, "grad_norm": 3.6133112485444188, "learning_rate": 4.709117120013054e-06, "loss": 1.0428, "step": 1559 }, { "epoch": 0.16406588928181734, "grad_norm": 2.2177026935577637, "learning_rate": 4.708726172704712e-06, "loss": 1.0373, "step": 1560 }, { "epoch": 0.16417105972366466, "grad_norm": 3.665100322013887, "learning_rate": 4.708334979106243e-06, "loss": 1.017, "step": 1561 }, { "epoch": 0.164276230165512, "grad_norm": 2.120634042862793, "learning_rate": 4.707943539261269e-06, "loss": 1.0423, "step": 1562 }, { "epoch": 0.1643814006073593, "grad_norm": 3.3096318778798346, "learning_rate": 4.707551853213439e-06, "loss": 1.0324, "step": 1563 }, { "epoch": 0.1644865710492066, "grad_norm": 2.722439105462851, "learning_rate": 4.7071599210064275e-06, "loss": 1.0437, "step": 1564 }, { "epoch": 0.16459174149105393, "grad_norm": 2.9680950262706105, "learning_rate": 4.70676774268394e-06, "loss": 1.0011, "step": 1565 }, { "epoch": 0.16469691193290126, "grad_norm": 2.7946642826043, "learning_rate": 4.706375318289706e-06, "loss": 0.9863, "step": 1566 }, { "epoch": 0.16480208237474858, "grad_norm": 3.008921981513297, "learning_rate": 4.705982647867484e-06, "loss": 1.0011, "step": 1567 }, { "epoch": 0.1649072528165959, "grad_norm": 2.438967195615608, "learning_rate": 4.705589731461061e-06, "loss": 1.0553, "step": 1568 }, { "epoch": 0.1650124232584432, "grad_norm": 2.0590569324727492, "learning_rate": 4.705196569114248e-06, "loss": 1.0026, "step": 1569 }, { "epoch": 0.16511759370029053, "grad_norm": 2.751042822546601, "learning_rate": 4.704803160870888e-06, "loss": 1.0228, "step": 1570 }, { "epoch": 0.16522276414213785, "grad_norm": 1.9114644938599432, "learning_rate": 4.704409506774848e-06, "loss": 1.0235, "step": 1571 }, { "epoch": 0.16532793458398518, "grad_norm": 2.7725485760210433, "learning_rate": 4.7040156068700225e-06, "loss": 1.039, "step": 1572 }, { "epoch": 0.1654331050258325, "grad_norm": 3.3207748456848956, "learning_rate": 4.703621461200337e-06, "loss": 1.0551, "step": 1573 }, { "epoch": 0.1655382754676798, "grad_norm": 2.2142667900370805, "learning_rate": 4.703227069809739e-06, "loss": 1.0284, "step": 1574 }, { "epoch": 0.16564344590952712, "grad_norm": 2.4762983701232666, "learning_rate": 4.702832432742208e-06, "loss": 1.0301, "step": 1575 }, { "epoch": 0.16574861635137444, "grad_norm": 2.7474342790405464, "learning_rate": 4.702437550041749e-06, "loss": 1.0002, "step": 1576 }, { "epoch": 0.16585378679322177, "grad_norm": 2.490632975494078, "learning_rate": 4.702042421752393e-06, "loss": 0.9741, "step": 1577 }, { "epoch": 0.1659589572350691, "grad_norm": 2.7515934794089683, "learning_rate": 4.701647047918202e-06, "loss": 1.0435, "step": 1578 }, { "epoch": 0.1660641276769164, "grad_norm": 2.3948584711688916, "learning_rate": 4.701251428583261e-06, "loss": 1.0609, "step": 1579 }, { "epoch": 0.16616929811876371, "grad_norm": 2.792702623447517, "learning_rate": 4.700855563791686e-06, "loss": 1.0161, "step": 1580 }, { "epoch": 0.16627446856061104, "grad_norm": 2.81166121024242, "learning_rate": 4.700459453587619e-06, "loss": 1.0005, "step": 1581 }, { "epoch": 0.16637963900245836, "grad_norm": 3.2253366610207723, "learning_rate": 4.70006309801523e-06, "loss": 1.0329, "step": 1582 }, { "epoch": 0.1664848094443057, "grad_norm": 3.306319989253321, "learning_rate": 4.699666497118714e-06, "loss": 1.0728, "step": 1583 }, { "epoch": 0.16658997988615298, "grad_norm": 2.3503420314034082, "learning_rate": 4.699269650942296e-06, "loss": 1.006, "step": 1584 }, { "epoch": 0.1666951503280003, "grad_norm": 2.8744464659676634, "learning_rate": 4.6988725595302275e-06, "loss": 1.06, "step": 1585 }, { "epoch": 0.16680032076984763, "grad_norm": 3.3878922195539247, "learning_rate": 4.698475222926788e-06, "loss": 1.0133, "step": 1586 }, { "epoch": 0.16690549121169496, "grad_norm": 2.9490960578463747, "learning_rate": 4.698077641176282e-06, "loss": 0.9812, "step": 1587 }, { "epoch": 0.16701066165354228, "grad_norm": 3.898835572720005, "learning_rate": 4.697679814323044e-06, "loss": 1.0266, "step": 1588 }, { "epoch": 0.16711583209538958, "grad_norm": 2.4198826439223278, "learning_rate": 4.6972817424114335e-06, "loss": 0.9853, "step": 1589 }, { "epoch": 0.1672210025372369, "grad_norm": 3.2157863216591065, "learning_rate": 4.6968834254858405e-06, "loss": 0.9761, "step": 1590 }, { "epoch": 0.16732617297908423, "grad_norm": 3.42945668773291, "learning_rate": 4.69648486359068e-06, "loss": 1.0218, "step": 1591 }, { "epoch": 0.16743134342093155, "grad_norm": 3.8221661628298316, "learning_rate": 4.6960860567703935e-06, "loss": 1.0389, "step": 1592 }, { "epoch": 0.16753651386277887, "grad_norm": 2.5084187696585563, "learning_rate": 4.6956870050694524e-06, "loss": 1.046, "step": 1593 }, { "epoch": 0.1676416843046262, "grad_norm": 2.2140737319132415, "learning_rate": 4.695287708532353e-06, "loss": 1.0125, "step": 1594 }, { "epoch": 0.1677468547464735, "grad_norm": 2.906928468843093, "learning_rate": 4.6948881672036205e-06, "loss": 1.0367, "step": 1595 }, { "epoch": 0.16785202518832082, "grad_norm": 2.9231721544834257, "learning_rate": 4.694488381127808e-06, "loss": 1.0004, "step": 1596 }, { "epoch": 0.16795719563016814, "grad_norm": 2.2518787810723015, "learning_rate": 4.6940883503494925e-06, "loss": 1.0029, "step": 1597 }, { "epoch": 0.16806236607201547, "grad_norm": 2.75505444461088, "learning_rate": 4.693688074913282e-06, "loss": 1.0651, "step": 1598 }, { "epoch": 0.1681675365138628, "grad_norm": 2.9638750280320094, "learning_rate": 4.69328755486381e-06, "loss": 0.981, "step": 1599 }, { "epoch": 0.1682727069557101, "grad_norm": 2.2358315107683038, "learning_rate": 4.692886790245738e-06, "loss": 1.0402, "step": 1600 }, { "epoch": 0.1683778773975574, "grad_norm": 2.554442640835414, "learning_rate": 4.692485781103753e-06, "loss": 0.9991, "step": 1601 }, { "epoch": 0.16848304783940474, "grad_norm": 3.6587203735508886, "learning_rate": 4.692084527482572e-06, "loss": 1.0296, "step": 1602 }, { "epoch": 0.16858821828125206, "grad_norm": 2.874856178936171, "learning_rate": 4.691683029426938e-06, "loss": 1.034, "step": 1603 }, { "epoch": 0.16869338872309939, "grad_norm": 2.0504221476945466, "learning_rate": 4.691281286981619e-06, "loss": 1.0412, "step": 1604 }, { "epoch": 0.16879855916494668, "grad_norm": 4.299453983662296, "learning_rate": 4.690879300191416e-06, "loss": 1.0454, "step": 1605 }, { "epoch": 0.168903729606794, "grad_norm": 3.0645533844066875, "learning_rate": 4.690477069101151e-06, "loss": 1.0127, "step": 1606 }, { "epoch": 0.16900890004864133, "grad_norm": 3.8020279455543893, "learning_rate": 4.690074593755676e-06, "loss": 1.0422, "step": 1607 }, { "epoch": 0.16911407049048865, "grad_norm": 3.0926763271379034, "learning_rate": 4.689671874199871e-06, "loss": 0.9955, "step": 1608 }, { "epoch": 0.16921924093233598, "grad_norm": 3.0460948789795594, "learning_rate": 4.6892689104786425e-06, "loss": 0.9942, "step": 1609 }, { "epoch": 0.16932441137418328, "grad_norm": 2.562365194025863, "learning_rate": 4.6888657026369235e-06, "loss": 0.9697, "step": 1610 }, { "epoch": 0.1694295818160306, "grad_norm": 2.8814563767503456, "learning_rate": 4.688462250719675e-06, "loss": 1.0186, "step": 1611 }, { "epoch": 0.16953475225787792, "grad_norm": 3.0019075219247844, "learning_rate": 4.6880585547718845e-06, "loss": 1.0168, "step": 1612 }, { "epoch": 0.16963992269972525, "grad_norm": 3.3566378728550226, "learning_rate": 4.6876546148385685e-06, "loss": 1.0138, "step": 1613 }, { "epoch": 0.16974509314157257, "grad_norm": 3.815106274251855, "learning_rate": 4.687250430964768e-06, "loss": 1.0534, "step": 1614 }, { "epoch": 0.16985026358341987, "grad_norm": 3.955757242671359, "learning_rate": 4.6868460031955535e-06, "loss": 1.0462, "step": 1615 }, { "epoch": 0.1699554340252672, "grad_norm": 3.1546888936551762, "learning_rate": 4.686441331576021e-06, "loss": 0.9697, "step": 1616 }, { "epoch": 0.17006060446711452, "grad_norm": 2.987499231967973, "learning_rate": 4.686036416151296e-06, "loss": 1.0134, "step": 1617 }, { "epoch": 0.17016577490896184, "grad_norm": 3.2641730627495567, "learning_rate": 4.6856312569665285e-06, "loss": 1.0178, "step": 1618 }, { "epoch": 0.17027094535080917, "grad_norm": 2.5005821127937096, "learning_rate": 4.685225854066897e-06, "loss": 1.0002, "step": 1619 }, { "epoch": 0.17037611579265646, "grad_norm": 2.3448731740329434, "learning_rate": 4.684820207497608e-06, "loss": 1.0644, "step": 1620 }, { "epoch": 0.1704812862345038, "grad_norm": 3.060151169504036, "learning_rate": 4.684414317303894e-06, "loss": 1.0934, "step": 1621 }, { "epoch": 0.1705864566763511, "grad_norm": 2.00150049947049, "learning_rate": 4.6840081835310135e-06, "loss": 1.0504, "step": 1622 }, { "epoch": 0.17069162711819844, "grad_norm": 2.708844063331739, "learning_rate": 4.683601806224255e-06, "loss": 1.0163, "step": 1623 }, { "epoch": 0.17079679756004576, "grad_norm": 3.4472196380160978, "learning_rate": 4.683195185428932e-06, "loss": 0.9911, "step": 1624 }, { "epoch": 0.17090196800189306, "grad_norm": 2.8820764062472026, "learning_rate": 4.6827883211903865e-06, "loss": 1.0349, "step": 1625 }, { "epoch": 0.17100713844374038, "grad_norm": 2.2574433822521067, "learning_rate": 4.682381213553986e-06, "loss": 1.0159, "step": 1626 }, { "epoch": 0.1711123088855877, "grad_norm": 3.089754095729008, "learning_rate": 4.681973862565128e-06, "loss": 1.0047, "step": 1627 }, { "epoch": 0.17121747932743503, "grad_norm": 3.057788643389871, "learning_rate": 4.681566268269233e-06, "loss": 1.0697, "step": 1628 }, { "epoch": 0.17132264976928235, "grad_norm": 2.5667766260752085, "learning_rate": 4.681158430711753e-06, "loss": 1.0402, "step": 1629 }, { "epoch": 0.17142782021112965, "grad_norm": 2.9763268275620116, "learning_rate": 4.680750349938164e-06, "loss": 1.0168, "step": 1630 }, { "epoch": 0.17153299065297697, "grad_norm": 2.9070930547134566, "learning_rate": 4.68034202599397e-06, "loss": 1.0487, "step": 1631 }, { "epoch": 0.1716381610948243, "grad_norm": 2.6307238317161494, "learning_rate": 4.679933458924702e-06, "loss": 1.0303, "step": 1632 }, { "epoch": 0.17174333153667162, "grad_norm": 3.6659807629189913, "learning_rate": 4.67952464877592e-06, "loss": 0.9981, "step": 1633 }, { "epoch": 0.17184850197851895, "grad_norm": 2.736348178417095, "learning_rate": 4.679115595593208e-06, "loss": 0.9939, "step": 1634 }, { "epoch": 0.17195367242036624, "grad_norm": 3.0409494428632993, "learning_rate": 4.67870629942218e-06, "loss": 1.0585, "step": 1635 }, { "epoch": 0.17205884286221357, "grad_norm": 2.4454258516009593, "learning_rate": 4.678296760308474e-06, "loss": 1.0395, "step": 1636 }, { "epoch": 0.1721640133040609, "grad_norm": 2.612925874097498, "learning_rate": 4.677886978297758e-06, "loss": 0.9964, "step": 1637 }, { "epoch": 0.17226918374590822, "grad_norm": 3.0527819045438793, "learning_rate": 4.677476953435725e-06, "loss": 1.0306, "step": 1638 }, { "epoch": 0.17237435418775554, "grad_norm": 3.7722857272845616, "learning_rate": 4.677066685768097e-06, "loss": 1.0256, "step": 1639 }, { "epoch": 0.17247952462960284, "grad_norm": 4.671732853843867, "learning_rate": 4.676656175340621e-06, "loss": 1.0539, "step": 1640 }, { "epoch": 0.17258469507145016, "grad_norm": 3.402158905492146, "learning_rate": 4.676245422199073e-06, "loss": 1.041, "step": 1641 }, { "epoch": 0.17268986551329749, "grad_norm": 2.7043484924050913, "learning_rate": 4.675834426389254e-06, "loss": 0.9748, "step": 1642 }, { "epoch": 0.1727950359551448, "grad_norm": 3.045874999700864, "learning_rate": 4.675423187956995e-06, "loss": 1.0199, "step": 1643 }, { "epoch": 0.17290020639699213, "grad_norm": 2.0391498549902325, "learning_rate": 4.675011706948151e-06, "loss": 0.9807, "step": 1644 }, { "epoch": 0.17300537683883943, "grad_norm": 2.8730116912157517, "learning_rate": 4.674599983408605e-06, "loss": 1.0442, "step": 1645 }, { "epoch": 0.17311054728068676, "grad_norm": 3.6137100268246387, "learning_rate": 4.674188017384269e-06, "loss": 1.0149, "step": 1646 }, { "epoch": 0.17321571772253408, "grad_norm": 3.388019574947697, "learning_rate": 4.673775808921078e-06, "loss": 1.0567, "step": 1647 }, { "epoch": 0.1733208881643814, "grad_norm": 2.7619468248957775, "learning_rate": 4.673363358065e-06, "loss": 0.9992, "step": 1648 }, { "epoch": 0.17342605860622873, "grad_norm": 3.196115448528693, "learning_rate": 4.672950664862022e-06, "loss": 1.0359, "step": 1649 }, { "epoch": 0.17353122904807602, "grad_norm": 3.1251623630514054, "learning_rate": 4.672537729358166e-06, "loss": 1.0346, "step": 1650 }, { "epoch": 0.17363639948992335, "grad_norm": 2.3482592044116024, "learning_rate": 4.672124551599476e-06, "loss": 0.99, "step": 1651 }, { "epoch": 0.17374156993177067, "grad_norm": 3.2990493587317995, "learning_rate": 4.671711131632025e-06, "loss": 1.0203, "step": 1652 }, { "epoch": 0.173846740373618, "grad_norm": 2.2221626940474923, "learning_rate": 4.671297469501912e-06, "loss": 1.0141, "step": 1653 }, { "epoch": 0.17395191081546532, "grad_norm": 3.4954782095000776, "learning_rate": 4.670883565255264e-06, "loss": 1.003, "step": 1654 }, { "epoch": 0.17405708125731265, "grad_norm": 4.077759847846728, "learning_rate": 4.670469418938235e-06, "loss": 1.04, "step": 1655 }, { "epoch": 0.17416225169915994, "grad_norm": 3.7217727086743913, "learning_rate": 4.670055030597004e-06, "loss": 1.0416, "step": 1656 }, { "epoch": 0.17426742214100727, "grad_norm": 2.924757634673209, "learning_rate": 4.66964040027778e-06, "loss": 1.0284, "step": 1657 }, { "epoch": 0.1743725925828546, "grad_norm": 2.5219450852454037, "learning_rate": 4.669225528026797e-06, "loss": 1.0186, "step": 1658 }, { "epoch": 0.17447776302470192, "grad_norm": 2.9803154223037223, "learning_rate": 4.668810413890318e-06, "loss": 1.0058, "step": 1659 }, { "epoch": 0.17458293346654924, "grad_norm": 2.8822355518190004, "learning_rate": 4.668395057914627e-06, "loss": 1.0453, "step": 1660 }, { "epoch": 0.17468810390839654, "grad_norm": 3.6402941948880283, "learning_rate": 4.667979460146045e-06, "loss": 1.0375, "step": 1661 }, { "epoch": 0.17479327435024386, "grad_norm": 2.5715274816378746, "learning_rate": 4.6675636206309105e-06, "loss": 1.013, "step": 1662 }, { "epoch": 0.17489844479209118, "grad_norm": 3.0432398948655734, "learning_rate": 4.667147539415594e-06, "loss": 1.0241, "step": 1663 }, { "epoch": 0.1750036152339385, "grad_norm": 3.395961606680768, "learning_rate": 4.666731216546492e-06, "loss": 0.9814, "step": 1664 }, { "epoch": 0.17510878567578583, "grad_norm": 2.863033871229343, "learning_rate": 4.6663146520700275e-06, "loss": 1.0259, "step": 1665 }, { "epoch": 0.17521395611763313, "grad_norm": 2.905956162367102, "learning_rate": 4.665897846032651e-06, "loss": 1.0223, "step": 1666 }, { "epoch": 0.17531912655948045, "grad_norm": 2.9044821170497563, "learning_rate": 4.66548079848084e-06, "loss": 1.0568, "step": 1667 }, { "epoch": 0.17542429700132778, "grad_norm": 2.5251868732416978, "learning_rate": 4.665063509461098e-06, "loss": 1.0291, "step": 1668 }, { "epoch": 0.1755294674431751, "grad_norm": 1.9745812484711869, "learning_rate": 4.664645979019954e-06, "loss": 1.0088, "step": 1669 }, { "epoch": 0.17563463788502243, "grad_norm": 2.9571167671583933, "learning_rate": 4.6642282072039694e-06, "loss": 1.038, "step": 1670 }, { "epoch": 0.17573980832686972, "grad_norm": 3.162873132329408, "learning_rate": 4.663810194059727e-06, "loss": 1.039, "step": 1671 }, { "epoch": 0.17584497876871705, "grad_norm": 2.707745488716953, "learning_rate": 4.663391939633839e-06, "loss": 1.0412, "step": 1672 }, { "epoch": 0.17595014921056437, "grad_norm": 2.390831077180832, "learning_rate": 4.662973443972943e-06, "loss": 1.0217, "step": 1673 }, { "epoch": 0.1760553196524117, "grad_norm": 2.6697672577896103, "learning_rate": 4.662554707123707e-06, "loss": 0.9758, "step": 1674 }, { "epoch": 0.17616049009425902, "grad_norm": 2.5536432355735417, "learning_rate": 4.662135729132821e-06, "loss": 1.0093, "step": 1675 }, { "epoch": 0.17626566053610632, "grad_norm": 4.454966321168614, "learning_rate": 4.661716510047005e-06, "loss": 1.0656, "step": 1676 }, { "epoch": 0.17637083097795364, "grad_norm": 2.9059225841826586, "learning_rate": 4.661297049913005e-06, "loss": 1.054, "step": 1677 }, { "epoch": 0.17647600141980097, "grad_norm": 2.7078515707870063, "learning_rate": 4.660877348777595e-06, "loss": 0.9475, "step": 1678 }, { "epoch": 0.1765811718616483, "grad_norm": 2.714819257387345, "learning_rate": 4.660457406687574e-06, "loss": 1.008, "step": 1679 }, { "epoch": 0.1766863423034956, "grad_norm": 3.560183070646618, "learning_rate": 4.66003722368977e-06, "loss": 1.0123, "step": 1680 }, { "epoch": 0.1767915127453429, "grad_norm": 4.011571407375856, "learning_rate": 4.659616799831035e-06, "loss": 1.0113, "step": 1681 }, { "epoch": 0.17689668318719023, "grad_norm": 3.512773712732269, "learning_rate": 4.659196135158251e-06, "loss": 1.0847, "step": 1682 }, { "epoch": 0.17700185362903756, "grad_norm": 3.5710928774903783, "learning_rate": 4.658775229718323e-06, "loss": 0.9836, "step": 1683 }, { "epoch": 0.17710702407088488, "grad_norm": 2.576288670084694, "learning_rate": 4.6583540835581885e-06, "loss": 0.9977, "step": 1684 }, { "epoch": 0.1772121945127322, "grad_norm": 2.67065462027682, "learning_rate": 4.657932696724807e-06, "loss": 1.0019, "step": 1685 }, { "epoch": 0.1773173649545795, "grad_norm": 3.0304864854926286, "learning_rate": 4.657511069265166e-06, "loss": 1.0513, "step": 1686 }, { "epoch": 0.17742253539642683, "grad_norm": 2.756757768961673, "learning_rate": 4.6570892012262806e-06, "loss": 1.055, "step": 1687 }, { "epoch": 0.17752770583827415, "grad_norm": 3.3322689645027306, "learning_rate": 4.656667092655192e-06, "loss": 1.0246, "step": 1688 }, { "epoch": 0.17763287628012148, "grad_norm": 2.51855374758776, "learning_rate": 4.65624474359897e-06, "loss": 0.9991, "step": 1689 }, { "epoch": 0.1777380467219688, "grad_norm": 1.793782237595271, "learning_rate": 4.655822154104708e-06, "loss": 0.9936, "step": 1690 }, { "epoch": 0.1778432171638161, "grad_norm": 2.06409532213173, "learning_rate": 4.655399324219529e-06, "loss": 1.0011, "step": 1691 }, { "epoch": 0.17794838760566342, "grad_norm": 3.1724819528325843, "learning_rate": 4.654976253990582e-06, "loss": 1.0609, "step": 1692 }, { "epoch": 0.17805355804751075, "grad_norm": 3.173700150317756, "learning_rate": 4.654552943465042e-06, "loss": 1.0357, "step": 1693 }, { "epoch": 0.17815872848935807, "grad_norm": 2.630712209918402, "learning_rate": 4.654129392690111e-06, "loss": 1.0445, "step": 1694 }, { "epoch": 0.1782638989312054, "grad_norm": 3.195364242863823, "learning_rate": 4.653705601713019e-06, "loss": 1.0546, "step": 1695 }, { "epoch": 0.1783690693730527, "grad_norm": 2.9685233327726737, "learning_rate": 4.653281570581023e-06, "loss": 0.9853, "step": 1696 }, { "epoch": 0.17847423981490002, "grad_norm": 2.683882737708656, "learning_rate": 4.6528572993414036e-06, "loss": 1.0162, "step": 1697 }, { "epoch": 0.17857941025674734, "grad_norm": 3.4527829667595067, "learning_rate": 4.652432788041471e-06, "loss": 1.0187, "step": 1698 }, { "epoch": 0.17868458069859466, "grad_norm": 3.5173119907210735, "learning_rate": 4.652008036728563e-06, "loss": 1.0463, "step": 1699 }, { "epoch": 0.178789751140442, "grad_norm": 3.388433179291527, "learning_rate": 4.651583045450041e-06, "loss": 1.0248, "step": 1700 }, { "epoch": 0.17889492158228928, "grad_norm": 2.1485938375897313, "learning_rate": 4.651157814253295e-06, "loss": 1.0051, "step": 1701 }, { "epoch": 0.1790000920241366, "grad_norm": 3.1044973840896732, "learning_rate": 4.650732343185743e-06, "loss": 1.0274, "step": 1702 }, { "epoch": 0.17910526246598393, "grad_norm": 2.6481193563942718, "learning_rate": 4.6503066322948264e-06, "loss": 1.0212, "step": 1703 }, { "epoch": 0.17921043290783126, "grad_norm": 2.9567702455119496, "learning_rate": 4.649880681628016e-06, "loss": 1.0586, "step": 1704 }, { "epoch": 0.17931560334967858, "grad_norm": 2.847681704947885, "learning_rate": 4.649454491232809e-06, "loss": 1.0353, "step": 1705 }, { "epoch": 0.17942077379152588, "grad_norm": 2.544316932295647, "learning_rate": 4.649028061156728e-06, "loss": 1.0109, "step": 1706 }, { "epoch": 0.1795259442333732, "grad_norm": 2.5651350900643863, "learning_rate": 4.648601391447325e-06, "loss": 1.0665, "step": 1707 }, { "epoch": 0.17963111467522053, "grad_norm": 3.305216400055959, "learning_rate": 4.648174482152176e-06, "loss": 1.0119, "step": 1708 }, { "epoch": 0.17973628511706785, "grad_norm": 2.907862648072626, "learning_rate": 4.647747333318884e-06, "loss": 1.0051, "step": 1709 }, { "epoch": 0.17984145555891518, "grad_norm": 2.7406970823992785, "learning_rate": 4.64731994499508e-06, "loss": 0.9843, "step": 1710 }, { "epoch": 0.17994662600076247, "grad_norm": 2.986361487648303, "learning_rate": 4.646892317228422e-06, "loss": 1.0133, "step": 1711 }, { "epoch": 0.1800517964426098, "grad_norm": 2.897968238151226, "learning_rate": 4.646464450066592e-06, "loss": 1.0311, "step": 1712 }, { "epoch": 0.18015696688445712, "grad_norm": 3.4099728159788008, "learning_rate": 4.646036343557302e-06, "loss": 0.9871, "step": 1713 }, { "epoch": 0.18026213732630444, "grad_norm": 3.7576035434231745, "learning_rate": 4.6456079977482885e-06, "loss": 1.032, "step": 1714 }, { "epoch": 0.18036730776815177, "grad_norm": 2.569078677988013, "learning_rate": 4.645179412687316e-06, "loss": 1.024, "step": 1715 }, { "epoch": 0.1804724782099991, "grad_norm": 3.286151517668889, "learning_rate": 4.644750588422174e-06, "loss": 1.062, "step": 1716 }, { "epoch": 0.1805776486518464, "grad_norm": 3.179805762126629, "learning_rate": 4.644321525000681e-06, "loss": 1.0809, "step": 1717 }, { "epoch": 0.18068281909369371, "grad_norm": 2.154906179564819, "learning_rate": 4.64389222247068e-06, "loss": 1.0338, "step": 1718 }, { "epoch": 0.18078798953554104, "grad_norm": 3.4239950119409195, "learning_rate": 4.643462680880042e-06, "loss": 1.0273, "step": 1719 }, { "epoch": 0.18089315997738836, "grad_norm": 2.9055090835022632, "learning_rate": 4.643032900276664e-06, "loss": 1.0144, "step": 1720 }, { "epoch": 0.1809983304192357, "grad_norm": 2.4602189314080776, "learning_rate": 4.642602880708469e-06, "loss": 0.9683, "step": 1721 }, { "epoch": 0.18110350086108298, "grad_norm": 2.90218657552329, "learning_rate": 4.642172622223409e-06, "loss": 1.0738, "step": 1722 }, { "epoch": 0.1812086713029303, "grad_norm": 2.3552760906435983, "learning_rate": 4.641742124869461e-06, "loss": 0.9947, "step": 1723 }, { "epoch": 0.18131384174477763, "grad_norm": 2.8906978212652032, "learning_rate": 4.641311388694629e-06, "loss": 1.0405, "step": 1724 }, { "epoch": 0.18141901218662496, "grad_norm": 2.2774591530752653, "learning_rate": 4.640880413746942e-06, "loss": 1.0631, "step": 1725 }, { "epoch": 0.18152418262847228, "grad_norm": 2.909505385998881, "learning_rate": 4.640449200074459e-06, "loss": 1.0374, "step": 1726 }, { "epoch": 0.18162935307031958, "grad_norm": 2.2639846101073484, "learning_rate": 4.6400177477252615e-06, "loss": 1.0135, "step": 1727 }, { "epoch": 0.1817345235121669, "grad_norm": 3.409130796181564, "learning_rate": 4.639586056747461e-06, "loss": 1.0132, "step": 1728 }, { "epoch": 0.18183969395401423, "grad_norm": 2.1982637333179915, "learning_rate": 4.639154127189195e-06, "loss": 0.989, "step": 1729 }, { "epoch": 0.18194486439586155, "grad_norm": 3.8360953153189, "learning_rate": 4.638721959098626e-06, "loss": 1.0704, "step": 1730 }, { "epoch": 0.18205003483770887, "grad_norm": 2.4398191847689934, "learning_rate": 4.638289552523944e-06, "loss": 1.0277, "step": 1731 }, { "epoch": 0.18215520527955617, "grad_norm": 2.6670166772713646, "learning_rate": 4.637856907513366e-06, "loss": 1.0189, "step": 1732 }, { "epoch": 0.1822603757214035, "grad_norm": 2.6746238004492495, "learning_rate": 4.637424024115136e-06, "loss": 1.0083, "step": 1733 }, { "epoch": 0.18236554616325082, "grad_norm": 4.189496479103646, "learning_rate": 4.636990902377523e-06, "loss": 1.0884, "step": 1734 }, { "epoch": 0.18247071660509814, "grad_norm": 3.617312839139946, "learning_rate": 4.636557542348823e-06, "loss": 1.02, "step": 1735 }, { "epoch": 0.18257588704694547, "grad_norm": 2.7303718078659025, "learning_rate": 4.6361239440773595e-06, "loss": 1.033, "step": 1736 }, { "epoch": 0.18268105748879276, "grad_norm": 2.7591255551753426, "learning_rate": 4.635690107611483e-06, "loss": 1.0249, "step": 1737 }, { "epoch": 0.1827862279306401, "grad_norm": 2.698775360170187, "learning_rate": 4.635256032999569e-06, "loss": 1.0181, "step": 1738 }, { "epoch": 0.1828913983724874, "grad_norm": 2.9039375552443776, "learning_rate": 4.63482172029002e-06, "loss": 1.0174, "step": 1739 }, { "epoch": 0.18299656881433474, "grad_norm": 2.819682186528183, "learning_rate": 4.6343871695312646e-06, "loss": 0.9973, "step": 1740 }, { "epoch": 0.18310173925618206, "grad_norm": 2.752650866563116, "learning_rate": 4.63395238077176e-06, "loss": 0.9959, "step": 1741 }, { "epoch": 0.18320690969802936, "grad_norm": 2.037156561103997, "learning_rate": 4.6335173540599875e-06, "loss": 1.0232, "step": 1742 }, { "epoch": 0.18331208013987668, "grad_norm": 2.954334928188035, "learning_rate": 4.633082089444457e-06, "loss": 0.9881, "step": 1743 }, { "epoch": 0.183417250581724, "grad_norm": 2.7393783209030946, "learning_rate": 4.632646586973702e-06, "loss": 1.0439, "step": 1744 }, { "epoch": 0.18352242102357133, "grad_norm": 3.1124274526696385, "learning_rate": 4.6322108466962865e-06, "loss": 0.9901, "step": 1745 }, { "epoch": 0.18362759146541865, "grad_norm": 2.827497506273816, "learning_rate": 4.631774868660798e-06, "loss": 1.0557, "step": 1746 }, { "epoch": 0.18373276190726595, "grad_norm": 2.7113839252801495, "learning_rate": 4.631338652915851e-06, "loss": 1.0363, "step": 1747 }, { "epoch": 0.18383793234911328, "grad_norm": 3.0202399372617896, "learning_rate": 4.6309021995100875e-06, "loss": 0.9892, "step": 1748 }, { "epoch": 0.1839431027909606, "grad_norm": 3.0853410173378117, "learning_rate": 4.630465508492176e-06, "loss": 0.9995, "step": 1749 }, { "epoch": 0.18404827323280792, "grad_norm": 3.8771840606848347, "learning_rate": 4.630028579910809e-06, "loss": 1.0325, "step": 1750 }, { "epoch": 0.18415344367465525, "grad_norm": 2.924444671474204, "learning_rate": 4.629591413814709e-06, "loss": 1.0269, "step": 1751 }, { "epoch": 0.18425861411650254, "grad_norm": 3.855054949681042, "learning_rate": 4.629154010252624e-06, "loss": 1.0047, "step": 1752 }, { "epoch": 0.18436378455834987, "grad_norm": 2.94820330025766, "learning_rate": 4.628716369273326e-06, "loss": 1.0513, "step": 1753 }, { "epoch": 0.1844689550001972, "grad_norm": 3.1410382438209057, "learning_rate": 4.628278490925617e-06, "loss": 1.0178, "step": 1754 }, { "epoch": 0.18457412544204452, "grad_norm": 3.430775164691166, "learning_rate": 4.6278403752583235e-06, "loss": 1.0179, "step": 1755 }, { "epoch": 0.18467929588389184, "grad_norm": 3.935774284929484, "learning_rate": 4.627402022320298e-06, "loss": 1.0026, "step": 1756 }, { "epoch": 0.18478446632573914, "grad_norm": 4.002646697081782, "learning_rate": 4.626963432160421e-06, "loss": 1.0347, "step": 1757 }, { "epoch": 0.18488963676758646, "grad_norm": 2.5892402072879275, "learning_rate": 4.626524604827598e-06, "loss": 1.0222, "step": 1758 }, { "epoch": 0.1849948072094338, "grad_norm": 3.069956064773937, "learning_rate": 4.6260855403707625e-06, "loss": 1.0151, "step": 1759 }, { "epoch": 0.1850999776512811, "grad_norm": 2.7469752838222714, "learning_rate": 4.625646238838873e-06, "loss": 1.0214, "step": 1760 }, { "epoch": 0.18520514809312844, "grad_norm": 2.5645297933313875, "learning_rate": 4.6252067002809155e-06, "loss": 1.007, "step": 1761 }, { "epoch": 0.18531031853497573, "grad_norm": 3.4751336669098194, "learning_rate": 4.6247669247459015e-06, "loss": 1.0049, "step": 1762 }, { "epoch": 0.18541548897682306, "grad_norm": 2.641984983795442, "learning_rate": 4.62432691228287e-06, "loss": 0.9999, "step": 1763 }, { "epoch": 0.18552065941867038, "grad_norm": 2.4784360351295875, "learning_rate": 4.623886662940885e-06, "loss": 0.9663, "step": 1764 }, { "epoch": 0.1856258298605177, "grad_norm": 3.7083059171189814, "learning_rate": 4.6234461767690384e-06, "loss": 0.9815, "step": 1765 }, { "epoch": 0.18573100030236503, "grad_norm": 3.085101182950583, "learning_rate": 4.623005453816447e-06, "loss": 1.0183, "step": 1766 }, { "epoch": 0.18583617074421233, "grad_norm": 3.8717416513609164, "learning_rate": 4.622564494132256e-06, "loss": 1.0394, "step": 1767 }, { "epoch": 0.18594134118605965, "grad_norm": 2.6220312567865682, "learning_rate": 4.622123297765636e-06, "loss": 0.9954, "step": 1768 }, { "epoch": 0.18604651162790697, "grad_norm": 3.2456038569135313, "learning_rate": 4.621681864765783e-06, "loss": 1.0068, "step": 1769 }, { "epoch": 0.1861516820697543, "grad_norm": 2.99049524146547, "learning_rate": 4.621240195181918e-06, "loss": 1.0466, "step": 1770 }, { "epoch": 0.18625685251160162, "grad_norm": 3.7023703331482074, "learning_rate": 4.620798289063295e-06, "loss": 1.028, "step": 1771 }, { "epoch": 0.18636202295344895, "grad_norm": 2.284086379818769, "learning_rate": 4.620356146459187e-06, "loss": 1.0119, "step": 1772 }, { "epoch": 0.18646719339529624, "grad_norm": 3.7163945954226514, "learning_rate": 4.619913767418898e-06, "loss": 1.0508, "step": 1773 }, { "epoch": 0.18657236383714357, "grad_norm": 3.8367678119936324, "learning_rate": 4.619471151991755e-06, "loss": 1.0105, "step": 1774 }, { "epoch": 0.1866775342789909, "grad_norm": 2.8688910090189923, "learning_rate": 4.619028300227114e-06, "loss": 1.0038, "step": 1775 }, { "epoch": 0.18678270472083822, "grad_norm": 2.650967656775131, "learning_rate": 4.618585212174357e-06, "loss": 0.9438, "step": 1776 }, { "epoch": 0.18688787516268554, "grad_norm": 4.345745813091422, "learning_rate": 4.618141887882891e-06, "loss": 1.025, "step": 1777 }, { "epoch": 0.18699304560453284, "grad_norm": 2.868036629003295, "learning_rate": 4.6176983274021495e-06, "loss": 1.0249, "step": 1778 }, { "epoch": 0.18709821604638016, "grad_norm": 2.372708709831453, "learning_rate": 4.617254530781594e-06, "loss": 1.0195, "step": 1779 }, { "epoch": 0.18720338648822749, "grad_norm": 3.358894346037336, "learning_rate": 4.6168104980707105e-06, "loss": 1.0665, "step": 1780 }, { "epoch": 0.1873085569300748, "grad_norm": 3.3819142018228643, "learning_rate": 4.6163662293190135e-06, "loss": 1.0316, "step": 1781 }, { "epoch": 0.18741372737192213, "grad_norm": 2.7812505645074777, "learning_rate": 4.61592172457604e-06, "loss": 1.0267, "step": 1782 }, { "epoch": 0.18751889781376943, "grad_norm": 2.666048271062157, "learning_rate": 4.615476983891359e-06, "loss": 1.0273, "step": 1783 }, { "epoch": 0.18762406825561675, "grad_norm": 2.8496484883881994, "learning_rate": 4.61503200731456e-06, "loss": 0.9927, "step": 1784 }, { "epoch": 0.18772923869746408, "grad_norm": 2.604297527248957, "learning_rate": 4.6145867948952605e-06, "loss": 1.0363, "step": 1785 }, { "epoch": 0.1878344091393114, "grad_norm": 2.25491398075674, "learning_rate": 4.614141346683109e-06, "loss": 1.038, "step": 1786 }, { "epoch": 0.18793957958115873, "grad_norm": 2.264636873435953, "learning_rate": 4.6136956627277725e-06, "loss": 0.9992, "step": 1787 }, { "epoch": 0.18804475002300602, "grad_norm": 2.704549825776999, "learning_rate": 4.61324974307895e-06, "loss": 1.0082, "step": 1788 }, { "epoch": 0.18814992046485335, "grad_norm": 2.353105958418805, "learning_rate": 4.612803587786366e-06, "loss": 1.0102, "step": 1789 }, { "epoch": 0.18825509090670067, "grad_norm": 2.948441358243538, "learning_rate": 4.612357196899768e-06, "loss": 1.033, "step": 1790 }, { "epoch": 0.188360261348548, "grad_norm": 1.9059658097973773, "learning_rate": 4.611910570468933e-06, "loss": 1.002, "step": 1791 }, { "epoch": 0.18846543179039532, "grad_norm": 2.6032758990156712, "learning_rate": 4.611463708543665e-06, "loss": 1.0658, "step": 1792 }, { "epoch": 0.18857060223224262, "grad_norm": 2.230556596976156, "learning_rate": 4.61101661117379e-06, "loss": 1.0214, "step": 1793 }, { "epoch": 0.18867577267408994, "grad_norm": 3.0702976065770664, "learning_rate": 4.610569278409164e-06, "loss": 1.0007, "step": 1794 }, { "epoch": 0.18878094311593727, "grad_norm": 3.007760617807363, "learning_rate": 4.610121710299668e-06, "loss": 1.0181, "step": 1795 }, { "epoch": 0.1888861135577846, "grad_norm": 2.7344843254797127, "learning_rate": 4.609673906895208e-06, "loss": 1.0361, "step": 1796 }, { "epoch": 0.18899128399963191, "grad_norm": 1.9281901603612084, "learning_rate": 4.609225868245721e-06, "loss": 1.0006, "step": 1797 }, { "epoch": 0.1890964544414792, "grad_norm": 3.2114203189511152, "learning_rate": 4.608777594401164e-06, "loss": 1.0043, "step": 1798 }, { "epoch": 0.18920162488332654, "grad_norm": 2.145414047326945, "learning_rate": 4.608329085411523e-06, "loss": 1.0424, "step": 1799 }, { "epoch": 0.18930679532517386, "grad_norm": 2.51024154366095, "learning_rate": 4.607880341326812e-06, "loss": 1.0284, "step": 1800 }, { "epoch": 0.18941196576702118, "grad_norm": 3.0454022502767804, "learning_rate": 4.607431362197067e-06, "loss": 0.9817, "step": 1801 }, { "epoch": 0.1895171362088685, "grad_norm": 3.050295307561754, "learning_rate": 4.6069821480723545e-06, "loss": 1.0539, "step": 1802 }, { "epoch": 0.1896223066507158, "grad_norm": 4.406095657800138, "learning_rate": 4.6065326990027656e-06, "loss": 1.0129, "step": 1803 }, { "epoch": 0.18972747709256313, "grad_norm": 2.37443582116452, "learning_rate": 4.606083015038416e-06, "loss": 0.9949, "step": 1804 }, { "epoch": 0.18983264753441045, "grad_norm": 3.2183447058232253, "learning_rate": 4.6056330962294496e-06, "loss": 1.0636, "step": 1805 }, { "epoch": 0.18993781797625778, "grad_norm": 2.8177933591262407, "learning_rate": 4.605182942626037e-06, "loss": 1.0327, "step": 1806 }, { "epoch": 0.1900429884181051, "grad_norm": 2.3566286140592654, "learning_rate": 4.604732554278371e-06, "loss": 1.0276, "step": 1807 }, { "epoch": 0.1901481588599524, "grad_norm": 3.0654404715127024, "learning_rate": 4.604281931236675e-06, "loss": 1.0586, "step": 1808 }, { "epoch": 0.19025332930179972, "grad_norm": 2.099265216978414, "learning_rate": 4.6038310735511985e-06, "loss": 1.0363, "step": 1809 }, { "epoch": 0.19035849974364705, "grad_norm": 2.521236583898647, "learning_rate": 4.603379981272213e-06, "loss": 0.9839, "step": 1810 }, { "epoch": 0.19046367018549437, "grad_norm": 2.528668148495521, "learning_rate": 4.6029286544500205e-06, "loss": 1.009, "step": 1811 }, { "epoch": 0.1905688406273417, "grad_norm": 3.7980630261576125, "learning_rate": 4.602477093134947e-06, "loss": 0.9996, "step": 1812 }, { "epoch": 0.190674011069189, "grad_norm": 2.965801661136027, "learning_rate": 4.602025297377345e-06, "loss": 1.0368, "step": 1813 }, { "epoch": 0.19077918151103632, "grad_norm": 2.5523632367350104, "learning_rate": 4.6015732672275925e-06, "loss": 0.9936, "step": 1814 }, { "epoch": 0.19088435195288364, "grad_norm": 3.02733722515206, "learning_rate": 4.601121002736095e-06, "loss": 1.0129, "step": 1815 }, { "epoch": 0.19098952239473097, "grad_norm": 4.357521284066693, "learning_rate": 4.600668503953285e-06, "loss": 1.0212, "step": 1816 }, { "epoch": 0.1910946928365783, "grad_norm": 3.1172216407619318, "learning_rate": 4.600215770929617e-06, "loss": 1.093, "step": 1817 }, { "epoch": 0.19119986327842559, "grad_norm": 2.6332985511775493, "learning_rate": 4.599762803715576e-06, "loss": 1.0391, "step": 1818 }, { "epoch": 0.1913050337202729, "grad_norm": 2.569865786330344, "learning_rate": 4.599309602361671e-06, "loss": 1.0291, "step": 1819 }, { "epoch": 0.19141020416212023, "grad_norm": 3.2827113014482445, "learning_rate": 4.5988561669184376e-06, "loss": 1.0382, "step": 1820 }, { "epoch": 0.19151537460396756, "grad_norm": 2.1719148645999584, "learning_rate": 4.598402497436436e-06, "loss": 1.0192, "step": 1821 }, { "epoch": 0.19162054504581488, "grad_norm": 2.2765741322316546, "learning_rate": 4.597948593966256e-06, "loss": 1.0286, "step": 1822 }, { "epoch": 0.19172571548766218, "grad_norm": 4.114857140083318, "learning_rate": 4.59749445655851e-06, "loss": 1.045, "step": 1823 }, { "epoch": 0.1918308859295095, "grad_norm": 2.997696029602926, "learning_rate": 4.597040085263838e-06, "loss": 1.04, "step": 1824 }, { "epoch": 0.19193605637135683, "grad_norm": 3.047085040895484, "learning_rate": 4.596585480132906e-06, "loss": 1.0447, "step": 1825 }, { "epoch": 0.19204122681320415, "grad_norm": 3.9271378946979847, "learning_rate": 4.596130641216406e-06, "loss": 1.008, "step": 1826 }, { "epoch": 0.19214639725505148, "grad_norm": 2.0196952752323414, "learning_rate": 4.595675568565058e-06, "loss": 0.9986, "step": 1827 }, { "epoch": 0.19225156769689877, "grad_norm": 3.228450782508349, "learning_rate": 4.5952202622296015e-06, "loss": 1.0224, "step": 1828 }, { "epoch": 0.1923567381387461, "grad_norm": 3.398130308359955, "learning_rate": 4.594764722260812e-06, "loss": 1.0222, "step": 1829 }, { "epoch": 0.19246190858059342, "grad_norm": 2.3897945567374648, "learning_rate": 4.594308948709482e-06, "loss": 1.0258, "step": 1830 }, { "epoch": 0.19256707902244075, "grad_norm": 2.867204854790798, "learning_rate": 4.593852941626435e-06, "loss": 0.9945, "step": 1831 }, { "epoch": 0.19267224946428807, "grad_norm": 3.1790669432232317, "learning_rate": 4.59339670106252e-06, "loss": 1.0426, "step": 1832 }, { "epoch": 0.1927774199061354, "grad_norm": 3.0542457539999694, "learning_rate": 4.59294022706861e-06, "loss": 1.0483, "step": 1833 }, { "epoch": 0.1928825903479827, "grad_norm": 3.2664531106906685, "learning_rate": 4.592483519695606e-06, "loss": 0.989, "step": 1834 }, { "epoch": 0.19298776078983002, "grad_norm": 2.4685050890530436, "learning_rate": 4.592026578994435e-06, "loss": 1.0167, "step": 1835 }, { "epoch": 0.19309293123167734, "grad_norm": 2.9309814017337197, "learning_rate": 4.59156940501605e-06, "loss": 1.0402, "step": 1836 }, { "epoch": 0.19319810167352466, "grad_norm": 3.121589794261374, "learning_rate": 4.591111997811427e-06, "loss": 1.0245, "step": 1837 }, { "epoch": 0.193303272115372, "grad_norm": 3.620780997081411, "learning_rate": 4.590654357431573e-06, "loss": 1.0185, "step": 1838 }, { "epoch": 0.19340844255721928, "grad_norm": 2.5408636903460775, "learning_rate": 4.590196483927517e-06, "loss": 1.0493, "step": 1839 }, { "epoch": 0.1935136129990666, "grad_norm": 2.642817968832206, "learning_rate": 4.589738377350316e-06, "loss": 1.0139, "step": 1840 }, { "epoch": 0.19361878344091393, "grad_norm": 2.461137436664243, "learning_rate": 4.589280037751052e-06, "loss": 1.048, "step": 1841 }, { "epoch": 0.19372395388276126, "grad_norm": 2.2490071042179625, "learning_rate": 4.5888214651808325e-06, "loss": 1.014, "step": 1842 }, { "epoch": 0.19382912432460858, "grad_norm": 2.4662682458707255, "learning_rate": 4.5883626596907945e-06, "loss": 1.0227, "step": 1843 }, { "epoch": 0.19393429476645588, "grad_norm": 4.025300912872622, "learning_rate": 4.587903621332097e-06, "loss": 1.0394, "step": 1844 }, { "epoch": 0.1940394652083032, "grad_norm": 3.0814943588959722, "learning_rate": 4.5874443501559265e-06, "loss": 1.0244, "step": 1845 }, { "epoch": 0.19414463565015053, "grad_norm": 3.5317457168193074, "learning_rate": 4.586984846213494e-06, "loss": 1.0279, "step": 1846 }, { "epoch": 0.19424980609199785, "grad_norm": 3.5877904213974934, "learning_rate": 4.586525109556039e-06, "loss": 1.0146, "step": 1847 }, { "epoch": 0.19435497653384518, "grad_norm": 3.2081496657086075, "learning_rate": 4.586065140234827e-06, "loss": 1.022, "step": 1848 }, { "epoch": 0.19446014697569247, "grad_norm": 2.0788025293634678, "learning_rate": 4.585604938301146e-06, "loss": 1.0051, "step": 1849 }, { "epoch": 0.1945653174175398, "grad_norm": 2.894336944770527, "learning_rate": 4.585144503806312e-06, "loss": 1.0676, "step": 1850 }, { "epoch": 0.19467048785938712, "grad_norm": 2.877696747200756, "learning_rate": 4.584683836801669e-06, "loss": 1.0292, "step": 1851 }, { "epoch": 0.19477565830123444, "grad_norm": 4.55585031976607, "learning_rate": 4.584222937338584e-06, "loss": 1.0615, "step": 1852 }, { "epoch": 0.19488082874308177, "grad_norm": 1.7959609995555694, "learning_rate": 4.583761805468449e-06, "loss": 0.967, "step": 1853 }, { "epoch": 0.19498599918492907, "grad_norm": 2.742630215241344, "learning_rate": 4.583300441242688e-06, "loss": 0.9956, "step": 1854 }, { "epoch": 0.1950911696267764, "grad_norm": 2.6981352318462974, "learning_rate": 4.582838844712741e-06, "loss": 1.0238, "step": 1855 }, { "epoch": 0.1951963400686237, "grad_norm": 3.463326781342451, "learning_rate": 4.582377015930085e-06, "loss": 1.0759, "step": 1856 }, { "epoch": 0.19530151051047104, "grad_norm": 2.4494104426262138, "learning_rate": 4.581914954946215e-06, "loss": 1.013, "step": 1857 }, { "epoch": 0.19540668095231836, "grad_norm": 2.2066591737539865, "learning_rate": 4.581452661812655e-06, "loss": 1.0223, "step": 1858 }, { "epoch": 0.19551185139416566, "grad_norm": 3.3773728745426803, "learning_rate": 4.5809901365809524e-06, "loss": 1.0246, "step": 1859 }, { "epoch": 0.19561702183601298, "grad_norm": 2.5954532792428218, "learning_rate": 4.580527379302685e-06, "loss": 1.0264, "step": 1860 }, { "epoch": 0.1957221922778603, "grad_norm": 2.651276443421835, "learning_rate": 4.580064390029452e-06, "loss": 1.0487, "step": 1861 }, { "epoch": 0.19582736271970763, "grad_norm": 2.6830451292812834, "learning_rate": 4.579601168812882e-06, "loss": 1.037, "step": 1862 }, { "epoch": 0.19593253316155496, "grad_norm": 3.2055413372462813, "learning_rate": 4.579137715704626e-06, "loss": 1.0315, "step": 1863 }, { "epoch": 0.19603770360340225, "grad_norm": 2.144505459852746, "learning_rate": 4.578674030756364e-06, "loss": 0.9836, "step": 1864 }, { "epoch": 0.19614287404524958, "grad_norm": 2.630253444185954, "learning_rate": 4.578210114019799e-06, "loss": 1.0294, "step": 1865 }, { "epoch": 0.1962480444870969, "grad_norm": 3.8376888541418306, "learning_rate": 4.577745965546662e-06, "loss": 0.9988, "step": 1866 }, { "epoch": 0.19635321492894423, "grad_norm": 2.068518479777229, "learning_rate": 4.577281585388711e-06, "loss": 1.0126, "step": 1867 }, { "epoch": 0.19645838537079155, "grad_norm": 2.4680664224880497, "learning_rate": 4.576816973597725e-06, "loss": 1.013, "step": 1868 }, { "epoch": 0.19656355581263885, "grad_norm": 3.2990236331625615, "learning_rate": 4.576352130225513e-06, "loss": 1.0228, "step": 1869 }, { "epoch": 0.19666872625448617, "grad_norm": 2.772397789423621, "learning_rate": 4.5758870553239095e-06, "loss": 0.9804, "step": 1870 }, { "epoch": 0.1967738966963335, "grad_norm": 3.1931620564914747, "learning_rate": 4.575421748944773e-06, "loss": 1.0534, "step": 1871 }, { "epoch": 0.19687906713818082, "grad_norm": 2.4312586968940426, "learning_rate": 4.574956211139989e-06, "loss": 1.0221, "step": 1872 }, { "epoch": 0.19698423758002814, "grad_norm": 1.9836984160196345, "learning_rate": 4.574490441961469e-06, "loss": 1.0625, "step": 1873 }, { "epoch": 0.19708940802187544, "grad_norm": 2.687082682874049, "learning_rate": 4.57402444146115e-06, "loss": 1.0262, "step": 1874 }, { "epoch": 0.19719457846372276, "grad_norm": 3.29954290699376, "learning_rate": 4.573558209690993e-06, "loss": 1.0292, "step": 1875 }, { "epoch": 0.1972997489055701, "grad_norm": 2.5564424315027505, "learning_rate": 4.573091746702988e-06, "loss": 1.0151, "step": 1876 }, { "epoch": 0.1974049193474174, "grad_norm": 3.4128408599390716, "learning_rate": 4.572625052549149e-06, "loss": 1.0509, "step": 1877 }, { "epoch": 0.19751008978926474, "grad_norm": 2.754362006103999, "learning_rate": 4.572158127281516e-06, "loss": 0.9928, "step": 1878 }, { "epoch": 0.19761526023111203, "grad_norm": 2.737808240634724, "learning_rate": 4.571690970952155e-06, "loss": 1.0393, "step": 1879 }, { "epoch": 0.19772043067295936, "grad_norm": 2.5757192677948813, "learning_rate": 4.571223583613157e-06, "loss": 1.0617, "step": 1880 }, { "epoch": 0.19782560111480668, "grad_norm": 2.622124067123704, "learning_rate": 4.570755965316639e-06, "loss": 0.9885, "step": 1881 }, { "epoch": 0.197930771556654, "grad_norm": 2.5421671476785, "learning_rate": 4.570288116114745e-06, "loss": 0.9976, "step": 1882 }, { "epoch": 0.19803594199850133, "grad_norm": 2.8283659577999583, "learning_rate": 4.569820036059644e-06, "loss": 1.054, "step": 1883 }, { "epoch": 0.19814111244034863, "grad_norm": 2.4526889476942695, "learning_rate": 4.56935172520353e-06, "loss": 1.0015, "step": 1884 }, { "epoch": 0.19824628288219595, "grad_norm": 1.8002824330773355, "learning_rate": 4.568883183598623e-06, "loss": 1.0031, "step": 1885 }, { "epoch": 0.19835145332404328, "grad_norm": 1.7744689934585205, "learning_rate": 4.56841441129717e-06, "loss": 1.0815, "step": 1886 }, { "epoch": 0.1984566237658906, "grad_norm": 3.315507706913469, "learning_rate": 4.567945408351441e-06, "loss": 1.007, "step": 1887 }, { "epoch": 0.19856179420773792, "grad_norm": 2.5040098416445953, "learning_rate": 4.567476174813735e-06, "loss": 1.0166, "step": 1888 }, { "epoch": 0.19866696464958522, "grad_norm": 3.0590946518561544, "learning_rate": 4.567006710736375e-06, "loss": 1.0333, "step": 1889 }, { "epoch": 0.19877213509143254, "grad_norm": 1.9729874214632432, "learning_rate": 4.5665370161717095e-06, "loss": 1.0099, "step": 1890 }, { "epoch": 0.19887730553327987, "grad_norm": 2.646128060765525, "learning_rate": 4.566067091172114e-06, "loss": 0.9994, "step": 1891 }, { "epoch": 0.1989824759751272, "grad_norm": 2.39440777880957, "learning_rate": 4.565596935789987e-06, "loss": 1.0348, "step": 1892 }, { "epoch": 0.19908764641697452, "grad_norm": 3.5153787075984586, "learning_rate": 4.5651265500777564e-06, "loss": 1.0805, "step": 1893 }, { "epoch": 0.19919281685882184, "grad_norm": 2.5557058180370955, "learning_rate": 4.564655934087873e-06, "loss": 1.0109, "step": 1894 }, { "epoch": 0.19929798730066914, "grad_norm": 2.541551289434198, "learning_rate": 4.564185087872814e-06, "loss": 1.0578, "step": 1895 }, { "epoch": 0.19940315774251646, "grad_norm": 2.222619391054327, "learning_rate": 4.563714011485082e-06, "loss": 1.0541, "step": 1896 }, { "epoch": 0.1995083281843638, "grad_norm": 3.635232836192664, "learning_rate": 4.563242704977206e-06, "loss": 1.0528, "step": 1897 }, { "epoch": 0.1996134986262111, "grad_norm": 2.804337831993397, "learning_rate": 4.562771168401742e-06, "loss": 1.033, "step": 1898 }, { "epoch": 0.19971866906805844, "grad_norm": 2.940386059188817, "learning_rate": 4.562299401811268e-06, "loss": 1.0787, "step": 1899 }, { "epoch": 0.19982383950990573, "grad_norm": 2.516512929364484, "learning_rate": 4.56182740525839e-06, "loss": 1.0043, "step": 1900 }, { "epoch": 0.19992900995175306, "grad_norm": 2.8419554559032596, "learning_rate": 4.561355178795739e-06, "loss": 1.0395, "step": 1901 }, { "epoch": 0.20003418039360038, "grad_norm": 3.203241148860607, "learning_rate": 4.560882722475973e-06, "loss": 1.0354, "step": 1902 }, { "epoch": 0.2001393508354477, "grad_norm": 2.9994247587115233, "learning_rate": 4.560410036351774e-06, "loss": 1.0102, "step": 1903 }, { "epoch": 0.20024452127729503, "grad_norm": 2.5106634446501164, "learning_rate": 4.559937120475849e-06, "loss": 1.0177, "step": 1904 }, { "epoch": 0.20034969171914233, "grad_norm": 3.201430008264517, "learning_rate": 4.559463974900934e-06, "loss": 1.0222, "step": 1905 }, { "epoch": 0.20045486216098965, "grad_norm": 3.5626303781607938, "learning_rate": 4.558990599679787e-06, "loss": 1.021, "step": 1906 }, { "epoch": 0.20056003260283697, "grad_norm": 2.3292692746627024, "learning_rate": 4.558516994865194e-06, "loss": 1.0364, "step": 1907 }, { "epoch": 0.2006652030446843, "grad_norm": 2.747377159004723, "learning_rate": 4.558043160509964e-06, "loss": 1.0101, "step": 1908 }, { "epoch": 0.20077037348653162, "grad_norm": 2.876129833597873, "learning_rate": 4.557569096666934e-06, "loss": 0.9977, "step": 1909 }, { "epoch": 0.20087554392837892, "grad_norm": 3.8381922934160744, "learning_rate": 4.5570948033889675e-06, "loss": 1.0311, "step": 1910 }, { "epoch": 0.20098071437022624, "grad_norm": 2.5502637033749447, "learning_rate": 4.5566202807289485e-06, "loss": 1.0031, "step": 1911 }, { "epoch": 0.20108588481207357, "grad_norm": 2.07505839155443, "learning_rate": 4.556145528739793e-06, "loss": 1.0599, "step": 1912 }, { "epoch": 0.2011910552539209, "grad_norm": 2.8177451858148355, "learning_rate": 4.555670547474438e-06, "loss": 1.0686, "step": 1913 }, { "epoch": 0.20129622569576822, "grad_norm": 4.173041129558708, "learning_rate": 4.555195336985848e-06, "loss": 1.0301, "step": 1914 }, { "epoch": 0.2014013961376155, "grad_norm": 2.552074293804037, "learning_rate": 4.554719897327013e-06, "loss": 1.0075, "step": 1915 }, { "epoch": 0.20150656657946284, "grad_norm": 3.3799408110891975, "learning_rate": 4.554244228550947e-06, "loss": 1.0219, "step": 1916 }, { "epoch": 0.20161173702131016, "grad_norm": 2.9673353024375415, "learning_rate": 4.5537683307106924e-06, "loss": 1.0484, "step": 1917 }, { "epoch": 0.20171690746315749, "grad_norm": 2.8301509680066084, "learning_rate": 4.553292203859314e-06, "loss": 1.0586, "step": 1918 }, { "epoch": 0.2018220779050048, "grad_norm": 3.757513825220459, "learning_rate": 4.552815848049904e-06, "loss": 0.9888, "step": 1919 }, { "epoch": 0.2019272483468521, "grad_norm": 2.158466766123102, "learning_rate": 4.552339263335581e-06, "loss": 1.0265, "step": 1920 }, { "epoch": 0.20203241878869943, "grad_norm": 4.206006355483029, "learning_rate": 4.551862449769487e-06, "loss": 1.0384, "step": 1921 }, { "epoch": 0.20213758923054675, "grad_norm": 2.713465885037005, "learning_rate": 4.5513854074047905e-06, "loss": 1.0314, "step": 1922 }, { "epoch": 0.20224275967239408, "grad_norm": 2.159072474282689, "learning_rate": 4.550908136294685e-06, "loss": 1.0454, "step": 1923 }, { "epoch": 0.2023479301142414, "grad_norm": 2.5490149894200846, "learning_rate": 4.55043063649239e-06, "loss": 1.0143, "step": 1924 }, { "epoch": 0.2024531005560887, "grad_norm": 2.9457442371208815, "learning_rate": 4.549952908051151e-06, "loss": 1.0303, "step": 1925 }, { "epoch": 0.20255827099793602, "grad_norm": 3.717268820987839, "learning_rate": 4.549474951024238e-06, "loss": 1.0308, "step": 1926 }, { "epoch": 0.20266344143978335, "grad_norm": 2.6781806036533222, "learning_rate": 4.548996765464947e-06, "loss": 1.0291, "step": 1927 }, { "epoch": 0.20276861188163067, "grad_norm": 2.18546757845666, "learning_rate": 4.5485183514266004e-06, "loss": 1.0024, "step": 1928 }, { "epoch": 0.202873782323478, "grad_norm": 2.510233734668508, "learning_rate": 4.548039708962544e-06, "loss": 1.0261, "step": 1929 }, { "epoch": 0.2029789527653253, "grad_norm": 2.704997908914001, "learning_rate": 4.547560838126149e-06, "loss": 0.9926, "step": 1930 }, { "epoch": 0.20308412320717262, "grad_norm": 2.5107441763705647, "learning_rate": 4.5470817389708155e-06, "loss": 1.0371, "step": 1931 }, { "epoch": 0.20318929364901994, "grad_norm": 3.774440786379843, "learning_rate": 4.546602411549966e-06, "loss": 1.0473, "step": 1932 }, { "epoch": 0.20329446409086727, "grad_norm": 2.3929427473890987, "learning_rate": 4.546122855917049e-06, "loss": 1.0211, "step": 1933 }, { "epoch": 0.2033996345327146, "grad_norm": 2.150887513168131, "learning_rate": 4.5456430721255384e-06, "loss": 0.9851, "step": 1934 }, { "epoch": 0.2035048049745619, "grad_norm": 2.4012826520584274, "learning_rate": 4.545163060228934e-06, "loss": 1.0105, "step": 1935 }, { "epoch": 0.2036099754164092, "grad_norm": 2.1182604676351495, "learning_rate": 4.544682820280762e-06, "loss": 1.0485, "step": 1936 }, { "epoch": 0.20371514585825654, "grad_norm": 3.9728966306261673, "learning_rate": 4.54420235233457e-06, "loss": 1.0238, "step": 1937 }, { "epoch": 0.20382031630010386, "grad_norm": 3.2576390659692103, "learning_rate": 4.543721656443938e-06, "loss": 1.0105, "step": 1938 }, { "epoch": 0.20392548674195118, "grad_norm": 2.7319424380077, "learning_rate": 4.5432407326624635e-06, "loss": 1.0294, "step": 1939 }, { "epoch": 0.20403065718379848, "grad_norm": 3.6616075268684134, "learning_rate": 4.542759581043775e-06, "loss": 1.0375, "step": 1940 }, { "epoch": 0.2041358276256458, "grad_norm": 2.286876999823875, "learning_rate": 4.5422782016415255e-06, "loss": 0.972, "step": 1941 }, { "epoch": 0.20424099806749313, "grad_norm": 3.5108681231659533, "learning_rate": 4.541796594509391e-06, "loss": 1.0733, "step": 1942 }, { "epoch": 0.20434616850934045, "grad_norm": 1.916935649789232, "learning_rate": 4.541314759701075e-06, "loss": 1.0334, "step": 1943 }, { "epoch": 0.20445133895118778, "grad_norm": 3.1369552191984047, "learning_rate": 4.540832697270305e-06, "loss": 1.0138, "step": 1944 }, { "epoch": 0.20455650939303507, "grad_norm": 2.8904365660760942, "learning_rate": 4.540350407270836e-06, "loss": 1.0579, "step": 1945 }, { "epoch": 0.2046616798348824, "grad_norm": 2.678633223567234, "learning_rate": 4.539867889756447e-06, "loss": 1.0582, "step": 1946 }, { "epoch": 0.20476685027672972, "grad_norm": 2.5400319091221664, "learning_rate": 4.539385144780942e-06, "loss": 1.0247, "step": 1947 }, { "epoch": 0.20487202071857705, "grad_norm": 1.8249180058750547, "learning_rate": 4.538902172398151e-06, "loss": 1.0025, "step": 1948 }, { "epoch": 0.20497719116042437, "grad_norm": 2.39557256136971, "learning_rate": 4.5384189726619285e-06, "loss": 1.0428, "step": 1949 }, { "epoch": 0.20508236160227167, "grad_norm": 2.0902532581196818, "learning_rate": 4.537935545626156e-06, "loss": 1.0141, "step": 1950 }, { "epoch": 0.205187532044119, "grad_norm": 2.6266101963885817, "learning_rate": 4.5374518913447384e-06, "loss": 1.0491, "step": 1951 }, { "epoch": 0.20529270248596632, "grad_norm": 1.8895661108791229, "learning_rate": 4.536968009871608e-06, "loss": 1.0318, "step": 1952 }, { "epoch": 0.20539787292781364, "grad_norm": 2.1586887149934095, "learning_rate": 4.536483901260721e-06, "loss": 1.0529, "step": 1953 }, { "epoch": 0.20550304336966096, "grad_norm": 3.064938688866919, "learning_rate": 4.535999565566058e-06, "loss": 0.9881, "step": 1954 }, { "epoch": 0.2056082138115083, "grad_norm": 3.4278129755911415, "learning_rate": 4.535515002841628e-06, "loss": 1.0366, "step": 1955 }, { "epoch": 0.20571338425335559, "grad_norm": 2.442730036564765, "learning_rate": 4.535030213141462e-06, "loss": 1.0062, "step": 1956 }, { "epoch": 0.2058185546952029, "grad_norm": 2.6315188808426813, "learning_rate": 4.5345451965196196e-06, "loss": 0.9853, "step": 1957 }, { "epoch": 0.20592372513705023, "grad_norm": 3.773145979161978, "learning_rate": 4.5340599530301826e-06, "loss": 1.0389, "step": 1958 }, { "epoch": 0.20602889557889756, "grad_norm": 2.7833840302672614, "learning_rate": 4.53357448272726e-06, "loss": 1.0276, "step": 1959 }, { "epoch": 0.20613406602074488, "grad_norm": 3.1775766358378053, "learning_rate": 4.5330887856649845e-06, "loss": 0.9929, "step": 1960 }, { "epoch": 0.20623923646259218, "grad_norm": 4.593991119183918, "learning_rate": 4.532602861897516e-06, "loss": 1.0597, "step": 1961 }, { "epoch": 0.2063444069044395, "grad_norm": 2.310336282250504, "learning_rate": 4.532116711479039e-06, "loss": 1.0174, "step": 1962 }, { "epoch": 0.20644957734628683, "grad_norm": 3.9022900668265668, "learning_rate": 4.531630334463762e-06, "loss": 1.079, "step": 1963 }, { "epoch": 0.20655474778813415, "grad_norm": 3.760113016517964, "learning_rate": 4.53114373090592e-06, "loss": 0.992, "step": 1964 }, { "epoch": 0.20665991822998148, "grad_norm": 2.583786767339631, "learning_rate": 4.5306569008597745e-06, "loss": 1.0444, "step": 1965 }, { "epoch": 0.20676508867182877, "grad_norm": 1.7472575830950914, "learning_rate": 4.53016984437961e-06, "loss": 0.9853, "step": 1966 }, { "epoch": 0.2068702591136761, "grad_norm": 2.6276414742188097, "learning_rate": 4.529682561519736e-06, "loss": 1.0232, "step": 1967 }, { "epoch": 0.20697542955552342, "grad_norm": 2.4241124519098705, "learning_rate": 4.52919505233449e-06, "loss": 1.017, "step": 1968 }, { "epoch": 0.20708059999737075, "grad_norm": 3.4436315276636815, "learning_rate": 4.528707316878233e-06, "loss": 1.0483, "step": 1969 }, { "epoch": 0.20718577043921807, "grad_norm": 3.6255766199501904, "learning_rate": 4.528219355205349e-06, "loss": 1.007, "step": 1970 }, { "epoch": 0.20729094088106537, "grad_norm": 2.5750935689757917, "learning_rate": 4.527731167370252e-06, "loss": 0.991, "step": 1971 }, { "epoch": 0.2073961113229127, "grad_norm": 3.527871448351104, "learning_rate": 4.527242753427378e-06, "loss": 0.9817, "step": 1972 }, { "epoch": 0.20750128176476001, "grad_norm": 3.0602580136672124, "learning_rate": 4.526754113431188e-06, "loss": 1.0392, "step": 1973 }, { "epoch": 0.20760645220660734, "grad_norm": 2.7611033572598025, "learning_rate": 4.526265247436171e-06, "loss": 0.9994, "step": 1974 }, { "epoch": 0.20771162264845466, "grad_norm": 2.6859585185179067, "learning_rate": 4.525776155496838e-06, "loss": 1.0119, "step": 1975 }, { "epoch": 0.20781679309030196, "grad_norm": 2.782628475167393, "learning_rate": 4.525286837667726e-06, "loss": 1.0515, "step": 1976 }, { "epoch": 0.20792196353214928, "grad_norm": 2.9155913927480275, "learning_rate": 4.5247972940034e-06, "loss": 0.9998, "step": 1977 }, { "epoch": 0.2080271339739966, "grad_norm": 2.9307391999616335, "learning_rate": 4.524307524558446e-06, "loss": 1.0204, "step": 1978 }, { "epoch": 0.20813230441584393, "grad_norm": 2.400026037357351, "learning_rate": 4.523817529387478e-06, "loss": 1.0126, "step": 1979 }, { "epoch": 0.20823747485769126, "grad_norm": 2.116341446271874, "learning_rate": 4.523327308545133e-06, "loss": 1.0167, "step": 1980 }, { "epoch": 0.20834264529953855, "grad_norm": 3.6019941589879414, "learning_rate": 4.522836862086076e-06, "loss": 1.0122, "step": 1981 }, { "epoch": 0.20844781574138588, "grad_norm": 2.530136205218369, "learning_rate": 4.5223461900649945e-06, "loss": 1.0279, "step": 1982 }, { "epoch": 0.2085529861832332, "grad_norm": 2.5456318862932923, "learning_rate": 4.521855292536603e-06, "loss": 1.0164, "step": 1983 }, { "epoch": 0.20865815662508053, "grad_norm": 3.2036037703970095, "learning_rate": 4.52136416955564e-06, "loss": 1.0244, "step": 1984 }, { "epoch": 0.20876332706692785, "grad_norm": 2.700422512134757, "learning_rate": 4.5208728211768696e-06, "loss": 1.0144, "step": 1985 }, { "epoch": 0.20886849750877515, "grad_norm": 2.4414664331229265, "learning_rate": 4.520381247455081e-06, "loss": 1.0219, "step": 1986 }, { "epoch": 0.20897366795062247, "grad_norm": 2.3814233051604368, "learning_rate": 4.519889448445088e-06, "loss": 1.0393, "step": 1987 }, { "epoch": 0.2090788383924698, "grad_norm": 2.6970110365603985, "learning_rate": 4.519397424201731e-06, "loss": 1.0227, "step": 1988 }, { "epoch": 0.20918400883431712, "grad_norm": 3.0579544571066384, "learning_rate": 4.518905174779874e-06, "loss": 1.0041, "step": 1989 }, { "epoch": 0.20928917927616444, "grad_norm": 3.0701954087018533, "learning_rate": 4.518412700234407e-06, "loss": 1.0392, "step": 1990 }, { "epoch": 0.20939434971801174, "grad_norm": 3.10208553663707, "learning_rate": 4.5179200006202425e-06, "loss": 0.9866, "step": 1991 }, { "epoch": 0.20949952015985907, "grad_norm": 2.5379999852239687, "learning_rate": 4.517427075992325e-06, "loss": 0.9833, "step": 1992 }, { "epoch": 0.2096046906017064, "grad_norm": 2.898863425583052, "learning_rate": 4.516933926405614e-06, "loss": 1.024, "step": 1993 }, { "epoch": 0.2097098610435537, "grad_norm": 2.5016600905979134, "learning_rate": 4.516440551915103e-06, "loss": 1.0243, "step": 1994 }, { "epoch": 0.20981503148540104, "grad_norm": 2.37701429193729, "learning_rate": 4.5159469525758065e-06, "loss": 1.0574, "step": 1995 }, { "epoch": 0.20992020192724833, "grad_norm": 3.349993368242468, "learning_rate": 4.515453128442764e-06, "loss": 0.9991, "step": 1996 }, { "epoch": 0.21002537236909566, "grad_norm": 3.041271688381229, "learning_rate": 4.514959079571042e-06, "loss": 1.0306, "step": 1997 }, { "epoch": 0.21013054281094298, "grad_norm": 3.7606519650863452, "learning_rate": 4.514464806015729e-06, "loss": 1.0277, "step": 1998 }, { "epoch": 0.2102357132527903, "grad_norm": 2.8528069179863307, "learning_rate": 4.513970307831941e-06, "loss": 0.9865, "step": 1999 }, { "epoch": 0.21034088369463763, "grad_norm": 3.3798634505812974, "learning_rate": 4.513475585074819e-06, "loss": 1.0434, "step": 2000 }, { "epoch": 0.21044605413648493, "grad_norm": 3.446977094091343, "learning_rate": 4.512980637799529e-06, "loss": 1.0129, "step": 2001 }, { "epoch": 0.21055122457833225, "grad_norm": 2.5663183477604483, "learning_rate": 4.512485466061258e-06, "loss": 1.0074, "step": 2002 }, { "epoch": 0.21065639502017958, "grad_norm": 2.713416190814693, "learning_rate": 4.511990069915226e-06, "loss": 1.0175, "step": 2003 }, { "epoch": 0.2107615654620269, "grad_norm": 3.017467404478425, "learning_rate": 4.511494449416671e-06, "loss": 1.0022, "step": 2004 }, { "epoch": 0.21086673590387423, "grad_norm": 2.869407764453783, "learning_rate": 4.510998604620859e-06, "loss": 1.0091, "step": 2005 }, { "epoch": 0.21097190634572152, "grad_norm": 2.2914275171439624, "learning_rate": 4.510502535583081e-06, "loss": 1.027, "step": 2006 }, { "epoch": 0.21107707678756885, "grad_norm": 3.1289559026795413, "learning_rate": 4.510006242358652e-06, "loss": 1.0192, "step": 2007 }, { "epoch": 0.21118224722941617, "grad_norm": 3.365076590082289, "learning_rate": 4.509509725002913e-06, "loss": 1.0314, "step": 2008 }, { "epoch": 0.2112874176712635, "grad_norm": 2.619162513458224, "learning_rate": 4.5090129835712286e-06, "loss": 0.9736, "step": 2009 }, { "epoch": 0.21139258811311082, "grad_norm": 2.6581918245692973, "learning_rate": 4.50851601811899e-06, "loss": 0.9946, "step": 2010 }, { "epoch": 0.21149775855495812, "grad_norm": 3.2402527360258713, "learning_rate": 4.508018828701613e-06, "loss": 1.0187, "step": 2011 }, { "epoch": 0.21160292899680544, "grad_norm": 3.561750316675759, "learning_rate": 4.5075214153745375e-06, "loss": 1.0131, "step": 2012 }, { "epoch": 0.21170809943865276, "grad_norm": 3.6081378607990815, "learning_rate": 4.507023778193229e-06, "loss": 1.0536, "step": 2013 }, { "epoch": 0.2118132698805001, "grad_norm": 3.98061138379272, "learning_rate": 4.506525917213178e-06, "loss": 0.9886, "step": 2014 }, { "epoch": 0.2119184403223474, "grad_norm": 3.6286847213673656, "learning_rate": 4.506027832489901e-06, "loss": 1.0802, "step": 2015 }, { "epoch": 0.21202361076419474, "grad_norm": 2.970920799122295, "learning_rate": 4.505529524078936e-06, "loss": 0.9943, "step": 2016 }, { "epoch": 0.21212878120604203, "grad_norm": 3.1998864392209034, "learning_rate": 4.505030992035851e-06, "loss": 1.0541, "step": 2017 }, { "epoch": 0.21223395164788936, "grad_norm": 3.5531939185269117, "learning_rate": 4.504532236416234e-06, "loss": 1.0057, "step": 2018 }, { "epoch": 0.21233912208973668, "grad_norm": 2.6807118334186173, "learning_rate": 4.504033257275701e-06, "loss": 0.989, "step": 2019 }, { "epoch": 0.212444292531584, "grad_norm": 3.709260634562878, "learning_rate": 4.5035340546698915e-06, "loss": 1.0069, "step": 2020 }, { "epoch": 0.21254946297343133, "grad_norm": 3.0800163674767655, "learning_rate": 4.503034628654472e-06, "loss": 1.0188, "step": 2021 }, { "epoch": 0.21265463341527863, "grad_norm": 2.827473390632371, "learning_rate": 4.50253497928513e-06, "loss": 1.0189, "step": 2022 }, { "epoch": 0.21275980385712595, "grad_norm": 3.293367596776558, "learning_rate": 4.502035106617583e-06, "loss": 0.9761, "step": 2023 }, { "epoch": 0.21286497429897328, "grad_norm": 2.9764561882025093, "learning_rate": 4.501535010707569e-06, "loss": 1.0117, "step": 2024 }, { "epoch": 0.2129701447408206, "grad_norm": 1.8974311339827867, "learning_rate": 4.501034691610852e-06, "loss": 1.0089, "step": 2025 }, { "epoch": 0.21307531518266792, "grad_norm": 3.113046982013976, "learning_rate": 4.500534149383222e-06, "loss": 0.9953, "step": 2026 }, { "epoch": 0.21318048562451522, "grad_norm": 2.5218275940026307, "learning_rate": 4.5000333840804945e-06, "loss": 1.0573, "step": 2027 }, { "epoch": 0.21328565606636254, "grad_norm": 2.1504903869654974, "learning_rate": 4.499532395758507e-06, "loss": 1.0295, "step": 2028 }, { "epoch": 0.21339082650820987, "grad_norm": 2.116485409339894, "learning_rate": 4.499031184473125e-06, "loss": 1.0145, "step": 2029 }, { "epoch": 0.2134959969500572, "grad_norm": 2.1971557306143583, "learning_rate": 4.498529750280237e-06, "loss": 0.9975, "step": 2030 }, { "epoch": 0.21360116739190452, "grad_norm": 2.441106240316848, "learning_rate": 4.4980280932357565e-06, "loss": 1.0324, "step": 2031 }, { "epoch": 0.2137063378337518, "grad_norm": 1.8648832005177987, "learning_rate": 4.4975262133956235e-06, "loss": 0.976, "step": 2032 }, { "epoch": 0.21381150827559914, "grad_norm": 2.393559217891105, "learning_rate": 4.497024110815799e-06, "loss": 1.0236, "step": 2033 }, { "epoch": 0.21391667871744646, "grad_norm": 3.4843636423614175, "learning_rate": 4.496521785552273e-06, "loss": 1.0423, "step": 2034 }, { "epoch": 0.2140218491592938, "grad_norm": 2.9511317676008297, "learning_rate": 4.496019237661059e-06, "loss": 1.0201, "step": 2035 }, { "epoch": 0.2141270196011411, "grad_norm": 2.554507983535873, "learning_rate": 4.495516467198193e-06, "loss": 0.9985, "step": 2036 }, { "epoch": 0.2142321900429884, "grad_norm": 2.5051574205030973, "learning_rate": 4.49501347421974e-06, "loss": 0.9961, "step": 2037 }, { "epoch": 0.21433736048483573, "grad_norm": 2.148199842957758, "learning_rate": 4.494510258781788e-06, "loss": 0.9899, "step": 2038 }, { "epoch": 0.21444253092668306, "grad_norm": 3.4634861798471466, "learning_rate": 4.4940068209404465e-06, "loss": 1.0501, "step": 2039 }, { "epoch": 0.21454770136853038, "grad_norm": 3.022603989678606, "learning_rate": 4.493503160751855e-06, "loss": 0.999, "step": 2040 }, { "epoch": 0.2146528718103777, "grad_norm": 2.595687398370017, "learning_rate": 4.4929992782721755e-06, "loss": 1.0535, "step": 2041 }, { "epoch": 0.214758042252225, "grad_norm": 2.3610745008748033, "learning_rate": 4.492495173557594e-06, "loss": 0.9964, "step": 2042 }, { "epoch": 0.21486321269407233, "grad_norm": 2.523798173897767, "learning_rate": 4.491990846664322e-06, "loss": 1.0458, "step": 2043 }, { "epoch": 0.21496838313591965, "grad_norm": 2.768262718642694, "learning_rate": 4.4914862976485975e-06, "loss": 1.0412, "step": 2044 }, { "epoch": 0.21507355357776697, "grad_norm": 3.5933411780857605, "learning_rate": 4.4909815265666786e-06, "loss": 1.0408, "step": 2045 }, { "epoch": 0.2151787240196143, "grad_norm": 1.859821990629818, "learning_rate": 4.490476533474854e-06, "loss": 1.033, "step": 2046 }, { "epoch": 0.2152838944614616, "grad_norm": 2.952652314844579, "learning_rate": 4.4899713184294336e-06, "loss": 1.013, "step": 2047 }, { "epoch": 0.21538906490330892, "grad_norm": 3.0877949687525, "learning_rate": 4.489465881486753e-06, "loss": 1.065, "step": 2048 }, { "epoch": 0.21549423534515624, "grad_norm": 2.1468116186309043, "learning_rate": 4.48896022270317e-06, "loss": 0.9845, "step": 2049 }, { "epoch": 0.21559940578700357, "grad_norm": 3.0848353636050745, "learning_rate": 4.488454342135074e-06, "loss": 1.0412, "step": 2050 }, { "epoch": 0.2157045762288509, "grad_norm": 2.827181553896088, "learning_rate": 4.487948239838869e-06, "loss": 0.957, "step": 2051 }, { "epoch": 0.2158097466706982, "grad_norm": 2.722663933189815, "learning_rate": 4.487441915870995e-06, "loss": 0.9992, "step": 2052 }, { "epoch": 0.2159149171125455, "grad_norm": 2.996172795915466, "learning_rate": 4.486935370287907e-06, "loss": 1.0284, "step": 2053 }, { "epoch": 0.21602008755439284, "grad_norm": 3.6047336301727637, "learning_rate": 4.48642860314609e-06, "loss": 1.0249, "step": 2054 }, { "epoch": 0.21612525799624016, "grad_norm": 2.867533696478624, "learning_rate": 4.485921614502054e-06, "loss": 1.0379, "step": 2055 }, { "epoch": 0.21623042843808749, "grad_norm": 3.176123229551431, "learning_rate": 4.485414404412329e-06, "loss": 1.0427, "step": 2056 }, { "epoch": 0.21633559887993478, "grad_norm": 2.9533715243960965, "learning_rate": 4.484906972933476e-06, "loss": 0.9631, "step": 2057 }, { "epoch": 0.2164407693217821, "grad_norm": 2.8688315629005716, "learning_rate": 4.484399320122075e-06, "loss": 1.0105, "step": 2058 }, { "epoch": 0.21654593976362943, "grad_norm": 3.5024671051619136, "learning_rate": 4.483891446034736e-06, "loss": 1.0587, "step": 2059 }, { "epoch": 0.21665111020547675, "grad_norm": 1.901326742128866, "learning_rate": 4.4833833507280884e-06, "loss": 1.0266, "step": 2060 }, { "epoch": 0.21675628064732408, "grad_norm": 2.5650512457429966, "learning_rate": 4.4828750342587895e-06, "loss": 1.0043, "step": 2061 }, { "epoch": 0.21686145108917138, "grad_norm": 2.448189051315377, "learning_rate": 4.482366496683521e-06, "loss": 1.0479, "step": 2062 }, { "epoch": 0.2169666215310187, "grad_norm": 2.824309733500601, "learning_rate": 4.481857738058989e-06, "loss": 0.988, "step": 2063 }, { "epoch": 0.21707179197286602, "grad_norm": 3.056046040580293, "learning_rate": 4.481348758441923e-06, "loss": 1.017, "step": 2064 }, { "epoch": 0.21717696241471335, "grad_norm": 2.0164976145859885, "learning_rate": 4.480839557889079e-06, "loss": 1.0477, "step": 2065 }, { "epoch": 0.21728213285656067, "grad_norm": 2.9759205852579274, "learning_rate": 4.480330136457237e-06, "loss": 1.0313, "step": 2066 }, { "epoch": 0.21738730329840797, "grad_norm": 4.396630703796807, "learning_rate": 4.4798204942032005e-06, "loss": 1.0554, "step": 2067 }, { "epoch": 0.2174924737402553, "grad_norm": 3.2600670571924453, "learning_rate": 4.4793106311838e-06, "loss": 1.0142, "step": 2068 }, { "epoch": 0.21759764418210262, "grad_norm": 2.554456195461736, "learning_rate": 4.478800547455887e-06, "loss": 1.033, "step": 2069 }, { "epoch": 0.21770281462394994, "grad_norm": 1.8748589681364354, "learning_rate": 4.478290243076342e-06, "loss": 0.9785, "step": 2070 }, { "epoch": 0.21780798506579727, "grad_norm": 3.558459347898174, "learning_rate": 4.4777797181020675e-06, "loss": 1.0148, "step": 2071 }, { "epoch": 0.2179131555076446, "grad_norm": 2.4480561013052253, "learning_rate": 4.477268972589989e-06, "loss": 1.0506, "step": 2072 }, { "epoch": 0.2180183259494919, "grad_norm": 3.167847874819189, "learning_rate": 4.476758006597061e-06, "loss": 1.0134, "step": 2073 }, { "epoch": 0.2181234963913392, "grad_norm": 2.035096373195341, "learning_rate": 4.476246820180259e-06, "loss": 0.9828, "step": 2074 }, { "epoch": 0.21822866683318654, "grad_norm": 3.0180664267026978, "learning_rate": 4.475735413396585e-06, "loss": 0.9717, "step": 2075 }, { "epoch": 0.21833383727503386, "grad_norm": 2.937593336372816, "learning_rate": 4.475223786303064e-06, "loss": 1.0418, "step": 2076 }, { "epoch": 0.21843900771688118, "grad_norm": 3.1853321519938125, "learning_rate": 4.474711938956747e-06, "loss": 1.0308, "step": 2077 }, { "epoch": 0.21854417815872848, "grad_norm": 2.781640730371229, "learning_rate": 4.4741998714147085e-06, "loss": 0.9788, "step": 2078 }, { "epoch": 0.2186493486005758, "grad_norm": 2.97926045372165, "learning_rate": 4.473687583734048e-06, "loss": 1.0543, "step": 2079 }, { "epoch": 0.21875451904242313, "grad_norm": 3.427682301198862, "learning_rate": 4.473175075971889e-06, "loss": 1.0172, "step": 2080 }, { "epoch": 0.21885968948427045, "grad_norm": 2.594403559813018, "learning_rate": 4.472662348185382e-06, "loss": 1.031, "step": 2081 }, { "epoch": 0.21896485992611778, "grad_norm": 3.1273306327310193, "learning_rate": 4.472149400431699e-06, "loss": 1.0469, "step": 2082 }, { "epoch": 0.21907003036796507, "grad_norm": 2.604066679542293, "learning_rate": 4.471636232768038e-06, "loss": 0.9904, "step": 2083 }, { "epoch": 0.2191752008098124, "grad_norm": 2.4026133544836767, "learning_rate": 4.47112284525162e-06, "loss": 1.0221, "step": 2084 }, { "epoch": 0.21928037125165972, "grad_norm": 3.599921055468287, "learning_rate": 4.470609237939693e-06, "loss": 0.9989, "step": 2085 }, { "epoch": 0.21938554169350705, "grad_norm": 2.131221331900976, "learning_rate": 4.470095410889528e-06, "loss": 0.9947, "step": 2086 }, { "epoch": 0.21949071213535437, "grad_norm": 2.1497035619992104, "learning_rate": 4.46958136415842e-06, "loss": 1.0332, "step": 2087 }, { "epoch": 0.21959588257720167, "grad_norm": 2.3616472688438406, "learning_rate": 4.469067097803689e-06, "loss": 1.0208, "step": 2088 }, { "epoch": 0.219701053019049, "grad_norm": 2.6359552878027674, "learning_rate": 4.46855261188268e-06, "loss": 1.0618, "step": 2089 }, { "epoch": 0.21980622346089632, "grad_norm": 1.9018280297999302, "learning_rate": 4.468037906452765e-06, "loss": 1.006, "step": 2090 }, { "epoch": 0.21991139390274364, "grad_norm": 2.5721362754088517, "learning_rate": 4.4675229815713324e-06, "loss": 1.0456, "step": 2091 }, { "epoch": 0.22001656434459096, "grad_norm": 2.954980814994551, "learning_rate": 4.467007837295805e-06, "loss": 1.0167, "step": 2092 }, { "epoch": 0.22012173478643826, "grad_norm": 3.3860189739598643, "learning_rate": 4.466492473683624e-06, "loss": 1.0261, "step": 2093 }, { "epoch": 0.22022690522828559, "grad_norm": 2.9650931599791757, "learning_rate": 4.465976890792254e-06, "loss": 1.0466, "step": 2094 }, { "epoch": 0.2203320756701329, "grad_norm": 3.104064301699326, "learning_rate": 4.465461088679189e-06, "loss": 1.0256, "step": 2095 }, { "epoch": 0.22043724611198023, "grad_norm": 2.636686526867292, "learning_rate": 4.464945067401945e-06, "loss": 1.0324, "step": 2096 }, { "epoch": 0.22054241655382756, "grad_norm": 2.9763466587493896, "learning_rate": 4.464428827018062e-06, "loss": 1.0048, "step": 2097 }, { "epoch": 0.22064758699567485, "grad_norm": 2.658483317192696, "learning_rate": 4.4639123675851035e-06, "loss": 1.0172, "step": 2098 }, { "epoch": 0.22075275743752218, "grad_norm": 2.70559891063467, "learning_rate": 4.46339568916066e-06, "loss": 1.0571, "step": 2099 }, { "epoch": 0.2208579278793695, "grad_norm": 2.880535256255276, "learning_rate": 4.462878791802345e-06, "loss": 1.0431, "step": 2100 }, { "epoch": 0.22096309832121683, "grad_norm": 3.233937870847447, "learning_rate": 4.4623616755677965e-06, "loss": 0.9888, "step": 2101 }, { "epoch": 0.22106826876306415, "grad_norm": 2.804108237608663, "learning_rate": 4.461844340514678e-06, "loss": 1.0465, "step": 2102 }, { "epoch": 0.22117343920491145, "grad_norm": 3.2318628882501295, "learning_rate": 4.4613267867006745e-06, "loss": 1.0353, "step": 2103 }, { "epoch": 0.22127860964675877, "grad_norm": 3.0591146643434786, "learning_rate": 4.460809014183498e-06, "loss": 1.0467, "step": 2104 }, { "epoch": 0.2213837800886061, "grad_norm": 3.1219953722891525, "learning_rate": 4.460291023020885e-06, "loss": 1.0335, "step": 2105 }, { "epoch": 0.22148895053045342, "grad_norm": 2.837039568315429, "learning_rate": 4.4597728132705944e-06, "loss": 1.0017, "step": 2106 }, { "epoch": 0.22159412097230075, "grad_norm": 2.401393140037482, "learning_rate": 4.459254384990411e-06, "loss": 1.0577, "step": 2107 }, { "epoch": 0.22169929141414804, "grad_norm": 2.8215140434580905, "learning_rate": 4.458735738238145e-06, "loss": 0.9908, "step": 2108 }, { "epoch": 0.22180446185599537, "grad_norm": 3.800737740073966, "learning_rate": 4.4582168730716264e-06, "loss": 1.0633, "step": 2109 }, { "epoch": 0.2219096322978427, "grad_norm": 2.755738180922399, "learning_rate": 4.457697789548715e-06, "loss": 1.0299, "step": 2110 }, { "epoch": 0.22201480273969001, "grad_norm": 3.5390554379093477, "learning_rate": 4.457178487727292e-06, "loss": 1.0707, "step": 2111 }, { "epoch": 0.22211997318153734, "grad_norm": 4.020240288638399, "learning_rate": 4.456658967665265e-06, "loss": 1.0235, "step": 2112 }, { "epoch": 0.22222514362338464, "grad_norm": 2.7427373031139166, "learning_rate": 4.4561392294205615e-06, "loss": 1.0166, "step": 2113 }, { "epoch": 0.22233031406523196, "grad_norm": 3.0778943699917094, "learning_rate": 4.455619273051139e-06, "loss": 1.035, "step": 2114 }, { "epoch": 0.22243548450707928, "grad_norm": 2.747240418850475, "learning_rate": 4.455099098614975e-06, "loss": 0.9829, "step": 2115 }, { "epoch": 0.2225406549489266, "grad_norm": 3.7757497910125486, "learning_rate": 4.454578706170075e-06, "loss": 1.0399, "step": 2116 }, { "epoch": 0.22264582539077393, "grad_norm": 3.683295020630938, "learning_rate": 4.454058095774465e-06, "loss": 1.0256, "step": 2117 }, { "epoch": 0.22275099583262123, "grad_norm": 2.9562137863553235, "learning_rate": 4.453537267486197e-06, "loss": 1.027, "step": 2118 }, { "epoch": 0.22285616627446855, "grad_norm": 3.658729786664914, "learning_rate": 4.453016221363349e-06, "loss": 1.0615, "step": 2119 }, { "epoch": 0.22296133671631588, "grad_norm": 2.7902924257034796, "learning_rate": 4.452494957464021e-06, "loss": 1.0576, "step": 2120 }, { "epoch": 0.2230665071581632, "grad_norm": 2.7787504278125286, "learning_rate": 4.451973475846337e-06, "loss": 0.9904, "step": 2121 }, { "epoch": 0.22317167760001053, "grad_norm": 3.234868103009726, "learning_rate": 4.451451776568446e-06, "loss": 1.0415, "step": 2122 }, { "epoch": 0.22327684804185782, "grad_norm": 3.1870487546909323, "learning_rate": 4.450929859688524e-06, "loss": 1.0222, "step": 2123 }, { "epoch": 0.22338201848370515, "grad_norm": 2.4299350284656125, "learning_rate": 4.450407725264768e-06, "loss": 1.0192, "step": 2124 }, { "epoch": 0.22348718892555247, "grad_norm": 2.0691045775857115, "learning_rate": 4.449885373355398e-06, "loss": 1.0423, "step": 2125 }, { "epoch": 0.2235923593673998, "grad_norm": 2.2233933117212628, "learning_rate": 4.449362804018661e-06, "loss": 0.9819, "step": 2126 }, { "epoch": 0.22369752980924712, "grad_norm": 3.019428274676109, "learning_rate": 4.44884001731283e-06, "loss": 1.0295, "step": 2127 }, { "epoch": 0.22380270025109442, "grad_norm": 2.659057630935619, "learning_rate": 4.4483170132961964e-06, "loss": 0.9956, "step": 2128 }, { "epoch": 0.22390787069294174, "grad_norm": 2.830751803296575, "learning_rate": 4.447793792027082e-06, "loss": 1.0471, "step": 2129 }, { "epoch": 0.22401304113478906, "grad_norm": 3.464512110587927, "learning_rate": 4.447270353563828e-06, "loss": 1.0079, "step": 2130 }, { "epoch": 0.2241182115766364, "grad_norm": 3.3402290513696724, "learning_rate": 4.446746697964803e-06, "loss": 1.0274, "step": 2131 }, { "epoch": 0.2242233820184837, "grad_norm": 2.6721352938404586, "learning_rate": 4.446222825288398e-06, "loss": 0.9991, "step": 2132 }, { "epoch": 0.22432855246033104, "grad_norm": 3.3974985750450815, "learning_rate": 4.44569873559303e-06, "loss": 1.0168, "step": 2133 }, { "epoch": 0.22443372290217833, "grad_norm": 2.272453274425081, "learning_rate": 4.445174428937137e-06, "loss": 1.0018, "step": 2134 }, { "epoch": 0.22453889334402566, "grad_norm": 2.733296898300132, "learning_rate": 4.444649905379184e-06, "loss": 1.0517, "step": 2135 }, { "epoch": 0.22464406378587298, "grad_norm": 3.0055966087106563, "learning_rate": 4.444125164977662e-06, "loss": 1.0395, "step": 2136 }, { "epoch": 0.2247492342277203, "grad_norm": 2.7417512364939682, "learning_rate": 4.44360020779108e-06, "loss": 1.0283, "step": 2137 }, { "epoch": 0.22485440466956763, "grad_norm": 2.136270049883368, "learning_rate": 4.443075033877978e-06, "loss": 1.0313, "step": 2138 }, { "epoch": 0.22495957511141493, "grad_norm": 2.623520046793813, "learning_rate": 4.442549643296915e-06, "loss": 1.0285, "step": 2139 }, { "epoch": 0.22506474555326225, "grad_norm": 2.6185982076249306, "learning_rate": 4.442024036106476e-06, "loss": 1.0296, "step": 2140 }, { "epoch": 0.22516991599510958, "grad_norm": 2.953502401243642, "learning_rate": 4.441498212365271e-06, "loss": 1.0403, "step": 2141 }, { "epoch": 0.2252750864369569, "grad_norm": 5.131915620587942, "learning_rate": 4.440972172131933e-06, "loss": 1.0135, "step": 2142 }, { "epoch": 0.22538025687880422, "grad_norm": 3.2510064788855537, "learning_rate": 4.440445915465119e-06, "loss": 1.0226, "step": 2143 }, { "epoch": 0.22548542732065152, "grad_norm": 2.2690579029077007, "learning_rate": 4.439919442423513e-06, "loss": 1.0519, "step": 2144 }, { "epoch": 0.22559059776249885, "grad_norm": 3.028677542365834, "learning_rate": 4.439392753065818e-06, "loss": 1.0209, "step": 2145 }, { "epoch": 0.22569576820434617, "grad_norm": 2.6183652601612795, "learning_rate": 4.438865847450766e-06, "loss": 1.0234, "step": 2146 }, { "epoch": 0.2258009386461935, "grad_norm": 2.655490949700378, "learning_rate": 4.438338725637111e-06, "loss": 0.9981, "step": 2147 }, { "epoch": 0.22590610908804082, "grad_norm": 3.0919304406899104, "learning_rate": 4.43781138768363e-06, "loss": 1.0164, "step": 2148 }, { "epoch": 0.22601127952988811, "grad_norm": 2.930774431302077, "learning_rate": 4.437283833649125e-06, "loss": 0.9999, "step": 2149 }, { "epoch": 0.22611644997173544, "grad_norm": 1.6428635453927467, "learning_rate": 4.436756063592424e-06, "loss": 0.9752, "step": 2150 }, { "epoch": 0.22622162041358276, "grad_norm": 2.3696606960139546, "learning_rate": 4.4362280775723775e-06, "loss": 1.0179, "step": 2151 }, { "epoch": 0.2263267908554301, "grad_norm": 3.3313866190682817, "learning_rate": 4.4356998756478585e-06, "loss": 0.9765, "step": 2152 }, { "epoch": 0.2264319612972774, "grad_norm": 2.577906154713765, "learning_rate": 4.435171457877766e-06, "loss": 0.97, "step": 2153 }, { "epoch": 0.2265371317391247, "grad_norm": 2.503718726560151, "learning_rate": 4.434642824321023e-06, "loss": 0.9966, "step": 2154 }, { "epoch": 0.22664230218097203, "grad_norm": 2.7189315507014298, "learning_rate": 4.434113975036577e-06, "loss": 1.0341, "step": 2155 }, { "epoch": 0.22674747262281936, "grad_norm": 2.858149879919419, "learning_rate": 4.433584910083397e-06, "loss": 1.0271, "step": 2156 }, { "epoch": 0.22685264306466668, "grad_norm": 2.2845656463626094, "learning_rate": 4.4330556295204796e-06, "loss": 0.9709, "step": 2157 }, { "epoch": 0.226957813506514, "grad_norm": 2.2220751802170122, "learning_rate": 4.432526133406843e-06, "loss": 0.9941, "step": 2158 }, { "epoch": 0.2270629839483613, "grad_norm": 2.710945616604557, "learning_rate": 4.43199642180153e-06, "loss": 1.0139, "step": 2159 }, { "epoch": 0.22716815439020863, "grad_norm": 2.3634868040227235, "learning_rate": 4.4314664947636075e-06, "loss": 0.9786, "step": 2160 }, { "epoch": 0.22727332483205595, "grad_norm": 2.4612164449548977, "learning_rate": 4.430936352352167e-06, "loss": 1.0174, "step": 2161 }, { "epoch": 0.22737849527390327, "grad_norm": 2.0768737166088878, "learning_rate": 4.430405994626323e-06, "loss": 1.013, "step": 2162 }, { "epoch": 0.2274836657157506, "grad_norm": 2.3532257856103374, "learning_rate": 4.429875421645214e-06, "loss": 1.0091, "step": 2163 }, { "epoch": 0.2275888361575979, "grad_norm": 3.0325316947217362, "learning_rate": 4.429344633468005e-06, "loss": 1.028, "step": 2164 }, { "epoch": 0.22769400659944522, "grad_norm": 3.017583155543902, "learning_rate": 4.428813630153881e-06, "loss": 1.0273, "step": 2165 }, { "epoch": 0.22779917704129254, "grad_norm": 2.9673310296779034, "learning_rate": 4.428282411762054e-06, "loss": 1.0312, "step": 2166 }, { "epoch": 0.22790434748313987, "grad_norm": 2.756113051254936, "learning_rate": 4.427750978351758e-06, "loss": 1.0329, "step": 2167 }, { "epoch": 0.2280095179249872, "grad_norm": 3.197164204980888, "learning_rate": 4.427219329982253e-06, "loss": 0.999, "step": 2168 }, { "epoch": 0.2281146883668345, "grad_norm": 2.8107489657605944, "learning_rate": 4.426687466712822e-06, "loss": 1.02, "step": 2169 }, { "epoch": 0.2282198588086818, "grad_norm": 2.7507863352021547, "learning_rate": 4.426155388602771e-06, "loss": 0.9941, "step": 2170 }, { "epoch": 0.22832502925052914, "grad_norm": 2.0778977549643103, "learning_rate": 4.425623095711431e-06, "loss": 1.0075, "step": 2171 }, { "epoch": 0.22843019969237646, "grad_norm": 2.5218199437660487, "learning_rate": 4.425090588098158e-06, "loss": 1.0061, "step": 2172 }, { "epoch": 0.2285353701342238, "grad_norm": 1.8947896556435786, "learning_rate": 4.424557865822329e-06, "loss": 1.0319, "step": 2173 }, { "epoch": 0.22864054057607108, "grad_norm": 2.8580135935444715, "learning_rate": 4.424024928943347e-06, "loss": 1.0138, "step": 2174 }, { "epoch": 0.2287457110179184, "grad_norm": 1.9185065863265085, "learning_rate": 4.42349177752064e-06, "loss": 1.0251, "step": 2175 }, { "epoch": 0.22885088145976573, "grad_norm": 2.2887816350371484, "learning_rate": 4.4229584116136574e-06, "loss": 1.0304, "step": 2176 }, { "epoch": 0.22895605190161306, "grad_norm": 2.114307828197282, "learning_rate": 4.422424831281873e-06, "loss": 1.0362, "step": 2177 }, { "epoch": 0.22906122234346038, "grad_norm": 3.6302171769750364, "learning_rate": 4.421891036584787e-06, "loss": 1.0028, "step": 2178 }, { "epoch": 0.22916639278530768, "grad_norm": 3.4568600537373784, "learning_rate": 4.421357027581921e-06, "loss": 0.9796, "step": 2179 }, { "epoch": 0.229271563227155, "grad_norm": 3.3494243915757016, "learning_rate": 4.42082280433282e-06, "loss": 1.0287, "step": 2180 }, { "epoch": 0.22937673366900233, "grad_norm": 3.815143472620564, "learning_rate": 4.420288366897055e-06, "loss": 1.0211, "step": 2181 }, { "epoch": 0.22948190411084965, "grad_norm": 2.134153845893804, "learning_rate": 4.419753715334219e-06, "loss": 1.032, "step": 2182 }, { "epoch": 0.22958707455269697, "grad_norm": 2.6909666767089333, "learning_rate": 4.419218849703933e-06, "loss": 0.9872, "step": 2183 }, { "epoch": 0.22969224499454427, "grad_norm": 2.3377268022126994, "learning_rate": 4.418683770065834e-06, "loss": 1.0023, "step": 2184 }, { "epoch": 0.2297974154363916, "grad_norm": 3.1485034054066783, "learning_rate": 4.41814847647959e-06, "loss": 1.0084, "step": 2185 }, { "epoch": 0.22990258587823892, "grad_norm": 2.354211954549771, "learning_rate": 4.4176129690048905e-06, "loss": 1.006, "step": 2186 }, { "epoch": 0.23000775632008624, "grad_norm": 2.463169295196819, "learning_rate": 4.417077247701449e-06, "loss": 1.0514, "step": 2187 }, { "epoch": 0.23011292676193357, "grad_norm": 2.130669171298515, "learning_rate": 4.4165413126290015e-06, "loss": 1.0368, "step": 2188 }, { "epoch": 0.23021809720378086, "grad_norm": 2.6129366976545105, "learning_rate": 4.416005163847309e-06, "loss": 0.9739, "step": 2189 }, { "epoch": 0.2303232676456282, "grad_norm": 2.84939027329353, "learning_rate": 4.415468801416158e-06, "loss": 0.993, "step": 2190 }, { "epoch": 0.2304284380874755, "grad_norm": 2.426269871426963, "learning_rate": 4.4149322253953545e-06, "loss": 1.0335, "step": 2191 }, { "epoch": 0.23053360852932284, "grad_norm": 2.379802288203194, "learning_rate": 4.414395435844732e-06, "loss": 0.9636, "step": 2192 }, { "epoch": 0.23063877897117016, "grad_norm": 2.400251646859452, "learning_rate": 4.413858432824147e-06, "loss": 1.0368, "step": 2193 }, { "epoch": 0.23074394941301749, "grad_norm": 2.8763286217663677, "learning_rate": 4.41332121639348e-06, "loss": 1.0031, "step": 2194 }, { "epoch": 0.23084911985486478, "grad_norm": 3.2398926109360526, "learning_rate": 4.412783786612634e-06, "loss": 1.0469, "step": 2195 }, { "epoch": 0.2309542902967121, "grad_norm": 2.465573849120777, "learning_rate": 4.412246143541536e-06, "loss": 1.015, "step": 2196 }, { "epoch": 0.23105946073855943, "grad_norm": 2.7811425882506873, "learning_rate": 4.4117082872401395e-06, "loss": 0.9901, "step": 2197 }, { "epoch": 0.23116463118040675, "grad_norm": 2.990852585631329, "learning_rate": 4.411170217768417e-06, "loss": 1.0293, "step": 2198 }, { "epoch": 0.23126980162225408, "grad_norm": 3.261551379361568, "learning_rate": 4.410631935186369e-06, "loss": 1.0079, "step": 2199 }, { "epoch": 0.23137497206410138, "grad_norm": 2.7992235343655527, "learning_rate": 4.410093439554019e-06, "loss": 1.0397, "step": 2200 }, { "epoch": 0.2314801425059487, "grad_norm": 2.4192682778964336, "learning_rate": 4.409554730931412e-06, "loss": 1.0289, "step": 2201 }, { "epoch": 0.23158531294779602, "grad_norm": 2.5957977370487235, "learning_rate": 4.4090158093786175e-06, "loss": 1.0366, "step": 2202 }, { "epoch": 0.23169048338964335, "grad_norm": 2.8959476464448923, "learning_rate": 4.408476674955732e-06, "loss": 1.0785, "step": 2203 }, { "epoch": 0.23179565383149067, "grad_norm": 2.644482199192657, "learning_rate": 4.407937327722871e-06, "loss": 1.0254, "step": 2204 }, { "epoch": 0.23190082427333797, "grad_norm": 3.442594882430092, "learning_rate": 4.407397767740176e-06, "loss": 1.0397, "step": 2205 }, { "epoch": 0.2320059947151853, "grad_norm": 2.5341412795753566, "learning_rate": 4.406857995067813e-06, "loss": 1.006, "step": 2206 }, { "epoch": 0.23211116515703262, "grad_norm": 2.2043374296304994, "learning_rate": 4.406318009765971e-06, "loss": 1.0111, "step": 2207 }, { "epoch": 0.23221633559887994, "grad_norm": 3.0723931658417487, "learning_rate": 4.405777811894861e-06, "loss": 1.043, "step": 2208 }, { "epoch": 0.23232150604072727, "grad_norm": 2.364705705885201, "learning_rate": 4.405237401514722e-06, "loss": 1.0548, "step": 2209 }, { "epoch": 0.23242667648257456, "grad_norm": 3.1983834328748246, "learning_rate": 4.404696778685811e-06, "loss": 1.0139, "step": 2210 }, { "epoch": 0.2325318469244219, "grad_norm": 2.746332139516773, "learning_rate": 4.4041559434684135e-06, "loss": 1.02, "step": 2211 }, { "epoch": 0.2326370173662692, "grad_norm": 2.274325848928587, "learning_rate": 4.4036148959228365e-06, "loss": 1.0089, "step": 2212 }, { "epoch": 0.23274218780811654, "grad_norm": 3.0278107573054336, "learning_rate": 4.40307363610941e-06, "loss": 1.0081, "step": 2213 }, { "epoch": 0.23284735824996386, "grad_norm": 2.165972003847509, "learning_rate": 4.40253216408849e-06, "loss": 1.0138, "step": 2214 }, { "epoch": 0.23295252869181116, "grad_norm": 2.8880774293438334, "learning_rate": 4.401990479920455e-06, "loss": 0.9974, "step": 2215 }, { "epoch": 0.23305769913365848, "grad_norm": 2.105729606603633, "learning_rate": 4.401448583665706e-06, "loss": 1.0269, "step": 2216 }, { "epoch": 0.2331628695755058, "grad_norm": 2.5957248301304108, "learning_rate": 4.4009064753846685e-06, "loss": 1.0, "step": 2217 }, { "epoch": 0.23326804001735313, "grad_norm": 3.0915416585982336, "learning_rate": 4.400364155137793e-06, "loss": 1.0225, "step": 2218 }, { "epoch": 0.23337321045920045, "grad_norm": 2.174158247873258, "learning_rate": 4.3998216229855504e-06, "loss": 0.9767, "step": 2219 }, { "epoch": 0.23347838090104775, "grad_norm": 1.6915485879041205, "learning_rate": 4.3992788789884385e-06, "loss": 0.9691, "step": 2220 }, { "epoch": 0.23358355134289507, "grad_norm": 3.1989567531239733, "learning_rate": 4.398735923206978e-06, "loss": 1.0219, "step": 2221 }, { "epoch": 0.2336887217847424, "grad_norm": 2.4905020268160234, "learning_rate": 4.398192755701713e-06, "loss": 1.0105, "step": 2222 }, { "epoch": 0.23379389222658972, "grad_norm": 3.061385255458901, "learning_rate": 4.397649376533209e-06, "loss": 1.0154, "step": 2223 }, { "epoch": 0.23389906266843705, "grad_norm": 2.1173291128116647, "learning_rate": 4.397105785762059e-06, "loss": 1.0071, "step": 2224 }, { "epoch": 0.23400423311028434, "grad_norm": 2.4753927815387264, "learning_rate": 4.396561983448876e-06, "loss": 1.0191, "step": 2225 }, { "epoch": 0.23410940355213167, "grad_norm": 2.6295178770980465, "learning_rate": 4.396017969654299e-06, "loss": 1.0107, "step": 2226 }, { "epoch": 0.234214573993979, "grad_norm": 2.555842843500082, "learning_rate": 4.395473744438989e-06, "loss": 1.0316, "step": 2227 }, { "epoch": 0.23431974443582632, "grad_norm": 2.698211855081485, "learning_rate": 4.394929307863633e-06, "loss": 1.0002, "step": 2228 }, { "epoch": 0.23442491487767364, "grad_norm": 3.0360295797113963, "learning_rate": 4.394384659988938e-06, "loss": 0.9551, "step": 2229 }, { "epoch": 0.23453008531952094, "grad_norm": 3.1523775915462826, "learning_rate": 4.393839800875638e-06, "loss": 0.9874, "step": 2230 }, { "epoch": 0.23463525576136826, "grad_norm": 2.900257288408909, "learning_rate": 4.3932947305844875e-06, "loss": 0.9747, "step": 2231 }, { "epoch": 0.23474042620321559, "grad_norm": 3.0099034185771005, "learning_rate": 4.392749449176268e-06, "loss": 1.0314, "step": 2232 }, { "epoch": 0.2348455966450629, "grad_norm": 3.82061371167963, "learning_rate": 4.39220395671178e-06, "loss": 1.0465, "step": 2233 }, { "epoch": 0.23495076708691023, "grad_norm": 2.833323567535811, "learning_rate": 4.391658253251853e-06, "loss": 1.0376, "step": 2234 }, { "epoch": 0.23505593752875753, "grad_norm": 4.222429513815148, "learning_rate": 4.391112338857335e-06, "loss": 0.9761, "step": 2235 }, { "epoch": 0.23516110797060485, "grad_norm": 2.8239821014267354, "learning_rate": 4.390566213589101e-06, "loss": 1.0447, "step": 2236 }, { "epoch": 0.23526627841245218, "grad_norm": 2.47061316638576, "learning_rate": 4.390019877508048e-06, "loss": 1.0365, "step": 2237 }, { "epoch": 0.2353714488542995, "grad_norm": 2.504173310706496, "learning_rate": 4.389473330675096e-06, "loss": 1.0194, "step": 2238 }, { "epoch": 0.23547661929614683, "grad_norm": 3.4045598206190935, "learning_rate": 4.38892657315119e-06, "loss": 1.0359, "step": 2239 }, { "epoch": 0.23558178973799412, "grad_norm": 3.5442386693147396, "learning_rate": 4.388379604997297e-06, "loss": 0.9705, "step": 2240 }, { "epoch": 0.23568696017984145, "grad_norm": 3.938459399524716, "learning_rate": 4.387832426274409e-06, "loss": 1.0083, "step": 2241 }, { "epoch": 0.23579213062168877, "grad_norm": 2.7251348273983993, "learning_rate": 4.38728503704354e-06, "loss": 1.0247, "step": 2242 }, { "epoch": 0.2358973010635361, "grad_norm": 3.119137566805799, "learning_rate": 4.386737437365729e-06, "loss": 0.9493, "step": 2243 }, { "epoch": 0.23600247150538342, "grad_norm": 2.526082167109994, "learning_rate": 4.386189627302037e-06, "loss": 1.008, "step": 2244 }, { "epoch": 0.23610764194723072, "grad_norm": 2.5608821293229105, "learning_rate": 4.385641606913548e-06, "loss": 0.9885, "step": 2245 }, { "epoch": 0.23621281238907804, "grad_norm": 2.682222019417078, "learning_rate": 4.385093376261373e-06, "loss": 1.0404, "step": 2246 }, { "epoch": 0.23631798283092537, "grad_norm": 2.258432603781061, "learning_rate": 4.384544935406641e-06, "loss": 1.0196, "step": 2247 }, { "epoch": 0.2364231532727727, "grad_norm": 2.671642366757651, "learning_rate": 4.383996284410511e-06, "loss": 1.024, "step": 2248 }, { "epoch": 0.23652832371462001, "grad_norm": 3.1327248445757463, "learning_rate": 4.383447423334159e-06, "loss": 1.0182, "step": 2249 }, { "epoch": 0.2366334941564673, "grad_norm": 2.723883446970864, "learning_rate": 4.382898352238788e-06, "loss": 0.9918, "step": 2250 }, { "epoch": 0.23673866459831464, "grad_norm": 3.0308816991498118, "learning_rate": 4.382349071185624e-06, "loss": 1.0059, "step": 2251 }, { "epoch": 0.23684383504016196, "grad_norm": 2.223810923909976, "learning_rate": 4.3817995802359155e-06, "loss": 1.0591, "step": 2252 }, { "epoch": 0.23694900548200928, "grad_norm": 2.198306040920323, "learning_rate": 4.3812498794509365e-06, "loss": 1.0037, "step": 2253 }, { "epoch": 0.2370541759238566, "grad_norm": 2.823104481198863, "learning_rate": 4.38069996889198e-06, "loss": 1.0164, "step": 2254 }, { "epoch": 0.23715934636570393, "grad_norm": 2.3927642257493975, "learning_rate": 4.380149848620369e-06, "loss": 1.0045, "step": 2255 }, { "epoch": 0.23726451680755123, "grad_norm": 2.7176141928001334, "learning_rate": 4.379599518697444e-06, "loss": 0.9935, "step": 2256 }, { "epoch": 0.23736968724939855, "grad_norm": 2.386268088932299, "learning_rate": 4.379048979184572e-06, "loss": 1.0106, "step": 2257 }, { "epoch": 0.23747485769124588, "grad_norm": 2.327011792366536, "learning_rate": 4.37849823014314e-06, "loss": 0.9859, "step": 2258 }, { "epoch": 0.2375800281330932, "grad_norm": 2.310094818927127, "learning_rate": 4.377947271634564e-06, "loss": 1.0425, "step": 2259 }, { "epoch": 0.23768519857494053, "grad_norm": 3.7252277158290026, "learning_rate": 4.3773961037202784e-06, "loss": 1.0013, "step": 2260 }, { "epoch": 0.23779036901678782, "grad_norm": 2.864574664034268, "learning_rate": 4.376844726461743e-06, "loss": 0.9962, "step": 2261 }, { "epoch": 0.23789553945863515, "grad_norm": 3.3321853074983405, "learning_rate": 4.376293139920441e-06, "loss": 1.0182, "step": 2262 }, { "epoch": 0.23800070990048247, "grad_norm": 2.2402296812420173, "learning_rate": 4.375741344157879e-06, "loss": 1.0232, "step": 2263 }, { "epoch": 0.2381058803423298, "grad_norm": 3.332880747822815, "learning_rate": 4.375189339235585e-06, "loss": 0.9578, "step": 2264 }, { "epoch": 0.23821105078417712, "grad_norm": 2.4681929360535872, "learning_rate": 4.374637125215113e-06, "loss": 0.9791, "step": 2265 }, { "epoch": 0.23831622122602442, "grad_norm": 2.404293531281789, "learning_rate": 4.374084702158039e-06, "loss": 1.0218, "step": 2266 }, { "epoch": 0.23842139166787174, "grad_norm": 2.4396191881750577, "learning_rate": 4.373532070125962e-06, "loss": 1.0236, "step": 2267 }, { "epoch": 0.23852656210971906, "grad_norm": 2.9682403601811367, "learning_rate": 4.3729792291805055e-06, "loss": 1.0397, "step": 2268 }, { "epoch": 0.2386317325515664, "grad_norm": 3.1442251603576037, "learning_rate": 4.372426179383315e-06, "loss": 0.9759, "step": 2269 }, { "epoch": 0.2387369029934137, "grad_norm": 2.7480986026706664, "learning_rate": 4.371872920796059e-06, "loss": 1.0032, "step": 2270 }, { "epoch": 0.238842073435261, "grad_norm": 3.520061750412933, "learning_rate": 4.371319453480431e-06, "loss": 1.0073, "step": 2271 }, { "epoch": 0.23894724387710833, "grad_norm": 2.622605154078674, "learning_rate": 4.370765777498149e-06, "loss": 1.0505, "step": 2272 }, { "epoch": 0.23905241431895566, "grad_norm": 2.995803194412082, "learning_rate": 4.370211892910949e-06, "loss": 1.0239, "step": 2273 }, { "epoch": 0.23915758476080298, "grad_norm": 3.0130918185780664, "learning_rate": 4.369657799780595e-06, "loss": 1.0446, "step": 2274 }, { "epoch": 0.2392627552026503, "grad_norm": 2.2351494176209736, "learning_rate": 4.369103498168872e-06, "loss": 1.0292, "step": 2275 }, { "epoch": 0.2393679256444976, "grad_norm": 2.710072469645011, "learning_rate": 4.368548988137589e-06, "loss": 1.045, "step": 2276 }, { "epoch": 0.23947309608634493, "grad_norm": 2.3805553686213075, "learning_rate": 4.367994269748579e-06, "loss": 0.9925, "step": 2277 }, { "epoch": 0.23957826652819225, "grad_norm": 3.089542808347871, "learning_rate": 4.367439343063696e-06, "loss": 1.0466, "step": 2278 }, { "epoch": 0.23968343697003958, "grad_norm": 1.6810878146896986, "learning_rate": 4.3668842081448206e-06, "loss": 1.0232, "step": 2279 }, { "epoch": 0.2397886074118869, "grad_norm": 2.8351406317406522, "learning_rate": 4.366328865053853e-06, "loss": 1.0121, "step": 2280 }, { "epoch": 0.2398937778537342, "grad_norm": 2.2094407063080648, "learning_rate": 4.365773313852718e-06, "loss": 1.0166, "step": 2281 }, { "epoch": 0.23999894829558152, "grad_norm": 2.816907367682445, "learning_rate": 4.365217554603365e-06, "loss": 1.0322, "step": 2282 }, { "epoch": 0.24010411873742885, "grad_norm": 3.0356646249148382, "learning_rate": 4.364661587367765e-06, "loss": 0.9952, "step": 2283 }, { "epoch": 0.24020928917927617, "grad_norm": 3.4033015359175898, "learning_rate": 4.364105412207914e-06, "loss": 1.043, "step": 2284 }, { "epoch": 0.2403144596211235, "grad_norm": 3.068575576133874, "learning_rate": 4.3635490291858285e-06, "loss": 1.0411, "step": 2285 }, { "epoch": 0.2404196300629708, "grad_norm": 2.863399553567626, "learning_rate": 4.362992438363549e-06, "loss": 0.9437, "step": 2286 }, { "epoch": 0.24052480050481811, "grad_norm": 3.0443021645648094, "learning_rate": 4.362435639803141e-06, "loss": 0.9712, "step": 2287 }, { "epoch": 0.24062997094666544, "grad_norm": 2.100247783006265, "learning_rate": 4.361878633566691e-06, "loss": 0.9919, "step": 2288 }, { "epoch": 0.24073514138851276, "grad_norm": 2.0331761263216053, "learning_rate": 4.361321419716309e-06, "loss": 1.0414, "step": 2289 }, { "epoch": 0.2408403118303601, "grad_norm": 2.4507988887485017, "learning_rate": 4.3607639983141305e-06, "loss": 1.0253, "step": 2290 }, { "epoch": 0.24094548227220738, "grad_norm": 2.3383275471123235, "learning_rate": 4.360206369422311e-06, "loss": 0.9672, "step": 2291 }, { "epoch": 0.2410506527140547, "grad_norm": 2.985680116206487, "learning_rate": 4.35964853310303e-06, "loss": 1.0054, "step": 2292 }, { "epoch": 0.24115582315590203, "grad_norm": 3.0674737355369768, "learning_rate": 4.359090489418492e-06, "loss": 1.0293, "step": 2293 }, { "epoch": 0.24126099359774936, "grad_norm": 3.179508934892898, "learning_rate": 4.358532238430922e-06, "loss": 1.0232, "step": 2294 }, { "epoch": 0.24136616403959668, "grad_norm": 2.68704103252696, "learning_rate": 4.35797378020257e-06, "loss": 1.0297, "step": 2295 }, { "epoch": 0.24147133448144398, "grad_norm": 3.002150660941129, "learning_rate": 4.357415114795709e-06, "loss": 1.0245, "step": 2296 }, { "epoch": 0.2415765049232913, "grad_norm": 2.258390927515721, "learning_rate": 4.356856242272634e-06, "loss": 1.0379, "step": 2297 }, { "epoch": 0.24168167536513863, "grad_norm": 2.24898790122129, "learning_rate": 4.356297162695662e-06, "loss": 0.9842, "step": 2298 }, { "epoch": 0.24178684580698595, "grad_norm": 2.0293860274575515, "learning_rate": 4.3557378761271365e-06, "loss": 0.9971, "step": 2299 }, { "epoch": 0.24189201624883327, "grad_norm": 2.049737362518493, "learning_rate": 4.3551783826294225e-06, "loss": 0.9803, "step": 2300 }, { "epoch": 0.24199718669068057, "grad_norm": 2.82131721819364, "learning_rate": 4.3546186822649074e-06, "loss": 1.0371, "step": 2301 }, { "epoch": 0.2421023571325279, "grad_norm": 2.6674301569769794, "learning_rate": 4.3540587750960015e-06, "loss": 1.0335, "step": 2302 }, { "epoch": 0.24220752757437522, "grad_norm": 2.5133457003010315, "learning_rate": 4.353498661185139e-06, "loss": 1.0216, "step": 2303 }, { "epoch": 0.24231269801622254, "grad_norm": 2.4664866984104425, "learning_rate": 4.352938340594778e-06, "loss": 0.9618, "step": 2304 }, { "epoch": 0.24241786845806987, "grad_norm": 2.5932114027385276, "learning_rate": 4.352377813387398e-06, "loss": 0.9812, "step": 2305 }, { "epoch": 0.24252303889991716, "grad_norm": 2.9653618102280928, "learning_rate": 4.351817079625502e-06, "loss": 1.0269, "step": 2306 }, { "epoch": 0.2426282093417645, "grad_norm": 2.724639989437581, "learning_rate": 4.351256139371616e-06, "loss": 1.0426, "step": 2307 }, { "epoch": 0.2427333797836118, "grad_norm": 2.475922956963675, "learning_rate": 4.350694992688289e-06, "loss": 1.0309, "step": 2308 }, { "epoch": 0.24283855022545914, "grad_norm": 2.3209110579795906, "learning_rate": 4.350133639638094e-06, "loss": 0.9894, "step": 2309 }, { "epoch": 0.24294372066730646, "grad_norm": 3.9449479325880183, "learning_rate": 4.349572080283626e-06, "loss": 1.0032, "step": 2310 }, { "epoch": 0.24304889110915376, "grad_norm": 3.591306291688279, "learning_rate": 4.349010314687503e-06, "loss": 1.0259, "step": 2311 }, { "epoch": 0.24315406155100108, "grad_norm": 2.805682382653168, "learning_rate": 4.348448342912365e-06, "loss": 1.0302, "step": 2312 }, { "epoch": 0.2432592319928484, "grad_norm": 2.540123317376257, "learning_rate": 4.347886165020879e-06, "loss": 1.0104, "step": 2313 }, { "epoch": 0.24336440243469573, "grad_norm": 2.305099301071069, "learning_rate": 4.347323781075731e-06, "loss": 0.9888, "step": 2314 }, { "epoch": 0.24346957287654306, "grad_norm": 3.599970133750129, "learning_rate": 4.34676119113963e-06, "loss": 1.0483, "step": 2315 }, { "epoch": 0.24357474331839038, "grad_norm": 3.770809111412246, "learning_rate": 4.3461983952753105e-06, "loss": 1.018, "step": 2316 }, { "epoch": 0.24367991376023768, "grad_norm": 3.0932950050010013, "learning_rate": 4.3456353935455285e-06, "loss": 0.9798, "step": 2317 }, { "epoch": 0.243785084202085, "grad_norm": 2.2871843195096364, "learning_rate": 4.345072186013063e-06, "loss": 1.0237, "step": 2318 }, { "epoch": 0.24389025464393232, "grad_norm": 2.957870000494886, "learning_rate": 4.344508772740714e-06, "loss": 1.01, "step": 2319 }, { "epoch": 0.24399542508577965, "grad_norm": 1.8700377021220989, "learning_rate": 4.343945153791309e-06, "loss": 0.9877, "step": 2320 }, { "epoch": 0.24410059552762697, "grad_norm": 1.7017015392411625, "learning_rate": 4.343381329227695e-06, "loss": 0.9863, "step": 2321 }, { "epoch": 0.24420576596947427, "grad_norm": 2.0639304419621642, "learning_rate": 4.342817299112744e-06, "loss": 0.9882, "step": 2322 }, { "epoch": 0.2443109364113216, "grad_norm": 3.6180446386514125, "learning_rate": 4.342253063509348e-06, "loss": 1.0368, "step": 2323 }, { "epoch": 0.24441610685316892, "grad_norm": 2.843315371054092, "learning_rate": 4.341688622480423e-06, "loss": 1.0174, "step": 2324 }, { "epoch": 0.24452127729501624, "grad_norm": 2.315783436822103, "learning_rate": 4.341123976088912e-06, "loss": 1.0636, "step": 2325 }, { "epoch": 0.24462644773686357, "grad_norm": 2.584613916276836, "learning_rate": 4.340559124397774e-06, "loss": 1.0318, "step": 2326 }, { "epoch": 0.24473161817871086, "grad_norm": 3.106997533126451, "learning_rate": 4.339994067469996e-06, "loss": 0.9838, "step": 2327 }, { "epoch": 0.2448367886205582, "grad_norm": 2.931771272566417, "learning_rate": 4.339428805368586e-06, "loss": 1.026, "step": 2328 }, { "epoch": 0.2449419590624055, "grad_norm": 3.097355703100136, "learning_rate": 4.338863338156575e-06, "loss": 1.0357, "step": 2329 }, { "epoch": 0.24504712950425284, "grad_norm": 2.163293960868166, "learning_rate": 4.338297665897016e-06, "loss": 1.0129, "step": 2330 }, { "epoch": 0.24515229994610016, "grad_norm": 2.6395195900185966, "learning_rate": 4.3377317886529875e-06, "loss": 1.0473, "step": 2331 }, { "epoch": 0.24525747038794746, "grad_norm": 2.857130604203511, "learning_rate": 4.337165706487589e-06, "loss": 1.0317, "step": 2332 }, { "epoch": 0.24536264082979478, "grad_norm": 2.49538696157863, "learning_rate": 4.336599419463943e-06, "loss": 0.9921, "step": 2333 }, { "epoch": 0.2454678112716421, "grad_norm": 3.3688127588253645, "learning_rate": 4.336032927645194e-06, "loss": 1.0419, "step": 2334 }, { "epoch": 0.24557298171348943, "grad_norm": 2.4873830553852536, "learning_rate": 4.3354662310945105e-06, "loss": 1.0384, "step": 2335 }, { "epoch": 0.24567815215533675, "grad_norm": 4.367565068639842, "learning_rate": 4.334899329875084e-06, "loss": 1.0342, "step": 2336 }, { "epoch": 0.24578332259718405, "grad_norm": 2.3864309309189227, "learning_rate": 4.3343322240501276e-06, "loss": 0.9888, "step": 2337 }, { "epoch": 0.24588849303903137, "grad_norm": 2.2279995974847298, "learning_rate": 4.33376491368288e-06, "loss": 1.0035, "step": 2338 }, { "epoch": 0.2459936634808787, "grad_norm": 2.8286281778671287, "learning_rate": 4.333197398836598e-06, "loss": 0.9848, "step": 2339 }, { "epoch": 0.24609883392272602, "grad_norm": 1.9274410042281873, "learning_rate": 4.332629679574566e-06, "loss": 1.0054, "step": 2340 }, { "epoch": 0.24620400436457335, "grad_norm": 2.5323024007120893, "learning_rate": 4.332061755960089e-06, "loss": 1.0341, "step": 2341 }, { "epoch": 0.24630917480642064, "grad_norm": 2.8771741834278948, "learning_rate": 4.331493628056494e-06, "loss": 1.0287, "step": 2342 }, { "epoch": 0.24641434524826797, "grad_norm": 3.559675628437571, "learning_rate": 4.33092529592713e-06, "loss": 1.0325, "step": 2343 }, { "epoch": 0.2465195156901153, "grad_norm": 2.4388512232776023, "learning_rate": 4.330356759635374e-06, "loss": 1.0221, "step": 2344 }, { "epoch": 0.24662468613196262, "grad_norm": 2.682329033109531, "learning_rate": 4.329788019244621e-06, "loss": 0.9862, "step": 2345 }, { "epoch": 0.24672985657380994, "grad_norm": 2.5463446910483367, "learning_rate": 4.329219074818288e-06, "loss": 1.0284, "step": 2346 }, { "epoch": 0.24683502701565724, "grad_norm": 2.0296846553885253, "learning_rate": 4.32864992641982e-06, "loss": 1.0119, "step": 2347 }, { "epoch": 0.24694019745750456, "grad_norm": 2.048540561932806, "learning_rate": 4.328080574112678e-06, "loss": 1.01, "step": 2348 }, { "epoch": 0.2470453678993519, "grad_norm": 3.4200047911393745, "learning_rate": 4.327511017960353e-06, "loss": 1.0236, "step": 2349 }, { "epoch": 0.2471505383411992, "grad_norm": 2.777887461388932, "learning_rate": 4.326941258026352e-06, "loss": 1.0413, "step": 2350 }, { "epoch": 0.24725570878304653, "grad_norm": 2.6334314381177175, "learning_rate": 4.326371294374207e-06, "loss": 1.0092, "step": 2351 }, { "epoch": 0.24736087922489383, "grad_norm": 2.880908366865062, "learning_rate": 4.325801127067477e-06, "loss": 1.0373, "step": 2352 }, { "epoch": 0.24746604966674116, "grad_norm": 3.588695174592861, "learning_rate": 4.325230756169737e-06, "loss": 1.0001, "step": 2353 }, { "epoch": 0.24757122010858848, "grad_norm": 3.0543565117198535, "learning_rate": 4.324660181744589e-06, "loss": 0.9983, "step": 2354 }, { "epoch": 0.2476763905504358, "grad_norm": 2.650395143277798, "learning_rate": 4.324089403855657e-06, "loss": 1.0016, "step": 2355 }, { "epoch": 0.24778156099228313, "grad_norm": 2.339046826958962, "learning_rate": 4.323518422566586e-06, "loss": 1.021, "step": 2356 }, { "epoch": 0.24788673143413043, "grad_norm": 2.6212259533171647, "learning_rate": 4.322947237941045e-06, "loss": 0.9759, "step": 2357 }, { "epoch": 0.24799190187597775, "grad_norm": 3.383293390834341, "learning_rate": 4.322375850042726e-06, "loss": 0.9904, "step": 2358 }, { "epoch": 0.24809707231782507, "grad_norm": 2.670237539236923, "learning_rate": 4.321804258935343e-06, "loss": 1.0199, "step": 2359 }, { "epoch": 0.2482022427596724, "grad_norm": 3.250184677256926, "learning_rate": 4.3212324646826334e-06, "loss": 0.9948, "step": 2360 }, { "epoch": 0.24830741320151972, "grad_norm": 2.5906224312948916, "learning_rate": 4.320660467348355e-06, "loss": 1.0054, "step": 2361 }, { "epoch": 0.24841258364336702, "grad_norm": 2.3329999607620695, "learning_rate": 4.320088266996292e-06, "loss": 1.0111, "step": 2362 }, { "epoch": 0.24851775408521434, "grad_norm": 3.94765814630664, "learning_rate": 4.3195158636902475e-06, "loss": 1.0006, "step": 2363 }, { "epoch": 0.24862292452706167, "grad_norm": 2.247678998715992, "learning_rate": 4.3189432574940506e-06, "loss": 0.9504, "step": 2364 }, { "epoch": 0.248728094968909, "grad_norm": 2.638346815741289, "learning_rate": 4.3183704484715496e-06, "loss": 1.016, "step": 2365 }, { "epoch": 0.24883326541075632, "grad_norm": 2.9361318540962094, "learning_rate": 4.3177974366866185e-06, "loss": 1.0447, "step": 2366 }, { "epoch": 0.2489384358526036, "grad_norm": 3.1042757839794515, "learning_rate": 4.317224222203152e-06, "loss": 1.0061, "step": 2367 }, { "epoch": 0.24904360629445094, "grad_norm": 2.2154767610485426, "learning_rate": 4.316650805085068e-06, "loss": 1.005, "step": 2368 }, { "epoch": 0.24914877673629826, "grad_norm": 3.623072584794595, "learning_rate": 4.316077185396307e-06, "loss": 1.0467, "step": 2369 }, { "epoch": 0.24925394717814559, "grad_norm": 2.33674243869589, "learning_rate": 4.3155033632008316e-06, "loss": 1.0021, "step": 2370 }, { "epoch": 0.2493591176199929, "grad_norm": 2.8007703287989907, "learning_rate": 4.314929338562629e-06, "loss": 1.0135, "step": 2371 }, { "epoch": 0.24946428806184023, "grad_norm": 3.2196932437208923, "learning_rate": 4.314355111545705e-06, "loss": 0.9289, "step": 2372 }, { "epoch": 0.24956945850368753, "grad_norm": 3.334365342086359, "learning_rate": 4.313780682214093e-06, "loss": 1.0507, "step": 2373 }, { "epoch": 0.24967462894553485, "grad_norm": 2.4745573038357915, "learning_rate": 4.313206050631844e-06, "loss": 0.9509, "step": 2374 }, { "epoch": 0.24977979938738218, "grad_norm": 2.0261724785390123, "learning_rate": 4.312631216863035e-06, "loss": 1.0073, "step": 2375 }, { "epoch": 0.2498849698292295, "grad_norm": 3.786354346442156, "learning_rate": 4.312056180971764e-06, "loss": 1.0409, "step": 2376 }, { "epoch": 0.24999014027107683, "grad_norm": 1.8293455049956973, "learning_rate": 4.3114809430221534e-06, "loss": 0.983, "step": 2377 }, { "epoch": 0.2500953107129241, "grad_norm": 2.7551244030560023, "learning_rate": 4.3109055030783445e-06, "loss": 1.0099, "step": 2378 }, { "epoch": 0.25020048115477145, "grad_norm": 3.713006572607176, "learning_rate": 4.310329861204505e-06, "loss": 0.9658, "step": 2379 }, { "epoch": 0.2503056515966188, "grad_norm": 3.841601007385603, "learning_rate": 4.309754017464823e-06, "loss": 1.0511, "step": 2380 }, { "epoch": 0.2504108220384661, "grad_norm": 1.7189850598284027, "learning_rate": 4.309177971923509e-06, "loss": 1.0225, "step": 2381 }, { "epoch": 0.2505159924803134, "grad_norm": 2.3106913101555326, "learning_rate": 4.308601724644797e-06, "loss": 1.0436, "step": 2382 }, { "epoch": 0.25062116292216075, "grad_norm": 1.685769997718884, "learning_rate": 4.308025275692943e-06, "loss": 0.9828, "step": 2383 }, { "epoch": 0.25072633336400807, "grad_norm": 2.605151083100969, "learning_rate": 4.307448625132225e-06, "loss": 0.9986, "step": 2384 }, { "epoch": 0.25083150380585534, "grad_norm": 2.070105328779204, "learning_rate": 4.306871773026945e-06, "loss": 0.9713, "step": 2385 }, { "epoch": 0.25093667424770266, "grad_norm": 2.1511866343693007, "learning_rate": 4.306294719441426e-06, "loss": 1.0466, "step": 2386 }, { "epoch": 0.25104184468955, "grad_norm": 2.7021103206947936, "learning_rate": 4.305717464440015e-06, "loss": 1.042, "step": 2387 }, { "epoch": 0.2511470151313973, "grad_norm": 2.264135547093338, "learning_rate": 4.305140008087078e-06, "loss": 1.0355, "step": 2388 }, { "epoch": 0.25125218557324464, "grad_norm": 2.6174496430229994, "learning_rate": 4.304562350447008e-06, "loss": 1.0403, "step": 2389 }, { "epoch": 0.25135735601509196, "grad_norm": 2.5030793337590103, "learning_rate": 4.303984491584217e-06, "loss": 0.9831, "step": 2390 }, { "epoch": 0.2514625264569393, "grad_norm": 3.246883328280729, "learning_rate": 4.303406431563142e-06, "loss": 1.0792, "step": 2391 }, { "epoch": 0.2515676968987866, "grad_norm": 2.7687583296326004, "learning_rate": 4.30282817044824e-06, "loss": 1.0085, "step": 2392 }, { "epoch": 0.25167286734063393, "grad_norm": 2.3272942288985554, "learning_rate": 4.302249708303993e-06, "loss": 1.0151, "step": 2393 }, { "epoch": 0.25177803778248126, "grad_norm": 2.266231486995089, "learning_rate": 4.301671045194902e-06, "loss": 1.0047, "step": 2394 }, { "epoch": 0.2518832082243285, "grad_norm": 2.754988205132692, "learning_rate": 4.301092181185495e-06, "loss": 1.0253, "step": 2395 }, { "epoch": 0.25198837866617585, "grad_norm": 2.6929620972255255, "learning_rate": 4.300513116340317e-06, "loss": 1.0054, "step": 2396 }, { "epoch": 0.2520935491080232, "grad_norm": 2.336085325218602, "learning_rate": 4.2999338507239395e-06, "loss": 0.9848, "step": 2397 }, { "epoch": 0.2521987195498705, "grad_norm": 2.461142882427421, "learning_rate": 4.299354384400957e-06, "loss": 1.0059, "step": 2398 }, { "epoch": 0.2523038899917178, "grad_norm": 2.532391792296108, "learning_rate": 4.2987747174359805e-06, "loss": 0.9754, "step": 2399 }, { "epoch": 0.25240906043356515, "grad_norm": 2.621685341882318, "learning_rate": 4.298194849893651e-06, "loss": 1.0266, "step": 2400 }, { "epoch": 0.25251423087541247, "grad_norm": 2.3181343970978356, "learning_rate": 4.297614781838627e-06, "loss": 1.0272, "step": 2401 }, { "epoch": 0.2526194013172598, "grad_norm": 2.05334980419308, "learning_rate": 4.297034513335591e-06, "loss": 0.9885, "step": 2402 }, { "epoch": 0.2527245717591071, "grad_norm": 2.364971791424021, "learning_rate": 4.296454044449245e-06, "loss": 1.009, "step": 2403 }, { "epoch": 0.25282974220095444, "grad_norm": 2.0265896786829805, "learning_rate": 4.295873375244319e-06, "loss": 1.0018, "step": 2404 }, { "epoch": 0.25293491264280177, "grad_norm": 4.311217157785901, "learning_rate": 4.295292505785561e-06, "loss": 1.0421, "step": 2405 }, { "epoch": 0.25304008308464904, "grad_norm": 2.629094308586008, "learning_rate": 4.294711436137742e-06, "loss": 1.0348, "step": 2406 }, { "epoch": 0.25314525352649636, "grad_norm": 2.680285943391119, "learning_rate": 4.294130166365656e-06, "loss": 1.057, "step": 2407 }, { "epoch": 0.2532504239683437, "grad_norm": 2.5244192892893267, "learning_rate": 4.293548696534119e-06, "loss": 1.0008, "step": 2408 }, { "epoch": 0.253355594410191, "grad_norm": 2.1567089358080076, "learning_rate": 4.2929670267079695e-06, "loss": 1.0427, "step": 2409 }, { "epoch": 0.25346076485203833, "grad_norm": 2.4191444936394277, "learning_rate": 4.292385156952069e-06, "loss": 1.0596, "step": 2410 }, { "epoch": 0.25356593529388566, "grad_norm": 2.260012880541108, "learning_rate": 4.291803087331299e-06, "loss": 1.0076, "step": 2411 }, { "epoch": 0.253671105735733, "grad_norm": 1.9375892516696325, "learning_rate": 4.291220817910566e-06, "loss": 1.036, "step": 2412 }, { "epoch": 0.2537762761775803, "grad_norm": 2.425680002791181, "learning_rate": 4.290638348754797e-06, "loss": 1.0195, "step": 2413 }, { "epoch": 0.25388144661942763, "grad_norm": 2.1474551573197074, "learning_rate": 4.2900556799289415e-06, "loss": 0.9866, "step": 2414 }, { "epoch": 0.25398661706127496, "grad_norm": 2.3436746364584877, "learning_rate": 4.289472811497972e-06, "loss": 1.0447, "step": 2415 }, { "epoch": 0.2540917875031222, "grad_norm": 2.114334641282732, "learning_rate": 4.288889743526884e-06, "loss": 1.0044, "step": 2416 }, { "epoch": 0.25419695794496955, "grad_norm": 2.5018267220432846, "learning_rate": 4.288306476080691e-06, "loss": 1.061, "step": 2417 }, { "epoch": 0.2543021283868169, "grad_norm": 3.079700459263164, "learning_rate": 4.287723009224436e-06, "loss": 1.0394, "step": 2418 }, { "epoch": 0.2544072988286642, "grad_norm": 2.6029768657221672, "learning_rate": 4.2871393430231775e-06, "loss": 1.0305, "step": 2419 }, { "epoch": 0.2545124692705115, "grad_norm": 3.4032647927169966, "learning_rate": 4.2865554775419985e-06, "loss": 1.0707, "step": 2420 }, { "epoch": 0.25461763971235885, "grad_norm": 2.9725437594396964, "learning_rate": 4.2859714128460065e-06, "loss": 0.9889, "step": 2421 }, { "epoch": 0.25472281015420617, "grad_norm": 1.908112429787501, "learning_rate": 4.2853871490003265e-06, "loss": 0.9918, "step": 2422 }, { "epoch": 0.2548279805960535, "grad_norm": 2.4844690233952913, "learning_rate": 4.2848026860701104e-06, "loss": 1.0429, "step": 2423 }, { "epoch": 0.2549331510379008, "grad_norm": 2.7736489003252034, "learning_rate": 4.284218024120531e-06, "loss": 1.0414, "step": 2424 }, { "epoch": 0.25503832147974814, "grad_norm": 2.532322414340705, "learning_rate": 4.28363316321678e-06, "loss": 1.001, "step": 2425 }, { "epoch": 0.2551434919215954, "grad_norm": 2.970052877513903, "learning_rate": 4.283048103424077e-06, "loss": 1.0803, "step": 2426 }, { "epoch": 0.25524866236344274, "grad_norm": 3.4153519083462185, "learning_rate": 4.282462844807659e-06, "loss": 1.01, "step": 2427 }, { "epoch": 0.25535383280529006, "grad_norm": 3.1053802029271513, "learning_rate": 4.281877387432787e-06, "loss": 1.051, "step": 2428 }, { "epoch": 0.2554590032471374, "grad_norm": 2.6052490168349975, "learning_rate": 4.281291731364744e-06, "loss": 0.9926, "step": 2429 }, { "epoch": 0.2555641736889847, "grad_norm": 2.1996478485684903, "learning_rate": 4.280705876668836e-06, "loss": 1.0286, "step": 2430 }, { "epoch": 0.25566934413083203, "grad_norm": 2.4448585973957906, "learning_rate": 4.2801198234103895e-06, "loss": 1.0489, "step": 2431 }, { "epoch": 0.25577451457267936, "grad_norm": 2.605626730548949, "learning_rate": 4.279533571654754e-06, "loss": 1.0234, "step": 2432 }, { "epoch": 0.2558796850145267, "grad_norm": 2.2571136636808773, "learning_rate": 4.2789471214673e-06, "loss": 1.049, "step": 2433 }, { "epoch": 0.255984855456374, "grad_norm": 2.9495392792916615, "learning_rate": 4.278360472913424e-06, "loss": 0.9787, "step": 2434 }, { "epoch": 0.25609002589822133, "grad_norm": 2.0896220524241884, "learning_rate": 4.277773626058542e-06, "loss": 1.0116, "step": 2435 }, { "epoch": 0.2561951963400686, "grad_norm": 3.163234673182296, "learning_rate": 4.277186580968088e-06, "loss": 1.0366, "step": 2436 }, { "epoch": 0.2563003667819159, "grad_norm": 2.462962363222071, "learning_rate": 4.276599337707525e-06, "loss": 1.0296, "step": 2437 }, { "epoch": 0.25640553722376325, "grad_norm": 2.1865954009193524, "learning_rate": 4.276011896342336e-06, "loss": 1.0365, "step": 2438 }, { "epoch": 0.25651070766561057, "grad_norm": 2.3965938792682144, "learning_rate": 4.2754242569380226e-06, "loss": 1.0514, "step": 2439 }, { "epoch": 0.2566158781074579, "grad_norm": 2.428078705061389, "learning_rate": 4.2748364195601135e-06, "loss": 1.0428, "step": 2440 }, { "epoch": 0.2567210485493052, "grad_norm": 2.7590755653461203, "learning_rate": 4.274248384274156e-06, "loss": 0.9769, "step": 2441 }, { "epoch": 0.25682621899115254, "grad_norm": 1.9677366099461735, "learning_rate": 4.27366015114572e-06, "loss": 0.985, "step": 2442 }, { "epoch": 0.25693138943299987, "grad_norm": 2.209987282212459, "learning_rate": 4.2730717202404e-06, "loss": 1.0309, "step": 2443 }, { "epoch": 0.2570365598748472, "grad_norm": 1.9968714101047118, "learning_rate": 4.272483091623809e-06, "loss": 1.0394, "step": 2444 }, { "epoch": 0.2571417303166945, "grad_norm": 2.579965658298572, "learning_rate": 4.271894265361584e-06, "loss": 0.9979, "step": 2445 }, { "epoch": 0.2572469007585418, "grad_norm": 1.8660957651544707, "learning_rate": 4.2713052415193844e-06, "loss": 1.0358, "step": 2446 }, { "epoch": 0.2573520712003891, "grad_norm": 2.4422132950894757, "learning_rate": 4.27071602016289e-06, "loss": 1.0568, "step": 2447 }, { "epoch": 0.25745724164223643, "grad_norm": 3.129384441162122, "learning_rate": 4.270126601357804e-06, "loss": 1.085, "step": 2448 }, { "epoch": 0.25756241208408376, "grad_norm": 2.813236158519783, "learning_rate": 4.269536985169851e-06, "loss": 1.0045, "step": 2449 }, { "epoch": 0.2576675825259311, "grad_norm": 2.4100320645639863, "learning_rate": 4.268947171664778e-06, "loss": 1.0047, "step": 2450 }, { "epoch": 0.2577727529677784, "grad_norm": 2.939524262190411, "learning_rate": 4.268357160908354e-06, "loss": 1.0213, "step": 2451 }, { "epoch": 0.25787792340962573, "grad_norm": 2.450057161789921, "learning_rate": 4.267766952966369e-06, "loss": 1.0147, "step": 2452 }, { "epoch": 0.25798309385147306, "grad_norm": 3.105852562406945, "learning_rate": 4.267176547904636e-06, "loss": 0.9983, "step": 2453 }, { "epoch": 0.2580882642933204, "grad_norm": 3.0495170539629894, "learning_rate": 4.266585945788991e-06, "loss": 0.9887, "step": 2454 }, { "epoch": 0.2581934347351677, "grad_norm": 1.9555953731591447, "learning_rate": 4.26599514668529e-06, "loss": 1.0166, "step": 2455 }, { "epoch": 0.258298605177015, "grad_norm": 2.345172111269998, "learning_rate": 4.26540415065941e-06, "loss": 1.0157, "step": 2456 }, { "epoch": 0.2584037756188623, "grad_norm": 2.648580133103009, "learning_rate": 4.264812957777253e-06, "loss": 1.059, "step": 2457 }, { "epoch": 0.2585089460607096, "grad_norm": 2.8657246045783755, "learning_rate": 4.2642215681047425e-06, "loss": 1.0181, "step": 2458 }, { "epoch": 0.25861411650255695, "grad_norm": 2.593705422659073, "learning_rate": 4.263629981707823e-06, "loss": 1.0275, "step": 2459 }, { "epoch": 0.25871928694440427, "grad_norm": 2.733876182487878, "learning_rate": 4.263038198652459e-06, "loss": 0.9784, "step": 2460 }, { "epoch": 0.2588244573862516, "grad_norm": 3.183024692281709, "learning_rate": 4.26244621900464e-06, "loss": 1.0185, "step": 2461 }, { "epoch": 0.2589296278280989, "grad_norm": 2.7344593577023715, "learning_rate": 4.261854042830377e-06, "loss": 1.0289, "step": 2462 }, { "epoch": 0.25903479826994624, "grad_norm": 1.9703850254076136, "learning_rate": 4.261261670195701e-06, "loss": 0.9684, "step": 2463 }, { "epoch": 0.25913996871179357, "grad_norm": 2.910924468557452, "learning_rate": 4.2606691011666675e-06, "loss": 1.0207, "step": 2464 }, { "epoch": 0.2592451391536409, "grad_norm": 3.5563615470552548, "learning_rate": 4.260076335809351e-06, "loss": 1.0237, "step": 2465 }, { "epoch": 0.2593503095954882, "grad_norm": 3.665977688640787, "learning_rate": 4.259483374189851e-06, "loss": 0.9983, "step": 2466 }, { "epoch": 0.2594554800373355, "grad_norm": 3.749669082591241, "learning_rate": 4.258890216374286e-06, "loss": 1.0253, "step": 2467 }, { "epoch": 0.2595606504791828, "grad_norm": 2.2413255969070303, "learning_rate": 4.258296862428799e-06, "loss": 1.0029, "step": 2468 }, { "epoch": 0.25966582092103013, "grad_norm": 2.267636209673735, "learning_rate": 4.257703312419553e-06, "loss": 1.0052, "step": 2469 }, { "epoch": 0.25977099136287746, "grad_norm": 2.2484357476994177, "learning_rate": 4.257109566412733e-06, "loss": 1.045, "step": 2470 }, { "epoch": 0.2598761618047248, "grad_norm": 2.933825598950081, "learning_rate": 4.256515624474547e-06, "loss": 1.0228, "step": 2471 }, { "epoch": 0.2599813322465721, "grad_norm": 2.404569611194018, "learning_rate": 4.255921486671223e-06, "loss": 1.0117, "step": 2472 }, { "epoch": 0.26008650268841943, "grad_norm": 2.6554997150272595, "learning_rate": 4.255327153069014e-06, "loss": 1.0379, "step": 2473 }, { "epoch": 0.26019167313026675, "grad_norm": 2.912876480944375, "learning_rate": 4.25473262373419e-06, "loss": 1.0201, "step": 2474 }, { "epoch": 0.2602968435721141, "grad_norm": 2.4808723598184925, "learning_rate": 4.254137898733048e-06, "loss": 1.0086, "step": 2475 }, { "epoch": 0.2604020140139614, "grad_norm": 3.0077031830519627, "learning_rate": 4.253542978131904e-06, "loss": 1.0183, "step": 2476 }, { "epoch": 0.26050718445580867, "grad_norm": 2.119327566527547, "learning_rate": 4.252947861997096e-06, "loss": 1.0165, "step": 2477 }, { "epoch": 0.260612354897656, "grad_norm": 2.9506512336323585, "learning_rate": 4.2523525503949835e-06, "loss": 1.0186, "step": 2478 }, { "epoch": 0.2607175253395033, "grad_norm": 3.4059461780090414, "learning_rate": 4.251757043391949e-06, "loss": 1.0065, "step": 2479 }, { "epoch": 0.26082269578135064, "grad_norm": 3.159826449070252, "learning_rate": 4.251161341054396e-06, "loss": 1.0176, "step": 2480 }, { "epoch": 0.26092786622319797, "grad_norm": 3.0165983438656765, "learning_rate": 4.25056544344875e-06, "loss": 1.0177, "step": 2481 }, { "epoch": 0.2610330366650453, "grad_norm": 2.909172263879279, "learning_rate": 4.249969350641459e-06, "loss": 1.0157, "step": 2482 }, { "epoch": 0.2611382071068926, "grad_norm": 2.629076914941897, "learning_rate": 4.2493730626989905e-06, "loss": 1.0504, "step": 2483 }, { "epoch": 0.26124337754873994, "grad_norm": 2.790385279161908, "learning_rate": 4.248776579687835e-06, "loss": 1.0102, "step": 2484 }, { "epoch": 0.26134854799058727, "grad_norm": 3.4197241373079224, "learning_rate": 4.248179901674508e-06, "loss": 1.0153, "step": 2485 }, { "epoch": 0.2614537184324346, "grad_norm": 2.7065753512349597, "learning_rate": 4.247583028725539e-06, "loss": 0.9738, "step": 2486 }, { "epoch": 0.26155888887428186, "grad_norm": 2.0539337784982474, "learning_rate": 4.246985960907488e-06, "loss": 1.0371, "step": 2487 }, { "epoch": 0.2616640593161292, "grad_norm": 3.5039520400652115, "learning_rate": 4.2463886982869316e-06, "loss": 1.0311, "step": 2488 }, { "epoch": 0.2617692297579765, "grad_norm": 2.260518449174576, "learning_rate": 4.245791240930469e-06, "loss": 1.0418, "step": 2489 }, { "epoch": 0.26187440019982383, "grad_norm": 2.593178309237441, "learning_rate": 4.245193588904721e-06, "loss": 0.9884, "step": 2490 }, { "epoch": 0.26197957064167116, "grad_norm": 2.5646604102114807, "learning_rate": 4.244595742276329e-06, "loss": 1.0198, "step": 2491 }, { "epoch": 0.2620847410835185, "grad_norm": 2.1573764067407284, "learning_rate": 4.243997701111961e-06, "loss": 1.0283, "step": 2492 }, { "epoch": 0.2621899115253658, "grad_norm": 2.706034557337528, "learning_rate": 4.243399465478302e-06, "loss": 0.9915, "step": 2493 }, { "epoch": 0.26229508196721313, "grad_norm": 2.8335990248067704, "learning_rate": 4.242801035442059e-06, "loss": 0.9332, "step": 2494 }, { "epoch": 0.26240025240906045, "grad_norm": 2.633396099123692, "learning_rate": 4.242202411069961e-06, "loss": 0.9786, "step": 2495 }, { "epoch": 0.2625054228509078, "grad_norm": 3.360271499168492, "learning_rate": 4.241603592428761e-06, "loss": 1.0477, "step": 2496 }, { "epoch": 0.26261059329275505, "grad_norm": 2.148918489099196, "learning_rate": 4.241004579585231e-06, "loss": 1.0063, "step": 2497 }, { "epoch": 0.26271576373460237, "grad_norm": 3.2367845205717, "learning_rate": 4.240405372606165e-06, "loss": 1.0015, "step": 2498 }, { "epoch": 0.2628209341764497, "grad_norm": 2.7895205043470988, "learning_rate": 4.239805971558381e-06, "loss": 1.0499, "step": 2499 }, { "epoch": 0.262926104618297, "grad_norm": 1.9537882626779242, "learning_rate": 4.239206376508716e-06, "loss": 1.0381, "step": 2500 }, { "epoch": 0.26303127506014434, "grad_norm": 2.784638128216924, "learning_rate": 4.2386065875240295e-06, "loss": 1.0069, "step": 2501 }, { "epoch": 0.26313644550199167, "grad_norm": 2.9037401874074096, "learning_rate": 4.238006604671202e-06, "loss": 0.997, "step": 2502 }, { "epoch": 0.263241615943839, "grad_norm": 3.7907319889378095, "learning_rate": 4.2374064280171375e-06, "loss": 1.0153, "step": 2503 }, { "epoch": 0.2633467863856863, "grad_norm": 2.42146316099506, "learning_rate": 4.2368060576287586e-06, "loss": 0.9756, "step": 2504 }, { "epoch": 0.26345195682753364, "grad_norm": 3.3487340200239943, "learning_rate": 4.236205493573013e-06, "loss": 1.0098, "step": 2505 }, { "epoch": 0.26355712726938096, "grad_norm": 2.3850429416001337, "learning_rate": 4.235604735916867e-06, "loss": 1.0055, "step": 2506 }, { "epoch": 0.26366229771122823, "grad_norm": 2.849867107035556, "learning_rate": 4.235003784727311e-06, "loss": 1.0175, "step": 2507 }, { "epoch": 0.26376746815307556, "grad_norm": 3.019032768395256, "learning_rate": 4.234402640071355e-06, "loss": 1.0389, "step": 2508 }, { "epoch": 0.2638726385949229, "grad_norm": 1.6410605133020546, "learning_rate": 4.23380130201603e-06, "loss": 1.0201, "step": 2509 }, { "epoch": 0.2639778090367702, "grad_norm": 2.8300981371727656, "learning_rate": 4.233199770628392e-06, "loss": 1.0188, "step": 2510 }, { "epoch": 0.26408297947861753, "grad_norm": 2.5403242188674, "learning_rate": 4.2325980459755155e-06, "loss": 1.0789, "step": 2511 }, { "epoch": 0.26418814992046485, "grad_norm": 2.6388880305238147, "learning_rate": 4.231996128124498e-06, "loss": 0.9696, "step": 2512 }, { "epoch": 0.2642933203623122, "grad_norm": 3.679088388538602, "learning_rate": 4.231394017142458e-06, "loss": 1.0035, "step": 2513 }, { "epoch": 0.2643984908041595, "grad_norm": 2.153085146074569, "learning_rate": 4.230791713096534e-06, "loss": 0.9416, "step": 2514 }, { "epoch": 0.2645036612460068, "grad_norm": 2.9035280609022696, "learning_rate": 4.2301892160538895e-06, "loss": 1.0398, "step": 2515 }, { "epoch": 0.26460883168785415, "grad_norm": 3.382690191668748, "learning_rate": 4.229586526081707e-06, "loss": 1.0457, "step": 2516 }, { "epoch": 0.2647140021297014, "grad_norm": 2.7928016735192167, "learning_rate": 4.22898364324719e-06, "loss": 0.9843, "step": 2517 }, { "epoch": 0.26481917257154874, "grad_norm": 1.3854879435295921, "learning_rate": 4.228380567617566e-06, "loss": 1.06, "step": 2518 }, { "epoch": 0.26492434301339607, "grad_norm": 2.3894512440051945, "learning_rate": 4.227777299260083e-06, "loss": 1.0271, "step": 2519 }, { "epoch": 0.2650295134552434, "grad_norm": 2.409763930345137, "learning_rate": 4.227173838242008e-06, "loss": 1.0086, "step": 2520 }, { "epoch": 0.2651346838970907, "grad_norm": 2.300365291967152, "learning_rate": 4.2265701846306336e-06, "loss": 1.0139, "step": 2521 }, { "epoch": 0.26523985433893804, "grad_norm": 2.9413706238859936, "learning_rate": 4.225966338493272e-06, "loss": 1.0238, "step": 2522 }, { "epoch": 0.26534502478078537, "grad_norm": 3.3804230105479385, "learning_rate": 4.225362299897254e-06, "loss": 1.0536, "step": 2523 }, { "epoch": 0.2654501952226327, "grad_norm": 2.708985307345623, "learning_rate": 4.2247580689099376e-06, "loss": 0.9965, "step": 2524 }, { "epoch": 0.26555536566448, "grad_norm": 2.8007246157283494, "learning_rate": 4.224153645598698e-06, "loss": 1.033, "step": 2525 }, { "epoch": 0.26566053610632734, "grad_norm": 2.552503024762742, "learning_rate": 4.2235490300309324e-06, "loss": 1.0094, "step": 2526 }, { "epoch": 0.26576570654817466, "grad_norm": 2.761858798258985, "learning_rate": 4.222944222274062e-06, "loss": 1.0003, "step": 2527 }, { "epoch": 0.26587087699002193, "grad_norm": 2.5736511815926844, "learning_rate": 4.222339222395526e-06, "loss": 0.9731, "step": 2528 }, { "epoch": 0.26597604743186926, "grad_norm": 2.6976780654834744, "learning_rate": 4.221734030462787e-06, "loss": 1.0515, "step": 2529 }, { "epoch": 0.2660812178737166, "grad_norm": 3.4009547412639187, "learning_rate": 4.22112864654333e-06, "loss": 1.02, "step": 2530 }, { "epoch": 0.2661863883155639, "grad_norm": 2.060001991686356, "learning_rate": 4.220523070704657e-06, "loss": 0.967, "step": 2531 }, { "epoch": 0.26629155875741123, "grad_norm": 3.326975711226428, "learning_rate": 4.219917303014297e-06, "loss": 1.0475, "step": 2532 }, { "epoch": 0.26639672919925855, "grad_norm": 3.6695180566563743, "learning_rate": 4.219311343539797e-06, "loss": 1.0338, "step": 2533 }, { "epoch": 0.2665018996411059, "grad_norm": 3.402820484725127, "learning_rate": 4.218705192348725e-06, "loss": 1.0143, "step": 2534 }, { "epoch": 0.2666070700829532, "grad_norm": 2.611669788607023, "learning_rate": 4.2180988495086745e-06, "loss": 1.0101, "step": 2535 }, { "epoch": 0.2667122405248005, "grad_norm": 2.1839819006885364, "learning_rate": 4.217492315087255e-06, "loss": 1.0269, "step": 2536 }, { "epoch": 0.26681741096664785, "grad_norm": 2.6726735252473715, "learning_rate": 4.2168855891520995e-06, "loss": 1.0632, "step": 2537 }, { "epoch": 0.2669225814084951, "grad_norm": 2.8199626949758305, "learning_rate": 4.216278671770865e-06, "loss": 1.0688, "step": 2538 }, { "epoch": 0.26702775185034244, "grad_norm": 3.170146232476931, "learning_rate": 4.2156715630112245e-06, "loss": 0.9838, "step": 2539 }, { "epoch": 0.26713292229218977, "grad_norm": 3.0560237745595513, "learning_rate": 4.215064262940878e-06, "loss": 1.0116, "step": 2540 }, { "epoch": 0.2672380927340371, "grad_norm": 2.510504357329309, "learning_rate": 4.214456771627542e-06, "loss": 0.9855, "step": 2541 }, { "epoch": 0.2673432631758844, "grad_norm": 2.5311289381637163, "learning_rate": 4.213849089138959e-06, "loss": 1.0087, "step": 2542 }, { "epoch": 0.26744843361773174, "grad_norm": 2.244207627746343, "learning_rate": 4.213241215542888e-06, "loss": 0.9875, "step": 2543 }, { "epoch": 0.26755360405957906, "grad_norm": 1.9425305151864143, "learning_rate": 4.2126331509071125e-06, "loss": 0.9958, "step": 2544 }, { "epoch": 0.2676587745014264, "grad_norm": 2.895384366283835, "learning_rate": 4.212024895299437e-06, "loss": 0.9963, "step": 2545 }, { "epoch": 0.2677639449432737, "grad_norm": 2.6493035965793443, "learning_rate": 4.211416448787686e-06, "loss": 1.0128, "step": 2546 }, { "epoch": 0.26786911538512104, "grad_norm": 2.303330006776538, "learning_rate": 4.210807811439707e-06, "loss": 1.0284, "step": 2547 }, { "epoch": 0.2679742858269683, "grad_norm": 2.6701335223547833, "learning_rate": 4.210198983323366e-06, "loss": 1.0172, "step": 2548 }, { "epoch": 0.26807945626881563, "grad_norm": 2.548255245355431, "learning_rate": 4.209589964506553e-06, "loss": 1.0094, "step": 2549 }, { "epoch": 0.26818462671066295, "grad_norm": 2.5662458724791417, "learning_rate": 4.2089807550571786e-06, "loss": 1.0284, "step": 2550 }, { "epoch": 0.2682897971525103, "grad_norm": 2.7555215764610255, "learning_rate": 4.208371355043174e-06, "loss": 0.9936, "step": 2551 }, { "epoch": 0.2683949675943576, "grad_norm": 1.6391645215285977, "learning_rate": 4.207761764532493e-06, "loss": 1.0162, "step": 2552 }, { "epoch": 0.2685001380362049, "grad_norm": 2.861451794419541, "learning_rate": 4.207151983593109e-06, "loss": 1.0493, "step": 2553 }, { "epoch": 0.26860530847805225, "grad_norm": 3.000252612225211, "learning_rate": 4.206542012293016e-06, "loss": 0.9965, "step": 2554 }, { "epoch": 0.2687104789198996, "grad_norm": 3.0326862394669813, "learning_rate": 4.205931850700232e-06, "loss": 1.0026, "step": 2555 }, { "epoch": 0.2688156493617469, "grad_norm": 2.6432936556834026, "learning_rate": 4.205321498882795e-06, "loss": 1.0279, "step": 2556 }, { "epoch": 0.2689208198035942, "grad_norm": 2.9469947026091146, "learning_rate": 4.204710956908763e-06, "loss": 0.9982, "step": 2557 }, { "epoch": 0.2690259902454415, "grad_norm": 3.0253527353338447, "learning_rate": 4.204100224846217e-06, "loss": 1.0196, "step": 2558 }, { "epoch": 0.2691311606872888, "grad_norm": 2.8517981562593753, "learning_rate": 4.203489302763258e-06, "loss": 1.0275, "step": 2559 }, { "epoch": 0.26923633112913614, "grad_norm": 2.4348277733854418, "learning_rate": 4.202878190728009e-06, "loss": 0.9938, "step": 2560 }, { "epoch": 0.26934150157098347, "grad_norm": 3.561918888528251, "learning_rate": 4.202266888808613e-06, "loss": 1.041, "step": 2561 }, { "epoch": 0.2694466720128308, "grad_norm": 2.0281988514230394, "learning_rate": 4.201655397073234e-06, "loss": 1.0193, "step": 2562 }, { "epoch": 0.2695518424546781, "grad_norm": 2.6293480142232317, "learning_rate": 4.2010437155900605e-06, "loss": 1.0125, "step": 2563 }, { "epoch": 0.26965701289652544, "grad_norm": 3.250687419475308, "learning_rate": 4.200431844427299e-06, "loss": 0.993, "step": 2564 }, { "epoch": 0.26976218333837276, "grad_norm": 2.42448998982233, "learning_rate": 4.199819783653177e-06, "loss": 1.0116, "step": 2565 }, { "epoch": 0.2698673537802201, "grad_norm": 2.6337946712929936, "learning_rate": 4.199207533335944e-06, "loss": 0.9941, "step": 2566 }, { "epoch": 0.2699725242220674, "grad_norm": 2.710054204566471, "learning_rate": 4.198595093543871e-06, "loss": 0.9949, "step": 2567 }, { "epoch": 0.2700776946639147, "grad_norm": 2.697631288571504, "learning_rate": 4.197982464345251e-06, "loss": 0.9725, "step": 2568 }, { "epoch": 0.270182865105762, "grad_norm": 2.5276904086562926, "learning_rate": 4.197369645808394e-06, "loss": 1.0171, "step": 2569 }, { "epoch": 0.27028803554760933, "grad_norm": 2.46942882217501, "learning_rate": 4.196756638001638e-06, "loss": 1.0111, "step": 2570 }, { "epoch": 0.27039320598945665, "grad_norm": 2.657116364982858, "learning_rate": 4.196143440993335e-06, "loss": 1.0135, "step": 2571 }, { "epoch": 0.270498376431304, "grad_norm": 2.173209952959072, "learning_rate": 4.195530054851863e-06, "loss": 0.9677, "step": 2572 }, { "epoch": 0.2706035468731513, "grad_norm": 2.22871510749567, "learning_rate": 4.1949164796456174e-06, "loss": 1.0164, "step": 2573 }, { "epoch": 0.2707087173149986, "grad_norm": 1.903168210740841, "learning_rate": 4.194302715443018e-06, "loss": 0.9581, "step": 2574 }, { "epoch": 0.27081388775684595, "grad_norm": 1.8872564936631262, "learning_rate": 4.193688762312504e-06, "loss": 0.9807, "step": 2575 }, { "epoch": 0.2709190581986933, "grad_norm": 2.456914305309123, "learning_rate": 4.193074620322536e-06, "loss": 1.0195, "step": 2576 }, { "epoch": 0.2710242286405406, "grad_norm": 3.185302938492518, "learning_rate": 4.192460289541596e-06, "loss": 0.9971, "step": 2577 }, { "epoch": 0.27112939908238787, "grad_norm": 3.314584198817348, "learning_rate": 4.191845770038186e-06, "loss": 0.9779, "step": 2578 }, { "epoch": 0.2712345695242352, "grad_norm": 2.876042479155621, "learning_rate": 4.19123106188083e-06, "loss": 0.9777, "step": 2579 }, { "epoch": 0.2713397399660825, "grad_norm": 2.446043436959705, "learning_rate": 4.1906161651380725e-06, "loss": 1.0048, "step": 2580 }, { "epoch": 0.27144491040792984, "grad_norm": 2.376061110547315, "learning_rate": 4.19000107987848e-06, "loss": 1.0115, "step": 2581 }, { "epoch": 0.27155008084977716, "grad_norm": 2.681106684018155, "learning_rate": 4.189385806170637e-06, "loss": 1.027, "step": 2582 }, { "epoch": 0.2716552512916245, "grad_norm": 3.098522232486938, "learning_rate": 4.188770344083155e-06, "loss": 0.9921, "step": 2583 }, { "epoch": 0.2717604217334718, "grad_norm": 2.90935844056823, "learning_rate": 4.188154693684659e-06, "loss": 1.0271, "step": 2584 }, { "epoch": 0.27186559217531914, "grad_norm": 3.1110671923281936, "learning_rate": 4.187538855043802e-06, "loss": 1.0198, "step": 2585 }, { "epoch": 0.27197076261716646, "grad_norm": 2.682316070595872, "learning_rate": 4.186922828229254e-06, "loss": 1.0307, "step": 2586 }, { "epoch": 0.2720759330590138, "grad_norm": 3.015097650865152, "learning_rate": 4.186306613309704e-06, "loss": 1.0326, "step": 2587 }, { "epoch": 0.2721811035008611, "grad_norm": 2.366488727509157, "learning_rate": 4.185690210353869e-06, "loss": 1.0285, "step": 2588 }, { "epoch": 0.2722862739427084, "grad_norm": 2.6756347836291523, "learning_rate": 4.185073619430479e-06, "loss": 1.0118, "step": 2589 }, { "epoch": 0.2723914443845557, "grad_norm": 2.001448901496582, "learning_rate": 4.184456840608291e-06, "loss": 1.0067, "step": 2590 }, { "epoch": 0.272496614826403, "grad_norm": 2.425491517107179, "learning_rate": 4.183839873956081e-06, "loss": 0.9963, "step": 2591 }, { "epoch": 0.27260178526825035, "grad_norm": 3.4036870716254417, "learning_rate": 4.183222719542643e-06, "loss": 0.9999, "step": 2592 }, { "epoch": 0.2727069557100977, "grad_norm": 2.720894946053787, "learning_rate": 4.182605377436797e-06, "loss": 1.0125, "step": 2593 }, { "epoch": 0.272812126151945, "grad_norm": 2.6441136268156793, "learning_rate": 4.18198784770738e-06, "loss": 1.0377, "step": 2594 }, { "epoch": 0.2729172965937923, "grad_norm": 3.0457102136253273, "learning_rate": 4.1813701304232515e-06, "loss": 1.0487, "step": 2595 }, { "epoch": 0.27302246703563965, "grad_norm": 2.559547849904084, "learning_rate": 4.1807522256532925e-06, "loss": 0.9605, "step": 2596 }, { "epoch": 0.273127637477487, "grad_norm": 1.8862838127732708, "learning_rate": 4.1801341334664035e-06, "loss": 0.9979, "step": 2597 }, { "epoch": 0.2732328079193343, "grad_norm": 3.6119148619521337, "learning_rate": 4.179515853931507e-06, "loss": 1.0307, "step": 2598 }, { "epoch": 0.27333797836118157, "grad_norm": 3.286509646757304, "learning_rate": 4.178897387117547e-06, "loss": 0.9789, "step": 2599 }, { "epoch": 0.2734431488030289, "grad_norm": 2.7213519725866573, "learning_rate": 4.178278733093485e-06, "loss": 1.0111, "step": 2600 }, { "epoch": 0.2735483192448762, "grad_norm": 2.352945473282292, "learning_rate": 4.177659891928307e-06, "loss": 1.0025, "step": 2601 }, { "epoch": 0.27365348968672354, "grad_norm": 2.5229083023446015, "learning_rate": 4.1770408636910185e-06, "loss": 1.0267, "step": 2602 }, { "epoch": 0.27375866012857086, "grad_norm": 2.818848626261983, "learning_rate": 4.176421648450646e-06, "loss": 0.9997, "step": 2603 }, { "epoch": 0.2738638305704182, "grad_norm": 2.737597916134103, "learning_rate": 4.175802246276237e-06, "loss": 1.0182, "step": 2604 }, { "epoch": 0.2739690010122655, "grad_norm": 2.3272831659875393, "learning_rate": 4.1751826572368596e-06, "loss": 1.0206, "step": 2605 }, { "epoch": 0.27407417145411284, "grad_norm": 3.029868785399325, "learning_rate": 4.174562881401602e-06, "loss": 1.0163, "step": 2606 }, { "epoch": 0.27417934189596016, "grad_norm": 3.336473793800413, "learning_rate": 4.173942918839576e-06, "loss": 0.976, "step": 2607 }, { "epoch": 0.2742845123378075, "grad_norm": 2.6514956429533942, "learning_rate": 4.17332276961991e-06, "loss": 1.0177, "step": 2608 }, { "epoch": 0.27438968277965475, "grad_norm": 2.679420445962811, "learning_rate": 4.172702433811756e-06, "loss": 1.0326, "step": 2609 }, { "epoch": 0.2744948532215021, "grad_norm": 3.6351143708377616, "learning_rate": 4.172081911484287e-06, "loss": 1.005, "step": 2610 }, { "epoch": 0.2746000236633494, "grad_norm": 2.9056209865867184, "learning_rate": 4.171461202706696e-06, "loss": 1.0271, "step": 2611 }, { "epoch": 0.2747051941051967, "grad_norm": 2.417998566001529, "learning_rate": 4.170840307548196e-06, "loss": 1.02, "step": 2612 }, { "epoch": 0.27481036454704405, "grad_norm": 2.9211736184925505, "learning_rate": 4.170219226078023e-06, "loss": 1.0503, "step": 2613 }, { "epoch": 0.2749155349888914, "grad_norm": 2.418767483745108, "learning_rate": 4.169597958365431e-06, "loss": 1.0352, "step": 2614 }, { "epoch": 0.2750207054307387, "grad_norm": 1.9237097504393978, "learning_rate": 4.1689765044796965e-06, "loss": 1.0098, "step": 2615 }, { "epoch": 0.275125875872586, "grad_norm": 2.6838641339374902, "learning_rate": 4.168354864490117e-06, "loss": 1.0082, "step": 2616 }, { "epoch": 0.27523104631443335, "grad_norm": 2.577137912600681, "learning_rate": 4.16773303846601e-06, "loss": 1.0114, "step": 2617 }, { "epoch": 0.27533621675628067, "grad_norm": 3.0299539149461783, "learning_rate": 4.167111026476714e-06, "loss": 1.0265, "step": 2618 }, { "epoch": 0.27544138719812794, "grad_norm": 2.3354635781135595, "learning_rate": 4.166488828591587e-06, "loss": 1.0034, "step": 2619 }, { "epoch": 0.27554655763997526, "grad_norm": 3.2046716724463673, "learning_rate": 4.1658664448800105e-06, "loss": 1.0174, "step": 2620 }, { "epoch": 0.2756517280818226, "grad_norm": 2.4271263749473597, "learning_rate": 4.165243875411385e-06, "loss": 1.038, "step": 2621 }, { "epoch": 0.2757568985236699, "grad_norm": 2.946133587548101, "learning_rate": 4.16462112025513e-06, "loss": 1.0256, "step": 2622 }, { "epoch": 0.27586206896551724, "grad_norm": 2.4337734453214823, "learning_rate": 4.163998179480689e-06, "loss": 1.0307, "step": 2623 }, { "epoch": 0.27596723940736456, "grad_norm": 2.551909935384673, "learning_rate": 4.163375053157526e-06, "loss": 1.0084, "step": 2624 }, { "epoch": 0.2760724098492119, "grad_norm": 3.4353272254389964, "learning_rate": 4.162751741355122e-06, "loss": 1.0516, "step": 2625 }, { "epoch": 0.2761775802910592, "grad_norm": 2.070361498390321, "learning_rate": 4.1621282441429824e-06, "loss": 0.9945, "step": 2626 }, { "epoch": 0.27628275073290653, "grad_norm": 2.6297532263169803, "learning_rate": 4.161504561590632e-06, "loss": 1.0422, "step": 2627 }, { "epoch": 0.27638792117475386, "grad_norm": 2.225063604883171, "learning_rate": 4.1608806937676156e-06, "loss": 1.0377, "step": 2628 }, { "epoch": 0.2764930916166011, "grad_norm": 2.1970819975722082, "learning_rate": 4.160256640743499e-06, "loss": 1.0056, "step": 2629 }, { "epoch": 0.27659826205844845, "grad_norm": 1.8843326423778457, "learning_rate": 4.159632402587871e-06, "loss": 1.0027, "step": 2630 }, { "epoch": 0.2767034325002958, "grad_norm": 2.384132121607812, "learning_rate": 4.159007979370337e-06, "loss": 0.9889, "step": 2631 }, { "epoch": 0.2768086029421431, "grad_norm": 2.3542048671824496, "learning_rate": 4.158383371160526e-06, "loss": 0.9994, "step": 2632 }, { "epoch": 0.2769137733839904, "grad_norm": 1.9109594616784749, "learning_rate": 4.157758578028086e-06, "loss": 0.9851, "step": 2633 }, { "epoch": 0.27701894382583775, "grad_norm": 2.719043073466606, "learning_rate": 4.1571336000426865e-06, "loss": 0.9938, "step": 2634 }, { "epoch": 0.2771241142676851, "grad_norm": 2.3398878575871818, "learning_rate": 4.156508437274017e-06, "loss": 0.9909, "step": 2635 }, { "epoch": 0.2772292847095324, "grad_norm": 2.6076473869071077, "learning_rate": 4.15588308979179e-06, "loss": 1.0094, "step": 2636 }, { "epoch": 0.2773344551513797, "grad_norm": 2.9186482297651373, "learning_rate": 4.155257557665734e-06, "loss": 1.0107, "step": 2637 }, { "epoch": 0.27743962559322705, "grad_norm": 2.924548529106016, "learning_rate": 4.154631840965603e-06, "loss": 1.0273, "step": 2638 }, { "epoch": 0.2775447960350743, "grad_norm": 2.79169988893619, "learning_rate": 4.154005939761167e-06, "loss": 1.045, "step": 2639 }, { "epoch": 0.27764996647692164, "grad_norm": 2.5444212624663476, "learning_rate": 4.15337985412222e-06, "loss": 0.9968, "step": 2640 }, { "epoch": 0.27775513691876896, "grad_norm": 2.770240170620273, "learning_rate": 4.152753584118576e-06, "loss": 1.0392, "step": 2641 }, { "epoch": 0.2778603073606163, "grad_norm": 3.112319778752494, "learning_rate": 4.152127129820067e-06, "loss": 1.0249, "step": 2642 }, { "epoch": 0.2779654778024636, "grad_norm": 2.310464927042958, "learning_rate": 4.151500491296551e-06, "loss": 0.9965, "step": 2643 }, { "epoch": 0.27807064824431094, "grad_norm": 3.2829488475899558, "learning_rate": 4.150873668617899e-06, "loss": 0.9908, "step": 2644 }, { "epoch": 0.27817581868615826, "grad_norm": 2.176507323056151, "learning_rate": 4.150246661854009e-06, "loss": 0.9991, "step": 2645 }, { "epoch": 0.2782809891280056, "grad_norm": 1.5655734274913256, "learning_rate": 4.149619471074797e-06, "loss": 0.9418, "step": 2646 }, { "epoch": 0.2783861595698529, "grad_norm": 2.108505326156569, "learning_rate": 4.148992096350199e-06, "loss": 1.0599, "step": 2647 }, { "epoch": 0.27849133001170023, "grad_norm": 3.3136337973670202, "learning_rate": 4.1483645377501726e-06, "loss": 1.0149, "step": 2648 }, { "epoch": 0.27859650045354756, "grad_norm": 2.54494923113061, "learning_rate": 4.147736795344695e-06, "loss": 0.9897, "step": 2649 }, { "epoch": 0.2787016708953948, "grad_norm": 3.225755330401636, "learning_rate": 4.147108869203765e-06, "loss": 1.0042, "step": 2650 }, { "epoch": 0.27880684133724215, "grad_norm": 2.7331184633216123, "learning_rate": 4.146480759397401e-06, "loss": 1.0194, "step": 2651 }, { "epoch": 0.2789120117790895, "grad_norm": 3.0050939408575794, "learning_rate": 4.145852465995642e-06, "loss": 0.9754, "step": 2652 }, { "epoch": 0.2790171822209368, "grad_norm": 1.8464056831154667, "learning_rate": 4.145223989068547e-06, "loss": 0.9801, "step": 2653 }, { "epoch": 0.2791223526627841, "grad_norm": 3.288289011526489, "learning_rate": 4.1445953286861976e-06, "loss": 1.0099, "step": 2654 }, { "epoch": 0.27922752310463145, "grad_norm": 2.9001394289482763, "learning_rate": 4.143966484918692e-06, "loss": 1.0246, "step": 2655 }, { "epoch": 0.27933269354647877, "grad_norm": 2.7075837385137715, "learning_rate": 4.143337457836154e-06, "loss": 1.0293, "step": 2656 }, { "epoch": 0.2794378639883261, "grad_norm": 3.4531421732250114, "learning_rate": 4.142708247508723e-06, "loss": 1.015, "step": 2657 }, { "epoch": 0.2795430344301734, "grad_norm": 2.473925702976424, "learning_rate": 4.142078854006561e-06, "loss": 1.0234, "step": 2658 }, { "epoch": 0.27964820487202074, "grad_norm": 2.533831100531155, "learning_rate": 4.141449277399852e-06, "loss": 1.04, "step": 2659 }, { "epoch": 0.279753375313868, "grad_norm": 2.9141485074580458, "learning_rate": 4.140819517758796e-06, "loss": 1.0459, "step": 2660 }, { "epoch": 0.27985854575571534, "grad_norm": 1.6275648359890877, "learning_rate": 4.1401895751536185e-06, "loss": 1.0112, "step": 2661 }, { "epoch": 0.27996371619756266, "grad_norm": 2.3074858640345064, "learning_rate": 4.139559449654561e-06, "loss": 1.0349, "step": 2662 }, { "epoch": 0.28006888663941, "grad_norm": 2.266029848888559, "learning_rate": 4.138929141331888e-06, "loss": 1.0054, "step": 2663 }, { "epoch": 0.2801740570812573, "grad_norm": 2.30011851501213, "learning_rate": 4.138298650255885e-06, "loss": 1.0183, "step": 2664 }, { "epoch": 0.28027922752310463, "grad_norm": 2.0840813611244378, "learning_rate": 4.137667976496855e-06, "loss": 1.0502, "step": 2665 }, { "epoch": 0.28038439796495196, "grad_norm": 2.1068525223894343, "learning_rate": 4.137037120125125e-06, "loss": 0.9381, "step": 2666 }, { "epoch": 0.2804895684067993, "grad_norm": 2.7828354238745967, "learning_rate": 4.136406081211038e-06, "loss": 1.019, "step": 2667 }, { "epoch": 0.2805947388486466, "grad_norm": 2.6442032647808733, "learning_rate": 4.135774859824962e-06, "loss": 1.0206, "step": 2668 }, { "epoch": 0.28069990929049393, "grad_norm": 2.5457929175843677, "learning_rate": 4.13514345603728e-06, "loss": 0.9947, "step": 2669 }, { "epoch": 0.2808050797323412, "grad_norm": 3.348381030141236, "learning_rate": 4.134511869918402e-06, "loss": 1.0436, "step": 2670 }, { "epoch": 0.2809102501741885, "grad_norm": 1.8730510261518738, "learning_rate": 4.133880101538753e-06, "loss": 0.9471, "step": 2671 }, { "epoch": 0.28101542061603585, "grad_norm": 2.39799470254033, "learning_rate": 4.13324815096878e-06, "loss": 1.0186, "step": 2672 }, { "epoch": 0.2811205910578832, "grad_norm": 2.626761498640316, "learning_rate": 4.132616018278951e-06, "loss": 0.9936, "step": 2673 }, { "epoch": 0.2812257614997305, "grad_norm": 2.7240523919341504, "learning_rate": 4.1319837035397525e-06, "loss": 0.9984, "step": 2674 }, { "epoch": 0.2813309319415778, "grad_norm": 2.856921462750168, "learning_rate": 4.131351206821694e-06, "loss": 1.0513, "step": 2675 }, { "epoch": 0.28143610238342515, "grad_norm": 1.9943496303192936, "learning_rate": 4.130718528195303e-06, "loss": 1.0222, "step": 2676 }, { "epoch": 0.28154127282527247, "grad_norm": 4.2289456038778, "learning_rate": 4.130085667731129e-06, "loss": 1.0211, "step": 2677 }, { "epoch": 0.2816464432671198, "grad_norm": 2.9894225702147543, "learning_rate": 4.129452625499739e-06, "loss": 1.0277, "step": 2678 }, { "epoch": 0.2817516137089671, "grad_norm": 1.992493935234122, "learning_rate": 4.128819401571724e-06, "loss": 1.0171, "step": 2679 }, { "epoch": 0.2818567841508144, "grad_norm": 2.652762924314855, "learning_rate": 4.128185996017692e-06, "loss": 1.0551, "step": 2680 }, { "epoch": 0.2819619545926617, "grad_norm": 2.604810015626862, "learning_rate": 4.127552408908274e-06, "loss": 1.0353, "step": 2681 }, { "epoch": 0.28206712503450904, "grad_norm": 2.7095942486504505, "learning_rate": 4.126918640314118e-06, "loss": 1.0172, "step": 2682 }, { "epoch": 0.28217229547635636, "grad_norm": 3.2119845572549863, "learning_rate": 4.1262846903058966e-06, "loss": 0.9824, "step": 2683 }, { "epoch": 0.2822774659182037, "grad_norm": 2.4305337457552003, "learning_rate": 4.1256505589542976e-06, "loss": 1.0247, "step": 2684 }, { "epoch": 0.282382636360051, "grad_norm": 2.0472831189235277, "learning_rate": 4.125016246330034e-06, "loss": 1.0256, "step": 2685 }, { "epoch": 0.28248780680189833, "grad_norm": 2.1373038919099687, "learning_rate": 4.124381752503834e-06, "loss": 1.0105, "step": 2686 }, { "epoch": 0.28259297724374566, "grad_norm": 1.5615996486183839, "learning_rate": 4.123747077546451e-06, "loss": 1.0378, "step": 2687 }, { "epoch": 0.282698147685593, "grad_norm": 1.8388087927806853, "learning_rate": 4.123112221528654e-06, "loss": 1.003, "step": 2688 }, { "epoch": 0.2828033181274403, "grad_norm": 3.487408850014367, "learning_rate": 4.122477184521237e-06, "loss": 1.0202, "step": 2689 }, { "epoch": 0.2829084885692876, "grad_norm": 2.666547606564312, "learning_rate": 4.121841966595009e-06, "loss": 1.042, "step": 2690 }, { "epoch": 0.2830136590111349, "grad_norm": 2.99015276586862, "learning_rate": 4.121206567820803e-06, "loss": 1.0559, "step": 2691 }, { "epoch": 0.2831188294529822, "grad_norm": 2.866974473703025, "learning_rate": 4.120570988269472e-06, "loss": 0.9848, "step": 2692 }, { "epoch": 0.28322399989482955, "grad_norm": 2.50728052143124, "learning_rate": 4.119935228011885e-06, "loss": 1.0286, "step": 2693 }, { "epoch": 0.28332917033667687, "grad_norm": 2.3872886212034095, "learning_rate": 4.119299287118937e-06, "loss": 1.0258, "step": 2694 }, { "epoch": 0.2834343407785242, "grad_norm": 1.7175832511041358, "learning_rate": 4.118663165661538e-06, "loss": 1.0324, "step": 2695 }, { "epoch": 0.2835395112203715, "grad_norm": 2.657674607317461, "learning_rate": 4.118026863710623e-06, "loss": 1.0573, "step": 2696 }, { "epoch": 0.28364468166221884, "grad_norm": 2.5589768139758755, "learning_rate": 4.117390381337144e-06, "loss": 0.9864, "step": 2697 }, { "epoch": 0.28374985210406617, "grad_norm": 3.1191553451509093, "learning_rate": 4.116753718612072e-06, "loss": 0.9765, "step": 2698 }, { "epoch": 0.2838550225459135, "grad_norm": 3.6343579637339842, "learning_rate": 4.116116875606402e-06, "loss": 1.0286, "step": 2699 }, { "epoch": 0.28396019298776076, "grad_norm": 2.884280278934574, "learning_rate": 4.1154798523911446e-06, "loss": 1.0308, "step": 2700 }, { "epoch": 0.2840653634296081, "grad_norm": 2.2973711289546603, "learning_rate": 4.114842649037335e-06, "loss": 0.965, "step": 2701 }, { "epoch": 0.2841705338714554, "grad_norm": 2.870025826884082, "learning_rate": 4.114205265616026e-06, "loss": 0.9826, "step": 2702 }, { "epoch": 0.28427570431330274, "grad_norm": 2.8685291174085332, "learning_rate": 4.1135677021982885e-06, "loss": 1.0087, "step": 2703 }, { "epoch": 0.28438087475515006, "grad_norm": 3.000969023465314, "learning_rate": 4.11292995885522e-06, "loss": 1.0142, "step": 2704 }, { "epoch": 0.2844860451969974, "grad_norm": 2.382052707066313, "learning_rate": 4.11229203565793e-06, "loss": 0.9957, "step": 2705 }, { "epoch": 0.2845912156388447, "grad_norm": 2.5092385915195723, "learning_rate": 4.111653932677553e-06, "loss": 0.97, "step": 2706 }, { "epoch": 0.28469638608069203, "grad_norm": 2.83339755995173, "learning_rate": 4.111015649985243e-06, "loss": 0.9873, "step": 2707 }, { "epoch": 0.28480155652253936, "grad_norm": 2.7074863691662046, "learning_rate": 4.110377187652174e-06, "loss": 0.987, "step": 2708 }, { "epoch": 0.2849067269643867, "grad_norm": 1.9740060538086266, "learning_rate": 4.109738545749538e-06, "loss": 0.9809, "step": 2709 }, { "epoch": 0.285011897406234, "grad_norm": 3.3206180620517114, "learning_rate": 4.1090997243485494e-06, "loss": 1.0318, "step": 2710 }, { "epoch": 0.2851170678480813, "grad_norm": 3.4193647547163435, "learning_rate": 4.108460723520441e-06, "loss": 1.0323, "step": 2711 }, { "epoch": 0.2852222382899286, "grad_norm": 2.461999394482132, "learning_rate": 4.107821543336468e-06, "loss": 0.969, "step": 2712 }, { "epoch": 0.2853274087317759, "grad_norm": 2.033813622679507, "learning_rate": 4.107182183867903e-06, "loss": 0.9876, "step": 2713 }, { "epoch": 0.28543257917362325, "grad_norm": 2.7246823900429673, "learning_rate": 4.106542645186039e-06, "loss": 0.9927, "step": 2714 }, { "epoch": 0.28553774961547057, "grad_norm": 2.869297378241626, "learning_rate": 4.10590292736219e-06, "loss": 1.0259, "step": 2715 }, { "epoch": 0.2856429200573179, "grad_norm": 2.521121687829408, "learning_rate": 4.105263030467689e-06, "loss": 1.0261, "step": 2716 }, { "epoch": 0.2857480904991652, "grad_norm": 2.599567696407617, "learning_rate": 4.10462295457389e-06, "loss": 1.0017, "step": 2717 }, { "epoch": 0.28585326094101254, "grad_norm": 2.024220267749111, "learning_rate": 4.103982699752167e-06, "loss": 0.9992, "step": 2718 }, { "epoch": 0.28595843138285987, "grad_norm": 2.6696411790056565, "learning_rate": 4.103342266073913e-06, "loss": 1.0149, "step": 2719 }, { "epoch": 0.2860636018247072, "grad_norm": 1.8522315223803922, "learning_rate": 4.102701653610541e-06, "loss": 1.0337, "step": 2720 }, { "epoch": 0.28616877226655446, "grad_norm": 2.7949963553772075, "learning_rate": 4.102060862433484e-06, "loss": 0.9954, "step": 2721 }, { "epoch": 0.2862739427084018, "grad_norm": 2.229728603318455, "learning_rate": 4.101419892614195e-06, "loss": 1.0195, "step": 2722 }, { "epoch": 0.2863791131502491, "grad_norm": 3.3540545324036914, "learning_rate": 4.10077874422415e-06, "loss": 0.9692, "step": 2723 }, { "epoch": 0.28648428359209643, "grad_norm": 2.7750001031832183, "learning_rate": 4.100137417334838e-06, "loss": 1.0112, "step": 2724 }, { "epoch": 0.28658945403394376, "grad_norm": 2.8776802158198462, "learning_rate": 4.099495912017773e-06, "loss": 1.0133, "step": 2725 }, { "epoch": 0.2866946244757911, "grad_norm": 2.728837469705898, "learning_rate": 4.09885422834449e-06, "loss": 1.0062, "step": 2726 }, { "epoch": 0.2867997949176384, "grad_norm": 3.151019351944655, "learning_rate": 4.0982123663865394e-06, "loss": 1.0328, "step": 2727 }, { "epoch": 0.28690496535948573, "grad_norm": 2.278189959064822, "learning_rate": 4.097570326215495e-06, "loss": 0.9912, "step": 2728 }, { "epoch": 0.28701013580133306, "grad_norm": 3.0741949886346394, "learning_rate": 4.096928107902949e-06, "loss": 1.0505, "step": 2729 }, { "epoch": 0.2871153062431804, "grad_norm": 2.2526302976277495, "learning_rate": 4.096285711520513e-06, "loss": 1.0154, "step": 2730 }, { "epoch": 0.28722047668502765, "grad_norm": 2.4076837142276974, "learning_rate": 4.09564313713982e-06, "loss": 1.0188, "step": 2731 }, { "epoch": 0.287325647126875, "grad_norm": 1.8879117425120007, "learning_rate": 4.095000384832522e-06, "loss": 1.0139, "step": 2732 }, { "epoch": 0.2874308175687223, "grad_norm": 3.1660647193201643, "learning_rate": 4.09435745467029e-06, "loss": 0.9862, "step": 2733 }, { "epoch": 0.2875359880105696, "grad_norm": 2.4093841491214922, "learning_rate": 4.0937143467248176e-06, "loss": 1.038, "step": 2734 }, { "epoch": 0.28764115845241695, "grad_norm": 2.3022115233534937, "learning_rate": 4.093071061067815e-06, "loss": 1.0139, "step": 2735 }, { "epoch": 0.28774632889426427, "grad_norm": 2.285699802794061, "learning_rate": 4.092427597771013e-06, "loss": 0.9595, "step": 2736 }, { "epoch": 0.2878514993361116, "grad_norm": 2.7549603896627004, "learning_rate": 4.091783956906164e-06, "loss": 1.0187, "step": 2737 }, { "epoch": 0.2879566697779589, "grad_norm": 3.483788671598276, "learning_rate": 4.091140138545037e-06, "loss": 1.0093, "step": 2738 }, { "epoch": 0.28806184021980624, "grad_norm": 3.2320477916369437, "learning_rate": 4.090496142759425e-06, "loss": 1.0223, "step": 2739 }, { "epoch": 0.28816701066165357, "grad_norm": 2.4540897960845, "learning_rate": 4.089851969621138e-06, "loss": 1.0203, "step": 2740 }, { "epoch": 0.28827218110350084, "grad_norm": 3.241726113530304, "learning_rate": 4.089207619202006e-06, "loss": 1.015, "step": 2741 }, { "epoch": 0.28837735154534816, "grad_norm": 1.8502699365222972, "learning_rate": 4.088563091573879e-06, "loss": 1.0269, "step": 2742 }, { "epoch": 0.2884825219871955, "grad_norm": 2.5427369122444423, "learning_rate": 4.087918386808627e-06, "loss": 1.0118, "step": 2743 }, { "epoch": 0.2885876924290428, "grad_norm": 2.301419718364066, "learning_rate": 4.087273504978139e-06, "loss": 1.0222, "step": 2744 }, { "epoch": 0.28869286287089013, "grad_norm": 2.917540225698338, "learning_rate": 4.086628446154325e-06, "loss": 1.0672, "step": 2745 }, { "epoch": 0.28879803331273746, "grad_norm": 3.578547174167657, "learning_rate": 4.085983210409114e-06, "loss": 0.9953, "step": 2746 }, { "epoch": 0.2889032037545848, "grad_norm": 3.3534719860895152, "learning_rate": 4.085337797814455e-06, "loss": 0.9982, "step": 2747 }, { "epoch": 0.2890083741964321, "grad_norm": 2.676661209030138, "learning_rate": 4.084692208442316e-06, "loss": 1.0266, "step": 2748 }, { "epoch": 0.28911354463827943, "grad_norm": 3.20816985126381, "learning_rate": 4.084046442364686e-06, "loss": 1.0083, "step": 2749 }, { "epoch": 0.28921871508012675, "grad_norm": 4.219244894983786, "learning_rate": 4.0834004996535706e-06, "loss": 1.0561, "step": 2750 }, { "epoch": 0.289323885521974, "grad_norm": 2.1890256985234298, "learning_rate": 4.082754380381001e-06, "loss": 0.9447, "step": 2751 }, { "epoch": 0.28942905596382135, "grad_norm": 2.8643683405510396, "learning_rate": 4.082108084619021e-06, "loss": 0.9889, "step": 2752 }, { "epoch": 0.28953422640566867, "grad_norm": 2.51789630360384, "learning_rate": 4.0814616124397015e-06, "loss": 0.991, "step": 2753 }, { "epoch": 0.289639396847516, "grad_norm": 2.0882264359630436, "learning_rate": 4.080814963915125e-06, "loss": 0.9913, "step": 2754 }, { "epoch": 0.2897445672893633, "grad_norm": 2.214555512262277, "learning_rate": 4.0801681391174005e-06, "loss": 0.9323, "step": 2755 }, { "epoch": 0.28984973773121064, "grad_norm": 2.4270616412613424, "learning_rate": 4.079521138118654e-06, "loss": 1.0091, "step": 2756 }, { "epoch": 0.28995490817305797, "grad_norm": 2.5107109147244344, "learning_rate": 4.07887396099103e-06, "loss": 1.0413, "step": 2757 }, { "epoch": 0.2900600786149053, "grad_norm": 2.599915906438244, "learning_rate": 4.078226607806694e-06, "loss": 0.9893, "step": 2758 }, { "epoch": 0.2901652490567526, "grad_norm": 2.2767518525790287, "learning_rate": 4.077579078637831e-06, "loss": 1.0241, "step": 2759 }, { "epoch": 0.29027041949859994, "grad_norm": 2.794647276010669, "learning_rate": 4.076931373556646e-06, "loss": 1.037, "step": 2760 }, { "epoch": 0.2903755899404472, "grad_norm": 3.253264621576212, "learning_rate": 4.076283492635362e-06, "loss": 1.0122, "step": 2761 }, { "epoch": 0.29048076038229453, "grad_norm": 2.499980883018592, "learning_rate": 4.075635435946225e-06, "loss": 1.0027, "step": 2762 }, { "epoch": 0.29058593082414186, "grad_norm": 2.972899947742262, "learning_rate": 4.074987203561497e-06, "loss": 0.9985, "step": 2763 }, { "epoch": 0.2906911012659892, "grad_norm": 1.9737479143234022, "learning_rate": 4.074338795553459e-06, "loss": 1.0032, "step": 2764 }, { "epoch": 0.2907962717078365, "grad_norm": 2.584186588539805, "learning_rate": 4.073690211994417e-06, "loss": 1.0373, "step": 2765 }, { "epoch": 0.29090144214968383, "grad_norm": 3.1651907150502154, "learning_rate": 4.0730414529566905e-06, "loss": 0.9922, "step": 2766 }, { "epoch": 0.29100661259153116, "grad_norm": 2.6878955915114253, "learning_rate": 4.072392518512623e-06, "loss": 1.0367, "step": 2767 }, { "epoch": 0.2911117830333785, "grad_norm": 2.489228399908619, "learning_rate": 4.071743408734574e-06, "loss": 1.0302, "step": 2768 }, { "epoch": 0.2912169534752258, "grad_norm": 3.060067782653973, "learning_rate": 4.071094123694926e-06, "loss": 0.9873, "step": 2769 }, { "epoch": 0.29132212391707313, "grad_norm": 2.9513075554961388, "learning_rate": 4.070444663466079e-06, "loss": 1.0038, "step": 2770 }, { "epoch": 0.29142729435892045, "grad_norm": 2.495540383919349, "learning_rate": 4.069795028120452e-06, "loss": 0.9954, "step": 2771 }, { "epoch": 0.2915324648007677, "grad_norm": 4.0277271187757, "learning_rate": 4.069145217730484e-06, "loss": 0.9957, "step": 2772 }, { "epoch": 0.29163763524261505, "grad_norm": 2.511687233295335, "learning_rate": 4.068495232368635e-06, "loss": 0.9685, "step": 2773 }, { "epoch": 0.29174280568446237, "grad_norm": 2.1591231007772085, "learning_rate": 4.067845072107384e-06, "loss": 1.0597, "step": 2774 }, { "epoch": 0.2918479761263097, "grad_norm": 2.4755744056169635, "learning_rate": 4.0671947370192264e-06, "loss": 1.0009, "step": 2775 }, { "epoch": 0.291953146568157, "grad_norm": 1.7500400514158185, "learning_rate": 4.066544227176683e-06, "loss": 1.0042, "step": 2776 }, { "epoch": 0.29205831701000434, "grad_norm": 2.7751869337926225, "learning_rate": 4.065893542652288e-06, "loss": 0.9955, "step": 2777 }, { "epoch": 0.29216348745185167, "grad_norm": 2.0427141686414445, "learning_rate": 4.065242683518599e-06, "loss": 0.9956, "step": 2778 }, { "epoch": 0.292268657893699, "grad_norm": 2.582374430746467, "learning_rate": 4.0645916498481905e-06, "loss": 0.9875, "step": 2779 }, { "epoch": 0.2923738283355463, "grad_norm": 2.1183619829247573, "learning_rate": 4.06394044171366e-06, "loss": 0.9543, "step": 2780 }, { "epoch": 0.29247899877739364, "grad_norm": 2.288872193618224, "learning_rate": 4.063289059187621e-06, "loss": 1.0294, "step": 2781 }, { "epoch": 0.2925841692192409, "grad_norm": 2.9861250226649356, "learning_rate": 4.062637502342708e-06, "loss": 1.0364, "step": 2782 }, { "epoch": 0.29268933966108823, "grad_norm": 2.263444569755448, "learning_rate": 4.061985771251573e-06, "loss": 1.0157, "step": 2783 }, { "epoch": 0.29279451010293556, "grad_norm": 3.25482041267685, "learning_rate": 4.061333865986892e-06, "loss": 1.0169, "step": 2784 }, { "epoch": 0.2928996805447829, "grad_norm": 2.993652291933884, "learning_rate": 4.060681786621357e-06, "loss": 1.0432, "step": 2785 }, { "epoch": 0.2930048509866302, "grad_norm": 2.5325196207248517, "learning_rate": 4.060029533227678e-06, "loss": 0.9786, "step": 2786 }, { "epoch": 0.29311002142847753, "grad_norm": 3.072755160863761, "learning_rate": 4.059377105878586e-06, "loss": 1.0081, "step": 2787 }, { "epoch": 0.29321519187032485, "grad_norm": 2.9059852060216436, "learning_rate": 4.058724504646834e-06, "loss": 0.999, "step": 2788 }, { "epoch": 0.2933203623121722, "grad_norm": 2.1664281660182154, "learning_rate": 4.058071729605191e-06, "loss": 1.0101, "step": 2789 }, { "epoch": 0.2934255327540195, "grad_norm": 2.6755938701735014, "learning_rate": 4.057418780826448e-06, "loss": 0.9915, "step": 2790 }, { "epoch": 0.2935307031958668, "grad_norm": 1.7269644244858031, "learning_rate": 4.05676565838341e-06, "loss": 1.0071, "step": 2791 }, { "epoch": 0.2936358736377141, "grad_norm": 2.74362018348315, "learning_rate": 4.0561123623489096e-06, "loss": 1.0314, "step": 2792 }, { "epoch": 0.2937410440795614, "grad_norm": 2.340557452024337, "learning_rate": 4.0554588927957925e-06, "loss": 0.9678, "step": 2793 }, { "epoch": 0.29384621452140874, "grad_norm": 2.066040916919864, "learning_rate": 4.054805249796925e-06, "loss": 1.0263, "step": 2794 }, { "epoch": 0.29395138496325607, "grad_norm": 2.8805200149811956, "learning_rate": 4.054151433425194e-06, "loss": 1.0603, "step": 2795 }, { "epoch": 0.2940565554051034, "grad_norm": 2.434123603671788, "learning_rate": 4.053497443753505e-06, "loss": 1.0042, "step": 2796 }, { "epoch": 0.2941617258469507, "grad_norm": 3.618193905329278, "learning_rate": 4.052843280854783e-06, "loss": 0.9702, "step": 2797 }, { "epoch": 0.29426689628879804, "grad_norm": 3.1682364893441193, "learning_rate": 4.052188944801972e-06, "loss": 1.0143, "step": 2798 }, { "epoch": 0.29437206673064537, "grad_norm": 2.3744837915482484, "learning_rate": 4.051534435668038e-06, "loss": 0.9982, "step": 2799 }, { "epoch": 0.2944772371724927, "grad_norm": 2.1386699249557872, "learning_rate": 4.050879753525959e-06, "loss": 1.0242, "step": 2800 }, { "epoch": 0.29458240761434, "grad_norm": 2.6446899519304314, "learning_rate": 4.050224898448741e-06, "loss": 0.9404, "step": 2801 }, { "epoch": 0.2946875780561873, "grad_norm": 2.381644444210151, "learning_rate": 4.049569870509404e-06, "loss": 1.0128, "step": 2802 }, { "epoch": 0.2947927484980346, "grad_norm": 2.0924851058105287, "learning_rate": 4.048914669780989e-06, "loss": 1.0263, "step": 2803 }, { "epoch": 0.29489791893988193, "grad_norm": 2.8128059213176666, "learning_rate": 4.048259296336556e-06, "loss": 1.0109, "step": 2804 }, { "epoch": 0.29500308938172926, "grad_norm": 2.230026087010622, "learning_rate": 4.047603750249184e-06, "loss": 0.9928, "step": 2805 }, { "epoch": 0.2951082598235766, "grad_norm": 3.651123695031622, "learning_rate": 4.0469480315919714e-06, "loss": 1.0337, "step": 2806 }, { "epoch": 0.2952134302654239, "grad_norm": 3.794948500887568, "learning_rate": 4.0462921404380376e-06, "loss": 1.0177, "step": 2807 }, { "epoch": 0.29531860070727123, "grad_norm": 2.717172683131933, "learning_rate": 4.045636076860517e-06, "loss": 0.9645, "step": 2808 }, { "epoch": 0.29542377114911855, "grad_norm": 2.8432922404185557, "learning_rate": 4.044979840932567e-06, "loss": 1.0147, "step": 2809 }, { "epoch": 0.2955289415909659, "grad_norm": 2.466690276825208, "learning_rate": 4.044323432727363e-06, "loss": 1.04, "step": 2810 }, { "epoch": 0.2956341120328132, "grad_norm": 1.898880562225239, "learning_rate": 4.0436668523180985e-06, "loss": 0.9796, "step": 2811 }, { "epoch": 0.29573928247466047, "grad_norm": 2.2401687192743265, "learning_rate": 4.04301009977799e-06, "loss": 1.0075, "step": 2812 }, { "epoch": 0.2958444529165078, "grad_norm": 2.779156433583653, "learning_rate": 4.042353175180268e-06, "loss": 1.0163, "step": 2813 }, { "epoch": 0.2959496233583551, "grad_norm": 2.6233755857676617, "learning_rate": 4.041696078598185e-06, "loss": 1.0308, "step": 2814 }, { "epoch": 0.29605479380020244, "grad_norm": 2.89215727215323, "learning_rate": 4.041038810105014e-06, "loss": 1.039, "step": 2815 }, { "epoch": 0.29615996424204977, "grad_norm": 2.2937244141847275, "learning_rate": 4.040381369774045e-06, "loss": 0.9985, "step": 2816 }, { "epoch": 0.2962651346838971, "grad_norm": 2.2753449571613507, "learning_rate": 4.039723757678585e-06, "loss": 0.9842, "step": 2817 }, { "epoch": 0.2963703051257444, "grad_norm": 2.825575794098944, "learning_rate": 4.0390659738919665e-06, "loss": 1.0148, "step": 2818 }, { "epoch": 0.29647547556759174, "grad_norm": 2.1237158283334088, "learning_rate": 4.0384080184875355e-06, "loss": 0.9896, "step": 2819 }, { "epoch": 0.29658064600943906, "grad_norm": 2.8266979036329123, "learning_rate": 4.037749891538661e-06, "loss": 1.0097, "step": 2820 }, { "epoch": 0.2966858164512864, "grad_norm": 2.2004271894165557, "learning_rate": 4.037091593118726e-06, "loss": 1.0418, "step": 2821 }, { "epoch": 0.2967909868931337, "grad_norm": 2.0681432005760914, "learning_rate": 4.036433123301139e-06, "loss": 0.9897, "step": 2822 }, { "epoch": 0.296896157334981, "grad_norm": 2.8211256820827404, "learning_rate": 4.035774482159323e-06, "loss": 1.0378, "step": 2823 }, { "epoch": 0.2970013277768283, "grad_norm": 2.6606426976386675, "learning_rate": 4.035115669766721e-06, "loss": 1.0247, "step": 2824 }, { "epoch": 0.29710649821867563, "grad_norm": 2.5354918405939366, "learning_rate": 4.034456686196798e-06, "loss": 0.9795, "step": 2825 }, { "epoch": 0.29721166866052295, "grad_norm": 3.4840884239739487, "learning_rate": 4.033797531523034e-06, "loss": 0.9938, "step": 2826 }, { "epoch": 0.2973168391023703, "grad_norm": 2.9035094968158135, "learning_rate": 4.033138205818931e-06, "loss": 0.9971, "step": 2827 }, { "epoch": 0.2974220095442176, "grad_norm": 2.388147959432155, "learning_rate": 4.032478709158007e-06, "loss": 0.9786, "step": 2828 }, { "epoch": 0.2975271799860649, "grad_norm": 2.5043218419305657, "learning_rate": 4.0318190416138024e-06, "loss": 1.013, "step": 2829 }, { "epoch": 0.29763235042791225, "grad_norm": 2.630395705298903, "learning_rate": 4.031159203259876e-06, "loss": 0.9978, "step": 2830 }, { "epoch": 0.2977375208697596, "grad_norm": 3.048566956011636, "learning_rate": 4.030499194169803e-06, "loss": 1.022, "step": 2831 }, { "epoch": 0.2978426913116069, "grad_norm": 2.5379425325683656, "learning_rate": 4.029839014417181e-06, "loss": 1.0029, "step": 2832 }, { "epoch": 0.29794786175345417, "grad_norm": 2.6960366196246857, "learning_rate": 4.029178664075625e-06, "loss": 1.0183, "step": 2833 }, { "epoch": 0.2980530321953015, "grad_norm": 3.224224287628243, "learning_rate": 4.028518143218768e-06, "loss": 1.073, "step": 2834 }, { "epoch": 0.2981582026371488, "grad_norm": 2.429803063051489, "learning_rate": 4.027857451920264e-06, "loss": 0.9988, "step": 2835 }, { "epoch": 0.29826337307899614, "grad_norm": 3.126241311853215, "learning_rate": 4.027196590253786e-06, "loss": 1.0351, "step": 2836 }, { "epoch": 0.29836854352084347, "grad_norm": 2.3886966033688557, "learning_rate": 4.026535558293024e-06, "loss": 1.0146, "step": 2837 }, { "epoch": 0.2984737139626908, "grad_norm": 2.1691315598429544, "learning_rate": 4.025874356111689e-06, "loss": 0.9821, "step": 2838 }, { "epoch": 0.2985788844045381, "grad_norm": 3.2536165239458774, "learning_rate": 4.025212983783511e-06, "loss": 1.0239, "step": 2839 }, { "epoch": 0.29868405484638544, "grad_norm": 2.361345102232033, "learning_rate": 4.024551441382235e-06, "loss": 1.0125, "step": 2840 }, { "epoch": 0.29878922528823276, "grad_norm": 2.797329995864098, "learning_rate": 4.023889728981631e-06, "loss": 1.0278, "step": 2841 }, { "epoch": 0.2988943957300801, "grad_norm": 2.053294094600003, "learning_rate": 4.023227846655484e-06, "loss": 0.9636, "step": 2842 }, { "epoch": 0.29899956617192736, "grad_norm": 2.0068541001608, "learning_rate": 4.0225657944776e-06, "loss": 0.9924, "step": 2843 }, { "epoch": 0.2991047366137747, "grad_norm": 3.0489588147562947, "learning_rate": 4.021903572521802e-06, "loss": 1.059, "step": 2844 }, { "epoch": 0.299209907055622, "grad_norm": 2.416659465285543, "learning_rate": 4.021241180861933e-06, "loss": 1.0491, "step": 2845 }, { "epoch": 0.29931507749746933, "grad_norm": 1.8189123954689044, "learning_rate": 4.0205786195718545e-06, "loss": 0.997, "step": 2846 }, { "epoch": 0.29942024793931665, "grad_norm": 2.2393968626632126, "learning_rate": 4.0199158887254484e-06, "loss": 1.0253, "step": 2847 }, { "epoch": 0.299525418381164, "grad_norm": 2.7113732338095438, "learning_rate": 4.019252988396613e-06, "loss": 0.9651, "step": 2848 }, { "epoch": 0.2996305888230113, "grad_norm": 3.219458618141807, "learning_rate": 4.018589918659267e-06, "loss": 1.047, "step": 2849 }, { "epoch": 0.2997357592648586, "grad_norm": 2.485215246867124, "learning_rate": 4.01792667958735e-06, "loss": 0.995, "step": 2850 }, { "epoch": 0.29984092970670595, "grad_norm": 2.5877817382570276, "learning_rate": 4.0172632712548145e-06, "loss": 0.9823, "step": 2851 }, { "epoch": 0.2999461001485533, "grad_norm": 2.8920656586696043, "learning_rate": 4.016599693735639e-06, "loss": 1.0172, "step": 2852 }, { "epoch": 0.30005127059040054, "grad_norm": 2.1377145970196025, "learning_rate": 4.015935947103816e-06, "loss": 0.9841, "step": 2853 }, { "epoch": 0.30015644103224787, "grad_norm": 2.357987310739265, "learning_rate": 4.015272031433358e-06, "loss": 0.9969, "step": 2854 }, { "epoch": 0.3002616114740952, "grad_norm": 2.6065064666162927, "learning_rate": 4.014607946798298e-06, "loss": 0.9742, "step": 2855 }, { "epoch": 0.3003667819159425, "grad_norm": 2.5201558670387443, "learning_rate": 4.013943693272686e-06, "loss": 0.9801, "step": 2856 }, { "epoch": 0.30047195235778984, "grad_norm": 3.0654655143598166, "learning_rate": 4.013279270930592e-06, "loss": 1.0204, "step": 2857 }, { "epoch": 0.30057712279963716, "grad_norm": 4.142146640718669, "learning_rate": 4.012614679846103e-06, "loss": 1.0169, "step": 2858 }, { "epoch": 0.3006822932414845, "grad_norm": 1.7013966226721964, "learning_rate": 4.011949920093327e-06, "loss": 1.0133, "step": 2859 }, { "epoch": 0.3007874636833318, "grad_norm": 2.3493733773934036, "learning_rate": 4.0112849917463905e-06, "loss": 1.0396, "step": 2860 }, { "epoch": 0.30089263412517914, "grad_norm": 2.9932760739027167, "learning_rate": 4.010619894879436e-06, "loss": 1.0482, "step": 2861 }, { "epoch": 0.30099780456702646, "grad_norm": 3.4913376390190747, "learning_rate": 4.009954629566629e-06, "loss": 1.0478, "step": 2862 }, { "epoch": 0.30110297500887373, "grad_norm": 2.4522472564185223, "learning_rate": 4.0092891958821515e-06, "loss": 1.0164, "step": 2863 }, { "epoch": 0.30120814545072105, "grad_norm": 2.7224329510455996, "learning_rate": 4.0086235939002024e-06, "loss": 1.0279, "step": 2864 }, { "epoch": 0.3013133158925684, "grad_norm": 2.7001843459526396, "learning_rate": 4.007957823695005e-06, "loss": 1.0319, "step": 2865 }, { "epoch": 0.3014184863344157, "grad_norm": 2.9328100471570813, "learning_rate": 4.007291885340796e-06, "loss": 0.9669, "step": 2866 }, { "epoch": 0.301523656776263, "grad_norm": 2.8953214503968705, "learning_rate": 4.006625778911831e-06, "loss": 0.9993, "step": 2867 }, { "epoch": 0.30162882721811035, "grad_norm": 3.0833861909775333, "learning_rate": 4.005959504482389e-06, "loss": 0.9904, "step": 2868 }, { "epoch": 0.3017339976599577, "grad_norm": 2.706249563006177, "learning_rate": 4.005293062126764e-06, "loss": 1.0137, "step": 2869 }, { "epoch": 0.301839168101805, "grad_norm": 2.6523322513613996, "learning_rate": 4.004626451919268e-06, "loss": 1.022, "step": 2870 }, { "epoch": 0.3019443385436523, "grad_norm": 2.363970677643061, "learning_rate": 4.003959673934235e-06, "loss": 0.9987, "step": 2871 }, { "epoch": 0.30204950898549965, "grad_norm": 2.6706791504734824, "learning_rate": 4.003292728246015e-06, "loss": 1.0179, "step": 2872 }, { "epoch": 0.3021546794273469, "grad_norm": 3.0479510186456977, "learning_rate": 4.002625614928978e-06, "loss": 0.956, "step": 2873 }, { "epoch": 0.30225984986919424, "grad_norm": 1.8651319495727035, "learning_rate": 4.001958334057512e-06, "loss": 1.0283, "step": 2874 }, { "epoch": 0.30236502031104157, "grad_norm": 2.065753591700377, "learning_rate": 4.001290885706023e-06, "loss": 0.9982, "step": 2875 }, { "epoch": 0.3024701907528889, "grad_norm": 3.420963266297305, "learning_rate": 4.0006232699489385e-06, "loss": 0.9814, "step": 2876 }, { "epoch": 0.3025753611947362, "grad_norm": 2.598806589411273, "learning_rate": 3.9999554868607036e-06, "loss": 1.0091, "step": 2877 }, { "epoch": 0.30268053163658354, "grad_norm": 2.691137805944449, "learning_rate": 3.999287536515778e-06, "loss": 1.0782, "step": 2878 }, { "epoch": 0.30278570207843086, "grad_norm": 2.5676564139134976, "learning_rate": 3.998619418988646e-06, "loss": 1.0563, "step": 2879 }, { "epoch": 0.3028908725202782, "grad_norm": 2.5833344941215235, "learning_rate": 3.997951134353808e-06, "loss": 1.0403, "step": 2880 }, { "epoch": 0.3029960429621255, "grad_norm": 2.1282219410524914, "learning_rate": 3.99728268268578e-06, "loss": 0.9861, "step": 2881 }, { "epoch": 0.30310121340397284, "grad_norm": 3.4275643393055333, "learning_rate": 3.996614064059104e-06, "loss": 1.0173, "step": 2882 }, { "epoch": 0.30320638384582016, "grad_norm": 2.2265143102649647, "learning_rate": 3.995945278548331e-06, "loss": 1.0226, "step": 2883 }, { "epoch": 0.30331155428766743, "grad_norm": 2.8938023915675264, "learning_rate": 3.99527632622804e-06, "loss": 1.0185, "step": 2884 }, { "epoch": 0.30341672472951475, "grad_norm": 2.854330050298019, "learning_rate": 3.994607207172823e-06, "loss": 1.0003, "step": 2885 }, { "epoch": 0.3035218951713621, "grad_norm": 2.878769782756261, "learning_rate": 3.993937921457292e-06, "loss": 0.9944, "step": 2886 }, { "epoch": 0.3036270656132094, "grad_norm": 2.8721086441370294, "learning_rate": 3.993268469156077e-06, "loss": 0.9825, "step": 2887 }, { "epoch": 0.3037322360550567, "grad_norm": 2.2081747573437074, "learning_rate": 3.992598850343827e-06, "loss": 0.9873, "step": 2888 }, { "epoch": 0.30383740649690405, "grad_norm": 2.9007140299444822, "learning_rate": 3.991929065095211e-06, "loss": 1.0232, "step": 2889 }, { "epoch": 0.3039425769387514, "grad_norm": 1.3415032258707018, "learning_rate": 3.991259113484916e-06, "loss": 0.9871, "step": 2890 }, { "epoch": 0.3040477473805987, "grad_norm": 2.7105364628823456, "learning_rate": 3.990588995587643e-06, "loss": 1.0259, "step": 2891 }, { "epoch": 0.304152917822446, "grad_norm": 2.31954600362432, "learning_rate": 3.989918711478118e-06, "loss": 1.042, "step": 2892 }, { "epoch": 0.30425808826429335, "grad_norm": 2.7954636771354346, "learning_rate": 3.989248261231084e-06, "loss": 1.0302, "step": 2893 }, { "epoch": 0.3043632587061406, "grad_norm": 2.7537708698276813, "learning_rate": 3.988577644921299e-06, "loss": 1.0587, "step": 2894 }, { "epoch": 0.30446842914798794, "grad_norm": 3.0962306715267935, "learning_rate": 3.9879068626235425e-06, "loss": 0.9759, "step": 2895 }, { "epoch": 0.30457359958983526, "grad_norm": 2.3957225896167746, "learning_rate": 3.987235914412614e-06, "loss": 1.0023, "step": 2896 }, { "epoch": 0.3046787700316826, "grad_norm": 2.771932833415502, "learning_rate": 3.986564800363326e-06, "loss": 1.0512, "step": 2897 }, { "epoch": 0.3047839404735299, "grad_norm": 2.3606896051469537, "learning_rate": 3.985893520550516e-06, "loss": 1.0107, "step": 2898 }, { "epoch": 0.30488911091537724, "grad_norm": 2.4174906103885943, "learning_rate": 3.985222075049035e-06, "loss": 0.9982, "step": 2899 }, { "epoch": 0.30499428135722456, "grad_norm": 1.6550177073693109, "learning_rate": 3.984550463933754e-06, "loss": 0.9569, "step": 2900 }, { "epoch": 0.3050994517990719, "grad_norm": 3.4294334384914213, "learning_rate": 3.983878687279565e-06, "loss": 1.0138, "step": 2901 }, { "epoch": 0.3052046222409192, "grad_norm": 2.56368532622028, "learning_rate": 3.9832067451613755e-06, "loss": 1.0517, "step": 2902 }, { "epoch": 0.30530979268276653, "grad_norm": 2.8083843993980953, "learning_rate": 3.982534637654112e-06, "loss": 0.9687, "step": 2903 }, { "epoch": 0.3054149631246138, "grad_norm": 2.7297876966125156, "learning_rate": 3.981862364832718e-06, "loss": 1.014, "step": 2904 }, { "epoch": 0.3055201335664611, "grad_norm": 2.103857181645903, "learning_rate": 3.981189926772161e-06, "loss": 0.9635, "step": 2905 }, { "epoch": 0.30562530400830845, "grad_norm": 2.5474502602534175, "learning_rate": 3.980517323547419e-06, "loss": 1.0005, "step": 2906 }, { "epoch": 0.3057304744501558, "grad_norm": 2.4631842991491637, "learning_rate": 3.979844555233496e-06, "loss": 1.0259, "step": 2907 }, { "epoch": 0.3058356448920031, "grad_norm": 3.829976049615588, "learning_rate": 3.979171621905409e-06, "loss": 1.0053, "step": 2908 }, { "epoch": 0.3059408153338504, "grad_norm": 3.0337382004641404, "learning_rate": 3.978498523638194e-06, "loss": 1.0447, "step": 2909 }, { "epoch": 0.30604598577569775, "grad_norm": 2.4115719177112105, "learning_rate": 3.97782526050691e-06, "loss": 0.9999, "step": 2910 }, { "epoch": 0.3061511562175451, "grad_norm": 2.5527608779229696, "learning_rate": 3.977151832586628e-06, "loss": 1.0281, "step": 2911 }, { "epoch": 0.3062563266593924, "grad_norm": 2.0029391608583262, "learning_rate": 3.976478239952444e-06, "loss": 1.0366, "step": 2912 }, { "epoch": 0.3063614971012397, "grad_norm": 2.447326314753849, "learning_rate": 3.975804482679464e-06, "loss": 1.0104, "step": 2913 }, { "epoch": 0.306466667543087, "grad_norm": 2.2765266252277203, "learning_rate": 3.975130560842821e-06, "loss": 0.979, "step": 2914 }, { "epoch": 0.3065718379849343, "grad_norm": 2.615082713312339, "learning_rate": 3.974456474517661e-06, "loss": 1.0355, "step": 2915 }, { "epoch": 0.30667700842678164, "grad_norm": 2.380276345939876, "learning_rate": 3.97378222377915e-06, "loss": 1.0034, "step": 2916 }, { "epoch": 0.30678217886862896, "grad_norm": 2.3073048459869168, "learning_rate": 3.973107808702472e-06, "loss": 1.0268, "step": 2917 }, { "epoch": 0.3068873493104763, "grad_norm": 1.8941823202478179, "learning_rate": 3.9724332293628295e-06, "loss": 1.0306, "step": 2918 }, { "epoch": 0.3069925197523236, "grad_norm": 2.896280116503032, "learning_rate": 3.9717584858354454e-06, "loss": 1.008, "step": 2919 }, { "epoch": 0.30709769019417094, "grad_norm": 2.7180851301598103, "learning_rate": 3.971083578195556e-06, "loss": 1.0215, "step": 2920 }, { "epoch": 0.30720286063601826, "grad_norm": 2.0401389712432487, "learning_rate": 3.970408506518419e-06, "loss": 0.9842, "step": 2921 }, { "epoch": 0.3073080310778656, "grad_norm": 2.3667558391152426, "learning_rate": 3.969733270879313e-06, "loss": 0.9797, "step": 2922 }, { "epoch": 0.3074132015197129, "grad_norm": 3.307599231062686, "learning_rate": 3.969057871353529e-06, "loss": 0.9895, "step": 2923 }, { "epoch": 0.3075183719615602, "grad_norm": 2.8646693785344555, "learning_rate": 3.96838230801638e-06, "loss": 0.9618, "step": 2924 }, { "epoch": 0.3076235424034075, "grad_norm": 2.2316449558106974, "learning_rate": 3.967706580943197e-06, "loss": 1.0306, "step": 2925 }, { "epoch": 0.3077287128452548, "grad_norm": 2.771790047908696, "learning_rate": 3.9670306902093286e-06, "loss": 1.0021, "step": 2926 }, { "epoch": 0.30783388328710215, "grad_norm": 2.7193389805440207, "learning_rate": 3.9663546358901415e-06, "loss": 0.9975, "step": 2927 }, { "epoch": 0.3079390537289495, "grad_norm": 2.174531390393959, "learning_rate": 3.965678418061023e-06, "loss": 1.0174, "step": 2928 }, { "epoch": 0.3080442241707968, "grad_norm": 2.473148623762536, "learning_rate": 3.965002036797374e-06, "loss": 0.9751, "step": 2929 }, { "epoch": 0.3081493946126441, "grad_norm": 2.523481125448685, "learning_rate": 3.9643254921746176e-06, "loss": 1.0486, "step": 2930 }, { "epoch": 0.30825456505449145, "grad_norm": 2.4231464073038103, "learning_rate": 3.963648784268193e-06, "loss": 1.0308, "step": 2931 }, { "epoch": 0.30835973549633877, "grad_norm": 2.908693220754445, "learning_rate": 3.9629719131535595e-06, "loss": 1.019, "step": 2932 }, { "epoch": 0.3084649059381861, "grad_norm": 2.831569138155086, "learning_rate": 3.9622948789061935e-06, "loss": 1.0282, "step": 2933 }, { "epoch": 0.30857007638003336, "grad_norm": 2.1822044513817533, "learning_rate": 3.961617681601588e-06, "loss": 1.0471, "step": 2934 }, { "epoch": 0.3086752468218807, "grad_norm": 1.968984775871048, "learning_rate": 3.960940321315257e-06, "loss": 0.9938, "step": 2935 }, { "epoch": 0.308780417263728, "grad_norm": 3.0742676631782015, "learning_rate": 3.960262798122731e-06, "loss": 1.0384, "step": 2936 }, { "epoch": 0.30888558770557534, "grad_norm": 1.7715236826173646, "learning_rate": 3.95958511209956e-06, "loss": 1.017, "step": 2937 }, { "epoch": 0.30899075814742266, "grad_norm": 2.6875698356975297, "learning_rate": 3.95890726332131e-06, "loss": 1.0554, "step": 2938 }, { "epoch": 0.30909592858927, "grad_norm": 1.9619216486997688, "learning_rate": 3.958229251863567e-06, "loss": 0.9878, "step": 2939 }, { "epoch": 0.3092010990311173, "grad_norm": 2.7541620784435406, "learning_rate": 3.957551077801935e-06, "loss": 0.9557, "step": 2940 }, { "epoch": 0.30930626947296463, "grad_norm": 1.8909424032199753, "learning_rate": 3.956872741212035e-06, "loss": 1.0064, "step": 2941 }, { "epoch": 0.30941143991481196, "grad_norm": 2.623868633279091, "learning_rate": 3.956194242169506e-06, "loss": 1.0086, "step": 2942 }, { "epoch": 0.3095166103566593, "grad_norm": 3.8473610355629195, "learning_rate": 3.955515580750008e-06, "loss": 1.0438, "step": 2943 }, { "epoch": 0.3096217807985066, "grad_norm": 3.550700597509594, "learning_rate": 3.954836757029214e-06, "loss": 1.0497, "step": 2944 }, { "epoch": 0.3097269512403539, "grad_norm": 2.9530235516637013, "learning_rate": 3.9541577710828225e-06, "loss": 1.0073, "step": 2945 }, { "epoch": 0.3098321216822012, "grad_norm": 2.5132191578181384, "learning_rate": 3.953478622986542e-06, "loss": 1.0157, "step": 2946 }, { "epoch": 0.3099372921240485, "grad_norm": 2.815147625525899, "learning_rate": 3.952799312816105e-06, "loss": 1.0341, "step": 2947 }, { "epoch": 0.31004246256589585, "grad_norm": 2.3437305046069077, "learning_rate": 3.9521198406472575e-06, "loss": 1.0106, "step": 2948 }, { "epoch": 0.3101476330077432, "grad_norm": 2.7478630820842724, "learning_rate": 3.9514402065557675e-06, "loss": 0.9561, "step": 2949 }, { "epoch": 0.3102528034495905, "grad_norm": 3.1039712218296445, "learning_rate": 3.950760410617421e-06, "loss": 1.0222, "step": 2950 }, { "epoch": 0.3103579738914378, "grad_norm": 2.758211476967507, "learning_rate": 3.950080452908016e-06, "loss": 0.9854, "step": 2951 }, { "epoch": 0.31046314433328515, "grad_norm": 3.118482777087128, "learning_rate": 3.949400333503378e-06, "loss": 1.0438, "step": 2952 }, { "epoch": 0.31056831477513247, "grad_norm": 2.3959007914603077, "learning_rate": 3.948720052479343e-06, "loss": 1.002, "step": 2953 }, { "epoch": 0.3106734852169798, "grad_norm": 1.9575742067090993, "learning_rate": 3.948039609911768e-06, "loss": 0.9846, "step": 2954 }, { "epoch": 0.31077865565882706, "grad_norm": 2.218966006005466, "learning_rate": 3.947359005876527e-06, "loss": 1.0369, "step": 2955 }, { "epoch": 0.3108838261006744, "grad_norm": 2.957402283333697, "learning_rate": 3.946678240449515e-06, "loss": 0.9891, "step": 2956 }, { "epoch": 0.3109889965425217, "grad_norm": 2.6314748348639805, "learning_rate": 3.94599731370664e-06, "loss": 1.0428, "step": 2957 }, { "epoch": 0.31109416698436904, "grad_norm": 2.7377714026833866, "learning_rate": 3.945316225723832e-06, "loss": 1.035, "step": 2958 }, { "epoch": 0.31119933742621636, "grad_norm": 2.053832855562837, "learning_rate": 3.944634976577036e-06, "loss": 0.9603, "step": 2959 }, { "epoch": 0.3113045078680637, "grad_norm": 3.1158025154221125, "learning_rate": 3.943953566342219e-06, "loss": 0.9998, "step": 2960 }, { "epoch": 0.311409678309911, "grad_norm": 2.396662606226732, "learning_rate": 3.9432719950953625e-06, "loss": 1.003, "step": 2961 }, { "epoch": 0.31151484875175833, "grad_norm": 2.477520494380087, "learning_rate": 3.942590262912466e-06, "loss": 1.0024, "step": 2962 }, { "epoch": 0.31162001919360566, "grad_norm": 2.966774835153069, "learning_rate": 3.941908369869549e-06, "loss": 1.024, "step": 2963 }, { "epoch": 0.311725189635453, "grad_norm": 2.3612255349598286, "learning_rate": 3.941226316042648e-06, "loss": 0.9814, "step": 2964 }, { "epoch": 0.31183036007730025, "grad_norm": 3.0645177496274334, "learning_rate": 3.940544101507817e-06, "loss": 0.9985, "step": 2965 }, { "epoch": 0.3119355305191476, "grad_norm": 2.695073034424229, "learning_rate": 3.939861726341128e-06, "loss": 0.9781, "step": 2966 }, { "epoch": 0.3120407009609949, "grad_norm": 2.731332674190935, "learning_rate": 3.939179190618671e-06, "loss": 0.9987, "step": 2967 }, { "epoch": 0.3121458714028422, "grad_norm": 1.620215381519614, "learning_rate": 3.938496494416554e-06, "loss": 0.9743, "step": 2968 }, { "epoch": 0.31225104184468955, "grad_norm": 2.3891831524051295, "learning_rate": 3.937813637810904e-06, "loss": 1.0118, "step": 2969 }, { "epoch": 0.31235621228653687, "grad_norm": 2.2376707736140067, "learning_rate": 3.937130620877863e-06, "loss": 0.9824, "step": 2970 }, { "epoch": 0.3124613827283842, "grad_norm": 2.4199521650953866, "learning_rate": 3.936447443693595e-06, "loss": 0.9938, "step": 2971 }, { "epoch": 0.3125665531702315, "grad_norm": 2.3031295788347226, "learning_rate": 3.935764106334278e-06, "loss": 1.0102, "step": 2972 }, { "epoch": 0.31267172361207884, "grad_norm": 3.5924980543328506, "learning_rate": 3.935080608876109e-06, "loss": 0.9912, "step": 2973 }, { "epoch": 0.31277689405392617, "grad_norm": 2.8358224243128154, "learning_rate": 3.934396951395305e-06, "loss": 1.004, "step": 2974 }, { "epoch": 0.31288206449577344, "grad_norm": 2.094776426203808, "learning_rate": 3.933713133968097e-06, "loss": 0.96, "step": 2975 }, { "epoch": 0.31298723493762076, "grad_norm": 2.8795819218722722, "learning_rate": 3.933029156670738e-06, "loss": 0.99, "step": 2976 }, { "epoch": 0.3130924053794681, "grad_norm": 3.1871471127409756, "learning_rate": 3.9323450195794954e-06, "loss": 0.9891, "step": 2977 }, { "epoch": 0.3131975758213154, "grad_norm": 2.573983278943729, "learning_rate": 3.9316607227706564e-06, "loss": 0.995, "step": 2978 }, { "epoch": 0.31330274626316273, "grad_norm": 2.4060298355716445, "learning_rate": 3.930976266320525e-06, "loss": 1.0275, "step": 2979 }, { "epoch": 0.31340791670501006, "grad_norm": 2.4230905246024435, "learning_rate": 3.930291650305424e-06, "loss": 0.997, "step": 2980 }, { "epoch": 0.3135130871468574, "grad_norm": 1.8561383044323279, "learning_rate": 3.929606874801694e-06, "loss": 0.9847, "step": 2981 }, { "epoch": 0.3136182575887047, "grad_norm": 2.549475061635563, "learning_rate": 3.9289219398856905e-06, "loss": 0.9506, "step": 2982 }, { "epoch": 0.31372342803055203, "grad_norm": 2.220462170609673, "learning_rate": 3.928236845633791e-06, "loss": 1.0019, "step": 2983 }, { "epoch": 0.31382859847239936, "grad_norm": 2.856787147753265, "learning_rate": 3.927551592122389e-06, "loss": 1.0285, "step": 2984 }, { "epoch": 0.3139337689142466, "grad_norm": 3.083922842525906, "learning_rate": 3.926866179427894e-06, "loss": 1.0602, "step": 2985 }, { "epoch": 0.31403893935609395, "grad_norm": 2.597249942858582, "learning_rate": 3.926180607626735e-06, "loss": 1.0163, "step": 2986 }, { "epoch": 0.3141441097979413, "grad_norm": 2.3456341225046278, "learning_rate": 3.92549487679536e-06, "loss": 1.0348, "step": 2987 }, { "epoch": 0.3142492802397886, "grad_norm": 3.092563769825593, "learning_rate": 3.924808987010234e-06, "loss": 1.0002, "step": 2988 }, { "epoch": 0.3143544506816359, "grad_norm": 2.1068768689816983, "learning_rate": 3.924122938347835e-06, "loss": 0.994, "step": 2989 }, { "epoch": 0.31445962112348325, "grad_norm": 2.5596294401722846, "learning_rate": 3.923436730884668e-06, "loss": 1.0405, "step": 2990 }, { "epoch": 0.31456479156533057, "grad_norm": 2.5979688517209687, "learning_rate": 3.922750364697246e-06, "loss": 1.0104, "step": 2991 }, { "epoch": 0.3146699620071779, "grad_norm": 3.0395592306666175, "learning_rate": 3.922063839862107e-06, "loss": 1.0278, "step": 2992 }, { "epoch": 0.3147751324490252, "grad_norm": 3.095491872011433, "learning_rate": 3.921377156455802e-06, "loss": 1.0113, "step": 2993 }, { "epoch": 0.31488030289087254, "grad_norm": 3.2475174637972857, "learning_rate": 3.920690314554903e-06, "loss": 1.0456, "step": 2994 }, { "epoch": 0.3149854733327198, "grad_norm": 2.4088622405130926, "learning_rate": 3.9200033142359975e-06, "loss": 1.0592, "step": 2995 }, { "epoch": 0.31509064377456714, "grad_norm": 2.71257741566834, "learning_rate": 3.919316155575692e-06, "loss": 1.0116, "step": 2996 }, { "epoch": 0.31519581421641446, "grad_norm": 2.450376163606724, "learning_rate": 3.918628838650609e-06, "loss": 1.0195, "step": 2997 }, { "epoch": 0.3153009846582618, "grad_norm": 3.3118566816217427, "learning_rate": 3.9179413635373895e-06, "loss": 0.968, "step": 2998 }, { "epoch": 0.3154061551001091, "grad_norm": 2.242881590172938, "learning_rate": 3.917253730312694e-06, "loss": 0.9813, "step": 2999 }, { "epoch": 0.31551132554195643, "grad_norm": 2.427670249904579, "learning_rate": 3.916565939053198e-06, "loss": 1.018, "step": 3000 }, { "epoch": 0.31561649598380376, "grad_norm": 3.0529942785276436, "learning_rate": 3.915877989835595e-06, "loss": 1.03, "step": 3001 }, { "epoch": 0.3157216664256511, "grad_norm": 2.5578272107417317, "learning_rate": 3.915189882736597e-06, "loss": 1.0433, "step": 3002 }, { "epoch": 0.3158268368674984, "grad_norm": 2.585770865143952, "learning_rate": 3.914501617832935e-06, "loss": 1.0069, "step": 3003 }, { "epoch": 0.31593200730934573, "grad_norm": 3.2812648373996893, "learning_rate": 3.9138131952013535e-06, "loss": 1.0027, "step": 3004 }, { "epoch": 0.31603717775119305, "grad_norm": 3.3571648099609654, "learning_rate": 3.913124614918617e-06, "loss": 1.0312, "step": 3005 }, { "epoch": 0.3161423481930403, "grad_norm": 1.7227150762858836, "learning_rate": 3.9124358770615094e-06, "loss": 0.9955, "step": 3006 }, { "epoch": 0.31624751863488765, "grad_norm": 3.5117489508527022, "learning_rate": 3.911746981706829e-06, "loss": 1.0127, "step": 3007 }, { "epoch": 0.31635268907673497, "grad_norm": 2.651233249155203, "learning_rate": 3.911057928931394e-06, "loss": 0.9847, "step": 3008 }, { "epoch": 0.3164578595185823, "grad_norm": 3.406708098715362, "learning_rate": 3.910368718812037e-06, "loss": 1.0204, "step": 3009 }, { "epoch": 0.3165630299604296, "grad_norm": 2.6490697167241666, "learning_rate": 3.909679351425612e-06, "loss": 1.0232, "step": 3010 }, { "epoch": 0.31666820040227694, "grad_norm": 3.0367731314433857, "learning_rate": 3.90898982684899e-06, "loss": 1.0102, "step": 3011 }, { "epoch": 0.31677337084412427, "grad_norm": 1.99254564411588, "learning_rate": 3.908300145159055e-06, "loss": 0.9834, "step": 3012 }, { "epoch": 0.3168785412859716, "grad_norm": 3.2342199206410482, "learning_rate": 3.907610306432716e-06, "loss": 0.9951, "step": 3013 }, { "epoch": 0.3169837117278189, "grad_norm": 2.322687279586599, "learning_rate": 3.906920310746891e-06, "loss": 1.0255, "step": 3014 }, { "epoch": 0.31708888216966624, "grad_norm": 2.2910797838024104, "learning_rate": 3.906230158178523e-06, "loss": 1.0091, "step": 3015 }, { "epoch": 0.3171940526115135, "grad_norm": 2.5171087691012386, "learning_rate": 3.9055398488045685e-06, "loss": 0.935, "step": 3016 }, { "epoch": 0.31729922305336083, "grad_norm": 1.9921759243129207, "learning_rate": 3.9048493827020015e-06, "loss": 1.0218, "step": 3017 }, { "epoch": 0.31740439349520816, "grad_norm": 2.1147110832212928, "learning_rate": 3.904158759947816e-06, "loss": 1.0058, "step": 3018 }, { "epoch": 0.3175095639370555, "grad_norm": 3.0475021542334555, "learning_rate": 3.9034679806190204e-06, "loss": 1.0187, "step": 3019 }, { "epoch": 0.3176147343789028, "grad_norm": 2.745517766757522, "learning_rate": 3.902777044792642e-06, "loss": 1.0115, "step": 3020 }, { "epoch": 0.31771990482075013, "grad_norm": 1.5206699032468878, "learning_rate": 3.902085952545726e-06, "loss": 0.9619, "step": 3021 }, { "epoch": 0.31782507526259746, "grad_norm": 2.4452139648992395, "learning_rate": 3.901394703955335e-06, "loss": 0.9903, "step": 3022 }, { "epoch": 0.3179302457044448, "grad_norm": 2.5184421966703594, "learning_rate": 3.900703299098548e-06, "loss": 0.9858, "step": 3023 }, { "epoch": 0.3180354161462921, "grad_norm": 1.9652874285496023, "learning_rate": 3.900011738052463e-06, "loss": 1.0252, "step": 3024 }, { "epoch": 0.31814058658813943, "grad_norm": 2.392255529966947, "learning_rate": 3.899320020894192e-06, "loss": 1.0227, "step": 3025 }, { "epoch": 0.3182457570299867, "grad_norm": 2.1611953399013153, "learning_rate": 3.898628147700869e-06, "loss": 0.9738, "step": 3026 }, { "epoch": 0.318350927471834, "grad_norm": 2.2430395870179063, "learning_rate": 3.8979361185496426e-06, "loss": 0.9749, "step": 3027 }, { "epoch": 0.31845609791368135, "grad_norm": 3.541913920661668, "learning_rate": 3.897243933517679e-06, "loss": 1.0152, "step": 3028 }, { "epoch": 0.31856126835552867, "grad_norm": 1.7600013581050258, "learning_rate": 3.896551592682164e-06, "loss": 0.9925, "step": 3029 }, { "epoch": 0.318666438797376, "grad_norm": 3.0382688857854387, "learning_rate": 3.895859096120296e-06, "loss": 1.005, "step": 3030 }, { "epoch": 0.3187716092392233, "grad_norm": 1.7559359887388504, "learning_rate": 3.8951664439092966e-06, "loss": 1.0027, "step": 3031 }, { "epoch": 0.31887677968107064, "grad_norm": 2.41513756248943, "learning_rate": 3.8944736361263996e-06, "loss": 1.0058, "step": 3032 }, { "epoch": 0.31898195012291797, "grad_norm": 3.824958571683499, "learning_rate": 3.89378067284886e-06, "loss": 0.9754, "step": 3033 }, { "epoch": 0.3190871205647653, "grad_norm": 2.8869062987377174, "learning_rate": 3.893087554153948e-06, "loss": 0.974, "step": 3034 }, { "epoch": 0.3191922910066126, "grad_norm": 3.3376996294364782, "learning_rate": 3.892394280118952e-06, "loss": 1.0322, "step": 3035 }, { "epoch": 0.3192974614484599, "grad_norm": 2.1493242739855316, "learning_rate": 3.891700850821178e-06, "loss": 0.9793, "step": 3036 }, { "epoch": 0.3194026318903072, "grad_norm": 2.0600928702389694, "learning_rate": 3.891007266337947e-06, "loss": 0.9667, "step": 3037 }, { "epoch": 0.31950780233215453, "grad_norm": 2.422518969459341, "learning_rate": 3.8903135267466e-06, "loss": 1.0283, "step": 3038 }, { "epoch": 0.31961297277400186, "grad_norm": 2.7095503103612764, "learning_rate": 3.889619632124495e-06, "loss": 1.0291, "step": 3039 }, { "epoch": 0.3197181432158492, "grad_norm": 2.6136848210226833, "learning_rate": 3.888925582549006e-06, "loss": 1.0488, "step": 3040 }, { "epoch": 0.3198233136576965, "grad_norm": 3.3309108315349962, "learning_rate": 3.888231378097525e-06, "loss": 1.0006, "step": 3041 }, { "epoch": 0.31992848409954383, "grad_norm": 2.9848323149363716, "learning_rate": 3.8875370188474606e-06, "loss": 1.0346, "step": 3042 }, { "epoch": 0.32003365454139115, "grad_norm": 1.8815097278144668, "learning_rate": 3.88684250487624e-06, "loss": 0.9791, "step": 3043 }, { "epoch": 0.3201388249832385, "grad_norm": 2.975369543509321, "learning_rate": 3.886147836261307e-06, "loss": 1.0097, "step": 3044 }, { "epoch": 0.3202439954250858, "grad_norm": 1.9729548982996215, "learning_rate": 3.8854530130801226e-06, "loss": 1.0049, "step": 3045 }, { "epoch": 0.32034916586693307, "grad_norm": 2.964599401521195, "learning_rate": 3.884758035410165e-06, "loss": 0.9898, "step": 3046 }, { "epoch": 0.3204543363087804, "grad_norm": 2.240600934404886, "learning_rate": 3.884062903328929e-06, "loss": 0.9923, "step": 3047 }, { "epoch": 0.3205595067506277, "grad_norm": 2.598966467579801, "learning_rate": 3.883367616913927e-06, "loss": 1.0003, "step": 3048 }, { "epoch": 0.32066467719247504, "grad_norm": 2.2641338646409883, "learning_rate": 3.8826721762426885e-06, "loss": 1.002, "step": 3049 }, { "epoch": 0.32076984763432237, "grad_norm": 2.2997383920918897, "learning_rate": 3.881976581392763e-06, "loss": 1.0243, "step": 3050 }, { "epoch": 0.3208750180761697, "grad_norm": 2.1550910823149136, "learning_rate": 3.881280832441712e-06, "loss": 0.991, "step": 3051 }, { "epoch": 0.320980188518017, "grad_norm": 2.855557332734093, "learning_rate": 3.880584929467119e-06, "loss": 0.9982, "step": 3052 }, { "epoch": 0.32108535895986434, "grad_norm": 3.5691659687435897, "learning_rate": 3.879888872546581e-06, "loss": 1.0354, "step": 3053 }, { "epoch": 0.32119052940171167, "grad_norm": 2.2916019493138817, "learning_rate": 3.879192661757715e-06, "loss": 0.9934, "step": 3054 }, { "epoch": 0.321295699843559, "grad_norm": 2.473847967975587, "learning_rate": 3.878496297178151e-06, "loss": 1.0425, "step": 3055 }, { "epoch": 0.32140087028540626, "grad_norm": 3.677982616082223, "learning_rate": 3.8777997788855435e-06, "loss": 0.9895, "step": 3056 }, { "epoch": 0.3215060407272536, "grad_norm": 2.7264358103840376, "learning_rate": 3.877103106957559e-06, "loss": 1.0241, "step": 3057 }, { "epoch": 0.3216112111691009, "grad_norm": 2.2211588348254185, "learning_rate": 3.876406281471877e-06, "loss": 1.0182, "step": 3058 }, { "epoch": 0.32171638161094823, "grad_norm": 3.091295524354873, "learning_rate": 3.875709302506204e-06, "loss": 1.0349, "step": 3059 }, { "epoch": 0.32182155205279556, "grad_norm": 2.236128132871001, "learning_rate": 3.875012170138256e-06, "loss": 0.9954, "step": 3060 }, { "epoch": 0.3219267224946429, "grad_norm": 2.66801073802577, "learning_rate": 3.874314884445771e-06, "loss": 0.9817, "step": 3061 }, { "epoch": 0.3220318929364902, "grad_norm": 2.6169139547610816, "learning_rate": 3.873617445506499e-06, "loss": 1.0343, "step": 3062 }, { "epoch": 0.32213706337833753, "grad_norm": 2.709426123429328, "learning_rate": 3.872919853398212e-06, "loss": 1.0071, "step": 3063 }, { "epoch": 0.32224223382018485, "grad_norm": 2.299843384495024, "learning_rate": 3.872222108198696e-06, "loss": 1.0012, "step": 3064 }, { "epoch": 0.3223474042620322, "grad_norm": 2.636252219736122, "learning_rate": 3.871524209985755e-06, "loss": 1.0427, "step": 3065 }, { "epoch": 0.3224525747038795, "grad_norm": 3.117900115113946, "learning_rate": 3.87082615883721e-06, "loss": 1.0041, "step": 3066 }, { "epoch": 0.32255774514572677, "grad_norm": 2.2357890840611465, "learning_rate": 3.8701279548309e-06, "loss": 0.9666, "step": 3067 }, { "epoch": 0.3226629155875741, "grad_norm": 2.566115912683812, "learning_rate": 3.869429598044679e-06, "loss": 1.0166, "step": 3068 }, { "epoch": 0.3227680860294214, "grad_norm": 2.7172969124996165, "learning_rate": 3.868731088556419e-06, "loss": 1.0099, "step": 3069 }, { "epoch": 0.32287325647126874, "grad_norm": 1.5483818724179716, "learning_rate": 3.868032426444012e-06, "loss": 0.9894, "step": 3070 }, { "epoch": 0.32297842691311607, "grad_norm": 2.318650017542439, "learning_rate": 3.867333611785361e-06, "loss": 1.05, "step": 3071 }, { "epoch": 0.3230835973549634, "grad_norm": 1.917805124510381, "learning_rate": 3.86663464465839e-06, "loss": 0.9977, "step": 3072 }, { "epoch": 0.3231887677968107, "grad_norm": 2.3161890326941275, "learning_rate": 3.86593552514104e-06, "loss": 1.003, "step": 3073 }, { "epoch": 0.32329393823865804, "grad_norm": 2.1693043396128093, "learning_rate": 3.865236253311268e-06, "loss": 0.9695, "step": 3074 }, { "epoch": 0.32339910868050536, "grad_norm": 2.8668301075021914, "learning_rate": 3.864536829247049e-06, "loss": 0.9888, "step": 3075 }, { "epoch": 0.3235042791223527, "grad_norm": 2.4444547149619296, "learning_rate": 3.863837253026372e-06, "loss": 1.0067, "step": 3076 }, { "epoch": 0.32360944956419996, "grad_norm": 3.3217478770609925, "learning_rate": 3.863137524727248e-06, "loss": 1.018, "step": 3077 }, { "epoch": 0.3237146200060473, "grad_norm": 3.375894933254088, "learning_rate": 3.862437644427699e-06, "loss": 1.0314, "step": 3078 }, { "epoch": 0.3238197904478946, "grad_norm": 3.1024834482299135, "learning_rate": 3.86173761220577e-06, "loss": 0.9857, "step": 3079 }, { "epoch": 0.32392496088974193, "grad_norm": 1.8859603281941375, "learning_rate": 3.8610374281395205e-06, "loss": 1.0021, "step": 3080 }, { "epoch": 0.32403013133158926, "grad_norm": 2.181930740279361, "learning_rate": 3.860337092307023e-06, "loss": 1.0024, "step": 3081 }, { "epoch": 0.3241353017734366, "grad_norm": 2.2394462965094806, "learning_rate": 3.859636604786372e-06, "loss": 1.0499, "step": 3082 }, { "epoch": 0.3242404722152839, "grad_norm": 2.429116428790114, "learning_rate": 3.8589359656556775e-06, "loss": 1.0016, "step": 3083 }, { "epoch": 0.32434564265713123, "grad_norm": 3.2486086179267697, "learning_rate": 3.858235174993067e-06, "loss": 0.9778, "step": 3084 }, { "epoch": 0.32445081309897855, "grad_norm": 2.168312151384131, "learning_rate": 3.857534232876684e-06, "loss": 0.9809, "step": 3085 }, { "epoch": 0.3245559835408259, "grad_norm": 2.894850817884378, "learning_rate": 3.856833139384687e-06, "loss": 1.0092, "step": 3086 }, { "epoch": 0.32466115398267315, "grad_norm": 3.1973922575609697, "learning_rate": 3.856131894595255e-06, "loss": 1.0425, "step": 3087 }, { "epoch": 0.32476632442452047, "grad_norm": 2.6786962321285883, "learning_rate": 3.855430498586582e-06, "loss": 0.9848, "step": 3088 }, { "epoch": 0.3248714948663678, "grad_norm": 1.7068699911559382, "learning_rate": 3.8547289514368795e-06, "loss": 0.9864, "step": 3089 }, { "epoch": 0.3249766653082151, "grad_norm": 1.762520608645227, "learning_rate": 3.854027253224375e-06, "loss": 0.9843, "step": 3090 }, { "epoch": 0.32508183575006244, "grad_norm": 1.756341086493816, "learning_rate": 3.853325404027313e-06, "loss": 0.9738, "step": 3091 }, { "epoch": 0.32518700619190977, "grad_norm": 1.9319056465036724, "learning_rate": 3.852623403923955e-06, "loss": 1.0034, "step": 3092 }, { "epoch": 0.3252921766337571, "grad_norm": 2.627736625312551, "learning_rate": 3.851921252992581e-06, "loss": 1.0046, "step": 3093 }, { "epoch": 0.3253973470756044, "grad_norm": 2.387276425999192, "learning_rate": 3.851218951311484e-06, "loss": 1.0061, "step": 3094 }, { "epoch": 0.32550251751745174, "grad_norm": 2.6657811160762868, "learning_rate": 3.850516498958979e-06, "loss": 0.989, "step": 3095 }, { "epoch": 0.32560768795929906, "grad_norm": 3.7563246886039785, "learning_rate": 3.849813896013392e-06, "loss": 0.9911, "step": 3096 }, { "epoch": 0.32571285840114633, "grad_norm": 3.186401005858273, "learning_rate": 3.849111142553069e-06, "loss": 0.9781, "step": 3097 }, { "epoch": 0.32581802884299366, "grad_norm": 2.8120268250532128, "learning_rate": 3.848408238656375e-06, "loss": 1.0248, "step": 3098 }, { "epoch": 0.325923199284841, "grad_norm": 2.5367126653996985, "learning_rate": 3.8477051844016865e-06, "loss": 1.0108, "step": 3099 }, { "epoch": 0.3260283697266883, "grad_norm": 2.4405235194407804, "learning_rate": 3.847001979867401e-06, "loss": 0.9774, "step": 3100 }, { "epoch": 0.32613354016853563, "grad_norm": 1.8950206598525756, "learning_rate": 3.846298625131931e-06, "loss": 1.0017, "step": 3101 }, { "epoch": 0.32623871061038295, "grad_norm": 2.7265934900210964, "learning_rate": 3.845595120273706e-06, "loss": 1.0165, "step": 3102 }, { "epoch": 0.3263438810522303, "grad_norm": 2.205211886185536, "learning_rate": 3.8448914653711725e-06, "loss": 1.037, "step": 3103 }, { "epoch": 0.3264490514940776, "grad_norm": 1.9366743565239208, "learning_rate": 3.844187660502792e-06, "loss": 0.9738, "step": 3104 }, { "epoch": 0.3265542219359249, "grad_norm": 2.593512907813824, "learning_rate": 3.843483705747045e-06, "loss": 1.0458, "step": 3105 }, { "epoch": 0.32665939237777225, "grad_norm": 2.0167179831155115, "learning_rate": 3.84277960118243e-06, "loss": 1.0399, "step": 3106 }, { "epoch": 0.3267645628196195, "grad_norm": 2.334539077805553, "learning_rate": 3.842075346887457e-06, "loss": 1.0077, "step": 3107 }, { "epoch": 0.32686973326146684, "grad_norm": 2.604441795888962, "learning_rate": 3.841370942940657e-06, "loss": 1.0063, "step": 3108 }, { "epoch": 0.32697490370331417, "grad_norm": 1.8763989090691047, "learning_rate": 3.840666389420578e-06, "loss": 1.0246, "step": 3109 }, { "epoch": 0.3270800741451615, "grad_norm": 2.1829721118576337, "learning_rate": 3.839961686405782e-06, "loss": 1.0295, "step": 3110 }, { "epoch": 0.3271852445870088, "grad_norm": 2.235743716349407, "learning_rate": 3.839256833974848e-06, "loss": 0.9813, "step": 3111 }, { "epoch": 0.32729041502885614, "grad_norm": 2.682923955392616, "learning_rate": 3.838551832206373e-06, "loss": 0.9701, "step": 3112 }, { "epoch": 0.32739558547070347, "grad_norm": 2.564345157324214, "learning_rate": 3.8378466811789714e-06, "loss": 1.0052, "step": 3113 }, { "epoch": 0.3275007559125508, "grad_norm": 2.745614634832843, "learning_rate": 3.837141380971271e-06, "loss": 0.9663, "step": 3114 }, { "epoch": 0.3276059263543981, "grad_norm": 4.102665613851121, "learning_rate": 3.83643593166192e-06, "loss": 1.0145, "step": 3115 }, { "epoch": 0.32771109679624544, "grad_norm": 1.9187924233062632, "learning_rate": 3.835730333329581e-06, "loss": 1.0537, "step": 3116 }, { "epoch": 0.3278162672380927, "grad_norm": 2.0366789642980856, "learning_rate": 3.835024586052933e-06, "loss": 1.0184, "step": 3117 }, { "epoch": 0.32792143767994003, "grad_norm": 2.584247749331405, "learning_rate": 3.834318689910673e-06, "loss": 1.0199, "step": 3118 }, { "epoch": 0.32802660812178736, "grad_norm": 2.3869440994924385, "learning_rate": 3.833612644981514e-06, "loss": 1.0261, "step": 3119 }, { "epoch": 0.3281317785636347, "grad_norm": 1.744981904406401, "learning_rate": 3.832906451344185e-06, "loss": 0.9887, "step": 3120 }, { "epoch": 0.328236949005482, "grad_norm": 2.205884774883593, "learning_rate": 3.832200109077433e-06, "loss": 1.0138, "step": 3121 }, { "epoch": 0.32834211944732933, "grad_norm": 2.8960434627989566, "learning_rate": 3.831493618260019e-06, "loss": 1.0183, "step": 3122 }, { "epoch": 0.32844728988917665, "grad_norm": 2.6153093943252803, "learning_rate": 3.830786978970723e-06, "loss": 1.0074, "step": 3123 }, { "epoch": 0.328552460331024, "grad_norm": 3.2605234910491787, "learning_rate": 3.830080191288342e-06, "loss": 0.9695, "step": 3124 }, { "epoch": 0.3286576307728713, "grad_norm": 2.3212838057371696, "learning_rate": 3.829373255291686e-06, "loss": 1.0006, "step": 3125 }, { "epoch": 0.3287628012147186, "grad_norm": 3.1048512567149156, "learning_rate": 3.828666171059586e-06, "loss": 1.0273, "step": 3126 }, { "epoch": 0.32886797165656595, "grad_norm": 3.432285163654643, "learning_rate": 3.827958938670886e-06, "loss": 1.0458, "step": 3127 }, { "epoch": 0.3289731420984132, "grad_norm": 2.819849325554979, "learning_rate": 3.827251558204449e-06, "loss": 0.9533, "step": 3128 }, { "epoch": 0.32907831254026054, "grad_norm": 1.6507578523510593, "learning_rate": 3.826544029739152e-06, "loss": 0.9536, "step": 3129 }, { "epoch": 0.32918348298210787, "grad_norm": 2.4830132435715537, "learning_rate": 3.825836353353891e-06, "loss": 1.0009, "step": 3130 }, { "epoch": 0.3292886534239552, "grad_norm": 2.4797089042570533, "learning_rate": 3.825128529127577e-06, "loss": 0.9801, "step": 3131 }, { "epoch": 0.3293938238658025, "grad_norm": 2.4239798237039594, "learning_rate": 3.824420557139139e-06, "loss": 1.0182, "step": 3132 }, { "epoch": 0.32949899430764984, "grad_norm": 1.945405865543695, "learning_rate": 3.82371243746752e-06, "loss": 1.0321, "step": 3133 }, { "epoch": 0.32960416474949716, "grad_norm": 2.084748306095817, "learning_rate": 3.823004170191681e-06, "loss": 1.0155, "step": 3134 }, { "epoch": 0.3297093351913445, "grad_norm": 2.251126194835179, "learning_rate": 3.8222957553906e-06, "loss": 1.0392, "step": 3135 }, { "epoch": 0.3298145056331918, "grad_norm": 2.2034664670180626, "learning_rate": 3.821587193143269e-06, "loss": 0.9897, "step": 3136 }, { "epoch": 0.32991967607503914, "grad_norm": 2.9409945673975626, "learning_rate": 3.8208784835287e-06, "loss": 0.9982, "step": 3137 }, { "epoch": 0.3300248465168864, "grad_norm": 3.058005825612, "learning_rate": 3.82016962662592e-06, "loss": 1.0317, "step": 3138 }, { "epoch": 0.33013001695873373, "grad_norm": 3.490847243165153, "learning_rate": 3.819460622513971e-06, "loss": 1.0036, "step": 3139 }, { "epoch": 0.33023518740058105, "grad_norm": 2.071993271550719, "learning_rate": 3.818751471271912e-06, "loss": 0.9886, "step": 3140 }, { "epoch": 0.3303403578424284, "grad_norm": 2.799536363219469, "learning_rate": 3.818042172978821e-06, "loss": 0.9794, "step": 3141 }, { "epoch": 0.3304455282842757, "grad_norm": 3.262735659956398, "learning_rate": 3.8173327277137876e-06, "loss": 1.0189, "step": 3142 }, { "epoch": 0.330550698726123, "grad_norm": 2.1874156683363664, "learning_rate": 3.816623135555921e-06, "loss": 0.963, "step": 3143 }, { "epoch": 0.33065586916797035, "grad_norm": 2.487129588775537, "learning_rate": 3.815913396584348e-06, "loss": 1.0097, "step": 3144 }, { "epoch": 0.3307610396098177, "grad_norm": 2.8581708249255096, "learning_rate": 3.815203510878209e-06, "loss": 1.0382, "step": 3145 }, { "epoch": 0.330866210051665, "grad_norm": 2.353074717228548, "learning_rate": 3.814493478516661e-06, "loss": 1.0092, "step": 3146 }, { "epoch": 0.3309713804935123, "grad_norm": 2.7540007639438375, "learning_rate": 3.813783299578879e-06, "loss": 1.0189, "step": 3147 }, { "epoch": 0.3310765509353596, "grad_norm": 2.4522274567147657, "learning_rate": 3.8130729741440536e-06, "loss": 1.0141, "step": 3148 }, { "epoch": 0.3311817213772069, "grad_norm": 2.849670426659251, "learning_rate": 3.8123625022913915e-06, "loss": 1.0118, "step": 3149 }, { "epoch": 0.33128689181905424, "grad_norm": 2.3301835319177373, "learning_rate": 3.811651884100115e-06, "loss": 0.9961, "step": 3150 }, { "epoch": 0.33139206226090157, "grad_norm": 2.3601708357586006, "learning_rate": 3.8109411196494638e-06, "loss": 0.9735, "step": 3151 }, { "epoch": 0.3314972327027489, "grad_norm": 2.0573991989526474, "learning_rate": 3.810230209018694e-06, "loss": 1.021, "step": 3152 }, { "epoch": 0.3316024031445962, "grad_norm": 2.9354196677378037, "learning_rate": 3.809519152287079e-06, "loss": 0.992, "step": 3153 }, { "epoch": 0.33170757358644354, "grad_norm": 2.544344310226173, "learning_rate": 3.8088079495339046e-06, "loss": 0.957, "step": 3154 }, { "epoch": 0.33181274402829086, "grad_norm": 2.2971377313074393, "learning_rate": 3.8080966008384775e-06, "loss": 1.0143, "step": 3155 }, { "epoch": 0.3319179144701382, "grad_norm": 2.9198098245236026, "learning_rate": 3.807385106280117e-06, "loss": 0.966, "step": 3156 }, { "epoch": 0.3320230849119855, "grad_norm": 2.787741411848439, "learning_rate": 3.806673465938161e-06, "loss": 1.0119, "step": 3157 }, { "epoch": 0.3321282553538328, "grad_norm": 2.139566525593868, "learning_rate": 3.805961679891964e-06, "loss": 1.0094, "step": 3158 }, { "epoch": 0.3322334257956801, "grad_norm": 2.55889761213254, "learning_rate": 3.805249748220893e-06, "loss": 1.0046, "step": 3159 }, { "epoch": 0.33233859623752743, "grad_norm": 2.811994900697266, "learning_rate": 3.804537671004337e-06, "loss": 1.0026, "step": 3160 }, { "epoch": 0.33244376667937475, "grad_norm": 2.6857582832549745, "learning_rate": 3.8038254483216962e-06, "loss": 0.9833, "step": 3161 }, { "epoch": 0.3325489371212221, "grad_norm": 2.6228532392360866, "learning_rate": 3.8031130802523896e-06, "loss": 1.0335, "step": 3162 }, { "epoch": 0.3326541075630694, "grad_norm": 2.6508818240324836, "learning_rate": 3.802400566875851e-06, "loss": 1.0416, "step": 3163 }, { "epoch": 0.3327592780049167, "grad_norm": 2.708891165068549, "learning_rate": 3.8016879082715326e-06, "loss": 1.0311, "step": 3164 }, { "epoch": 0.33286444844676405, "grad_norm": 1.9791714208493958, "learning_rate": 3.8009751045189004e-06, "loss": 1.0022, "step": 3165 }, { "epoch": 0.3329696188886114, "grad_norm": 3.4480804252433592, "learning_rate": 3.8002621556974367e-06, "loss": 1.0225, "step": 3166 }, { "epoch": 0.3330747893304587, "grad_norm": 1.9335962357260972, "learning_rate": 3.7995490618866425e-06, "loss": 0.9969, "step": 3167 }, { "epoch": 0.33317995977230597, "grad_norm": 2.388818127758868, "learning_rate": 3.7988358231660333e-06, "loss": 1.023, "step": 3168 }, { "epoch": 0.3332851302141533, "grad_norm": 3.4063184395962445, "learning_rate": 3.7981224396151393e-06, "loss": 1.0006, "step": 3169 }, { "epoch": 0.3333903006560006, "grad_norm": 3.1228965688498764, "learning_rate": 3.7974089113135094e-06, "loss": 1.0256, "step": 3170 }, { "epoch": 0.33349547109784794, "grad_norm": 2.969409956688769, "learning_rate": 3.7966952383407075e-06, "loss": 0.9796, "step": 3171 }, { "epoch": 0.33360064153969526, "grad_norm": 1.772482770164931, "learning_rate": 3.7959814207763134e-06, "loss": 0.9964, "step": 3172 }, { "epoch": 0.3337058119815426, "grad_norm": 2.5201047196046034, "learning_rate": 3.7952674586999226e-06, "loss": 1.0227, "step": 3173 }, { "epoch": 0.3338109824233899, "grad_norm": 2.848018681472792, "learning_rate": 3.794553352191149e-06, "loss": 1.0123, "step": 3174 }, { "epoch": 0.33391615286523724, "grad_norm": 3.1213302281106547, "learning_rate": 3.7938391013296193e-06, "loss": 1.0099, "step": 3175 }, { "epoch": 0.33402132330708456, "grad_norm": 2.132897712038548, "learning_rate": 3.79312470619498e-06, "loss": 1.0306, "step": 3176 }, { "epoch": 0.3341264937489319, "grad_norm": 1.6980061002378293, "learning_rate": 3.79241016686689e-06, "loss": 0.9981, "step": 3177 }, { "epoch": 0.33423166419077915, "grad_norm": 2.133136371300042, "learning_rate": 3.791695483425026e-06, "loss": 0.9986, "step": 3178 }, { "epoch": 0.3343368346326265, "grad_norm": 2.1135831702313888, "learning_rate": 3.7909806559490827e-06, "loss": 0.9966, "step": 3179 }, { "epoch": 0.3344420050744738, "grad_norm": 2.517571111822009, "learning_rate": 3.790265684518767e-06, "loss": 1.0228, "step": 3180 }, { "epoch": 0.3345471755163211, "grad_norm": 2.98686980884583, "learning_rate": 3.7895505692138045e-06, "loss": 1.0223, "step": 3181 }, { "epoch": 0.33465234595816845, "grad_norm": 2.261356671644225, "learning_rate": 3.7888353101139353e-06, "loss": 1.0245, "step": 3182 }, { "epoch": 0.3347575164000158, "grad_norm": 2.409763840683323, "learning_rate": 3.7881199072989176e-06, "loss": 0.9886, "step": 3183 }, { "epoch": 0.3348626868418631, "grad_norm": 1.9087618932889743, "learning_rate": 3.7874043608485234e-06, "loss": 1.0211, "step": 3184 }, { "epoch": 0.3349678572837104, "grad_norm": 1.998174510767556, "learning_rate": 3.7866886708425427e-06, "loss": 1.0038, "step": 3185 }, { "epoch": 0.33507302772555775, "grad_norm": 1.9103622576526025, "learning_rate": 3.785972837360779e-06, "loss": 1.0043, "step": 3186 }, { "epoch": 0.3351781981674051, "grad_norm": 1.9379810577362075, "learning_rate": 3.785256860483054e-06, "loss": 1.0059, "step": 3187 }, { "epoch": 0.3352833686092524, "grad_norm": 2.9610384556218, "learning_rate": 3.7845407402892066e-06, "loss": 0.9998, "step": 3188 }, { "epoch": 0.33538853905109967, "grad_norm": 2.2025225245305076, "learning_rate": 3.7838244768590866e-06, "loss": 1.0089, "step": 3189 }, { "epoch": 0.335493709492947, "grad_norm": 2.8328379269098076, "learning_rate": 3.7831080702725643e-06, "loss": 1.0317, "step": 3190 }, { "epoch": 0.3355988799347943, "grad_norm": 1.9624846245151617, "learning_rate": 3.7823915206095246e-06, "loss": 0.9584, "step": 3191 }, { "epoch": 0.33570405037664164, "grad_norm": 2.3934098571822546, "learning_rate": 3.7816748279498687e-06, "loss": 1.0102, "step": 3192 }, { "epoch": 0.33580922081848896, "grad_norm": 2.1927978381480053, "learning_rate": 3.780957992373513e-06, "loss": 1.0098, "step": 3193 }, { "epoch": 0.3359143912603363, "grad_norm": 2.7690159806961896, "learning_rate": 3.7802410139603908e-06, "loss": 0.9457, "step": 3194 }, { "epoch": 0.3360195617021836, "grad_norm": 2.5415137912355137, "learning_rate": 3.7795238927904497e-06, "loss": 0.9811, "step": 3195 }, { "epoch": 0.33612473214403094, "grad_norm": 2.895165075185927, "learning_rate": 3.778806628943655e-06, "loss": 1.0291, "step": 3196 }, { "epoch": 0.33622990258587826, "grad_norm": 3.0795576587129676, "learning_rate": 3.778089222499987e-06, "loss": 1.0233, "step": 3197 }, { "epoch": 0.3363350730277256, "grad_norm": 2.4751867790109943, "learning_rate": 3.7773716735394415e-06, "loss": 1.0516, "step": 3198 }, { "epoch": 0.33644024346957285, "grad_norm": 2.4430066060566134, "learning_rate": 3.776653982142033e-06, "loss": 0.9791, "step": 3199 }, { "epoch": 0.3365454139114202, "grad_norm": 2.5324655109273606, "learning_rate": 3.7759361483877865e-06, "loss": 0.9815, "step": 3200 }, { "epoch": 0.3366505843532675, "grad_norm": 2.7523454023910365, "learning_rate": 3.7752181723567484e-06, "loss": 1.0079, "step": 3201 }, { "epoch": 0.3367557547951148, "grad_norm": 2.3630412261657607, "learning_rate": 3.7745000541289777e-06, "loss": 1.0065, "step": 3202 }, { "epoch": 0.33686092523696215, "grad_norm": 2.775762382934883, "learning_rate": 3.7737817937845504e-06, "loss": 0.9977, "step": 3203 }, { "epoch": 0.3369660956788095, "grad_norm": 2.4895504075619606, "learning_rate": 3.7730633914035585e-06, "loss": 1.0495, "step": 3204 }, { "epoch": 0.3370712661206568, "grad_norm": 2.721766046225593, "learning_rate": 3.7723448470661084e-06, "loss": 0.949, "step": 3205 }, { "epoch": 0.3371764365625041, "grad_norm": 2.9668981245557053, "learning_rate": 3.771626160852324e-06, "loss": 0.9979, "step": 3206 }, { "epoch": 0.33728160700435145, "grad_norm": 3.793048897027738, "learning_rate": 3.770907332842344e-06, "loss": 1.0005, "step": 3207 }, { "epoch": 0.33738677744619877, "grad_norm": 2.2379198509213, "learning_rate": 3.770188363116324e-06, "loss": 0.9751, "step": 3208 }, { "epoch": 0.33749194788804604, "grad_norm": 3.13559481810738, "learning_rate": 3.769469251754434e-06, "loss": 0.9865, "step": 3209 }, { "epoch": 0.33759711832989336, "grad_norm": 2.717848142691034, "learning_rate": 3.7687499988368613e-06, "loss": 1.0444, "step": 3210 }, { "epoch": 0.3377022887717407, "grad_norm": 3.9961109065173073, "learning_rate": 3.7680306044438074e-06, "loss": 1.0281, "step": 3211 }, { "epoch": 0.337807459213588, "grad_norm": 2.3111397830579867, "learning_rate": 3.7673110686554915e-06, "loss": 0.9546, "step": 3212 }, { "epoch": 0.33791262965543534, "grad_norm": 2.556022750068411, "learning_rate": 3.766591391552146e-06, "loss": 1.0199, "step": 3213 }, { "epoch": 0.33801780009728266, "grad_norm": 2.8546999903249555, "learning_rate": 3.7658715732140206e-06, "loss": 0.985, "step": 3214 }, { "epoch": 0.33812297053913, "grad_norm": 3.3523241411809104, "learning_rate": 3.7651516137213816e-06, "loss": 1.0066, "step": 3215 }, { "epoch": 0.3382281409809773, "grad_norm": 1.9532386436425269, "learning_rate": 3.7644315131545096e-06, "loss": 0.972, "step": 3216 }, { "epoch": 0.33833331142282463, "grad_norm": 2.721629663326664, "learning_rate": 3.763711271593702e-06, "loss": 0.9979, "step": 3217 }, { "epoch": 0.33843848186467196, "grad_norm": 3.4541692663759904, "learning_rate": 3.76299088911927e-06, "loss": 1.0571, "step": 3218 }, { "epoch": 0.3385436523065192, "grad_norm": 2.976934045713143, "learning_rate": 3.7622703658115435e-06, "loss": 1.0059, "step": 3219 }, { "epoch": 0.33864882274836655, "grad_norm": 3.0140365934725843, "learning_rate": 3.761549701750865e-06, "loss": 1.0244, "step": 3220 }, { "epoch": 0.3387539931902139, "grad_norm": 1.9982975993871346, "learning_rate": 3.7608288970175943e-06, "loss": 1.0215, "step": 3221 }, { "epoch": 0.3388591636320612, "grad_norm": 2.8744873417988788, "learning_rate": 3.7601079516921076e-06, "loss": 1.0015, "step": 3222 }, { "epoch": 0.3389643340739085, "grad_norm": 2.150725684186714, "learning_rate": 3.759386865854795e-06, "loss": 1.0129, "step": 3223 }, { "epoch": 0.33906950451575585, "grad_norm": 2.6319253916407, "learning_rate": 3.758665639586064e-06, "loss": 0.9982, "step": 3224 }, { "epoch": 0.3391746749576032, "grad_norm": 2.1192499513393357, "learning_rate": 3.7579442729663364e-06, "loss": 0.9852, "step": 3225 }, { "epoch": 0.3392798453994505, "grad_norm": 1.842379992675114, "learning_rate": 3.75722276607605e-06, "loss": 0.9908, "step": 3226 }, { "epoch": 0.3393850158412978, "grad_norm": 2.055975701068765, "learning_rate": 3.7565011189956597e-06, "loss": 0.9898, "step": 3227 }, { "epoch": 0.33949018628314515, "grad_norm": 2.4934374887026474, "learning_rate": 3.7557793318056323e-06, "loss": 0.9996, "step": 3228 }, { "epoch": 0.3395953567249924, "grad_norm": 2.845049089637477, "learning_rate": 3.755057404586455e-06, "loss": 1.0016, "step": 3229 }, { "epoch": 0.33970052716683974, "grad_norm": 3.0414746615738957, "learning_rate": 3.7543353374186274e-06, "loss": 1.0384, "step": 3230 }, { "epoch": 0.33980569760868706, "grad_norm": 2.3969907781571793, "learning_rate": 3.753613130382666e-06, "loss": 1.0169, "step": 3231 }, { "epoch": 0.3399108680505344, "grad_norm": 3.100902023873532, "learning_rate": 3.752890783559102e-06, "loss": 0.9903, "step": 3232 }, { "epoch": 0.3400160384923817, "grad_norm": 2.515669648178997, "learning_rate": 3.7521682970284827e-06, "loss": 1.014, "step": 3233 }, { "epoch": 0.34012120893422904, "grad_norm": 2.0868339076531215, "learning_rate": 3.7514456708713717e-06, "loss": 0.9637, "step": 3234 }, { "epoch": 0.34022637937607636, "grad_norm": 2.837174645842647, "learning_rate": 3.750722905168346e-06, "loss": 0.9769, "step": 3235 }, { "epoch": 0.3403315498179237, "grad_norm": 2.3872202244360547, "learning_rate": 3.7500000000000005e-06, "loss": 1.003, "step": 3236 }, { "epoch": 0.340436720259771, "grad_norm": 2.512997542175295, "learning_rate": 3.7492769554469443e-06, "loss": 0.9801, "step": 3237 }, { "epoch": 0.34054189070161833, "grad_norm": 2.2402243033296507, "learning_rate": 3.7485537715898036e-06, "loss": 1.0344, "step": 3238 }, { "epoch": 0.3406470611434656, "grad_norm": 1.788467788663251, "learning_rate": 3.7478304485092174e-06, "loss": 0.9945, "step": 3239 }, { "epoch": 0.3407522315853129, "grad_norm": 2.438950326014985, "learning_rate": 3.7471069862858446e-06, "loss": 1.0212, "step": 3240 }, { "epoch": 0.34085740202716025, "grad_norm": 2.625325390678325, "learning_rate": 3.7463833850003532e-06, "loss": 1.0471, "step": 3241 }, { "epoch": 0.3409625724690076, "grad_norm": 2.298721915181398, "learning_rate": 3.7456596447334324e-06, "loss": 1.0091, "step": 3242 }, { "epoch": 0.3410677429108549, "grad_norm": 2.393036922682323, "learning_rate": 3.7449357655657854e-06, "loss": 1.0367, "step": 3243 }, { "epoch": 0.3411729133527022, "grad_norm": 2.439476214622686, "learning_rate": 3.744211747578129e-06, "loss": 1.0245, "step": 3244 }, { "epoch": 0.34127808379454955, "grad_norm": 2.3404240819465536, "learning_rate": 3.743487590851197e-06, "loss": 0.9768, "step": 3245 }, { "epoch": 0.34138325423639687, "grad_norm": 2.4310810439248467, "learning_rate": 3.742763295465739e-06, "loss": 1.0063, "step": 3246 }, { "epoch": 0.3414884246782442, "grad_norm": 2.7737923645649105, "learning_rate": 3.7420388615025194e-06, "loss": 1.004, "step": 3247 }, { "epoch": 0.3415935951200915, "grad_norm": 2.525162719248003, "learning_rate": 3.7413142890423187e-06, "loss": 1.0317, "step": 3248 }, { "epoch": 0.34169876556193884, "grad_norm": 2.453175198423566, "learning_rate": 3.740589578165932e-06, "loss": 1.0053, "step": 3249 }, { "epoch": 0.3418039360037861, "grad_norm": 2.412643165410365, "learning_rate": 3.7398647289541703e-06, "loss": 1.0517, "step": 3250 }, { "epoch": 0.34190910644563344, "grad_norm": 1.8217035736085005, "learning_rate": 3.7391397414878583e-06, "loss": 0.9601, "step": 3251 }, { "epoch": 0.34201427688748076, "grad_norm": 2.542322284034248, "learning_rate": 3.73841461584784e-06, "loss": 0.9978, "step": 3252 }, { "epoch": 0.3421194473293281, "grad_norm": 1.9724168119288237, "learning_rate": 3.7376893521149713e-06, "loss": 0.9739, "step": 3253 }, { "epoch": 0.3422246177711754, "grad_norm": 2.3405127012102325, "learning_rate": 3.736963950370126e-06, "loss": 1.0283, "step": 3254 }, { "epoch": 0.34232978821302273, "grad_norm": 2.373486349876508, "learning_rate": 3.7362384106941906e-06, "loss": 0.9996, "step": 3255 }, { "epoch": 0.34243495865487006, "grad_norm": 2.312609174057517, "learning_rate": 3.7355127331680695e-06, "loss": 1.0166, "step": 3256 }, { "epoch": 0.3425401290967174, "grad_norm": 1.9211058932583842, "learning_rate": 3.7347869178726804e-06, "loss": 0.9955, "step": 3257 }, { "epoch": 0.3426452995385647, "grad_norm": 6.881539412158539, "learning_rate": 3.7340609648889575e-06, "loss": 1.0193, "step": 3258 }, { "epoch": 0.34275046998041203, "grad_norm": 2.0321978311810445, "learning_rate": 3.7333348742978515e-06, "loss": 0.9756, "step": 3259 }, { "epoch": 0.3428556404222593, "grad_norm": 2.6613297784968886, "learning_rate": 3.7326086461803247e-06, "loss": 0.9983, "step": 3260 }, { "epoch": 0.3429608108641066, "grad_norm": 2.0304428187193784, "learning_rate": 3.7318822806173594e-06, "loss": 1.0042, "step": 3261 }, { "epoch": 0.34306598130595395, "grad_norm": 2.5112230342073842, "learning_rate": 3.73115577768995e-06, "loss": 0.9995, "step": 3262 }, { "epoch": 0.3431711517478013, "grad_norm": 2.0313985757227706, "learning_rate": 3.7304291374791085e-06, "loss": 0.9986, "step": 3263 }, { "epoch": 0.3432763221896486, "grad_norm": 2.5690827796961737, "learning_rate": 3.7297023600658586e-06, "loss": 0.9691, "step": 3264 }, { "epoch": 0.3433814926314959, "grad_norm": 2.193452123288259, "learning_rate": 3.7289754455312434e-06, "loss": 0.9885, "step": 3265 }, { "epoch": 0.34348666307334325, "grad_norm": 2.6601267632676575, "learning_rate": 3.728248393956319e-06, "loss": 1.0152, "step": 3266 }, { "epoch": 0.34359183351519057, "grad_norm": 2.42078911309029, "learning_rate": 3.7275212054221566e-06, "loss": 1.0108, "step": 3267 }, { "epoch": 0.3436970039570379, "grad_norm": 2.269243127837986, "learning_rate": 3.7267938800098454e-06, "loss": 0.9881, "step": 3268 }, { "epoch": 0.3438021743988852, "grad_norm": 2.694411424356494, "learning_rate": 3.7260664178004858e-06, "loss": 1.0177, "step": 3269 }, { "epoch": 0.3439073448407325, "grad_norm": 3.1436582660691896, "learning_rate": 3.725338818875196e-06, "loss": 1.0144, "step": 3270 }, { "epoch": 0.3440125152825798, "grad_norm": 3.099436287839511, "learning_rate": 3.724611083315109e-06, "loss": 1.0101, "step": 3271 }, { "epoch": 0.34411768572442714, "grad_norm": 2.7296113243028284, "learning_rate": 3.7238832112013734e-06, "loss": 1.023, "step": 3272 }, { "epoch": 0.34422285616627446, "grad_norm": 2.2215641054092155, "learning_rate": 3.723155202615153e-06, "loss": 1.0077, "step": 3273 }, { "epoch": 0.3443280266081218, "grad_norm": 2.627762150099357, "learning_rate": 3.7224270576376243e-06, "loss": 1.0031, "step": 3274 }, { "epoch": 0.3444331970499691, "grad_norm": 2.368570833568216, "learning_rate": 3.7216987763499835e-06, "loss": 1.0204, "step": 3275 }, { "epoch": 0.34453836749181643, "grad_norm": 2.1971410365900406, "learning_rate": 3.7209703588334384e-06, "loss": 1.0001, "step": 3276 }, { "epoch": 0.34464353793366376, "grad_norm": 3.176673998132595, "learning_rate": 3.7202418051692134e-06, "loss": 1.0113, "step": 3277 }, { "epoch": 0.3447487083755111, "grad_norm": 2.304560441480544, "learning_rate": 3.719513115438548e-06, "loss": 1.0286, "step": 3278 }, { "epoch": 0.3448538788173584, "grad_norm": 2.3874420483075878, "learning_rate": 3.7187842897226965e-06, "loss": 1.0086, "step": 3279 }, { "epoch": 0.3449590492592057, "grad_norm": 2.6394476203678696, "learning_rate": 3.7180553281029295e-06, "loss": 0.9717, "step": 3280 }, { "epoch": 0.345064219701053, "grad_norm": 2.4270461247707034, "learning_rate": 3.7173262306605307e-06, "loss": 1.0398, "step": 3281 }, { "epoch": 0.3451693901429003, "grad_norm": 3.626716130228977, "learning_rate": 3.7165969974768005e-06, "loss": 1.0027, "step": 3282 }, { "epoch": 0.34527456058474765, "grad_norm": 3.345320093587124, "learning_rate": 3.715867628633055e-06, "loss": 0.9806, "step": 3283 }, { "epoch": 0.34537973102659497, "grad_norm": 2.4384035873967025, "learning_rate": 3.7151381242106232e-06, "loss": 0.9817, "step": 3284 }, { "epoch": 0.3454849014684423, "grad_norm": 2.6159320854024397, "learning_rate": 3.7144084842908506e-06, "loss": 0.9761, "step": 3285 }, { "epoch": 0.3455900719102896, "grad_norm": 2.8847114804108696, "learning_rate": 3.7136787089550986e-06, "loss": 0.9864, "step": 3286 }, { "epoch": 0.34569524235213694, "grad_norm": 3.029486595806022, "learning_rate": 3.7129487982847422e-06, "loss": 1.0003, "step": 3287 }, { "epoch": 0.34580041279398427, "grad_norm": 3.109537564490848, "learning_rate": 3.7122187523611724e-06, "loss": 1.009, "step": 3288 }, { "epoch": 0.3459055832358316, "grad_norm": 2.9156800483209326, "learning_rate": 3.711488571265795e-06, "loss": 0.9898, "step": 3289 }, { "epoch": 0.34601075367767886, "grad_norm": 2.711869560964191, "learning_rate": 3.71075825508003e-06, "loss": 0.9939, "step": 3290 }, { "epoch": 0.3461159241195262, "grad_norm": 1.4908550438638197, "learning_rate": 3.7100278038853157e-06, "loss": 1.0175, "step": 3291 }, { "epoch": 0.3462210945613735, "grad_norm": 2.7237754282190534, "learning_rate": 3.7092972177630998e-06, "loss": 0.999, "step": 3292 }, { "epoch": 0.34632626500322083, "grad_norm": 2.1709243952269577, "learning_rate": 3.708566496794851e-06, "loss": 0.9716, "step": 3293 }, { "epoch": 0.34643143544506816, "grad_norm": 2.316753104081801, "learning_rate": 3.7078356410620484e-06, "loss": 1.0317, "step": 3294 }, { "epoch": 0.3465366058869155, "grad_norm": 2.3542621040463567, "learning_rate": 3.7071046506461893e-06, "loss": 1.0029, "step": 3295 }, { "epoch": 0.3466417763287628, "grad_norm": 1.9708389723051476, "learning_rate": 3.7063735256287854e-06, "loss": 0.9978, "step": 3296 }, { "epoch": 0.34674694677061013, "grad_norm": 2.8033237545972414, "learning_rate": 3.705642266091361e-06, "loss": 0.9875, "step": 3297 }, { "epoch": 0.34685211721245746, "grad_norm": 2.6769139841372196, "learning_rate": 3.7049108721154586e-06, "loss": 1.0103, "step": 3298 }, { "epoch": 0.3469572876543048, "grad_norm": 2.2403444086333053, "learning_rate": 3.7041793437826336e-06, "loss": 1.0481, "step": 3299 }, { "epoch": 0.34706245809615205, "grad_norm": 2.260213376943109, "learning_rate": 3.703447681174458e-06, "loss": 1.0359, "step": 3300 }, { "epoch": 0.3471676285379994, "grad_norm": 2.367948574095755, "learning_rate": 3.7027158843725164e-06, "loss": 1.0121, "step": 3301 }, { "epoch": 0.3472727989798467, "grad_norm": 2.250568883640813, "learning_rate": 3.7019839534584113e-06, "loss": 0.9927, "step": 3302 }, { "epoch": 0.347377969421694, "grad_norm": 3.1053336133122396, "learning_rate": 3.7012518885137572e-06, "loss": 1.0122, "step": 3303 }, { "epoch": 0.34748313986354135, "grad_norm": 2.1013260166293732, "learning_rate": 3.7005196896201867e-06, "loss": 1.0169, "step": 3304 }, { "epoch": 0.34758831030538867, "grad_norm": 1.959095033611609, "learning_rate": 3.6997873568593446e-06, "loss": 1.0015, "step": 3305 }, { "epoch": 0.347693480747236, "grad_norm": 2.511723319973884, "learning_rate": 3.699054890312892e-06, "loss": 0.9941, "step": 3306 }, { "epoch": 0.3477986511890833, "grad_norm": 2.5395710231112902, "learning_rate": 3.6983222900625047e-06, "loss": 0.9469, "step": 3307 }, { "epoch": 0.34790382163093064, "grad_norm": 2.7171955825527783, "learning_rate": 3.697589556189873e-06, "loss": 1.0311, "step": 3308 }, { "epoch": 0.34800899207277797, "grad_norm": 2.5690913311688997, "learning_rate": 3.6968566887767027e-06, "loss": 1.0443, "step": 3309 }, { "epoch": 0.3481141625146253, "grad_norm": 3.0893677787025147, "learning_rate": 3.6961236879047135e-06, "loss": 1.0376, "step": 3310 }, { "epoch": 0.34821933295647256, "grad_norm": 2.3846973615171243, "learning_rate": 3.695390553655642e-06, "loss": 1.0051, "step": 3311 }, { "epoch": 0.3483245033983199, "grad_norm": 2.6749794579639006, "learning_rate": 3.6946572861112373e-06, "loss": 0.9813, "step": 3312 }, { "epoch": 0.3484296738401672, "grad_norm": 2.07377234986436, "learning_rate": 3.6939238853532657e-06, "loss": 0.9667, "step": 3313 }, { "epoch": 0.34853484428201453, "grad_norm": 3.060207599086186, "learning_rate": 3.693190351463505e-06, "loss": 1.0142, "step": 3314 }, { "epoch": 0.34864001472386186, "grad_norm": 2.936425505722273, "learning_rate": 3.6924566845237508e-06, "loss": 0.9837, "step": 3315 }, { "epoch": 0.3487451851657092, "grad_norm": 3.587810211037129, "learning_rate": 3.691722884615814e-06, "loss": 1.0268, "step": 3316 }, { "epoch": 0.3488503556075565, "grad_norm": 2.1046353489321077, "learning_rate": 3.690988951821517e-06, "loss": 1.0083, "step": 3317 }, { "epoch": 0.34895552604940383, "grad_norm": 2.5495061254512805, "learning_rate": 3.6902548862227007e-06, "loss": 1.0373, "step": 3318 }, { "epoch": 0.34906069649125115, "grad_norm": 3.116414932564101, "learning_rate": 3.689520687901218e-06, "loss": 1.039, "step": 3319 }, { "epoch": 0.3491658669330985, "grad_norm": 2.0740724861509197, "learning_rate": 3.6887863569389388e-06, "loss": 1.0328, "step": 3320 }, { "epoch": 0.34927103737494575, "grad_norm": 3.0783194643210865, "learning_rate": 3.688051893417745e-06, "loss": 1.0286, "step": 3321 }, { "epoch": 0.34937620781679307, "grad_norm": 2.66641944778877, "learning_rate": 3.687317297419536e-06, "loss": 1.037, "step": 3322 }, { "epoch": 0.3494813782586404, "grad_norm": 2.7985552282879804, "learning_rate": 3.6865825690262256e-06, "loss": 1.0158, "step": 3323 }, { "epoch": 0.3495865487004877, "grad_norm": 2.3794909564668623, "learning_rate": 3.6858477083197403e-06, "loss": 1.0267, "step": 3324 }, { "epoch": 0.34969171914233504, "grad_norm": 2.6094670978251115, "learning_rate": 3.6851127153820243e-06, "loss": 1.0293, "step": 3325 }, { "epoch": 0.34979688958418237, "grad_norm": 3.154815924297272, "learning_rate": 3.684377590295034e-06, "loss": 0.9958, "step": 3326 }, { "epoch": 0.3499020600260297, "grad_norm": 2.383540391351354, "learning_rate": 3.6836423331407416e-06, "loss": 1.0109, "step": 3327 }, { "epoch": 0.350007230467877, "grad_norm": 1.9040090397223854, "learning_rate": 3.6829069440011343e-06, "loss": 0.9934, "step": 3328 }, { "epoch": 0.35011240090972434, "grad_norm": 2.779759189993991, "learning_rate": 3.682171422958214e-06, "loss": 1.007, "step": 3329 }, { "epoch": 0.35021757135157167, "grad_norm": 3.2401364529589824, "learning_rate": 3.681435770093996e-06, "loss": 1.0305, "step": 3330 }, { "epoch": 0.35032274179341893, "grad_norm": 3.727910090979757, "learning_rate": 3.6806999854905117e-06, "loss": 1.0082, "step": 3331 }, { "epoch": 0.35042791223526626, "grad_norm": 2.5069421827992153, "learning_rate": 3.6799640692298076e-06, "loss": 0.9588, "step": 3332 }, { "epoch": 0.3505330826771136, "grad_norm": 3.2086550817958877, "learning_rate": 3.6792280213939433e-06, "loss": 1.0142, "step": 3333 }, { "epoch": 0.3506382531189609, "grad_norm": 1.736538961113674, "learning_rate": 3.6784918420649952e-06, "loss": 0.9666, "step": 3334 }, { "epoch": 0.35074342356080823, "grad_norm": 1.8790958449036552, "learning_rate": 3.6777555313250505e-06, "loss": 1.0285, "step": 3335 }, { "epoch": 0.35084859400265556, "grad_norm": 2.3400912078637677, "learning_rate": 3.6770190892562154e-06, "loss": 1.0094, "step": 3336 }, { "epoch": 0.3509537644445029, "grad_norm": 1.9690870053528573, "learning_rate": 3.676282515940608e-06, "loss": 0.9816, "step": 3337 }, { "epoch": 0.3510589348863502, "grad_norm": 2.517883459106263, "learning_rate": 3.675545811460362e-06, "loss": 1.0008, "step": 3338 }, { "epoch": 0.35116410532819753, "grad_norm": 4.054324173962527, "learning_rate": 3.6748089758976267e-06, "loss": 0.9958, "step": 3339 }, { "epoch": 0.35126927577004485, "grad_norm": 3.008203369375549, "learning_rate": 3.6740720093345646e-06, "loss": 0.9959, "step": 3340 }, { "epoch": 0.3513744462118921, "grad_norm": 2.2054094909667543, "learning_rate": 3.6733349118533524e-06, "loss": 0.9362, "step": 3341 }, { "epoch": 0.35147961665373945, "grad_norm": 2.147723777459706, "learning_rate": 3.6725976835361832e-06, "loss": 1.0227, "step": 3342 }, { "epoch": 0.35158478709558677, "grad_norm": 3.4189072148500466, "learning_rate": 3.6718603244652634e-06, "loss": 1.0129, "step": 3343 }, { "epoch": 0.3516899575374341, "grad_norm": 2.39516829951863, "learning_rate": 3.671122834722814e-06, "loss": 1.0158, "step": 3344 }, { "epoch": 0.3517951279792814, "grad_norm": 2.9471635187278786, "learning_rate": 3.67038521439107e-06, "loss": 0.9988, "step": 3345 }, { "epoch": 0.35190029842112874, "grad_norm": 2.7956147749271136, "learning_rate": 3.669647463552284e-06, "loss": 1.0219, "step": 3346 }, { "epoch": 0.35200546886297607, "grad_norm": 2.549496541093747, "learning_rate": 3.6689095822887188e-06, "loss": 1.0342, "step": 3347 }, { "epoch": 0.3521106393048234, "grad_norm": 3.3421799590829755, "learning_rate": 3.6681715706826555e-06, "loss": 0.9693, "step": 3348 }, { "epoch": 0.3522158097466707, "grad_norm": 2.7877765286056575, "learning_rate": 3.6674334288163872e-06, "loss": 0.9698, "step": 3349 }, { "epoch": 0.35232098018851804, "grad_norm": 2.850956387184573, "learning_rate": 3.666695156772222e-06, "loss": 1.0254, "step": 3350 }, { "epoch": 0.3524261506303653, "grad_norm": 2.5846160246925156, "learning_rate": 3.6659567546324843e-06, "loss": 0.9714, "step": 3351 }, { "epoch": 0.35253132107221263, "grad_norm": 3.0980328608661667, "learning_rate": 3.6652182224795108e-06, "loss": 1.0372, "step": 3352 }, { "epoch": 0.35263649151405996, "grad_norm": 2.2612646017522002, "learning_rate": 3.6644795603956535e-06, "loss": 0.978, "step": 3353 }, { "epoch": 0.3527416619559073, "grad_norm": 2.549186265914262, "learning_rate": 3.663740768463279e-06, "loss": 1.0212, "step": 3354 }, { "epoch": 0.3528468323977546, "grad_norm": 2.747876314122542, "learning_rate": 3.663001846764769e-06, "loss": 1.0214, "step": 3355 }, { "epoch": 0.35295200283960193, "grad_norm": 2.235824760868568, "learning_rate": 3.6622627953825187e-06, "loss": 0.9917, "step": 3356 }, { "epoch": 0.35305717328144925, "grad_norm": 2.4010747810798603, "learning_rate": 3.661523614398938e-06, "loss": 0.9649, "step": 3357 }, { "epoch": 0.3531623437232966, "grad_norm": 2.828908496537335, "learning_rate": 3.6607843038964515e-06, "loss": 0.9981, "step": 3358 }, { "epoch": 0.3532675141651439, "grad_norm": 2.555400473058865, "learning_rate": 3.660044863957497e-06, "loss": 1.0143, "step": 3359 }, { "epoch": 0.3533726846069912, "grad_norm": 2.617313449912603, "learning_rate": 3.6593052946645293e-06, "loss": 1.0176, "step": 3360 }, { "epoch": 0.3534778550488385, "grad_norm": 1.6229208590309265, "learning_rate": 3.6585655961000144e-06, "loss": 0.9889, "step": 3361 }, { "epoch": 0.3535830254906858, "grad_norm": 2.6242669270631156, "learning_rate": 3.6578257683464363e-06, "loss": 0.9977, "step": 3362 }, { "epoch": 0.35368819593253314, "grad_norm": 2.730302244599849, "learning_rate": 3.6570858114862905e-06, "loss": 0.9535, "step": 3363 }, { "epoch": 0.35379336637438047, "grad_norm": 2.5049239813451427, "learning_rate": 3.656345725602089e-06, "loss": 1.0363, "step": 3364 }, { "epoch": 0.3538985368162278, "grad_norm": 2.3239114473104454, "learning_rate": 3.6556055107763553e-06, "loss": 1.003, "step": 3365 }, { "epoch": 0.3540037072580751, "grad_norm": 3.122736859914737, "learning_rate": 3.6548651670916302e-06, "loss": 1.013, "step": 3366 }, { "epoch": 0.35410887769992244, "grad_norm": 3.046846196486878, "learning_rate": 3.654124694630468e-06, "loss": 1.0142, "step": 3367 }, { "epoch": 0.35421404814176977, "grad_norm": 2.636989697235664, "learning_rate": 3.653384093475436e-06, "loss": 1.0422, "step": 3368 }, { "epoch": 0.3543192185836171, "grad_norm": 2.61843008906023, "learning_rate": 3.6526433637091193e-06, "loss": 1.037, "step": 3369 }, { "epoch": 0.3544243890254644, "grad_norm": 1.915867272619924, "learning_rate": 3.651902505414112e-06, "loss": 0.9925, "step": 3370 }, { "epoch": 0.35452955946731174, "grad_norm": 2.9271823410792797, "learning_rate": 3.651161518673028e-06, "loss": 1.0368, "step": 3371 }, { "epoch": 0.354634729909159, "grad_norm": 2.409589901528896, "learning_rate": 3.6504204035684915e-06, "loss": 1.0037, "step": 3372 }, { "epoch": 0.35473990035100633, "grad_norm": 2.1387568533332395, "learning_rate": 3.649679160183144e-06, "loss": 1.0186, "step": 3373 }, { "epoch": 0.35484507079285366, "grad_norm": 2.4112770744675314, "learning_rate": 3.6489377885996385e-06, "loss": 1.0667, "step": 3374 }, { "epoch": 0.354950241234701, "grad_norm": 2.4610952866669096, "learning_rate": 3.648196288900644e-06, "loss": 1.0212, "step": 3375 }, { "epoch": 0.3550554116765483, "grad_norm": 3.23300202577758, "learning_rate": 3.6474546611688446e-06, "loss": 1.0027, "step": 3376 }, { "epoch": 0.35516058211839563, "grad_norm": 2.121250895820615, "learning_rate": 3.646712905486936e-06, "loss": 1.0263, "step": 3377 }, { "epoch": 0.35526575256024295, "grad_norm": 3.2579471158022195, "learning_rate": 3.6459710219376317e-06, "loss": 1.019, "step": 3378 }, { "epoch": 0.3553709230020903, "grad_norm": 2.283154927593308, "learning_rate": 3.645229010603655e-06, "loss": 1.0052, "step": 3379 }, { "epoch": 0.3554760934439376, "grad_norm": 2.844999630621272, "learning_rate": 3.6444868715677475e-06, "loss": 0.9872, "step": 3380 }, { "epoch": 0.3555812638857849, "grad_norm": 2.110778090946952, "learning_rate": 3.6437446049126636e-06, "loss": 0.9785, "step": 3381 }, { "epoch": 0.3556864343276322, "grad_norm": 3.104483713885235, "learning_rate": 3.6430022107211705e-06, "loss": 1.058, "step": 3382 }, { "epoch": 0.3557916047694795, "grad_norm": 3.698736872503185, "learning_rate": 3.642259689076052e-06, "loss": 0.9923, "step": 3383 }, { "epoch": 0.35589677521132684, "grad_norm": 2.6468434275616657, "learning_rate": 3.641517040060105e-06, "loss": 1.0115, "step": 3384 }, { "epoch": 0.35600194565317417, "grad_norm": 3.429273915761346, "learning_rate": 3.6407742637561407e-06, "loss": 0.9963, "step": 3385 }, { "epoch": 0.3561071160950215, "grad_norm": 2.2796812206143393, "learning_rate": 3.6400313602469835e-06, "loss": 0.9957, "step": 3386 }, { "epoch": 0.3562122865368688, "grad_norm": 2.2684651957658435, "learning_rate": 3.639288329615474e-06, "loss": 0.9733, "step": 3387 }, { "epoch": 0.35631745697871614, "grad_norm": 2.7538582807610092, "learning_rate": 3.6385451719444655e-06, "loss": 0.9873, "step": 3388 }, { "epoch": 0.35642262742056346, "grad_norm": 2.1667916415130244, "learning_rate": 3.6378018873168254e-06, "loss": 0.9706, "step": 3389 }, { "epoch": 0.3565277978624108, "grad_norm": 2.6101805735697092, "learning_rate": 3.6370584758154366e-06, "loss": 1.0247, "step": 3390 }, { "epoch": 0.3566329683042581, "grad_norm": 2.6339328347622066, "learning_rate": 3.6363149375231937e-06, "loss": 1.0401, "step": 3391 }, { "epoch": 0.3567381387461054, "grad_norm": 2.3753106980243843, "learning_rate": 3.6355712725230093e-06, "loss": 0.9831, "step": 3392 }, { "epoch": 0.3568433091879527, "grad_norm": 4.0920704573771305, "learning_rate": 3.6348274808978063e-06, "loss": 1.0566, "step": 3393 }, { "epoch": 0.35694847962980003, "grad_norm": 2.1861120277976256, "learning_rate": 3.634083562730523e-06, "loss": 1.0138, "step": 3394 }, { "epoch": 0.35705365007164735, "grad_norm": 2.4463244703652367, "learning_rate": 3.633339518104113e-06, "loss": 0.9825, "step": 3395 }, { "epoch": 0.3571588205134947, "grad_norm": 2.2656990738852016, "learning_rate": 3.632595347101543e-06, "loss": 1.0078, "step": 3396 }, { "epoch": 0.357263990955342, "grad_norm": 2.8104237008735944, "learning_rate": 3.631851049805793e-06, "loss": 1.0039, "step": 3397 }, { "epoch": 0.3573691613971893, "grad_norm": 1.6073481837060561, "learning_rate": 3.6311066262998585e-06, "loss": 0.9729, "step": 3398 }, { "epoch": 0.35747433183903665, "grad_norm": 2.1246171429605836, "learning_rate": 3.630362076666748e-06, "loss": 0.9902, "step": 3399 }, { "epoch": 0.357579502280884, "grad_norm": 2.0248681289481048, "learning_rate": 3.6296174009894856e-06, "loss": 1.0255, "step": 3400 }, { "epoch": 0.3576846727227313, "grad_norm": 2.938773915640707, "learning_rate": 3.628872599351108e-06, "loss": 1.0085, "step": 3401 }, { "epoch": 0.35778984316457857, "grad_norm": 2.8664540166582944, "learning_rate": 3.628127671834665e-06, "loss": 1.0408, "step": 3402 }, { "epoch": 0.3578950136064259, "grad_norm": 1.626968288052293, "learning_rate": 3.627382618523224e-06, "loss": 0.9455, "step": 3403 }, { "epoch": 0.3580001840482732, "grad_norm": 2.748931967001064, "learning_rate": 3.626637439499864e-06, "loss": 1.0007, "step": 3404 }, { "epoch": 0.35810535449012054, "grad_norm": 2.474980683402302, "learning_rate": 3.6258921348476754e-06, "loss": 0.9864, "step": 3405 }, { "epoch": 0.35821052493196787, "grad_norm": 2.7019560736927595, "learning_rate": 3.625146704649769e-06, "loss": 0.9493, "step": 3406 }, { "epoch": 0.3583156953738152, "grad_norm": 2.901227686551035, "learning_rate": 3.6244011489892645e-06, "loss": 1.0309, "step": 3407 }, { "epoch": 0.3584208658156625, "grad_norm": 2.4436082587640886, "learning_rate": 3.623655467949297e-06, "loss": 1.0221, "step": 3408 }, { "epoch": 0.35852603625750984, "grad_norm": 3.134715394017839, "learning_rate": 3.6229096616130154e-06, "loss": 0.9888, "step": 3409 }, { "epoch": 0.35863120669935716, "grad_norm": 2.396056272037577, "learning_rate": 3.6221637300635844e-06, "loss": 1.056, "step": 3410 }, { "epoch": 0.3587363771412045, "grad_norm": 2.0197687516513647, "learning_rate": 3.6214176733841792e-06, "loss": 0.9827, "step": 3411 }, { "epoch": 0.35884154758305176, "grad_norm": 2.934753315186612, "learning_rate": 3.6206714916579925e-06, "loss": 0.9909, "step": 3412 }, { "epoch": 0.3589467180248991, "grad_norm": 2.136669283224677, "learning_rate": 3.619925184968229e-06, "loss": 1.0157, "step": 3413 }, { "epoch": 0.3590518884667464, "grad_norm": 2.5314749644598344, "learning_rate": 3.6191787533981075e-06, "loss": 1.0389, "step": 3414 }, { "epoch": 0.35915705890859373, "grad_norm": 2.4095375951453004, "learning_rate": 3.618432197030861e-06, "loss": 1.0118, "step": 3415 }, { "epoch": 0.35926222935044105, "grad_norm": 2.325532442349783, "learning_rate": 3.617685515949736e-06, "loss": 1.0057, "step": 3416 }, { "epoch": 0.3593673997922884, "grad_norm": 2.4488118992083354, "learning_rate": 3.6169387102379935e-06, "loss": 1.0158, "step": 3417 }, { "epoch": 0.3594725702341357, "grad_norm": 2.624600631200174, "learning_rate": 3.6161917799789076e-06, "loss": 1.0063, "step": 3418 }, { "epoch": 0.359577740675983, "grad_norm": 2.643328190762798, "learning_rate": 3.615444725255768e-06, "loss": 0.9978, "step": 3419 }, { "epoch": 0.35968291111783035, "grad_norm": 2.407743326176636, "learning_rate": 3.6146975461518765e-06, "loss": 0.981, "step": 3420 }, { "epoch": 0.3597880815596777, "grad_norm": 2.311733214584104, "learning_rate": 3.613950242750549e-06, "loss": 0.9686, "step": 3421 }, { "epoch": 0.35989325200152494, "grad_norm": 2.1921054999722123, "learning_rate": 3.6132028151351163e-06, "loss": 0.9877, "step": 3422 }, { "epoch": 0.35999842244337227, "grad_norm": 1.9973124149153578, "learning_rate": 3.6124552633889217e-06, "loss": 0.9562, "step": 3423 }, { "epoch": 0.3601035928852196, "grad_norm": 2.2987560515883567, "learning_rate": 3.6117075875953233e-06, "loss": 1.0459, "step": 3424 }, { "epoch": 0.3602087633270669, "grad_norm": 3.20659343785868, "learning_rate": 3.610959787837693e-06, "loss": 1.0328, "step": 3425 }, { "epoch": 0.36031393376891424, "grad_norm": 2.7744645057080355, "learning_rate": 3.6102118641994166e-06, "loss": 0.9981, "step": 3426 }, { "epoch": 0.36041910421076157, "grad_norm": 1.4642559942958495, "learning_rate": 3.6094638167638924e-06, "loss": 0.9947, "step": 3427 }, { "epoch": 0.3605242746526089, "grad_norm": 2.386402726019697, "learning_rate": 3.608715645614534e-06, "loss": 1.0195, "step": 3428 }, { "epoch": 0.3606294450944562, "grad_norm": 2.6634628273386465, "learning_rate": 3.607967350834769e-06, "loss": 1.0473, "step": 3429 }, { "epoch": 0.36073461553630354, "grad_norm": 3.495355937233298, "learning_rate": 3.6072189325080364e-06, "loss": 1.0102, "step": 3430 }, { "epoch": 0.36083978597815086, "grad_norm": 1.9319582065248462, "learning_rate": 3.6064703907177923e-06, "loss": 0.9659, "step": 3431 }, { "epoch": 0.3609449564199982, "grad_norm": 2.3818832688536973, "learning_rate": 3.6057217255475034e-06, "loss": 0.9997, "step": 3432 }, { "epoch": 0.36105012686184546, "grad_norm": 1.7493814880171161, "learning_rate": 3.6049729370806534e-06, "loss": 1.0365, "step": 3433 }, { "epoch": 0.3611552973036928, "grad_norm": 2.8422167028984644, "learning_rate": 3.6042240254007367e-06, "loss": 0.9969, "step": 3434 }, { "epoch": 0.3612604677455401, "grad_norm": 2.27792880127726, "learning_rate": 3.6034749905912637e-06, "loss": 1.0197, "step": 3435 }, { "epoch": 0.36136563818738743, "grad_norm": 3.38034146624978, "learning_rate": 3.6027258327357573e-06, "loss": 1.0358, "step": 3436 }, { "epoch": 0.36147080862923475, "grad_norm": 1.915172769306197, "learning_rate": 3.6019765519177536e-06, "loss": 1.0184, "step": 3437 }, { "epoch": 0.3615759790710821, "grad_norm": 3.7012827312157732, "learning_rate": 3.6012271482208043e-06, "loss": 1.0315, "step": 3438 }, { "epoch": 0.3616811495129294, "grad_norm": 2.8348305338817665, "learning_rate": 3.600477621728473e-06, "loss": 1.0226, "step": 3439 }, { "epoch": 0.3617863199547767, "grad_norm": 2.226936028972617, "learning_rate": 3.5997279725243382e-06, "loss": 0.9864, "step": 3440 }, { "epoch": 0.36189149039662405, "grad_norm": 2.8560998684935988, "learning_rate": 3.5989782006919915e-06, "loss": 1.0336, "step": 3441 }, { "epoch": 0.3619966608384714, "grad_norm": 2.229618061905727, "learning_rate": 3.5982283063150388e-06, "loss": 0.9575, "step": 3442 }, { "epoch": 0.36210183128031864, "grad_norm": 1.5501631952100456, "learning_rate": 3.5974782894770983e-06, "loss": 1.0172, "step": 3443 }, { "epoch": 0.36220700172216597, "grad_norm": 2.803536569294912, "learning_rate": 3.5967281502618035e-06, "loss": 1.0584, "step": 3444 }, { "epoch": 0.3623121721640133, "grad_norm": 2.1481647342373957, "learning_rate": 3.5959778887527995e-06, "loss": 1.0145, "step": 3445 }, { "epoch": 0.3624173426058606, "grad_norm": 3.0517742861625896, "learning_rate": 3.595227505033747e-06, "loss": 1.0183, "step": 3446 }, { "epoch": 0.36252251304770794, "grad_norm": 2.6479450779631217, "learning_rate": 3.5944769991883197e-06, "loss": 0.9737, "step": 3447 }, { "epoch": 0.36262768348955526, "grad_norm": 2.5993294962929605, "learning_rate": 3.5937263713002046e-06, "loss": 1.0112, "step": 3448 }, { "epoch": 0.3627328539314026, "grad_norm": 2.2975185450286353, "learning_rate": 3.5929756214531035e-06, "loss": 1.0724, "step": 3449 }, { "epoch": 0.3628380243732499, "grad_norm": 3.128724604849356, "learning_rate": 3.59222474973073e-06, "loss": 1.0058, "step": 3450 }, { "epoch": 0.36294319481509724, "grad_norm": 2.551432018630201, "learning_rate": 3.5914737562168116e-06, "loss": 1.0239, "step": 3451 }, { "epoch": 0.36304836525694456, "grad_norm": 2.505173837642558, "learning_rate": 3.590722640995091e-06, "loss": 0.9775, "step": 3452 }, { "epoch": 0.36315353569879183, "grad_norm": 2.5187594676149434, "learning_rate": 3.589971404149323e-06, "loss": 1.0362, "step": 3453 }, { "epoch": 0.36325870614063915, "grad_norm": 2.239747984102128, "learning_rate": 3.589220045763276e-06, "loss": 1.0336, "step": 3454 }, { "epoch": 0.3633638765824865, "grad_norm": 2.9744557369992184, "learning_rate": 3.588468565920732e-06, "loss": 0.9874, "step": 3455 }, { "epoch": 0.3634690470243338, "grad_norm": 2.0034051449703902, "learning_rate": 3.5877169647054875e-06, "loss": 0.9759, "step": 3456 }, { "epoch": 0.3635742174661811, "grad_norm": 2.4849493412176065, "learning_rate": 3.586965242201353e-06, "loss": 1.0216, "step": 3457 }, { "epoch": 0.36367938790802845, "grad_norm": 2.4957735320651775, "learning_rate": 3.586213398492149e-06, "loss": 1.0352, "step": 3458 }, { "epoch": 0.3637845583498758, "grad_norm": 2.966574153623298, "learning_rate": 3.585461433661714e-06, "loss": 1.0324, "step": 3459 }, { "epoch": 0.3638897287917231, "grad_norm": 2.0761534835735658, "learning_rate": 3.5847093477938955e-06, "loss": 0.9981, "step": 3460 }, { "epoch": 0.3639948992335704, "grad_norm": 2.400801379499366, "learning_rate": 3.5839571409725593e-06, "loss": 1.0048, "step": 3461 }, { "epoch": 0.36410006967541775, "grad_norm": 2.871894855158759, "learning_rate": 3.583204813281581e-06, "loss": 1.0233, "step": 3462 }, { "epoch": 0.364205240117265, "grad_norm": 2.7896234095062895, "learning_rate": 3.582452364804852e-06, "loss": 1.0171, "step": 3463 }, { "epoch": 0.36431041055911234, "grad_norm": 2.1993954405877942, "learning_rate": 3.581699795626275e-06, "loss": 0.9991, "step": 3464 }, { "epoch": 0.36441558100095967, "grad_norm": 2.054066351626218, "learning_rate": 3.580947105829769e-06, "loss": 1.0175, "step": 3465 }, { "epoch": 0.364520751442807, "grad_norm": 3.0539518525223213, "learning_rate": 3.580194295499263e-06, "loss": 1.0232, "step": 3466 }, { "epoch": 0.3646259218846543, "grad_norm": 3.168788256078409, "learning_rate": 3.5794413647187008e-06, "loss": 0.9885, "step": 3467 }, { "epoch": 0.36473109232650164, "grad_norm": 1.775472584199268, "learning_rate": 3.578688313572042e-06, "loss": 0.9917, "step": 3468 }, { "epoch": 0.36483626276834896, "grad_norm": 2.6600939684935625, "learning_rate": 3.577935142143256e-06, "loss": 0.9925, "step": 3469 }, { "epoch": 0.3649414332101963, "grad_norm": 3.0526490958871295, "learning_rate": 3.577181850516329e-06, "loss": 1.029, "step": 3470 }, { "epoch": 0.3650466036520436, "grad_norm": 2.5112352942954383, "learning_rate": 3.576428438775257e-06, "loss": 1.0038, "step": 3471 }, { "epoch": 0.36515177409389094, "grad_norm": 2.4210733566582645, "learning_rate": 3.575674907004052e-06, "loss": 1.0058, "step": 3472 }, { "epoch": 0.3652569445357382, "grad_norm": 2.513245854367, "learning_rate": 3.5749212552867397e-06, "loss": 1.0608, "step": 3473 }, { "epoch": 0.36536211497758553, "grad_norm": 2.7039176655465473, "learning_rate": 3.5741674837073563e-06, "loss": 0.9913, "step": 3474 }, { "epoch": 0.36546728541943285, "grad_norm": 3.0641705314770693, "learning_rate": 3.5734135923499548e-06, "loss": 1.0289, "step": 3475 }, { "epoch": 0.3655724558612802, "grad_norm": 2.7553733431065064, "learning_rate": 3.572659581298598e-06, "loss": 0.9869, "step": 3476 }, { "epoch": 0.3656776263031275, "grad_norm": 2.876816353464465, "learning_rate": 3.571905450637366e-06, "loss": 1.0163, "step": 3477 }, { "epoch": 0.3657827967449748, "grad_norm": 2.7493296725215712, "learning_rate": 3.571151200450349e-06, "loss": 1.0349, "step": 3478 }, { "epoch": 0.36588796718682215, "grad_norm": 2.7429718758626693, "learning_rate": 3.5703968308216523e-06, "loss": 0.9933, "step": 3479 }, { "epoch": 0.3659931376286695, "grad_norm": 2.7233182700143024, "learning_rate": 3.569642341835394e-06, "loss": 0.9801, "step": 3480 }, { "epoch": 0.3660983080705168, "grad_norm": 2.898665070653661, "learning_rate": 3.5688877335757055e-06, "loss": 1.0327, "step": 3481 }, { "epoch": 0.3662034785123641, "grad_norm": 3.3379320870265663, "learning_rate": 3.5681330061267317e-06, "loss": 1.0193, "step": 3482 }, { "epoch": 0.36630864895421145, "grad_norm": 2.6749109290262223, "learning_rate": 3.5673781595726286e-06, "loss": 0.9996, "step": 3483 }, { "epoch": 0.3664138193960587, "grad_norm": 2.316180353529294, "learning_rate": 3.5666231939975702e-06, "loss": 1.044, "step": 3484 }, { "epoch": 0.36651898983790604, "grad_norm": 2.343425285023339, "learning_rate": 3.5658681094857394e-06, "loss": 0.9644, "step": 3485 }, { "epoch": 0.36662416027975336, "grad_norm": 2.8549983640033703, "learning_rate": 3.5651129061213345e-06, "loss": 1.0054, "step": 3486 }, { "epoch": 0.3667293307216007, "grad_norm": 2.8622892093311907, "learning_rate": 3.5643575839885667e-06, "loss": 1.0235, "step": 3487 }, { "epoch": 0.366834501163448, "grad_norm": 2.9948754783989253, "learning_rate": 3.5636021431716604e-06, "loss": 1.0314, "step": 3488 }, { "epoch": 0.36693967160529534, "grad_norm": 2.1124215741074, "learning_rate": 3.5628465837548526e-06, "loss": 1.0036, "step": 3489 }, { "epoch": 0.36704484204714266, "grad_norm": 2.985214300844437, "learning_rate": 3.5620909058223933e-06, "loss": 1.0203, "step": 3490 }, { "epoch": 0.36715001248899, "grad_norm": 3.111175604817724, "learning_rate": 3.5613351094585484e-06, "loss": 1.0222, "step": 3491 }, { "epoch": 0.3672551829308373, "grad_norm": 2.3456559320452195, "learning_rate": 3.5605791947475934e-06, "loss": 0.9925, "step": 3492 }, { "epoch": 0.36736035337268463, "grad_norm": 2.1181973524224675, "learning_rate": 3.55982316177382e-06, "loss": 0.992, "step": 3493 }, { "epoch": 0.3674655238145319, "grad_norm": 3.0413234842482844, "learning_rate": 3.5590670106215307e-06, "loss": 1.0221, "step": 3494 }, { "epoch": 0.3675706942563792, "grad_norm": 2.564889170052048, "learning_rate": 3.5583107413750427e-06, "loss": 1.0259, "step": 3495 }, { "epoch": 0.36767586469822655, "grad_norm": 2.626728709218182, "learning_rate": 3.5575543541186853e-06, "loss": 1.0114, "step": 3496 }, { "epoch": 0.3677810351400739, "grad_norm": 2.1294990688555417, "learning_rate": 3.5567978489368026e-06, "loss": 1.01, "step": 3497 }, { "epoch": 0.3678862055819212, "grad_norm": 2.502867155421716, "learning_rate": 3.55604122591375e-06, "loss": 1.0494, "step": 3498 }, { "epoch": 0.3679913760237685, "grad_norm": 3.537928483150429, "learning_rate": 3.5552844851338973e-06, "loss": 1.0098, "step": 3499 }, { "epoch": 0.36809654646561585, "grad_norm": 2.1233082823635665, "learning_rate": 3.5545276266816265e-06, "loss": 1.0037, "step": 3500 }, { "epoch": 0.3682017169074632, "grad_norm": 2.065621137457995, "learning_rate": 3.5537706506413338e-06, "loss": 1.0426, "step": 3501 }, { "epoch": 0.3683068873493105, "grad_norm": 2.382029858431779, "learning_rate": 3.553013557097428e-06, "loss": 1.0496, "step": 3502 }, { "epoch": 0.3684120577911578, "grad_norm": 1.6297673370173709, "learning_rate": 3.552256346134329e-06, "loss": 1.012, "step": 3503 }, { "epoch": 0.3685172282330051, "grad_norm": 2.575845849912579, "learning_rate": 3.5514990178364746e-06, "loss": 0.9622, "step": 3504 }, { "epoch": 0.3686223986748524, "grad_norm": 1.8413351349315177, "learning_rate": 3.550741572288311e-06, "loss": 1.0271, "step": 3505 }, { "epoch": 0.36872756911669974, "grad_norm": 2.2981599308221745, "learning_rate": 3.5499840095742987e-06, "loss": 1.0653, "step": 3506 }, { "epoch": 0.36883273955854706, "grad_norm": 2.770027195890931, "learning_rate": 3.549226329778914e-06, "loss": 1.0416, "step": 3507 }, { "epoch": 0.3689379100003944, "grad_norm": 2.172783977718339, "learning_rate": 3.5484685329866424e-06, "loss": 1.0382, "step": 3508 }, { "epoch": 0.3690430804422417, "grad_norm": 2.7336060159802953, "learning_rate": 3.547710619281985e-06, "loss": 1.0135, "step": 3509 }, { "epoch": 0.36914825088408904, "grad_norm": 3.5054838300829463, "learning_rate": 3.546952588749454e-06, "loss": 0.9847, "step": 3510 }, { "epoch": 0.36925342132593636, "grad_norm": 3.127625095566459, "learning_rate": 3.5461944414735766e-06, "loss": 0.9888, "step": 3511 }, { "epoch": 0.3693585917677837, "grad_norm": 2.4488246566949448, "learning_rate": 3.545436177538892e-06, "loss": 0.9815, "step": 3512 }, { "epoch": 0.369463762209631, "grad_norm": 3.6498915209813, "learning_rate": 3.544677797029952e-06, "loss": 0.998, "step": 3513 }, { "epoch": 0.3695689326514783, "grad_norm": 2.8541317738893306, "learning_rate": 3.5439193000313226e-06, "loss": 1.0012, "step": 3514 }, { "epoch": 0.3696741030933256, "grad_norm": 2.912304761031327, "learning_rate": 3.5431606866275812e-06, "loss": 0.984, "step": 3515 }, { "epoch": 0.3697792735351729, "grad_norm": 2.2203764999621467, "learning_rate": 3.542401956903321e-06, "loss": 1.0176, "step": 3516 }, { "epoch": 0.36988444397702025, "grad_norm": 2.439171891410255, "learning_rate": 3.5416431109431437e-06, "loss": 1.0556, "step": 3517 }, { "epoch": 0.3699896144188676, "grad_norm": 2.797287931857089, "learning_rate": 3.540884148831668e-06, "loss": 1.0811, "step": 3518 }, { "epoch": 0.3700947848607149, "grad_norm": 2.6586762875266614, "learning_rate": 3.540125070653524e-06, "loss": 1.0006, "step": 3519 }, { "epoch": 0.3701999553025622, "grad_norm": 2.9224263695935817, "learning_rate": 3.5393658764933546e-06, "loss": 1.0327, "step": 3520 }, { "epoch": 0.37030512574440955, "grad_norm": 2.7522305202993405, "learning_rate": 3.5386065664358164e-06, "loss": 1.0175, "step": 3521 }, { "epoch": 0.37041029618625687, "grad_norm": 2.195682252666581, "learning_rate": 3.5378471405655768e-06, "loss": 0.958, "step": 3522 }, { "epoch": 0.3705154666281042, "grad_norm": 2.4183425267496985, "learning_rate": 3.5370875989673198e-06, "loss": 1.0164, "step": 3523 }, { "epoch": 0.37062063706995146, "grad_norm": 2.315338386367098, "learning_rate": 3.536327941725739e-06, "loss": 1.0022, "step": 3524 }, { "epoch": 0.3707258075117988, "grad_norm": 2.3589678245581815, "learning_rate": 3.5355681689255417e-06, "loss": 0.9943, "step": 3525 }, { "epoch": 0.3708309779536461, "grad_norm": 2.575779344588469, "learning_rate": 3.5348082806514496e-06, "loss": 1.0326, "step": 3526 }, { "epoch": 0.37093614839549344, "grad_norm": 2.172676292007147, "learning_rate": 3.534048276988195e-06, "loss": 0.9872, "step": 3527 }, { "epoch": 0.37104131883734076, "grad_norm": 2.676488772279958, "learning_rate": 3.5332881580205246e-06, "loss": 0.9993, "step": 3528 }, { "epoch": 0.3711464892791881, "grad_norm": 2.137926314108314, "learning_rate": 3.5325279238331977e-06, "loss": 1.0049, "step": 3529 }, { "epoch": 0.3712516597210354, "grad_norm": 2.7103603439452075, "learning_rate": 3.531767574510987e-06, "loss": 0.957, "step": 3530 }, { "epoch": 0.37135683016288273, "grad_norm": 2.558627297098295, "learning_rate": 3.5310071101386766e-06, "loss": 1.0162, "step": 3531 }, { "epoch": 0.37146200060473006, "grad_norm": 2.610302442944242, "learning_rate": 3.530246530801064e-06, "loss": 0.9467, "step": 3532 }, { "epoch": 0.3715671710465774, "grad_norm": 2.5992181909547543, "learning_rate": 3.52948583658296e-06, "loss": 0.997, "step": 3533 }, { "epoch": 0.37167234148842465, "grad_norm": 2.943011406158968, "learning_rate": 3.528725027569188e-06, "loss": 1.0061, "step": 3534 }, { "epoch": 0.371777511930272, "grad_norm": 2.0335971995059103, "learning_rate": 3.527964103844583e-06, "loss": 1.0035, "step": 3535 }, { "epoch": 0.3718826823721193, "grad_norm": 2.290860416398897, "learning_rate": 3.5272030654939967e-06, "loss": 1.0089, "step": 3536 }, { "epoch": 0.3719878528139666, "grad_norm": 3.705983857104791, "learning_rate": 3.526441912602288e-06, "loss": 1.0453, "step": 3537 }, { "epoch": 0.37209302325581395, "grad_norm": 2.910250579235628, "learning_rate": 3.525680645254333e-06, "loss": 1.0259, "step": 3538 }, { "epoch": 0.3721981936976613, "grad_norm": 2.7958380455087366, "learning_rate": 3.5249192635350178e-06, "loss": 1.0077, "step": 3539 }, { "epoch": 0.3723033641395086, "grad_norm": 2.415766053951578, "learning_rate": 3.5241577675292426e-06, "loss": 0.9892, "step": 3540 }, { "epoch": 0.3724085345813559, "grad_norm": 3.1343561189937725, "learning_rate": 3.5233961573219203e-06, "loss": 1.0475, "step": 3541 }, { "epoch": 0.37251370502320325, "grad_norm": 2.476762825422412, "learning_rate": 3.5226344329979767e-06, "loss": 1.01, "step": 3542 }, { "epoch": 0.37261887546505057, "grad_norm": 2.4464917162110815, "learning_rate": 3.52187259464235e-06, "loss": 1.0814, "step": 3543 }, { "epoch": 0.3727240459068979, "grad_norm": 2.54651260509731, "learning_rate": 3.521110642339991e-06, "loss": 1.0308, "step": 3544 }, { "epoch": 0.37282921634874516, "grad_norm": 2.757074308896569, "learning_rate": 3.5203485761758627e-06, "loss": 0.9986, "step": 3545 }, { "epoch": 0.3729343867905925, "grad_norm": 2.2476503257651643, "learning_rate": 3.519586396234942e-06, "loss": 1.045, "step": 3546 }, { "epoch": 0.3730395572324398, "grad_norm": 3.4160260399141134, "learning_rate": 3.518824102602217e-06, "loss": 0.9736, "step": 3547 }, { "epoch": 0.37314472767428714, "grad_norm": 2.3235809456017242, "learning_rate": 3.5180616953626905e-06, "loss": 1.026, "step": 3548 }, { "epoch": 0.37324989811613446, "grad_norm": 3.1866772084593977, "learning_rate": 3.5172991746013764e-06, "loss": 1.0147, "step": 3549 }, { "epoch": 0.3733550685579818, "grad_norm": 1.7405784529022383, "learning_rate": 3.516536540403302e-06, "loss": 0.9932, "step": 3550 }, { "epoch": 0.3734602389998291, "grad_norm": 2.4307045279664665, "learning_rate": 3.5157737928535065e-06, "loss": 1.0486, "step": 3551 }, { "epoch": 0.37356540944167643, "grad_norm": 2.4827334628389925, "learning_rate": 3.5150109320370425e-06, "loss": 1.0143, "step": 3552 }, { "epoch": 0.37367057988352376, "grad_norm": 2.17954336422211, "learning_rate": 3.5142479580389744e-06, "loss": 0.9822, "step": 3553 }, { "epoch": 0.3737757503253711, "grad_norm": 2.009056281408665, "learning_rate": 3.5134848709443793e-06, "loss": 1.0011, "step": 3554 }, { "epoch": 0.37388092076721835, "grad_norm": 2.70716892667249, "learning_rate": 3.5127216708383495e-06, "loss": 1.0292, "step": 3555 }, { "epoch": 0.3739860912090657, "grad_norm": 1.9705536861580673, "learning_rate": 3.5119583578059845e-06, "loss": 1.005, "step": 3556 }, { "epoch": 0.374091261650913, "grad_norm": 2.6162266436594415, "learning_rate": 3.5111949319324027e-06, "loss": 1.0156, "step": 3557 }, { "epoch": 0.3741964320927603, "grad_norm": 2.360521949456891, "learning_rate": 3.51043139330273e-06, "loss": 0.9847, "step": 3558 }, { "epoch": 0.37430160253460765, "grad_norm": 3.006787570352432, "learning_rate": 3.5096677420021092e-06, "loss": 1.0322, "step": 3559 }, { "epoch": 0.37440677297645497, "grad_norm": 2.2412182060746964, "learning_rate": 3.508903978115691e-06, "loss": 0.986, "step": 3560 }, { "epoch": 0.3745119434183023, "grad_norm": 2.3336931601024395, "learning_rate": 3.508140101728641e-06, "loss": 1.0246, "step": 3561 }, { "epoch": 0.3746171138601496, "grad_norm": 2.562229225549696, "learning_rate": 3.5073761129261393e-06, "loss": 0.9975, "step": 3562 }, { "epoch": 0.37472228430199694, "grad_norm": 3.1758702644776156, "learning_rate": 3.5066120117933743e-06, "loss": 1.02, "step": 3563 }, { "epoch": 0.37482745474384427, "grad_norm": 2.228109854729782, "learning_rate": 3.505847798415551e-06, "loss": 1.0244, "step": 3564 }, { "epoch": 0.37493262518569154, "grad_norm": 2.355915255145524, "learning_rate": 3.505083472877884e-06, "loss": 1.0427, "step": 3565 }, { "epoch": 0.37503779562753886, "grad_norm": 2.71234930662769, "learning_rate": 3.5043190352656026e-06, "loss": 0.9927, "step": 3566 }, { "epoch": 0.3751429660693862, "grad_norm": 3.125287938693249, "learning_rate": 3.503554485663947e-06, "loss": 0.9746, "step": 3567 }, { "epoch": 0.3752481365112335, "grad_norm": 2.832305455594945, "learning_rate": 3.50278982415817e-06, "loss": 0.9868, "step": 3568 }, { "epoch": 0.37535330695308083, "grad_norm": 2.16624778915707, "learning_rate": 3.502025050833538e-06, "loss": 0.9823, "step": 3569 }, { "epoch": 0.37545847739492816, "grad_norm": 2.6723466436176517, "learning_rate": 3.5012601657753283e-06, "loss": 0.9912, "step": 3570 }, { "epoch": 0.3755636478367755, "grad_norm": 2.894125932929896, "learning_rate": 3.500495169068832e-06, "loss": 0.9625, "step": 3571 }, { "epoch": 0.3756688182786228, "grad_norm": 2.349145898282107, "learning_rate": 3.499730060799352e-06, "loss": 0.9749, "step": 3572 }, { "epoch": 0.37577398872047013, "grad_norm": 2.2715747784546636, "learning_rate": 3.4989648410522053e-06, "loss": 1.0067, "step": 3573 }, { "epoch": 0.37587915916231746, "grad_norm": 2.7664541979733435, "learning_rate": 3.498199509912718e-06, "loss": 1.0479, "step": 3574 }, { "epoch": 0.3759843296041647, "grad_norm": 2.120229567329133, "learning_rate": 3.497434067466231e-06, "loss": 1.0106, "step": 3575 }, { "epoch": 0.37608950004601205, "grad_norm": 2.1692031969688608, "learning_rate": 3.4966685137980972e-06, "loss": 0.9755, "step": 3576 }, { "epoch": 0.3761946704878594, "grad_norm": 2.304539050607103, "learning_rate": 3.4959028489936815e-06, "loss": 1.0251, "step": 3577 }, { "epoch": 0.3762998409297067, "grad_norm": 3.3999675880797784, "learning_rate": 3.4951370731383615e-06, "loss": 0.9857, "step": 3578 }, { "epoch": 0.376405011371554, "grad_norm": 2.254087700165053, "learning_rate": 3.494371186317528e-06, "loss": 0.9828, "step": 3579 }, { "epoch": 0.37651018181340135, "grad_norm": 2.8458404135362048, "learning_rate": 3.4936051886165825e-06, "loss": 1.0431, "step": 3580 }, { "epoch": 0.37661535225524867, "grad_norm": 2.4716575338002214, "learning_rate": 3.4928390801209395e-06, "loss": 0.9775, "step": 3581 }, { "epoch": 0.376720522697096, "grad_norm": 2.6453228157698714, "learning_rate": 3.492072860916027e-06, "loss": 1.0048, "step": 3582 }, { "epoch": 0.3768256931389433, "grad_norm": 2.7675242977400454, "learning_rate": 3.4913065310872834e-06, "loss": 1.0323, "step": 3583 }, { "epoch": 0.37693086358079064, "grad_norm": 2.204262494467231, "learning_rate": 3.4905400907201604e-06, "loss": 1.0081, "step": 3584 }, { "epoch": 0.3770360340226379, "grad_norm": 2.9513125690747883, "learning_rate": 3.489773539900123e-06, "loss": 0.9778, "step": 3585 }, { "epoch": 0.37714120446448524, "grad_norm": 2.363516897078722, "learning_rate": 3.4890068787126475e-06, "loss": 1.0189, "step": 3586 }, { "epoch": 0.37724637490633256, "grad_norm": 3.122013375469763, "learning_rate": 3.4882401072432215e-06, "loss": 1.0105, "step": 3587 }, { "epoch": 0.3773515453481799, "grad_norm": 2.36989045728494, "learning_rate": 3.487473225577347e-06, "loss": 0.9898, "step": 3588 }, { "epoch": 0.3774567157900272, "grad_norm": 2.9849863757199784, "learning_rate": 3.486706233800537e-06, "loss": 1.0172, "step": 3589 }, { "epoch": 0.37756188623187453, "grad_norm": 2.6450922737910116, "learning_rate": 3.4859391319983165e-06, "loss": 1.0169, "step": 3590 }, { "epoch": 0.37766705667372186, "grad_norm": 2.521241818346483, "learning_rate": 3.485171920256224e-06, "loss": 1.0099, "step": 3591 }, { "epoch": 0.3777722271155692, "grad_norm": 3.926184947651807, "learning_rate": 3.484404598659809e-06, "loss": 1.026, "step": 3592 }, { "epoch": 0.3778773975574165, "grad_norm": 2.5873477697553944, "learning_rate": 3.483637167294634e-06, "loss": 0.9857, "step": 3593 }, { "epoch": 0.37798256799926383, "grad_norm": 2.890617758160231, "learning_rate": 3.4828696262462743e-06, "loss": 1.0289, "step": 3594 }, { "epoch": 0.3780877384411111, "grad_norm": 2.7950755400661755, "learning_rate": 3.482101975600316e-06, "loss": 0.9954, "step": 3595 }, { "epoch": 0.3781929088829584, "grad_norm": 1.9102337369098177, "learning_rate": 3.481334215442358e-06, "loss": 0.9304, "step": 3596 }, { "epoch": 0.37829807932480575, "grad_norm": 3.195058034843074, "learning_rate": 3.4805663458580113e-06, "loss": 1.0091, "step": 3597 }, { "epoch": 0.37840324976665307, "grad_norm": 2.1115956503169757, "learning_rate": 3.4797983669329004e-06, "loss": 1.0348, "step": 3598 }, { "epoch": 0.3785084202085004, "grad_norm": 2.3416744434406813, "learning_rate": 3.47903027875266e-06, "loss": 0.9704, "step": 3599 }, { "epoch": 0.3786135906503477, "grad_norm": 2.363772195609984, "learning_rate": 3.4782620814029376e-06, "loss": 0.9948, "step": 3600 }, { "epoch": 0.37871876109219504, "grad_norm": 1.9628721511439282, "learning_rate": 3.4774937749693947e-06, "loss": 1.0062, "step": 3601 }, { "epoch": 0.37882393153404237, "grad_norm": 2.426600466574066, "learning_rate": 3.4767253595377015e-06, "loss": 0.9645, "step": 3602 }, { "epoch": 0.3789291019758897, "grad_norm": 2.6977870839090645, "learning_rate": 3.4759568351935446e-06, "loss": 1.0291, "step": 3603 }, { "epoch": 0.379034272417737, "grad_norm": 2.676605696492157, "learning_rate": 3.4751882020226174e-06, "loss": 1.0354, "step": 3604 }, { "epoch": 0.37913944285958434, "grad_norm": 3.363753270722036, "learning_rate": 3.4744194601106314e-06, "loss": 0.9662, "step": 3605 }, { "epoch": 0.3792446133014316, "grad_norm": 2.063579478740677, "learning_rate": 3.4736506095433053e-06, "loss": 1.0097, "step": 3606 }, { "epoch": 0.37934978374327893, "grad_norm": 1.9293491925383035, "learning_rate": 3.472881650406373e-06, "loss": 0.9955, "step": 3607 }, { "epoch": 0.37945495418512626, "grad_norm": 2.0019157816680124, "learning_rate": 3.472112582785579e-06, "loss": 0.958, "step": 3608 }, { "epoch": 0.3795601246269736, "grad_norm": 1.9340739804799547, "learning_rate": 3.4713434067666803e-06, "loss": 0.9936, "step": 3609 }, { "epoch": 0.3796652950688209, "grad_norm": 2.621910541531949, "learning_rate": 3.4705741224354463e-06, "loss": 1.0269, "step": 3610 }, { "epoch": 0.37977046551066823, "grad_norm": 2.319507440485628, "learning_rate": 3.4698047298776578e-06, "loss": 0.9785, "step": 3611 }, { "epoch": 0.37987563595251556, "grad_norm": 3.5152668337700885, "learning_rate": 3.4690352291791084e-06, "loss": 1.0375, "step": 3612 }, { "epoch": 0.3799808063943629, "grad_norm": 2.56130563777068, "learning_rate": 3.4682656204256033e-06, "loss": 0.9976, "step": 3613 }, { "epoch": 0.3800859768362102, "grad_norm": 3.0184814971313294, "learning_rate": 3.4674959037029593e-06, "loss": 1.028, "step": 3614 }, { "epoch": 0.38019114727805753, "grad_norm": 2.846303670335661, "learning_rate": 3.4667260790970065e-06, "loss": 0.9842, "step": 3615 }, { "epoch": 0.3802963177199048, "grad_norm": 2.4611491100975558, "learning_rate": 3.465956146693586e-06, "loss": 0.9622, "step": 3616 }, { "epoch": 0.3804014881617521, "grad_norm": 2.615213958589618, "learning_rate": 3.4651861065785515e-06, "loss": 1.0227, "step": 3617 }, { "epoch": 0.38050665860359945, "grad_norm": 2.541915515993435, "learning_rate": 3.464415958837768e-06, "loss": 1.0021, "step": 3618 }, { "epoch": 0.38061182904544677, "grad_norm": 2.3551759643491184, "learning_rate": 3.4636457035571135e-06, "loss": 0.9849, "step": 3619 }, { "epoch": 0.3807169994872941, "grad_norm": 3.3489736874070846, "learning_rate": 3.462875340822477e-06, "loss": 0.9757, "step": 3620 }, { "epoch": 0.3808221699291414, "grad_norm": 3.2681188585536973, "learning_rate": 3.4621048707197607e-06, "loss": 1.0341, "step": 3621 }, { "epoch": 0.38092734037098874, "grad_norm": 3.4147560699832744, "learning_rate": 3.461334293334877e-06, "loss": 1.0442, "step": 3622 }, { "epoch": 0.38103251081283607, "grad_norm": 2.2145079608120346, "learning_rate": 3.4605636087537514e-06, "loss": 0.998, "step": 3623 }, { "epoch": 0.3811376812546834, "grad_norm": 1.8033358102527874, "learning_rate": 3.4597928170623217e-06, "loss": 1.0069, "step": 3624 }, { "epoch": 0.3812428516965307, "grad_norm": 2.7695898589555465, "learning_rate": 3.459021918346537e-06, "loss": 1.018, "step": 3625 }, { "epoch": 0.381348022138378, "grad_norm": 3.306734754623489, "learning_rate": 3.458250912692359e-06, "loss": 1.006, "step": 3626 }, { "epoch": 0.3814531925802253, "grad_norm": 2.986143628061839, "learning_rate": 3.4574798001857598e-06, "loss": 1.0543, "step": 3627 }, { "epoch": 0.38155836302207263, "grad_norm": 2.3585732168057922, "learning_rate": 3.4567085809127247e-06, "loss": 1.0696, "step": 3628 }, { "epoch": 0.38166353346391996, "grad_norm": 2.876800068207621, "learning_rate": 3.4559372549592513e-06, "loss": 1.0084, "step": 3629 }, { "epoch": 0.3817687039057673, "grad_norm": 2.7444522756355534, "learning_rate": 3.455165822411347e-06, "loss": 1.0238, "step": 3630 }, { "epoch": 0.3818738743476146, "grad_norm": 2.73044610583874, "learning_rate": 3.4543942833550347e-06, "loss": 0.982, "step": 3631 }, { "epoch": 0.38197904478946193, "grad_norm": 3.4323322120847712, "learning_rate": 3.453622637876346e-06, "loss": 1.0404, "step": 3632 }, { "epoch": 0.38208421523130925, "grad_norm": 3.64343859536563, "learning_rate": 3.4528508860613243e-06, "loss": 1.0047, "step": 3633 }, { "epoch": 0.3821893856731566, "grad_norm": 2.3123411149550885, "learning_rate": 3.452079027996027e-06, "loss": 0.9852, "step": 3634 }, { "epoch": 0.3822945561150039, "grad_norm": 2.2206426400316, "learning_rate": 3.451307063766522e-06, "loss": 1.0439, "step": 3635 }, { "epoch": 0.38239972655685117, "grad_norm": 2.0863520447051855, "learning_rate": 3.45053499345889e-06, "loss": 0.9948, "step": 3636 }, { "epoch": 0.3825048969986985, "grad_norm": 1.7414750885086039, "learning_rate": 3.449762817159221e-06, "loss": 1.0387, "step": 3637 }, { "epoch": 0.3826100674405458, "grad_norm": 2.8068429720399295, "learning_rate": 3.4489905349536207e-06, "loss": 1.0214, "step": 3638 }, { "epoch": 0.38271523788239314, "grad_norm": 2.3872404074955216, "learning_rate": 3.448218146928204e-06, "loss": 1.0203, "step": 3639 }, { "epoch": 0.38282040832424047, "grad_norm": 3.3031263883141846, "learning_rate": 3.4474456531690976e-06, "loss": 1.0072, "step": 3640 }, { "epoch": 0.3829255787660878, "grad_norm": 2.6702786020242044, "learning_rate": 3.4466730537624406e-06, "loss": 1.0556, "step": 3641 }, { "epoch": 0.3830307492079351, "grad_norm": 2.2459257036074343, "learning_rate": 3.4459003487943842e-06, "loss": 0.9824, "step": 3642 }, { "epoch": 0.38313591964978244, "grad_norm": 2.809514281952401, "learning_rate": 3.4451275383510905e-06, "loss": 1.0374, "step": 3643 }, { "epoch": 0.38324109009162977, "grad_norm": 2.6516762564393455, "learning_rate": 3.444354622518735e-06, "loss": 0.9732, "step": 3644 }, { "epoch": 0.3833462605334771, "grad_norm": 2.7733996194687043, "learning_rate": 3.443581601383503e-06, "loss": 1.007, "step": 3645 }, { "epoch": 0.38345143097532436, "grad_norm": 2.7799328910992718, "learning_rate": 3.4428084750315925e-06, "loss": 1.0295, "step": 3646 }, { "epoch": 0.3835566014171717, "grad_norm": 2.195837771998775, "learning_rate": 3.442035243549213e-06, "loss": 1.0024, "step": 3647 }, { "epoch": 0.383661771859019, "grad_norm": 1.9838499795852829, "learning_rate": 3.441261907022585e-06, "loss": 0.9941, "step": 3648 }, { "epoch": 0.38376694230086633, "grad_norm": 2.8357895335106758, "learning_rate": 3.4404884655379433e-06, "loss": 1.0466, "step": 3649 }, { "epoch": 0.38387211274271366, "grad_norm": 3.490737509449956, "learning_rate": 3.4397149191815317e-06, "loss": 0.9822, "step": 3650 }, { "epoch": 0.383977283184561, "grad_norm": 3.0020658384708194, "learning_rate": 3.438941268039606e-06, "loss": 1.0228, "step": 3651 }, { "epoch": 0.3840824536264083, "grad_norm": 2.580567442677589, "learning_rate": 3.438167512198436e-06, "loss": 0.9814, "step": 3652 }, { "epoch": 0.38418762406825563, "grad_norm": 2.6240587564325732, "learning_rate": 3.4373936517442996e-06, "loss": 0.9582, "step": 3653 }, { "epoch": 0.38429279451010295, "grad_norm": 2.80704824521018, "learning_rate": 3.43661968676349e-06, "loss": 1.0803, "step": 3654 }, { "epoch": 0.3843979649519503, "grad_norm": 2.019265289979809, "learning_rate": 3.4358456173423084e-06, "loss": 0.9665, "step": 3655 }, { "epoch": 0.38450313539379755, "grad_norm": 2.130869954578095, "learning_rate": 3.4350714435670706e-06, "loss": 0.9747, "step": 3656 }, { "epoch": 0.38460830583564487, "grad_norm": 2.6188509617557267, "learning_rate": 3.434297165524103e-06, "loss": 0.9966, "step": 3657 }, { "epoch": 0.3847134762774922, "grad_norm": 3.1899105918775748, "learning_rate": 3.433522783299744e-06, "loss": 1.0068, "step": 3658 }, { "epoch": 0.3848186467193395, "grad_norm": 2.6244822778152224, "learning_rate": 3.432748296980343e-06, "loss": 0.998, "step": 3659 }, { "epoch": 0.38492381716118684, "grad_norm": 2.1576133938956885, "learning_rate": 3.4319737066522603e-06, "loss": 1.0098, "step": 3660 }, { "epoch": 0.38502898760303417, "grad_norm": 2.218275955657075, "learning_rate": 3.43119901240187e-06, "loss": 0.9752, "step": 3661 }, { "epoch": 0.3851341580448815, "grad_norm": 3.06330157636448, "learning_rate": 3.430424214315556e-06, "loss": 0.9907, "step": 3662 }, { "epoch": 0.3852393284867288, "grad_norm": 2.559143144501293, "learning_rate": 3.429649312479714e-06, "loss": 1.0291, "step": 3663 }, { "epoch": 0.38534449892857614, "grad_norm": 2.7644506663996316, "learning_rate": 3.4288743069807516e-06, "loss": 0.9873, "step": 3664 }, { "epoch": 0.38544966937042346, "grad_norm": 2.511298257225812, "learning_rate": 3.4280991979050892e-06, "loss": 1.0193, "step": 3665 }, { "epoch": 0.3855548398122708, "grad_norm": 3.51758390169175, "learning_rate": 3.427323985339156e-06, "loss": 1.0199, "step": 3666 }, { "epoch": 0.38566001025411806, "grad_norm": 2.8645279133183448, "learning_rate": 3.4265486693693944e-06, "loss": 0.9971, "step": 3667 }, { "epoch": 0.3857651806959654, "grad_norm": 2.9018074235287155, "learning_rate": 3.4257732500822592e-06, "loss": 0.9952, "step": 3668 }, { "epoch": 0.3858703511378127, "grad_norm": 2.0588163183524357, "learning_rate": 3.4249977275642147e-06, "loss": 0.9869, "step": 3669 }, { "epoch": 0.38597552157966003, "grad_norm": 2.686052380256393, "learning_rate": 3.424222101901738e-06, "loss": 1.0185, "step": 3670 }, { "epoch": 0.38608069202150735, "grad_norm": 2.3591003789868448, "learning_rate": 3.423446373181317e-06, "loss": 1.018, "step": 3671 }, { "epoch": 0.3861858624633547, "grad_norm": 4.273636944885451, "learning_rate": 3.4226705414894517e-06, "loss": 1.0388, "step": 3672 }, { "epoch": 0.386291032905202, "grad_norm": 2.9970837835983386, "learning_rate": 3.4218946069126534e-06, "loss": 1.0069, "step": 3673 }, { "epoch": 0.3863962033470493, "grad_norm": 1.7215807328411599, "learning_rate": 3.4211185695374454e-06, "loss": 0.9614, "step": 3674 }, { "epoch": 0.38650137378889665, "grad_norm": 3.4161522944359737, "learning_rate": 3.4203424294503617e-06, "loss": 1.0311, "step": 3675 }, { "epoch": 0.386606544230744, "grad_norm": 2.734529352859378, "learning_rate": 3.419566186737947e-06, "loss": 0.9555, "step": 3676 }, { "epoch": 0.38671171467259124, "grad_norm": 2.091680566056881, "learning_rate": 3.4187898414867594e-06, "loss": 0.998, "step": 3677 }, { "epoch": 0.38681688511443857, "grad_norm": 2.5504380693881066, "learning_rate": 3.418013393783367e-06, "loss": 1.0147, "step": 3678 }, { "epoch": 0.3869220555562859, "grad_norm": 2.4427536161465766, "learning_rate": 3.4172368437143495e-06, "loss": 1.0142, "step": 3679 }, { "epoch": 0.3870272259981332, "grad_norm": 1.9842607417290865, "learning_rate": 3.4164601913662985e-06, "loss": 1.0323, "step": 3680 }, { "epoch": 0.38713239643998054, "grad_norm": 2.65586599370129, "learning_rate": 3.4156834368258175e-06, "loss": 1.0053, "step": 3681 }, { "epoch": 0.38723756688182787, "grad_norm": 2.0732316500738306, "learning_rate": 3.4149065801795196e-06, "loss": 0.9952, "step": 3682 }, { "epoch": 0.3873427373236752, "grad_norm": 2.7781109716188843, "learning_rate": 3.4141296215140307e-06, "loss": 1.0386, "step": 3683 }, { "epoch": 0.3874479077655225, "grad_norm": 2.0689240430312554, "learning_rate": 3.4133525609159883e-06, "loss": 0.9956, "step": 3684 }, { "epoch": 0.38755307820736984, "grad_norm": 1.9357441976440448, "learning_rate": 3.4125753984720392e-06, "loss": 1.0061, "step": 3685 }, { "epoch": 0.38765824864921716, "grad_norm": 3.5270821137244472, "learning_rate": 3.411798134268845e-06, "loss": 0.9688, "step": 3686 }, { "epoch": 0.38776341909106443, "grad_norm": 2.7246597051655637, "learning_rate": 3.4110207683930754e-06, "loss": 1.018, "step": 3687 }, { "epoch": 0.38786858953291176, "grad_norm": 2.84204082065096, "learning_rate": 3.410243300931413e-06, "loss": 1.0036, "step": 3688 }, { "epoch": 0.3879737599747591, "grad_norm": 2.8073515518648535, "learning_rate": 3.409465731970551e-06, "loss": 0.987, "step": 3689 }, { "epoch": 0.3880789304166064, "grad_norm": 2.203385890249724, "learning_rate": 3.408688061597196e-06, "loss": 1.0379, "step": 3690 }, { "epoch": 0.38818410085845373, "grad_norm": 1.9853510788665791, "learning_rate": 3.4079102898980633e-06, "loss": 1.008, "step": 3691 }, { "epoch": 0.38828927130030105, "grad_norm": 2.0269843653629502, "learning_rate": 3.407132416959879e-06, "loss": 0.9997, "step": 3692 }, { "epoch": 0.3883944417421484, "grad_norm": 2.513094917455823, "learning_rate": 3.406354442869384e-06, "loss": 1.033, "step": 3693 }, { "epoch": 0.3884996121839957, "grad_norm": 2.1492929808384917, "learning_rate": 3.405576367713328e-06, "loss": 0.9705, "step": 3694 }, { "epoch": 0.388604782625843, "grad_norm": 2.4692888812089064, "learning_rate": 3.404798191578472e-06, "loss": 1.0106, "step": 3695 }, { "epoch": 0.38870995306769035, "grad_norm": 3.2356374351548935, "learning_rate": 3.4040199145515882e-06, "loss": 0.9841, "step": 3696 }, { "epoch": 0.3888151235095376, "grad_norm": 2.6289580492215268, "learning_rate": 3.4032415367194628e-06, "loss": 1.0185, "step": 3697 }, { "epoch": 0.38892029395138494, "grad_norm": 2.66254807289422, "learning_rate": 3.4024630581688895e-06, "loss": 1.022, "step": 3698 }, { "epoch": 0.38902546439323227, "grad_norm": 2.312866252842055, "learning_rate": 3.4016844789866733e-06, "loss": 1.0001, "step": 3699 }, { "epoch": 0.3891306348350796, "grad_norm": 2.748756895399873, "learning_rate": 3.400905799259634e-06, "loss": 1.0213, "step": 3700 }, { "epoch": 0.3892358052769269, "grad_norm": 2.422215122838384, "learning_rate": 3.4001270190745988e-06, "loss": 0.9726, "step": 3701 }, { "epoch": 0.38934097571877424, "grad_norm": 2.3324336660284413, "learning_rate": 3.3993481385184097e-06, "loss": 1.0022, "step": 3702 }, { "epoch": 0.38944614616062156, "grad_norm": 3.3738941893509407, "learning_rate": 3.398569157677916e-06, "loss": 1.0698, "step": 3703 }, { "epoch": 0.3895513166024689, "grad_norm": 1.9996879578710953, "learning_rate": 3.3977900766399807e-06, "loss": 1.0164, "step": 3704 }, { "epoch": 0.3896564870443162, "grad_norm": 3.7223647380800693, "learning_rate": 3.3970108954914782e-06, "loss": 1.0151, "step": 3705 }, { "epoch": 0.38976165748616354, "grad_norm": 2.9087827912362934, "learning_rate": 3.3962316143192923e-06, "loss": 1.0174, "step": 3706 }, { "epoch": 0.3898668279280108, "grad_norm": 2.222505824431483, "learning_rate": 3.3954522332103197e-06, "loss": 1.0393, "step": 3707 }, { "epoch": 0.38997199836985813, "grad_norm": 2.110921600030713, "learning_rate": 3.394672752251466e-06, "loss": 0.9682, "step": 3708 }, { "epoch": 0.39007716881170545, "grad_norm": 2.6718880567786507, "learning_rate": 3.3938931715296515e-06, "loss": 0.9888, "step": 3709 }, { "epoch": 0.3901823392535528, "grad_norm": 3.1737106251642637, "learning_rate": 3.393113491131803e-06, "loss": 0.9869, "step": 3710 }, { "epoch": 0.3902875096954001, "grad_norm": 3.316157096089004, "learning_rate": 3.3923337111448635e-06, "loss": 1.0071, "step": 3711 }, { "epoch": 0.3903926801372474, "grad_norm": 2.393156968914557, "learning_rate": 3.391553831655783e-06, "loss": 1.0382, "step": 3712 }, { "epoch": 0.39049785057909475, "grad_norm": 2.8352293224211986, "learning_rate": 3.3907738527515243e-06, "loss": 0.9929, "step": 3713 }, { "epoch": 0.3906030210209421, "grad_norm": 2.950692846867964, "learning_rate": 3.3899937745190616e-06, "loss": 1.0297, "step": 3714 }, { "epoch": 0.3907081914627894, "grad_norm": 1.9201546485115777, "learning_rate": 3.3892135970453787e-06, "loss": 0.9889, "step": 3715 }, { "epoch": 0.3908133619046367, "grad_norm": 3.0185805923153746, "learning_rate": 3.3884333204174722e-06, "loss": 1.0277, "step": 3716 }, { "epoch": 0.390918532346484, "grad_norm": 2.574076886352291, "learning_rate": 3.387652944722349e-06, "loss": 1.0126, "step": 3717 }, { "epoch": 0.3910237027883313, "grad_norm": 2.9898536035964702, "learning_rate": 3.3868724700470273e-06, "loss": 1.0308, "step": 3718 }, { "epoch": 0.39112887323017864, "grad_norm": 3.170586149546781, "learning_rate": 3.3860918964785354e-06, "loss": 1.012, "step": 3719 }, { "epoch": 0.39123404367202597, "grad_norm": 2.162024803561226, "learning_rate": 3.3853112241039136e-06, "loss": 1.0032, "step": 3720 }, { "epoch": 0.3913392141138733, "grad_norm": 2.800456326423131, "learning_rate": 3.3845304530102137e-06, "loss": 0.9699, "step": 3721 }, { "epoch": 0.3914443845557206, "grad_norm": 2.230103160597217, "learning_rate": 3.3837495832844958e-06, "loss": 0.9907, "step": 3722 }, { "epoch": 0.39154955499756794, "grad_norm": 1.9475989647841914, "learning_rate": 3.382968615013835e-06, "loss": 0.9762, "step": 3723 }, { "epoch": 0.39165472543941526, "grad_norm": 1.690195126900537, "learning_rate": 3.382187548285314e-06, "loss": 0.9697, "step": 3724 }, { "epoch": 0.3917598958812626, "grad_norm": 2.043759641357441, "learning_rate": 3.3814063831860282e-06, "loss": 0.9833, "step": 3725 }, { "epoch": 0.3918650663231099, "grad_norm": 2.798655205453248, "learning_rate": 3.3806251198030843e-06, "loss": 0.9836, "step": 3726 }, { "epoch": 0.39197023676495724, "grad_norm": 2.8030545959030553, "learning_rate": 3.3798437582235984e-06, "loss": 0.9983, "step": 3727 }, { "epoch": 0.3920754072068045, "grad_norm": 1.88892717549588, "learning_rate": 3.3790622985346982e-06, "loss": 0.9806, "step": 3728 }, { "epoch": 0.39218057764865183, "grad_norm": 2.9555789052928256, "learning_rate": 3.378280740823523e-06, "loss": 1.0005, "step": 3729 }, { "epoch": 0.39228574809049915, "grad_norm": 2.999318912879433, "learning_rate": 3.377499085177223e-06, "loss": 1.037, "step": 3730 }, { "epoch": 0.3923909185323465, "grad_norm": 3.187290454242505, "learning_rate": 3.3767173316829577e-06, "loss": 1.0223, "step": 3731 }, { "epoch": 0.3924960889741938, "grad_norm": 2.739342019449803, "learning_rate": 3.3759354804279e-06, "loss": 1.0258, "step": 3732 }, { "epoch": 0.3926012594160411, "grad_norm": 2.358011734083977, "learning_rate": 3.375153531499231e-06, "loss": 1.0051, "step": 3733 }, { "epoch": 0.39270642985788845, "grad_norm": 2.4014349663864345, "learning_rate": 3.3743714849841457e-06, "loss": 1.0244, "step": 3734 }, { "epoch": 0.3928116002997358, "grad_norm": 2.621545965673851, "learning_rate": 3.373589340969847e-06, "loss": 0.9887, "step": 3735 }, { "epoch": 0.3929167707415831, "grad_norm": 1.4862634418648064, "learning_rate": 3.37280709954355e-06, "loss": 1.0241, "step": 3736 }, { "epoch": 0.3930219411834304, "grad_norm": 2.3418718121471893, "learning_rate": 3.3720247607924816e-06, "loss": 1.0129, "step": 3737 }, { "epoch": 0.3931271116252777, "grad_norm": 2.029673077254618, "learning_rate": 3.3712423248038785e-06, "loss": 1.0308, "step": 3738 }, { "epoch": 0.393232282067125, "grad_norm": 2.15890181900642, "learning_rate": 3.370459791664988e-06, "loss": 0.9973, "step": 3739 }, { "epoch": 0.39333745250897234, "grad_norm": 2.790195503492097, "learning_rate": 3.369677161463068e-06, "loss": 1.0607, "step": 3740 }, { "epoch": 0.39344262295081966, "grad_norm": 2.3702375741582253, "learning_rate": 3.3688944342853904e-06, "loss": 0.9909, "step": 3741 }, { "epoch": 0.393547793392667, "grad_norm": 2.6321034738325975, "learning_rate": 3.368111610219232e-06, "loss": 0.9887, "step": 3742 }, { "epoch": 0.3936529638345143, "grad_norm": 3.016851750333276, "learning_rate": 3.367328689351886e-06, "loss": 1.0107, "step": 3743 }, { "epoch": 0.39375813427636164, "grad_norm": 2.2979782597568774, "learning_rate": 3.3665456717706522e-06, "loss": 1.0155, "step": 3744 }, { "epoch": 0.39386330471820896, "grad_norm": 2.695466999857748, "learning_rate": 3.3657625575628462e-06, "loss": 1.0072, "step": 3745 }, { "epoch": 0.3939684751600563, "grad_norm": 1.8645001661672012, "learning_rate": 3.3649793468157887e-06, "loss": 0.99, "step": 3746 }, { "epoch": 0.3940736456019036, "grad_norm": 2.324980227855501, "learning_rate": 3.364196039616815e-06, "loss": 0.9843, "step": 3747 }, { "epoch": 0.3941788160437509, "grad_norm": 3.100195028994209, "learning_rate": 3.3634126360532694e-06, "loss": 0.9901, "step": 3748 }, { "epoch": 0.3942839864855982, "grad_norm": 2.3643331155710534, "learning_rate": 3.3626291362125075e-06, "loss": 1.0829, "step": 3749 }, { "epoch": 0.39438915692744553, "grad_norm": 2.2726875865978444, "learning_rate": 3.3618455401818963e-06, "loss": 0.9908, "step": 3750 }, { "epoch": 0.39449432736929285, "grad_norm": 2.259480175273652, "learning_rate": 3.3610618480488115e-06, "loss": 0.9894, "step": 3751 }, { "epoch": 0.3945994978111402, "grad_norm": 2.6256746329463154, "learning_rate": 3.3602780599006425e-06, "loss": 1.0088, "step": 3752 }, { "epoch": 0.3947046682529875, "grad_norm": 2.8544786484384534, "learning_rate": 3.359494175824787e-06, "loss": 1.0236, "step": 3753 }, { "epoch": 0.3948098386948348, "grad_norm": 2.055256613594115, "learning_rate": 3.358710195908653e-06, "loss": 1.0129, "step": 3754 }, { "epoch": 0.39491500913668215, "grad_norm": 2.6762903150895165, "learning_rate": 3.3579261202396624e-06, "loss": 1.0084, "step": 3755 }, { "epoch": 0.3950201795785295, "grad_norm": 3.105067449951284, "learning_rate": 3.357141948905244e-06, "loss": 1.0396, "step": 3756 }, { "epoch": 0.3951253500203768, "grad_norm": 2.7357680717640736, "learning_rate": 3.356357681992841e-06, "loss": 1.0104, "step": 3757 }, { "epoch": 0.39523052046222407, "grad_norm": 2.5272957675398615, "learning_rate": 3.3555733195899034e-06, "loss": 0.9918, "step": 3758 }, { "epoch": 0.3953356909040714, "grad_norm": 2.2839849999981823, "learning_rate": 3.354788861783894e-06, "loss": 0.9403, "step": 3759 }, { "epoch": 0.3954408613459187, "grad_norm": 2.68537039553363, "learning_rate": 3.3540043086622865e-06, "loss": 1.0048, "step": 3760 }, { "epoch": 0.39554603178776604, "grad_norm": 1.95762032240909, "learning_rate": 3.3532196603125646e-06, "loss": 1.0022, "step": 3761 }, { "epoch": 0.39565120222961336, "grad_norm": 2.4783053417448278, "learning_rate": 3.3524349168222224e-06, "loss": 1.0038, "step": 3762 }, { "epoch": 0.3957563726714607, "grad_norm": 2.2138346751080373, "learning_rate": 3.3516500782787653e-06, "loss": 1.0094, "step": 3763 }, { "epoch": 0.395861543113308, "grad_norm": 2.936093102972425, "learning_rate": 3.3508651447697083e-06, "loss": 1.0291, "step": 3764 }, { "epoch": 0.39596671355515534, "grad_norm": 2.971080492228416, "learning_rate": 3.350080116382578e-06, "loss": 1.0148, "step": 3765 }, { "epoch": 0.39607188399700266, "grad_norm": 2.4766119317435167, "learning_rate": 3.3492949932049115e-06, "loss": 0.9808, "step": 3766 }, { "epoch": 0.39617705443885, "grad_norm": 2.0419716907450707, "learning_rate": 3.3485097753242546e-06, "loss": 0.9947, "step": 3767 }, { "epoch": 0.39628222488069725, "grad_norm": 2.6590218339263787, "learning_rate": 3.3477244628281667e-06, "loss": 0.9918, "step": 3768 }, { "epoch": 0.3963873953225446, "grad_norm": 2.711175294896815, "learning_rate": 3.3469390558042163e-06, "loss": 0.9712, "step": 3769 }, { "epoch": 0.3964925657643919, "grad_norm": 3.5550676117965385, "learning_rate": 3.346153554339982e-06, "loss": 1.0413, "step": 3770 }, { "epoch": 0.3965977362062392, "grad_norm": 2.9614092689255473, "learning_rate": 3.3453679585230532e-06, "loss": 1.0076, "step": 3771 }, { "epoch": 0.39670290664808655, "grad_norm": 2.3025973292096507, "learning_rate": 3.3445822684410295e-06, "loss": 0.9737, "step": 3772 }, { "epoch": 0.3968080770899339, "grad_norm": 2.2173191627458158, "learning_rate": 3.3437964841815226e-06, "loss": 0.9836, "step": 3773 }, { "epoch": 0.3969132475317812, "grad_norm": 2.3309333133092243, "learning_rate": 3.3430106058321517e-06, "loss": 1.0039, "step": 3774 }, { "epoch": 0.3970184179736285, "grad_norm": 1.9871063924389158, "learning_rate": 3.3422246334805504e-06, "loss": 0.9684, "step": 3775 }, { "epoch": 0.39712358841547585, "grad_norm": 2.4572887046393204, "learning_rate": 3.34143856721436e-06, "loss": 0.9899, "step": 3776 }, { "epoch": 0.39722875885732317, "grad_norm": 2.365568699436195, "learning_rate": 3.3406524071212327e-06, "loss": 0.9614, "step": 3777 }, { "epoch": 0.39733392929917044, "grad_norm": 1.7818280646816138, "learning_rate": 3.3398661532888314e-06, "loss": 0.9849, "step": 3778 }, { "epoch": 0.39743909974101777, "grad_norm": 1.9046459086271046, "learning_rate": 3.339079805804829e-06, "loss": 1.0143, "step": 3779 }, { "epoch": 0.3975442701828651, "grad_norm": 2.3309655746278333, "learning_rate": 3.3382933647569115e-06, "loss": 0.9559, "step": 3780 }, { "epoch": 0.3976494406247124, "grad_norm": 1.9564505802602077, "learning_rate": 3.337506830232771e-06, "loss": 1.0231, "step": 3781 }, { "epoch": 0.39775461106655974, "grad_norm": 2.474905869601421, "learning_rate": 3.3367202023201128e-06, "loss": 1.0038, "step": 3782 }, { "epoch": 0.39785978150840706, "grad_norm": 2.598347095424969, "learning_rate": 3.3359334811066524e-06, "loss": 1.0089, "step": 3783 }, { "epoch": 0.3979649519502544, "grad_norm": 2.164211187048747, "learning_rate": 3.3351466666801147e-06, "loss": 1.0008, "step": 3784 }, { "epoch": 0.3980701223921017, "grad_norm": 2.7808448041427845, "learning_rate": 3.3343597591282366e-06, "loss": 1.0307, "step": 3785 }, { "epoch": 0.39817529283394903, "grad_norm": 3.473039231593379, "learning_rate": 3.3335727585387636e-06, "loss": 1.0376, "step": 3786 }, { "epoch": 0.39828046327579636, "grad_norm": 2.8034840356520485, "learning_rate": 3.332785664999453e-06, "loss": 1.0261, "step": 3787 }, { "epoch": 0.3983856337176437, "grad_norm": 2.7072024059878474, "learning_rate": 3.3319984785980703e-06, "loss": 1.0098, "step": 3788 }, { "epoch": 0.39849080415949095, "grad_norm": 2.4114719388071872, "learning_rate": 3.331211199422395e-06, "loss": 1.0064, "step": 3789 }, { "epoch": 0.3985959746013383, "grad_norm": 1.7540733706136926, "learning_rate": 3.330423827560213e-06, "loss": 0.9798, "step": 3790 }, { "epoch": 0.3987011450431856, "grad_norm": 2.370248082622637, "learning_rate": 3.3296363630993245e-06, "loss": 0.999, "step": 3791 }, { "epoch": 0.3988063154850329, "grad_norm": 2.51665004694854, "learning_rate": 3.3288488061275365e-06, "loss": 0.9833, "step": 3792 }, { "epoch": 0.39891148592688025, "grad_norm": 2.5790260976139705, "learning_rate": 3.328061156732667e-06, "loss": 1.0166, "step": 3793 }, { "epoch": 0.3990166563687276, "grad_norm": 2.090788901444674, "learning_rate": 3.3272734150025467e-06, "loss": 0.9734, "step": 3794 }, { "epoch": 0.3991218268105749, "grad_norm": 2.0061905903896005, "learning_rate": 3.3264855810250137e-06, "loss": 1.0343, "step": 3795 }, { "epoch": 0.3992269972524222, "grad_norm": 2.829412278323214, "learning_rate": 3.3256976548879183e-06, "loss": 0.9787, "step": 3796 }, { "epoch": 0.39933216769426955, "grad_norm": 2.2031781208991723, "learning_rate": 3.3249096366791196e-06, "loss": 0.984, "step": 3797 }, { "epoch": 0.39943733813611687, "grad_norm": 2.6789806053409944, "learning_rate": 3.324121526486489e-06, "loss": 1.0294, "step": 3798 }, { "epoch": 0.39954250857796414, "grad_norm": 2.2888330035394273, "learning_rate": 3.323333324397906e-06, "loss": 1.0565, "step": 3799 }, { "epoch": 0.39964767901981146, "grad_norm": 2.247074905242807, "learning_rate": 3.3225450305012614e-06, "loss": 1.012, "step": 3800 }, { "epoch": 0.3997528494616588, "grad_norm": 2.694527096396108, "learning_rate": 3.321756644884456e-06, "loss": 1.0569, "step": 3801 }, { "epoch": 0.3998580199035061, "grad_norm": 1.7511580004306093, "learning_rate": 3.3209681676354012e-06, "loss": 1.018, "step": 3802 }, { "epoch": 0.39996319034535344, "grad_norm": 2.8140466000278463, "learning_rate": 3.3201795988420184e-06, "loss": 1.0134, "step": 3803 }, { "epoch": 0.40006836078720076, "grad_norm": 2.892670193426442, "learning_rate": 3.3193909385922385e-06, "loss": 0.9829, "step": 3804 }, { "epoch": 0.4001735312290481, "grad_norm": 2.0932756519682356, "learning_rate": 3.3186021869740036e-06, "loss": 1.0221, "step": 3805 }, { "epoch": 0.4002787016708954, "grad_norm": 2.690319929330406, "learning_rate": 3.3178133440752664e-06, "loss": 1.0595, "step": 3806 }, { "epoch": 0.40038387211274273, "grad_norm": 3.1784389270535365, "learning_rate": 3.317024409983988e-06, "loss": 1.0084, "step": 3807 }, { "epoch": 0.40048904255459006, "grad_norm": 1.8851550338498042, "learning_rate": 3.3162353847881414e-06, "loss": 0.9487, "step": 3808 }, { "epoch": 0.4005942129964373, "grad_norm": 2.5639704456014574, "learning_rate": 3.3154462685757077e-06, "loss": 0.9514, "step": 3809 }, { "epoch": 0.40069938343828465, "grad_norm": 2.3978339213837856, "learning_rate": 3.3146570614346814e-06, "loss": 1.0111, "step": 3810 }, { "epoch": 0.400804553880132, "grad_norm": 2.6853925911053285, "learning_rate": 3.3138677634530637e-06, "loss": 1.0216, "step": 3811 }, { "epoch": 0.4009097243219793, "grad_norm": 2.138313165527399, "learning_rate": 3.313078374718868e-06, "loss": 0.9739, "step": 3812 }, { "epoch": 0.4010148947638266, "grad_norm": 2.1570278204299753, "learning_rate": 3.3122888953201176e-06, "loss": 0.9708, "step": 3813 }, { "epoch": 0.40112006520567395, "grad_norm": 2.128469182048962, "learning_rate": 3.3114993253448457e-06, "loss": 1.0079, "step": 3814 }, { "epoch": 0.40122523564752127, "grad_norm": 3.0955012345320623, "learning_rate": 3.3107096648810945e-06, "loss": 1.0154, "step": 3815 }, { "epoch": 0.4013304060893686, "grad_norm": 3.5600261230012413, "learning_rate": 3.309919914016918e-06, "loss": 1.0091, "step": 3816 }, { "epoch": 0.4014355765312159, "grad_norm": 3.6965619154449274, "learning_rate": 3.309130072840379e-06, "loss": 0.9899, "step": 3817 }, { "epoch": 0.40154074697306325, "grad_norm": 3.1497857281534882, "learning_rate": 3.3083401414395516e-06, "loss": 1.0138, "step": 3818 }, { "epoch": 0.4016459174149105, "grad_norm": 2.752987084802028, "learning_rate": 3.3075501199025194e-06, "loss": 1.0456, "step": 3819 }, { "epoch": 0.40175108785675784, "grad_norm": 2.3240624321572456, "learning_rate": 3.3067600083173752e-06, "loss": 0.963, "step": 3820 }, { "epoch": 0.40185625829860516, "grad_norm": 3.583739472871665, "learning_rate": 3.3059698067722236e-06, "loss": 0.9924, "step": 3821 }, { "epoch": 0.4019614287404525, "grad_norm": 2.1565141093715754, "learning_rate": 3.305179515355177e-06, "loss": 1.0152, "step": 3822 }, { "epoch": 0.4020665991822998, "grad_norm": 2.697943981821852, "learning_rate": 3.304389134154359e-06, "loss": 1.0333, "step": 3823 }, { "epoch": 0.40217176962414714, "grad_norm": 2.7143533256140215, "learning_rate": 3.303598663257904e-06, "loss": 0.9832, "step": 3824 }, { "epoch": 0.40227694006599446, "grad_norm": 2.750860124589298, "learning_rate": 3.3028081027539562e-06, "loss": 1.0203, "step": 3825 }, { "epoch": 0.4023821105078418, "grad_norm": 3.058694203710453, "learning_rate": 3.302017452730668e-06, "loss": 0.9606, "step": 3826 }, { "epoch": 0.4024872809496891, "grad_norm": 2.6222751926965073, "learning_rate": 3.301226713276203e-06, "loss": 1.0546, "step": 3827 }, { "epoch": 0.40259245139153643, "grad_norm": 3.5103197855371935, "learning_rate": 3.300435884478737e-06, "loss": 1.0299, "step": 3828 }, { "epoch": 0.4026976218333837, "grad_norm": 2.8240018305917975, "learning_rate": 3.2996449664264506e-06, "loss": 1.0326, "step": 3829 }, { "epoch": 0.402802792275231, "grad_norm": 2.5409628619006646, "learning_rate": 3.298853959207539e-06, "loss": 1.0357, "step": 3830 }, { "epoch": 0.40290796271707835, "grad_norm": 2.6396149602766235, "learning_rate": 3.298062862910205e-06, "loss": 1.0238, "step": 3831 }, { "epoch": 0.4030131331589257, "grad_norm": 2.2641809884701716, "learning_rate": 3.297271677622662e-06, "loss": 1.0312, "step": 3832 }, { "epoch": 0.403118303600773, "grad_norm": 2.1404633046302535, "learning_rate": 3.2964804034331344e-06, "loss": 1.0002, "step": 3833 }, { "epoch": 0.4032234740426203, "grad_norm": 3.02584736223, "learning_rate": 3.2956890404298537e-06, "loss": 1.0608, "step": 3834 }, { "epoch": 0.40332864448446765, "grad_norm": 1.916867136991986, "learning_rate": 3.2948975887010643e-06, "loss": 0.9875, "step": 3835 }, { "epoch": 0.40343381492631497, "grad_norm": 3.057690418288368, "learning_rate": 3.2941060483350183e-06, "loss": 0.9777, "step": 3836 }, { "epoch": 0.4035389853681623, "grad_norm": 2.0359787957493336, "learning_rate": 3.29331441941998e-06, "loss": 0.9759, "step": 3837 }, { "epoch": 0.4036441558100096, "grad_norm": 2.565513639029297, "learning_rate": 3.292522702044221e-06, "loss": 1.0005, "step": 3838 }, { "epoch": 0.4037493262518569, "grad_norm": 2.6810471066425743, "learning_rate": 3.2917308962960232e-06, "loss": 1.0066, "step": 3839 }, { "epoch": 0.4038544966937042, "grad_norm": 3.16105856831009, "learning_rate": 3.2909390022636813e-06, "loss": 1.0143, "step": 3840 }, { "epoch": 0.40395966713555154, "grad_norm": 2.9822084656388923, "learning_rate": 3.2901470200354953e-06, "loss": 0.998, "step": 3841 }, { "epoch": 0.40406483757739886, "grad_norm": 2.19021483535333, "learning_rate": 3.289354949699779e-06, "loss": 0.9732, "step": 3842 }, { "epoch": 0.4041700080192462, "grad_norm": 3.090611395109951, "learning_rate": 3.288562791344854e-06, "loss": 1.0176, "step": 3843 }, { "epoch": 0.4042751784610935, "grad_norm": 2.468177042950385, "learning_rate": 3.2877705450590525e-06, "loss": 1.04, "step": 3844 }, { "epoch": 0.40438034890294083, "grad_norm": 3.2507866817267086, "learning_rate": 3.2869782109307147e-06, "loss": 1.024, "step": 3845 }, { "epoch": 0.40448551934478816, "grad_norm": 2.749363551983819, "learning_rate": 3.2861857890481928e-06, "loss": 1.0086, "step": 3846 }, { "epoch": 0.4045906897866355, "grad_norm": 3.3223260711708282, "learning_rate": 3.2853932794998487e-06, "loss": 1.026, "step": 3847 }, { "epoch": 0.4046958602284828, "grad_norm": 2.0959836207172917, "learning_rate": 3.2846006823740527e-06, "loss": 0.963, "step": 3848 }, { "epoch": 0.40480103067033013, "grad_norm": 2.601661822818262, "learning_rate": 3.283807997759186e-06, "loss": 1.0286, "step": 3849 }, { "epoch": 0.4049062011121774, "grad_norm": 2.223265802293675, "learning_rate": 3.283015225743638e-06, "loss": 0.9474, "step": 3850 }, { "epoch": 0.4050113715540247, "grad_norm": 3.994077202727601, "learning_rate": 3.2822223664158103e-06, "loss": 0.9945, "step": 3851 }, { "epoch": 0.40511654199587205, "grad_norm": 2.035477588921751, "learning_rate": 3.281429419864112e-06, "loss": 0.9734, "step": 3852 }, { "epoch": 0.4052217124377194, "grad_norm": 3.15506782103542, "learning_rate": 3.2806363861769626e-06, "loss": 1.0234, "step": 3853 }, { "epoch": 0.4053268828795667, "grad_norm": 3.0102187470478428, "learning_rate": 3.2798432654427925e-06, "loss": 0.9851, "step": 3854 }, { "epoch": 0.405432053321414, "grad_norm": 1.8770708121120634, "learning_rate": 3.2790500577500393e-06, "loss": 0.9907, "step": 3855 }, { "epoch": 0.40553722376326135, "grad_norm": 2.6017298144724914, "learning_rate": 3.2782567631871536e-06, "loss": 1.0101, "step": 3856 }, { "epoch": 0.40564239420510867, "grad_norm": 2.3770058244751833, "learning_rate": 3.2774633818425932e-06, "loss": 1.0182, "step": 3857 }, { "epoch": 0.405747564646956, "grad_norm": 2.2714004871527003, "learning_rate": 3.2766699138048265e-06, "loss": 0.9867, "step": 3858 }, { "epoch": 0.4058527350888033, "grad_norm": 2.0842568889627793, "learning_rate": 3.27587635916233e-06, "loss": 0.95, "step": 3859 }, { "epoch": 0.4059579055306506, "grad_norm": 2.0840650081817884, "learning_rate": 3.2750827180035927e-06, "loss": 1.0121, "step": 3860 }, { "epoch": 0.4060630759724979, "grad_norm": 1.9672860296600982, "learning_rate": 3.2742889904171114e-06, "loss": 1.0028, "step": 3861 }, { "epoch": 0.40616824641434524, "grad_norm": 2.885167297028863, "learning_rate": 3.2734951764913926e-06, "loss": 0.9963, "step": 3862 }, { "epoch": 0.40627341685619256, "grad_norm": 2.704006997564469, "learning_rate": 3.2727012763149535e-06, "loss": 1.0243, "step": 3863 }, { "epoch": 0.4063785872980399, "grad_norm": 2.3809414848304398, "learning_rate": 3.2719072899763186e-06, "loss": 0.9912, "step": 3864 }, { "epoch": 0.4064837577398872, "grad_norm": 2.6369184204411287, "learning_rate": 3.271113217564025e-06, "loss": 0.9984, "step": 3865 }, { "epoch": 0.40658892818173453, "grad_norm": 2.8295484338886734, "learning_rate": 3.2703190591666174e-06, "loss": 1.0434, "step": 3866 }, { "epoch": 0.40669409862358186, "grad_norm": 2.925767740990065, "learning_rate": 3.2695248148726513e-06, "loss": 1.032, "step": 3867 }, { "epoch": 0.4067992690654292, "grad_norm": 2.6452717628361406, "learning_rate": 3.268730484770689e-06, "loss": 1.0057, "step": 3868 }, { "epoch": 0.4069044395072765, "grad_norm": 2.0361868641093652, "learning_rate": 3.2679360689493067e-06, "loss": 0.9866, "step": 3869 }, { "epoch": 0.4070096099491238, "grad_norm": 2.8896506691583648, "learning_rate": 3.2671415674970874e-06, "loss": 0.9859, "step": 3870 }, { "epoch": 0.4071147803909711, "grad_norm": 2.3143761737562305, "learning_rate": 3.266346980502624e-06, "loss": 0.9912, "step": 3871 }, { "epoch": 0.4072199508328184, "grad_norm": 2.821772166631868, "learning_rate": 3.2655523080545188e-06, "loss": 1.0074, "step": 3872 }, { "epoch": 0.40732512127466575, "grad_norm": 2.539562382624043, "learning_rate": 3.264757550241384e-06, "loss": 1.0218, "step": 3873 }, { "epoch": 0.40743029171651307, "grad_norm": 2.401844779407129, "learning_rate": 3.263962707151842e-06, "loss": 0.9949, "step": 3874 }, { "epoch": 0.4075354621583604, "grad_norm": 1.6438140292650527, "learning_rate": 3.263167778874523e-06, "loss": 0.9944, "step": 3875 }, { "epoch": 0.4076406326002077, "grad_norm": 2.1162759607114006, "learning_rate": 3.2623727654980686e-06, "loss": 1.0216, "step": 3876 }, { "epoch": 0.40774580304205504, "grad_norm": 2.4748412890168354, "learning_rate": 3.2615776671111284e-06, "loss": 0.9946, "step": 3877 }, { "epoch": 0.40785097348390237, "grad_norm": 2.4536684362236123, "learning_rate": 3.2607824838023616e-06, "loss": 1.0266, "step": 3878 }, { "epoch": 0.4079561439257497, "grad_norm": 2.62038083169509, "learning_rate": 3.259987215660439e-06, "loss": 1.0633, "step": 3879 }, { "epoch": 0.40806131436759696, "grad_norm": 3.030423450158621, "learning_rate": 3.259191862774037e-06, "loss": 1.0133, "step": 3880 }, { "epoch": 0.4081664848094443, "grad_norm": 2.629007143942859, "learning_rate": 3.2583964252318457e-06, "loss": 0.986, "step": 3881 }, { "epoch": 0.4082716552512916, "grad_norm": 2.7588881309590634, "learning_rate": 3.25760090312256e-06, "loss": 0.9893, "step": 3882 }, { "epoch": 0.40837682569313893, "grad_norm": 1.8943250545350006, "learning_rate": 3.25680529653489e-06, "loss": 1.0228, "step": 3883 }, { "epoch": 0.40848199613498626, "grad_norm": 2.1333974399767763, "learning_rate": 3.2560096055575495e-06, "loss": 1.0113, "step": 3884 }, { "epoch": 0.4085871665768336, "grad_norm": 2.963505372102945, "learning_rate": 3.2552138302792652e-06, "loss": 0.9818, "step": 3885 }, { "epoch": 0.4086923370186809, "grad_norm": 1.9028453643067, "learning_rate": 3.254417970788772e-06, "loss": 1.0293, "step": 3886 }, { "epoch": 0.40879750746052823, "grad_norm": 2.733815135749297, "learning_rate": 3.2536220271748154e-06, "loss": 1.0211, "step": 3887 }, { "epoch": 0.40890267790237556, "grad_norm": 2.7610883994558195, "learning_rate": 3.252825999526148e-06, "loss": 1.0258, "step": 3888 }, { "epoch": 0.4090078483442229, "grad_norm": 1.5656271357264868, "learning_rate": 3.252029887931533e-06, "loss": 0.9811, "step": 3889 }, { "epoch": 0.40911301878607015, "grad_norm": 1.9443105818052695, "learning_rate": 3.251233692479744e-06, "loss": 0.9607, "step": 3890 }, { "epoch": 0.4092181892279175, "grad_norm": 2.4504615742047497, "learning_rate": 3.2504374132595617e-06, "loss": 0.9724, "step": 3891 }, { "epoch": 0.4093233596697648, "grad_norm": 2.209862220160882, "learning_rate": 3.249641050359779e-06, "loss": 1.0212, "step": 3892 }, { "epoch": 0.4094285301116121, "grad_norm": 3.0205649466760898, "learning_rate": 3.2488446038691962e-06, "loss": 1.0367, "step": 3893 }, { "epoch": 0.40953370055345945, "grad_norm": 2.6987146362653918, "learning_rate": 3.2480480738766222e-06, "loss": 1.0034, "step": 3894 }, { "epoch": 0.40963887099530677, "grad_norm": 2.147797158916421, "learning_rate": 3.247251460470877e-06, "loss": 0.9863, "step": 3895 }, { "epoch": 0.4097440414371541, "grad_norm": 2.7248827223178025, "learning_rate": 3.246454763740789e-06, "loss": 1.0234, "step": 3896 }, { "epoch": 0.4098492118790014, "grad_norm": 2.221702170406418, "learning_rate": 3.2456579837751964e-06, "loss": 1.0635, "step": 3897 }, { "epoch": 0.40995438232084874, "grad_norm": 2.3438438321838726, "learning_rate": 3.2448611206629456e-06, "loss": 0.9872, "step": 3898 }, { "epoch": 0.41005955276269607, "grad_norm": 1.8808604121161838, "learning_rate": 3.2440641744928942e-06, "loss": 0.9978, "step": 3899 }, { "epoch": 0.41016472320454334, "grad_norm": 2.5185303976659172, "learning_rate": 3.2432671453539074e-06, "loss": 1.0102, "step": 3900 }, { "epoch": 0.41026989364639066, "grad_norm": 3.0639940895334576, "learning_rate": 3.242470033334859e-06, "loss": 1.0326, "step": 3901 }, { "epoch": 0.410375064088238, "grad_norm": 2.5153081413305505, "learning_rate": 3.2416728385246354e-06, "loss": 0.9968, "step": 3902 }, { "epoch": 0.4104802345300853, "grad_norm": 2.3345795279528536, "learning_rate": 3.2408755610121277e-06, "loss": 1.0283, "step": 3903 }, { "epoch": 0.41058540497193263, "grad_norm": 2.797650533754245, "learning_rate": 3.2400782008862402e-06, "loss": 1.0079, "step": 3904 }, { "epoch": 0.41069057541377996, "grad_norm": 1.9715704104207485, "learning_rate": 3.2392807582358838e-06, "loss": 1.0157, "step": 3905 }, { "epoch": 0.4107957458556273, "grad_norm": 2.3447329388547105, "learning_rate": 3.2384832331499804e-06, "loss": 1.039, "step": 3906 }, { "epoch": 0.4109009162974746, "grad_norm": 2.7086394522324007, "learning_rate": 3.2376856257174596e-06, "loss": 1.0044, "step": 3907 }, { "epoch": 0.41100608673932193, "grad_norm": 2.2145844261459966, "learning_rate": 3.236887936027261e-06, "loss": 1.0186, "step": 3908 }, { "epoch": 0.41111125718116925, "grad_norm": 2.144833660377574, "learning_rate": 3.2360901641683335e-06, "loss": 0.9982, "step": 3909 }, { "epoch": 0.4112164276230166, "grad_norm": 3.0103082487238346, "learning_rate": 3.2352923102296336e-06, "loss": 0.981, "step": 3910 }, { "epoch": 0.41132159806486385, "grad_norm": 3.4747483035986786, "learning_rate": 3.23449437430013e-06, "loss": 1.0245, "step": 3911 }, { "epoch": 0.41142676850671117, "grad_norm": 3.1896133858883626, "learning_rate": 3.233696356468797e-06, "loss": 0.9647, "step": 3912 }, { "epoch": 0.4115319389485585, "grad_norm": 2.630457285599166, "learning_rate": 3.2328982568246213e-06, "loss": 1.0004, "step": 3913 }, { "epoch": 0.4116371093904058, "grad_norm": 2.523456894015238, "learning_rate": 3.2321000754565972e-06, "loss": 1.0134, "step": 3914 }, { "epoch": 0.41174227983225314, "grad_norm": 2.1853258787221623, "learning_rate": 3.2313018124537273e-06, "loss": 0.9849, "step": 3915 }, { "epoch": 0.41184745027410047, "grad_norm": 2.207439047305829, "learning_rate": 3.230503467905024e-06, "loss": 0.9982, "step": 3916 }, { "epoch": 0.4119526207159478, "grad_norm": 2.0123219656783427, "learning_rate": 3.229705041899509e-06, "loss": 0.9751, "step": 3917 }, { "epoch": 0.4120577911577951, "grad_norm": 2.4532617794552687, "learning_rate": 3.228906534526214e-06, "loss": 1.0182, "step": 3918 }, { "epoch": 0.41216296159964244, "grad_norm": 3.1167065793191897, "learning_rate": 3.228107945874177e-06, "loss": 0.9758, "step": 3919 }, { "epoch": 0.41226813204148977, "grad_norm": 2.01605320694452, "learning_rate": 3.2273092760324488e-06, "loss": 1.0439, "step": 3920 }, { "epoch": 0.41237330248333703, "grad_norm": 2.0503930931336134, "learning_rate": 3.2265105250900857e-06, "loss": 1.0149, "step": 3921 }, { "epoch": 0.41247847292518436, "grad_norm": 2.535946058767984, "learning_rate": 3.225711693136156e-06, "loss": 1.0071, "step": 3922 }, { "epoch": 0.4125836433670317, "grad_norm": 3.0494247439611684, "learning_rate": 3.224912780259736e-06, "loss": 1.0153, "step": 3923 }, { "epoch": 0.412688813808879, "grad_norm": 1.9610941081321251, "learning_rate": 3.2241137865499073e-06, "loss": 1.0368, "step": 3924 }, { "epoch": 0.41279398425072633, "grad_norm": 2.187054374833785, "learning_rate": 3.2233147120957674e-06, "loss": 0.9997, "step": 3925 }, { "epoch": 0.41289915469257366, "grad_norm": 2.032475210558614, "learning_rate": 3.2225155569864187e-06, "loss": 0.9925, "step": 3926 }, { "epoch": 0.413004325134421, "grad_norm": 2.778151381349566, "learning_rate": 3.221716321310972e-06, "loss": 0.9881, "step": 3927 }, { "epoch": 0.4131094955762683, "grad_norm": 2.84516595462591, "learning_rate": 3.2209170051585486e-06, "loss": 1.0279, "step": 3928 }, { "epoch": 0.41321466601811563, "grad_norm": 2.706803603589453, "learning_rate": 3.2201176086182796e-06, "loss": 1.0087, "step": 3929 }, { "epoch": 0.41331983645996295, "grad_norm": 2.3853452577702137, "learning_rate": 3.2193181317793034e-06, "loss": 0.9627, "step": 3930 }, { "epoch": 0.4134250069018102, "grad_norm": 2.8455662869523275, "learning_rate": 3.2185185747307675e-06, "loss": 1.0292, "step": 3931 }, { "epoch": 0.41353017734365755, "grad_norm": 2.861868010646851, "learning_rate": 3.217718937561829e-06, "loss": 1.0368, "step": 3932 }, { "epoch": 0.41363534778550487, "grad_norm": 2.665200169651637, "learning_rate": 3.2169192203616525e-06, "loss": 1.0477, "step": 3933 }, { "epoch": 0.4137405182273522, "grad_norm": 2.7937901564994334, "learning_rate": 3.2161194232194144e-06, "loss": 0.9551, "step": 3934 }, { "epoch": 0.4138456886691995, "grad_norm": 2.488699321040164, "learning_rate": 3.2153195462242968e-06, "loss": 1.0131, "step": 3935 }, { "epoch": 0.41395085911104684, "grad_norm": 1.861341029966568, "learning_rate": 3.2145195894654947e-06, "loss": 1.0209, "step": 3936 }, { "epoch": 0.41405602955289417, "grad_norm": 2.814340851769455, "learning_rate": 3.2137195530322067e-06, "loss": 1.0021, "step": 3937 }, { "epoch": 0.4141611999947415, "grad_norm": 3.4877888465072187, "learning_rate": 3.2129194370136447e-06, "loss": 0.9719, "step": 3938 }, { "epoch": 0.4142663704365888, "grad_norm": 1.8405494233280453, "learning_rate": 3.2121192414990273e-06, "loss": 0.9498, "step": 3939 }, { "epoch": 0.41437154087843614, "grad_norm": 2.2875747831185893, "learning_rate": 3.2113189665775812e-06, "loss": 1.0207, "step": 3940 }, { "epoch": 0.4144767113202834, "grad_norm": 1.7886487845468368, "learning_rate": 3.2105186123385457e-06, "loss": 0.9669, "step": 3941 }, { "epoch": 0.41458188176213073, "grad_norm": 3.5285401524389775, "learning_rate": 3.209718178871165e-06, "loss": 1.0066, "step": 3942 }, { "epoch": 0.41468705220397806, "grad_norm": 2.8080439416391902, "learning_rate": 3.2089176662646946e-06, "loss": 1.0376, "step": 3943 }, { "epoch": 0.4147922226458254, "grad_norm": 2.8063762324719694, "learning_rate": 3.2081170746083972e-06, "loss": 0.9974, "step": 3944 }, { "epoch": 0.4148973930876727, "grad_norm": 2.7926830931363478, "learning_rate": 3.2073164039915457e-06, "loss": 1.0089, "step": 3945 }, { "epoch": 0.41500256352952003, "grad_norm": 3.0491045661913265, "learning_rate": 3.20651565450342e-06, "loss": 0.97, "step": 3946 }, { "epoch": 0.41510773397136735, "grad_norm": 3.0426107615622855, "learning_rate": 3.20571482623331e-06, "loss": 1.0066, "step": 3947 }, { "epoch": 0.4152129044132147, "grad_norm": 2.396107694194826, "learning_rate": 3.2049139192705146e-06, "loss": 1.0248, "step": 3948 }, { "epoch": 0.415318074855062, "grad_norm": 2.149480243498675, "learning_rate": 3.2041129337043415e-06, "loss": 1.0269, "step": 3949 }, { "epoch": 0.4154232452969093, "grad_norm": 2.619422758873159, "learning_rate": 3.203311869624107e-06, "loss": 1.0147, "step": 3950 }, { "epoch": 0.4155284157387566, "grad_norm": 2.575801899112643, "learning_rate": 3.202510727119135e-06, "loss": 1.0062, "step": 3951 }, { "epoch": 0.4156335861806039, "grad_norm": 2.2109556260255743, "learning_rate": 3.20170950627876e-06, "loss": 0.9984, "step": 3952 }, { "epoch": 0.41573875662245124, "grad_norm": 2.664759138194936, "learning_rate": 3.2009082071923243e-06, "loss": 0.9412, "step": 3953 }, { "epoch": 0.41584392706429857, "grad_norm": 3.5647874059605105, "learning_rate": 3.2001068299491777e-06, "loss": 1.0679, "step": 3954 }, { "epoch": 0.4159490975061459, "grad_norm": 1.9762308009149232, "learning_rate": 3.1993053746386817e-06, "loss": 0.9786, "step": 3955 }, { "epoch": 0.4160542679479932, "grad_norm": 2.685694396514915, "learning_rate": 3.1985038413502035e-06, "loss": 0.9744, "step": 3956 }, { "epoch": 0.41615943838984054, "grad_norm": 1.788013679923769, "learning_rate": 3.197702230173121e-06, "loss": 0.9708, "step": 3957 }, { "epoch": 0.41626460883168787, "grad_norm": 2.7969444556997947, "learning_rate": 3.19690054119682e-06, "loss": 1.0122, "step": 3958 }, { "epoch": 0.4163697792735352, "grad_norm": 3.247388808518861, "learning_rate": 3.1960987745106954e-06, "loss": 1.023, "step": 3959 }, { "epoch": 0.4164749497153825, "grad_norm": 2.891195304951478, "learning_rate": 3.195296930204149e-06, "loss": 1.0265, "step": 3960 }, { "epoch": 0.4165801201572298, "grad_norm": 2.3527631916312184, "learning_rate": 3.1944950083665943e-06, "loss": 0.9563, "step": 3961 }, { "epoch": 0.4166852905990771, "grad_norm": 2.1596860621844423, "learning_rate": 3.1936930090874506e-06, "loss": 0.9826, "step": 3962 }, { "epoch": 0.41679046104092443, "grad_norm": 2.6617240165685665, "learning_rate": 3.192890932456148e-06, "loss": 1.0048, "step": 3963 }, { "epoch": 0.41689563148277176, "grad_norm": 2.092563955479962, "learning_rate": 3.1920887785621233e-06, "loss": 1.0019, "step": 3964 }, { "epoch": 0.4170008019246191, "grad_norm": 2.2736516404112326, "learning_rate": 3.1912865474948242e-06, "loss": 1.0307, "step": 3965 }, { "epoch": 0.4171059723664664, "grad_norm": 3.3937785745994526, "learning_rate": 3.190484239343705e-06, "loss": 1.0387, "step": 3966 }, { "epoch": 0.41721114280831373, "grad_norm": 3.086439637922143, "learning_rate": 3.1896818541982288e-06, "loss": 0.9966, "step": 3967 }, { "epoch": 0.41731631325016105, "grad_norm": 2.161092398681452, "learning_rate": 3.188879392147869e-06, "loss": 0.9979, "step": 3968 }, { "epoch": 0.4174214836920084, "grad_norm": 2.732897747501538, "learning_rate": 3.188076853282105e-06, "loss": 0.9888, "step": 3969 }, { "epoch": 0.4175266541338557, "grad_norm": 2.3755042681989584, "learning_rate": 3.187274237690426e-06, "loss": 0.9807, "step": 3970 }, { "epoch": 0.417631824575703, "grad_norm": 3.236466063215594, "learning_rate": 3.1864715454623312e-06, "loss": 0.9789, "step": 3971 }, { "epoch": 0.4177369950175503, "grad_norm": 1.9463819496777226, "learning_rate": 3.1856687766873262e-06, "loss": 0.9797, "step": 3972 }, { "epoch": 0.4178421654593976, "grad_norm": 2.2662145233490234, "learning_rate": 3.184865931454926e-06, "loss": 1.0288, "step": 3973 }, { "epoch": 0.41794733590124494, "grad_norm": 1.866662882697477, "learning_rate": 3.184063009854655e-06, "loss": 1.0038, "step": 3974 }, { "epoch": 0.41805250634309227, "grad_norm": 2.650269702092644, "learning_rate": 3.183260011976044e-06, "loss": 1.0019, "step": 3975 }, { "epoch": 0.4181576767849396, "grad_norm": 2.8276642060293913, "learning_rate": 3.1824569379086334e-06, "loss": 1.0599, "step": 3976 }, { "epoch": 0.4182628472267869, "grad_norm": 3.5709056704281057, "learning_rate": 3.1816537877419735e-06, "loss": 1.0059, "step": 3977 }, { "epoch": 0.41836801766863424, "grad_norm": 2.071198284904167, "learning_rate": 3.180850561565621e-06, "loss": 1.0026, "step": 3978 }, { "epoch": 0.41847318811048156, "grad_norm": 2.9106218215179527, "learning_rate": 3.180047259469141e-06, "loss": 1.0052, "step": 3979 }, { "epoch": 0.4185783585523289, "grad_norm": 3.6515550019028513, "learning_rate": 3.1792438815421094e-06, "loss": 1.0022, "step": 3980 }, { "epoch": 0.4186835289941762, "grad_norm": 2.730798220509796, "learning_rate": 3.178440427874109e-06, "loss": 1.0587, "step": 3981 }, { "epoch": 0.4187886994360235, "grad_norm": 1.887706779651145, "learning_rate": 3.1776368985547296e-06, "loss": 0.9977, "step": 3982 }, { "epoch": 0.4188938698778708, "grad_norm": 3.609708212124221, "learning_rate": 3.176833293673572e-06, "loss": 1.001, "step": 3983 }, { "epoch": 0.41899904031971813, "grad_norm": 2.8850263626073764, "learning_rate": 3.1760296133202444e-06, "loss": 1.0177, "step": 3984 }, { "epoch": 0.41910421076156545, "grad_norm": 2.131234221005343, "learning_rate": 3.175225857584364e-06, "loss": 1.0055, "step": 3985 }, { "epoch": 0.4192093812034128, "grad_norm": 2.991084252493002, "learning_rate": 3.174422026555554e-06, "loss": 1.0366, "step": 3986 }, { "epoch": 0.4193145516452601, "grad_norm": 2.6654269600235274, "learning_rate": 3.17361812032345e-06, "loss": 0.9736, "step": 3987 }, { "epoch": 0.4194197220871074, "grad_norm": 2.6426415681720643, "learning_rate": 3.172814138977692e-06, "loss": 0.9885, "step": 3988 }, { "epoch": 0.41952489252895475, "grad_norm": 2.684767657076028, "learning_rate": 3.172010082607932e-06, "loss": 0.9982, "step": 3989 }, { "epoch": 0.4196300629708021, "grad_norm": 2.891495817722396, "learning_rate": 3.1712059513038264e-06, "loss": 1.016, "step": 3990 }, { "epoch": 0.4197352334126494, "grad_norm": 1.9233695211634947, "learning_rate": 3.1704017451550435e-06, "loss": 0.9776, "step": 3991 }, { "epoch": 0.41984040385449667, "grad_norm": 2.8276668356522165, "learning_rate": 3.169597464251258e-06, "loss": 0.958, "step": 3992 }, { "epoch": 0.419945574296344, "grad_norm": 2.2819287728424387, "learning_rate": 3.1687931086821534e-06, "loss": 0.9661, "step": 3993 }, { "epoch": 0.4200507447381913, "grad_norm": 1.9335515420518428, "learning_rate": 3.1679886785374227e-06, "loss": 1.0086, "step": 3994 }, { "epoch": 0.42015591518003864, "grad_norm": 2.703224110233739, "learning_rate": 3.1671841739067645e-06, "loss": 0.987, "step": 3995 }, { "epoch": 0.42026108562188597, "grad_norm": 2.366911505986237, "learning_rate": 3.1663795948798883e-06, "loss": 1.0131, "step": 3996 }, { "epoch": 0.4203662560637333, "grad_norm": 2.1981340850481317, "learning_rate": 3.1655749415465098e-06, "loss": 1.0267, "step": 3997 }, { "epoch": 0.4204714265055806, "grad_norm": 2.2878927859138662, "learning_rate": 3.164770213996356e-06, "loss": 0.9874, "step": 3998 }, { "epoch": 0.42057659694742794, "grad_norm": 2.797600970455762, "learning_rate": 3.1639654123191586e-06, "loss": 1.0073, "step": 3999 }, { "epoch": 0.42068176738927526, "grad_norm": 2.587687977989116, "learning_rate": 3.1631605366046604e-06, "loss": 0.9942, "step": 4000 }, { "epoch": 0.4207869378311226, "grad_norm": 2.8519043232197503, "learning_rate": 3.1623555869426105e-06, "loss": 1.0209, "step": 4001 }, { "epoch": 0.42089210827296986, "grad_norm": 2.7590859333924436, "learning_rate": 3.161550563422768e-06, "loss": 1.0017, "step": 4002 }, { "epoch": 0.4209972787148172, "grad_norm": 2.422336890671471, "learning_rate": 3.160745466134898e-06, "loss": 1.0184, "step": 4003 }, { "epoch": 0.4211024491566645, "grad_norm": 2.632748801794489, "learning_rate": 3.1599402951687745e-06, "loss": 1.0328, "step": 4004 }, { "epoch": 0.42120761959851183, "grad_norm": 2.6624952216264592, "learning_rate": 3.1591350506141834e-06, "loss": 0.9996, "step": 4005 }, { "epoch": 0.42131279004035915, "grad_norm": 2.181289439115408, "learning_rate": 3.1583297325609117e-06, "loss": 0.9675, "step": 4006 }, { "epoch": 0.4214179604822065, "grad_norm": 1.9476495215981124, "learning_rate": 3.1575243410987627e-06, "loss": 0.9822, "step": 4007 }, { "epoch": 0.4215231309240538, "grad_norm": 2.363361592022842, "learning_rate": 3.1567188763175417e-06, "loss": 0.9819, "step": 4008 }, { "epoch": 0.4216283013659011, "grad_norm": 2.8584886252924653, "learning_rate": 3.155913338307064e-06, "loss": 1.0167, "step": 4009 }, { "epoch": 0.42173347180774845, "grad_norm": 2.1134205793143463, "learning_rate": 3.1551077271571533e-06, "loss": 1.0171, "step": 4010 }, { "epoch": 0.4218386422495958, "grad_norm": 3.1621683092136545, "learning_rate": 3.154302042957642e-06, "loss": 1.0072, "step": 4011 }, { "epoch": 0.42194381269144304, "grad_norm": 3.3120782048978805, "learning_rate": 3.153496285798371e-06, "loss": 0.9971, "step": 4012 }, { "epoch": 0.42204898313329037, "grad_norm": 2.0326302294261644, "learning_rate": 3.152690455769186e-06, "loss": 1.0114, "step": 4013 }, { "epoch": 0.4221541535751377, "grad_norm": 2.0910034446035146, "learning_rate": 3.1518845529599464e-06, "loss": 0.995, "step": 4014 }, { "epoch": 0.422259324016985, "grad_norm": 2.7438097015848326, "learning_rate": 3.1510785774605148e-06, "loss": 1.0412, "step": 4015 }, { "epoch": 0.42236449445883234, "grad_norm": 1.9719647600817969, "learning_rate": 3.1502725293607635e-06, "loss": 0.9841, "step": 4016 }, { "epoch": 0.42246966490067966, "grad_norm": 2.5438613247357154, "learning_rate": 3.1494664087505743e-06, "loss": 1.002, "step": 4017 }, { "epoch": 0.422574835342527, "grad_norm": 2.3918717555442424, "learning_rate": 3.1486602157198347e-06, "loss": 0.986, "step": 4018 }, { "epoch": 0.4226800057843743, "grad_norm": 2.6686455761422594, "learning_rate": 3.1478539503584427e-06, "loss": 0.9643, "step": 4019 }, { "epoch": 0.42278517622622164, "grad_norm": 2.966460155715894, "learning_rate": 3.147047612756302e-06, "loss": 1.0212, "step": 4020 }, { "epoch": 0.42289034666806896, "grad_norm": 1.8811689133408926, "learning_rate": 3.1462412030033264e-06, "loss": 1.0067, "step": 4021 }, { "epoch": 0.42299551710991623, "grad_norm": 2.425415315035811, "learning_rate": 3.1454347211894364e-06, "loss": 1.031, "step": 4022 }, { "epoch": 0.42310068755176355, "grad_norm": 2.0840405211523074, "learning_rate": 3.144628167404561e-06, "loss": 0.98, "step": 4023 }, { "epoch": 0.4232058579936109, "grad_norm": 2.1726194511797052, "learning_rate": 3.1438215417386377e-06, "loss": 0.9629, "step": 4024 }, { "epoch": 0.4233110284354582, "grad_norm": 2.3856593069057714, "learning_rate": 3.1430148442816112e-06, "loss": 0.9939, "step": 4025 }, { "epoch": 0.4234161988773055, "grad_norm": 3.3977729251669184, "learning_rate": 3.1422080751234342e-06, "loss": 1.0636, "step": 4026 }, { "epoch": 0.42352136931915285, "grad_norm": 1.9288928690265361, "learning_rate": 3.1414012343540667e-06, "loss": 0.9848, "step": 4027 }, { "epoch": 0.4236265397610002, "grad_norm": 1.9840004899690646, "learning_rate": 3.1405943220634805e-06, "loss": 0.9988, "step": 4028 }, { "epoch": 0.4237317102028475, "grad_norm": 2.442923625799874, "learning_rate": 3.1397873383416503e-06, "loss": 1.0168, "step": 4029 }, { "epoch": 0.4238368806446948, "grad_norm": 2.739607689137626, "learning_rate": 3.138980283278562e-06, "loss": 1.0001, "step": 4030 }, { "epoch": 0.42394205108654215, "grad_norm": 1.8732837933056634, "learning_rate": 3.1381731569642093e-06, "loss": 1.0149, "step": 4031 }, { "epoch": 0.4240472215283895, "grad_norm": 2.1861276788817694, "learning_rate": 3.137365959488591e-06, "loss": 1.0425, "step": 4032 }, { "epoch": 0.42415239197023674, "grad_norm": 2.227889990859692, "learning_rate": 3.1365586909417177e-06, "loss": 1.0049, "step": 4033 }, { "epoch": 0.42425756241208407, "grad_norm": 2.401704867671759, "learning_rate": 3.1357513514136044e-06, "loss": 1.0011, "step": 4034 }, { "epoch": 0.4243627328539314, "grad_norm": 2.572582774921797, "learning_rate": 3.1349439409942778e-06, "loss": 1.0292, "step": 4035 }, { "epoch": 0.4244679032957787, "grad_norm": 2.464307334674958, "learning_rate": 3.1341364597737684e-06, "loss": 1.0012, "step": 4036 }, { "epoch": 0.42457307373762604, "grad_norm": 2.1963762256824078, "learning_rate": 3.133328907842118e-06, "loss": 0.9715, "step": 4037 }, { "epoch": 0.42467824417947336, "grad_norm": 3.0073856142249267, "learning_rate": 3.132521285289375e-06, "loss": 0.9854, "step": 4038 }, { "epoch": 0.4247834146213207, "grad_norm": 2.7319358749557625, "learning_rate": 3.1317135922055954e-06, "loss": 1.0202, "step": 4039 }, { "epoch": 0.424888585063168, "grad_norm": 1.7350134734136733, "learning_rate": 3.130905828680842e-06, "loss": 0.9956, "step": 4040 }, { "epoch": 0.42499375550501534, "grad_norm": 2.3777384845962257, "learning_rate": 3.1300979948051874e-06, "loss": 1.0045, "step": 4041 }, { "epoch": 0.42509892594686266, "grad_norm": 2.601237373112988, "learning_rate": 3.1292900906687123e-06, "loss": 0.9874, "step": 4042 }, { "epoch": 0.42520409638870993, "grad_norm": 2.2851187750948054, "learning_rate": 3.128482116361503e-06, "loss": 0.9632, "step": 4043 }, { "epoch": 0.42530926683055725, "grad_norm": 1.7435113684957382, "learning_rate": 3.1276740719736565e-06, "loss": 1.0032, "step": 4044 }, { "epoch": 0.4254144372724046, "grad_norm": 2.623135138337169, "learning_rate": 3.126865957595274e-06, "loss": 1.0326, "step": 4045 }, { "epoch": 0.4255196077142519, "grad_norm": 3.0108763917408683, "learning_rate": 3.126057773316469e-06, "loss": 0.9891, "step": 4046 }, { "epoch": 0.4256247781560992, "grad_norm": 2.580885149154926, "learning_rate": 3.1252495192273575e-06, "loss": 1.0105, "step": 4047 }, { "epoch": 0.42572994859794655, "grad_norm": 2.1650235060129694, "learning_rate": 3.1244411954180677e-06, "loss": 1.033, "step": 4048 }, { "epoch": 0.4258351190397939, "grad_norm": 2.756079053013681, "learning_rate": 3.1236328019787338e-06, "loss": 0.9757, "step": 4049 }, { "epoch": 0.4259402894816412, "grad_norm": 2.9476755695727452, "learning_rate": 3.1228243389994976e-06, "loss": 0.9734, "step": 4050 }, { "epoch": 0.4260454599234885, "grad_norm": 1.816970545742394, "learning_rate": 3.1220158065705104e-06, "loss": 0.9716, "step": 4051 }, { "epoch": 0.42615063036533585, "grad_norm": 2.1446993232730724, "learning_rate": 3.1212072047819276e-06, "loss": 0.9774, "step": 4052 }, { "epoch": 0.4262558008071831, "grad_norm": 3.315132448487326, "learning_rate": 3.1203985337239174e-06, "loss": 1.0018, "step": 4053 }, { "epoch": 0.42636097124903044, "grad_norm": 2.3941591823008643, "learning_rate": 3.1195897934866504e-06, "loss": 0.9595, "step": 4054 }, { "epoch": 0.42646614169087776, "grad_norm": 2.0757761997376627, "learning_rate": 3.118780984160307e-06, "loss": 0.9928, "step": 4055 }, { "epoch": 0.4265713121327251, "grad_norm": 1.8275624288227292, "learning_rate": 3.1179721058350787e-06, "loss": 1.0441, "step": 4056 }, { "epoch": 0.4266764825745724, "grad_norm": 2.5156642086205534, "learning_rate": 3.117163158601159e-06, "loss": 1.0065, "step": 4057 }, { "epoch": 0.42678165301641974, "grad_norm": 2.851734935278207, "learning_rate": 3.1163541425487535e-06, "loss": 0.9887, "step": 4058 }, { "epoch": 0.42688682345826706, "grad_norm": 2.4798006522437617, "learning_rate": 3.1155450577680725e-06, "loss": 1.0047, "step": 4059 }, { "epoch": 0.4269919939001144, "grad_norm": 1.5625163069318968, "learning_rate": 3.1147359043493365e-06, "loss": 0.96, "step": 4060 }, { "epoch": 0.4270971643419617, "grad_norm": 2.125796790580701, "learning_rate": 3.113926682382771e-06, "loss": 0.9713, "step": 4061 }, { "epoch": 0.42720233478380903, "grad_norm": 2.7077037498810754, "learning_rate": 3.113117391958612e-06, "loss": 0.995, "step": 4062 }, { "epoch": 0.4273075052256563, "grad_norm": 2.611706521027208, "learning_rate": 3.1123080331671015e-06, "loss": 1.0046, "step": 4063 }, { "epoch": 0.4274126756675036, "grad_norm": 1.909431132438331, "learning_rate": 3.111498606098487e-06, "loss": 1.0073, "step": 4064 }, { "epoch": 0.42751784610935095, "grad_norm": 2.7980952079855665, "learning_rate": 3.110689110843029e-06, "loss": 1.0122, "step": 4065 }, { "epoch": 0.4276230165511983, "grad_norm": 2.1381354002981037, "learning_rate": 3.10987954749099e-06, "loss": 0.9777, "step": 4066 }, { "epoch": 0.4277281869930456, "grad_norm": 3.062933496654773, "learning_rate": 3.1090699161326442e-06, "loss": 1.0339, "step": 4067 }, { "epoch": 0.4278333574348929, "grad_norm": 2.555658076041788, "learning_rate": 3.108260216858272e-06, "loss": 1.0222, "step": 4068 }, { "epoch": 0.42793852787674025, "grad_norm": 2.924322622102227, "learning_rate": 3.10745044975816e-06, "loss": 0.9848, "step": 4069 }, { "epoch": 0.4280436983185876, "grad_norm": 2.7919031185411525, "learning_rate": 3.1066406149226046e-06, "loss": 1.026, "step": 4070 }, { "epoch": 0.4281488687604349, "grad_norm": 2.3674479918799087, "learning_rate": 3.105830712441907e-06, "loss": 1.0281, "step": 4071 }, { "epoch": 0.4282540392022822, "grad_norm": 2.258912241147774, "learning_rate": 3.1050207424063793e-06, "loss": 0.9928, "step": 4072 }, { "epoch": 0.4283592096441295, "grad_norm": 2.831893269330568, "learning_rate": 3.1042107049063385e-06, "loss": 1.011, "step": 4073 }, { "epoch": 0.4284643800859768, "grad_norm": 2.9136787478525115, "learning_rate": 3.103400600032111e-06, "loss": 0.9885, "step": 4074 }, { "epoch": 0.42856955052782414, "grad_norm": 2.5104840959586308, "learning_rate": 3.1025904278740286e-06, "loss": 1.0101, "step": 4075 }, { "epoch": 0.42867472096967146, "grad_norm": 2.831638306335408, "learning_rate": 3.1017801885224332e-06, "loss": 0.9507, "step": 4076 }, { "epoch": 0.4287798914115188, "grad_norm": 2.2190444522869814, "learning_rate": 3.1009698820676714e-06, "loss": 0.9653, "step": 4077 }, { "epoch": 0.4288850618533661, "grad_norm": 3.176813249530208, "learning_rate": 3.100159508600099e-06, "loss": 0.9871, "step": 4078 }, { "epoch": 0.42899023229521344, "grad_norm": 3.3715464732818154, "learning_rate": 3.0993490682100797e-06, "loss": 1.0664, "step": 4079 }, { "epoch": 0.42909540273706076, "grad_norm": 2.187534155940015, "learning_rate": 3.0985385609879832e-06, "loss": 1.0389, "step": 4080 }, { "epoch": 0.4292005731789081, "grad_norm": 1.6249601238371671, "learning_rate": 3.097727987024187e-06, "loss": 1.0206, "step": 4081 }, { "epoch": 0.4293057436207554, "grad_norm": 2.028832037009087, "learning_rate": 3.096917346409078e-06, "loss": 1.0082, "step": 4082 }, { "epoch": 0.42941091406260273, "grad_norm": 2.5626702289369585, "learning_rate": 3.0961066392330475e-06, "loss": 1.0013, "step": 4083 }, { "epoch": 0.42951608450445, "grad_norm": 2.6483344911125952, "learning_rate": 3.0952958655864957e-06, "loss": 0.9299, "step": 4084 }, { "epoch": 0.4296212549462973, "grad_norm": 2.240406701709471, "learning_rate": 3.0944850255598307e-06, "loss": 1.0073, "step": 4085 }, { "epoch": 0.42972642538814465, "grad_norm": 3.2212875181907155, "learning_rate": 3.0936741192434673e-06, "loss": 1.0506, "step": 4086 }, { "epoch": 0.429831595829992, "grad_norm": 2.8050004911994466, "learning_rate": 3.0928631467278275e-06, "loss": 1.045, "step": 4087 }, { "epoch": 0.4299367662718393, "grad_norm": 2.3637943800552037, "learning_rate": 3.0920521081033418e-06, "loss": 1.0016, "step": 4088 }, { "epoch": 0.4300419367136866, "grad_norm": 2.4022684297375942, "learning_rate": 3.091241003460446e-06, "loss": 1.0081, "step": 4089 }, { "epoch": 0.43014710715553395, "grad_norm": 2.6213335210562643, "learning_rate": 3.0904298328895865e-06, "loss": 1.0066, "step": 4090 }, { "epoch": 0.43025227759738127, "grad_norm": 2.405980637873229, "learning_rate": 3.089618596481213e-06, "loss": 0.9793, "step": 4091 }, { "epoch": 0.4303574480392286, "grad_norm": 2.452115188274156, "learning_rate": 3.088807294325786e-06, "loss": 0.93, "step": 4092 }, { "epoch": 0.4304626184810759, "grad_norm": 2.0884019140145065, "learning_rate": 3.0879959265137722e-06, "loss": 0.9783, "step": 4093 }, { "epoch": 0.4305677889229232, "grad_norm": 2.9202655391972714, "learning_rate": 3.0871844931356437e-06, "loss": 0.9959, "step": 4094 }, { "epoch": 0.4306729593647705, "grad_norm": 2.0360566333873136, "learning_rate": 3.0863729942818835e-06, "loss": 0.985, "step": 4095 }, { "epoch": 0.43077812980661784, "grad_norm": 2.203833572789431, "learning_rate": 3.08556143004298e-06, "loss": 1.0127, "step": 4096 }, { "epoch": 0.43088330024846516, "grad_norm": 3.060914354625758, "learning_rate": 3.0847498005094277e-06, "loss": 1.0552, "step": 4097 }, { "epoch": 0.4309884706903125, "grad_norm": 2.127677108709688, "learning_rate": 3.0839381057717295e-06, "loss": 1.027, "step": 4098 }, { "epoch": 0.4310936411321598, "grad_norm": 2.185558039100322, "learning_rate": 3.083126345920397e-06, "loss": 0.9747, "step": 4099 }, { "epoch": 0.43119881157400713, "grad_norm": 2.502125904858561, "learning_rate": 3.082314521045947e-06, "loss": 0.9718, "step": 4100 }, { "epoch": 0.43130398201585446, "grad_norm": 2.249386844041367, "learning_rate": 3.081502631238904e-06, "loss": 1.0284, "step": 4101 }, { "epoch": 0.4314091524577018, "grad_norm": 2.510215362561653, "learning_rate": 3.080690676589801e-06, "loss": 1.0115, "step": 4102 }, { "epoch": 0.4315143228995491, "grad_norm": 2.540632786506748, "learning_rate": 3.0798786571891754e-06, "loss": 1.0009, "step": 4103 }, { "epoch": 0.4316194933413964, "grad_norm": 3.268212533873219, "learning_rate": 3.0790665731275764e-06, "loss": 1.0034, "step": 4104 }, { "epoch": 0.4317246637832437, "grad_norm": 2.2631828866412564, "learning_rate": 3.0782544244955546e-06, "loss": 0.986, "step": 4105 }, { "epoch": 0.431829834225091, "grad_norm": 2.8101017497715945, "learning_rate": 3.077442211383674e-06, "loss": 1.0227, "step": 4106 }, { "epoch": 0.43193500466693835, "grad_norm": 2.590129436859189, "learning_rate": 3.0766299338825003e-06, "loss": 0.9566, "step": 4107 }, { "epoch": 0.4320401751087857, "grad_norm": 2.6443948940324655, "learning_rate": 3.0758175920826096e-06, "loss": 0.9694, "step": 4108 }, { "epoch": 0.432145345550633, "grad_norm": 3.141552378052775, "learning_rate": 3.075005186074584e-06, "loss": 1.0031, "step": 4109 }, { "epoch": 0.4322505159924803, "grad_norm": 3.8498143175202366, "learning_rate": 3.0741927159490133e-06, "loss": 1.042, "step": 4110 }, { "epoch": 0.43235568643432765, "grad_norm": 1.586336922468649, "learning_rate": 3.073380181796495e-06, "loss": 0.9673, "step": 4111 }, { "epoch": 0.43246085687617497, "grad_norm": 3.3046763352664676, "learning_rate": 3.072567583707632e-06, "loss": 0.9643, "step": 4112 }, { "epoch": 0.4325660273180223, "grad_norm": 3.225434065889587, "learning_rate": 3.071754921773035e-06, "loss": 1.0441, "step": 4113 }, { "epoch": 0.43267119775986956, "grad_norm": 2.4386638186184277, "learning_rate": 3.070942196083323e-06, "loss": 0.9961, "step": 4114 }, { "epoch": 0.4327763682017169, "grad_norm": 2.2998360002188307, "learning_rate": 3.070129406729121e-06, "loss": 1.0012, "step": 4115 }, { "epoch": 0.4328815386435642, "grad_norm": 2.2206282124156593, "learning_rate": 3.0693165538010617e-06, "loss": 1.001, "step": 4116 }, { "epoch": 0.43298670908541154, "grad_norm": 2.616939405371355, "learning_rate": 3.0685036373897833e-06, "loss": 0.9975, "step": 4117 }, { "epoch": 0.43309187952725886, "grad_norm": 2.1392510552663477, "learning_rate": 3.0676906575859335e-06, "loss": 1.0204, "step": 4118 }, { "epoch": 0.4331970499691062, "grad_norm": 2.4770512512838523, "learning_rate": 3.0668776144801653e-06, "loss": 1.0116, "step": 4119 }, { "epoch": 0.4333022204109535, "grad_norm": 2.7619726953432227, "learning_rate": 3.0660645081631396e-06, "loss": 1.0338, "step": 4120 }, { "epoch": 0.43340739085280083, "grad_norm": 3.6075982772582718, "learning_rate": 3.0652513387255227e-06, "loss": 0.9864, "step": 4121 }, { "epoch": 0.43351256129464816, "grad_norm": 2.0025698189454344, "learning_rate": 3.064438106257992e-06, "loss": 1.0167, "step": 4122 }, { "epoch": 0.4336177317364955, "grad_norm": 2.4436854317627286, "learning_rate": 3.063624810851227e-06, "loss": 1.0045, "step": 4123 }, { "epoch": 0.43372290217834275, "grad_norm": 2.485396787287376, "learning_rate": 3.0628114525959175e-06, "loss": 1.001, "step": 4124 }, { "epoch": 0.4338280726201901, "grad_norm": 3.1025874629215706, "learning_rate": 3.0619980315827585e-06, "loss": 1.0026, "step": 4125 }, { "epoch": 0.4339332430620374, "grad_norm": 3.037225307324419, "learning_rate": 3.061184547902454e-06, "loss": 0.9949, "step": 4126 }, { "epoch": 0.4340384135038847, "grad_norm": 2.8257071700615133, "learning_rate": 3.060371001645713e-06, "loss": 1.0168, "step": 4127 }, { "epoch": 0.43414358394573205, "grad_norm": 1.4769811886831412, "learning_rate": 3.0595573929032513e-06, "loss": 0.9953, "step": 4128 }, { "epoch": 0.43424875438757937, "grad_norm": 2.3056756386496047, "learning_rate": 3.0587437217657937e-06, "loss": 0.9979, "step": 4129 }, { "epoch": 0.4343539248294267, "grad_norm": 2.530987884033079, "learning_rate": 3.057929988324071e-06, "loss": 1.0052, "step": 4130 }, { "epoch": 0.434459095271274, "grad_norm": 2.669533462808313, "learning_rate": 3.0571161926688204e-06, "loss": 1.0023, "step": 4131 }, { "epoch": 0.43456426571312134, "grad_norm": 2.0814334987484417, "learning_rate": 3.056302334890786e-06, "loss": 0.9791, "step": 4132 }, { "epoch": 0.43466943615496867, "grad_norm": 2.8696601264581654, "learning_rate": 3.0554884150807208e-06, "loss": 1.0174, "step": 4133 }, { "epoch": 0.43477460659681594, "grad_norm": 2.2470344072413533, "learning_rate": 3.0546744333293815e-06, "loss": 0.974, "step": 4134 }, { "epoch": 0.43487977703866326, "grad_norm": 2.6293475640576642, "learning_rate": 3.053860389727534e-06, "loss": 1.0178, "step": 4135 }, { "epoch": 0.4349849474805106, "grad_norm": 2.023015252677748, "learning_rate": 3.0530462843659504e-06, "loss": 0.991, "step": 4136 }, { "epoch": 0.4350901179223579, "grad_norm": 3.028146066432831, "learning_rate": 3.0522321173354095e-06, "loss": 1.0114, "step": 4137 }, { "epoch": 0.43519528836420523, "grad_norm": 2.198855179319069, "learning_rate": 3.0514178887266986e-06, "loss": 1.0585, "step": 4138 }, { "epoch": 0.43530045880605256, "grad_norm": 1.9696831755080155, "learning_rate": 3.0506035986306095e-06, "loss": 1.0022, "step": 4139 }, { "epoch": 0.4354056292478999, "grad_norm": 3.374841364809197, "learning_rate": 3.0497892471379415e-06, "loss": 1.0142, "step": 4140 }, { "epoch": 0.4355107996897472, "grad_norm": 1.8557258077383334, "learning_rate": 3.0489748343395013e-06, "loss": 0.9776, "step": 4141 }, { "epoch": 0.43561597013159453, "grad_norm": 2.47059315827509, "learning_rate": 3.0481603603261023e-06, "loss": 1.0122, "step": 4142 }, { "epoch": 0.43572114057344186, "grad_norm": 2.7641667162449974, "learning_rate": 3.0473458251885658e-06, "loss": 0.9913, "step": 4143 }, { "epoch": 0.4358263110152892, "grad_norm": 1.910330796679931, "learning_rate": 3.0465312290177166e-06, "loss": 1.0036, "step": 4144 }, { "epoch": 0.43593148145713645, "grad_norm": 2.118224274596519, "learning_rate": 3.0457165719043906e-06, "loss": 1.0305, "step": 4145 }, { "epoch": 0.4360366518989838, "grad_norm": 3.610473332887411, "learning_rate": 3.0449018539394274e-06, "loss": 1.0083, "step": 4146 }, { "epoch": 0.4361418223408311, "grad_norm": 2.3957683382072847, "learning_rate": 3.044087075213675e-06, "loss": 0.9799, "step": 4147 }, { "epoch": 0.4362469927826784, "grad_norm": 2.6043783680389865, "learning_rate": 3.0432722358179873e-06, "loss": 1.0203, "step": 4148 }, { "epoch": 0.43635216322452575, "grad_norm": 2.0797439186751654, "learning_rate": 3.0424573358432243e-06, "loss": 0.9899, "step": 4149 }, { "epoch": 0.43645733366637307, "grad_norm": 2.5250166716374918, "learning_rate": 3.041642375380254e-06, "loss": 1.0332, "step": 4150 }, { "epoch": 0.4365625041082204, "grad_norm": 2.4929052295492213, "learning_rate": 3.0408273545199517e-06, "loss": 1.0466, "step": 4151 }, { "epoch": 0.4366676745500677, "grad_norm": 1.9109696456025282, "learning_rate": 3.0400122733531984e-06, "loss": 0.9933, "step": 4152 }, { "epoch": 0.43677284499191504, "grad_norm": 1.9084755489133232, "learning_rate": 3.039197131970881e-06, "loss": 0.986, "step": 4153 }, { "epoch": 0.43687801543376237, "grad_norm": 2.669040697159479, "learning_rate": 3.0383819304638953e-06, "loss": 1.0244, "step": 4154 }, { "epoch": 0.43698318587560964, "grad_norm": 2.3914620652281022, "learning_rate": 3.0375666689231424e-06, "loss": 1.0093, "step": 4155 }, { "epoch": 0.43708835631745696, "grad_norm": 3.002326215365401, "learning_rate": 3.0367513474395293e-06, "loss": 1.0092, "step": 4156 }, { "epoch": 0.4371935267593043, "grad_norm": 2.611694901531462, "learning_rate": 3.035935966103972e-06, "loss": 0.9976, "step": 4157 }, { "epoch": 0.4372986972011516, "grad_norm": 3.2906065240695264, "learning_rate": 3.0351205250073897e-06, "loss": 1.0235, "step": 4158 }, { "epoch": 0.43740386764299893, "grad_norm": 2.7111569861223654, "learning_rate": 3.034305024240713e-06, "loss": 0.9998, "step": 4159 }, { "epoch": 0.43750903808484626, "grad_norm": 2.8104041489630553, "learning_rate": 3.0334894638948753e-06, "loss": 0.9441, "step": 4160 }, { "epoch": 0.4376142085266936, "grad_norm": 2.088309040465824, "learning_rate": 3.0326738440608176e-06, "loss": 1.0236, "step": 4161 }, { "epoch": 0.4377193789685409, "grad_norm": 2.4797799924774955, "learning_rate": 3.031858164829489e-06, "loss": 0.9917, "step": 4162 }, { "epoch": 0.43782454941038823, "grad_norm": 2.2176787604079258, "learning_rate": 3.0310424262918437e-06, "loss": 0.9509, "step": 4163 }, { "epoch": 0.43792971985223555, "grad_norm": 2.2461741633185994, "learning_rate": 3.030226628538843e-06, "loss": 0.9834, "step": 4164 }, { "epoch": 0.4380348902940828, "grad_norm": 3.2683363657888775, "learning_rate": 3.0294107716614522e-06, "loss": 0.9626, "step": 4165 }, { "epoch": 0.43814006073593015, "grad_norm": 2.101521859677799, "learning_rate": 3.0285948557506497e-06, "loss": 1.0173, "step": 4166 }, { "epoch": 0.4382452311777775, "grad_norm": 2.5072346713043774, "learning_rate": 3.027778880897413e-06, "loss": 0.9717, "step": 4167 }, { "epoch": 0.4383504016196248, "grad_norm": 2.886458825011365, "learning_rate": 3.026962847192732e-06, "loss": 0.9941, "step": 4168 }, { "epoch": 0.4384555720614721, "grad_norm": 2.250039487973679, "learning_rate": 3.0261467547276e-06, "loss": 0.999, "step": 4169 }, { "epoch": 0.43856074250331945, "grad_norm": 3.166565115166139, "learning_rate": 3.0253306035930173e-06, "loss": 0.9735, "step": 4170 }, { "epoch": 0.43866591294516677, "grad_norm": 2.798487567270618, "learning_rate": 3.024514393879992e-06, "loss": 1.0038, "step": 4171 }, { "epoch": 0.4387710833870141, "grad_norm": 2.7668838586778435, "learning_rate": 3.0236981256795367e-06, "loss": 1.0429, "step": 4172 }, { "epoch": 0.4388762538288614, "grad_norm": 2.3144035959674554, "learning_rate": 3.022881799082672e-06, "loss": 0.9873, "step": 4173 }, { "epoch": 0.43898142427070874, "grad_norm": 2.492637324980567, "learning_rate": 3.022065414180425e-06, "loss": 0.994, "step": 4174 }, { "epoch": 0.439086594712556, "grad_norm": 2.551661508974668, "learning_rate": 3.021248971063829e-06, "loss": 1.0159, "step": 4175 }, { "epoch": 0.43919176515440334, "grad_norm": 2.0455759141104584, "learning_rate": 3.0204324698239236e-06, "loss": 1.0531, "step": 4176 }, { "epoch": 0.43929693559625066, "grad_norm": 2.798083383983236, "learning_rate": 3.019615910551755e-06, "loss": 0.9885, "step": 4177 }, { "epoch": 0.439402106038098, "grad_norm": 1.9753015864782129, "learning_rate": 3.0187992933383754e-06, "loss": 0.981, "step": 4178 }, { "epoch": 0.4395072764799453, "grad_norm": 2.0935177190552077, "learning_rate": 3.017982618274844e-06, "loss": 0.9449, "step": 4179 }, { "epoch": 0.43961244692179263, "grad_norm": 2.3446418568178315, "learning_rate": 3.0171658854522274e-06, "loss": 1.0141, "step": 4180 }, { "epoch": 0.43971761736363996, "grad_norm": 1.3397303520976527, "learning_rate": 3.016349094961597e-06, "loss": 1.0155, "step": 4181 }, { "epoch": 0.4398227878054873, "grad_norm": 2.58181226066027, "learning_rate": 3.015532246894031e-06, "loss": 1.0046, "step": 4182 }, { "epoch": 0.4399279582473346, "grad_norm": 2.4829085003143923, "learning_rate": 3.0147153413406154e-06, "loss": 1.0414, "step": 4183 }, { "epoch": 0.44003312868918193, "grad_norm": 3.42576868411267, "learning_rate": 3.013898378392441e-06, "loss": 1.0368, "step": 4184 }, { "epoch": 0.4401382991310292, "grad_norm": 2.252207757095223, "learning_rate": 3.0130813581406044e-06, "loss": 1.0187, "step": 4185 }, { "epoch": 0.4402434695728765, "grad_norm": 2.5342374508667707, "learning_rate": 3.0122642806762114e-06, "loss": 0.9934, "step": 4186 }, { "epoch": 0.44034864001472385, "grad_norm": 2.8671681025292246, "learning_rate": 3.0114471460903714e-06, "loss": 0.9852, "step": 4187 }, { "epoch": 0.44045381045657117, "grad_norm": 2.5693510866729388, "learning_rate": 3.0106299544742013e-06, "loss": 1.0354, "step": 4188 }, { "epoch": 0.4405589808984185, "grad_norm": 3.364495917648472, "learning_rate": 3.0098127059188247e-06, "loss": 1.0339, "step": 4189 }, { "epoch": 0.4406641513402658, "grad_norm": 2.7990251477607146, "learning_rate": 3.0089954005153706e-06, "loss": 1.0116, "step": 4190 }, { "epoch": 0.44076932178211314, "grad_norm": 1.7727662574506695, "learning_rate": 3.0081780383549763e-06, "loss": 1.012, "step": 4191 }, { "epoch": 0.44087449222396047, "grad_norm": 3.2222272812715245, "learning_rate": 3.0073606195287825e-06, "loss": 0.9633, "step": 4192 }, { "epoch": 0.4409796626658078, "grad_norm": 3.6290145756150722, "learning_rate": 3.0065431441279386e-06, "loss": 1.0499, "step": 4193 }, { "epoch": 0.4410848331076551, "grad_norm": 3.1982957510410515, "learning_rate": 3.005725612243599e-06, "loss": 0.9487, "step": 4194 }, { "epoch": 0.4411900035495024, "grad_norm": 2.8434896830214123, "learning_rate": 3.0049080239669243e-06, "loss": 1.0009, "step": 4195 }, { "epoch": 0.4412951739913497, "grad_norm": 2.523909980989135, "learning_rate": 3.0040903793890834e-06, "loss": 1.0032, "step": 4196 }, { "epoch": 0.44140034443319703, "grad_norm": 2.523114131380918, "learning_rate": 3.003272678601249e-06, "loss": 1.0288, "step": 4197 }, { "epoch": 0.44150551487504436, "grad_norm": 2.8908811657988247, "learning_rate": 3.002454921694602e-06, "loss": 1.0062, "step": 4198 }, { "epoch": 0.4416106853168917, "grad_norm": 2.7991620994148376, "learning_rate": 3.0016371087603274e-06, "loss": 1.0093, "step": 4199 }, { "epoch": 0.441715855758739, "grad_norm": 2.6659000123176773, "learning_rate": 3.0008192398896185e-06, "loss": 1.0206, "step": 4200 }, { "epoch": 0.44182102620058633, "grad_norm": 2.2057443650750863, "learning_rate": 3.0000013151736747e-06, "loss": 1.0263, "step": 4201 }, { "epoch": 0.44192619664243366, "grad_norm": 2.7169424176961896, "learning_rate": 2.999183334703699e-06, "loss": 1.0041, "step": 4202 }, { "epoch": 0.442031367084281, "grad_norm": 2.950358292631194, "learning_rate": 2.9983652985709037e-06, "loss": 0.9955, "step": 4203 }, { "epoch": 0.4421365375261283, "grad_norm": 2.806264087299609, "learning_rate": 2.9975472068665063e-06, "loss": 1.0283, "step": 4204 }, { "epoch": 0.44224170796797563, "grad_norm": 2.711842000947828, "learning_rate": 2.9967290596817308e-06, "loss": 1.0311, "step": 4205 }, { "epoch": 0.4423468784098229, "grad_norm": 2.7709733812366664, "learning_rate": 2.9959108571078056e-06, "loss": 0.9947, "step": 4206 }, { "epoch": 0.4424520488516702, "grad_norm": 3.2885009923549515, "learning_rate": 2.995092599235968e-06, "loss": 1.0179, "step": 4207 }, { "epoch": 0.44255721929351755, "grad_norm": 2.955161655367558, "learning_rate": 2.994274286157459e-06, "loss": 0.9709, "step": 4208 }, { "epoch": 0.44266238973536487, "grad_norm": 2.953888729382429, "learning_rate": 2.9934559179635282e-06, "loss": 1.0252, "step": 4209 }, { "epoch": 0.4427675601772122, "grad_norm": 3.003042556969862, "learning_rate": 2.992637494745429e-06, "loss": 0.9944, "step": 4210 }, { "epoch": 0.4428727306190595, "grad_norm": 2.7657547018738637, "learning_rate": 2.9918190165944217e-06, "loss": 0.9603, "step": 4211 }, { "epoch": 0.44297790106090684, "grad_norm": 2.0161297281440613, "learning_rate": 2.991000483601774e-06, "loss": 1.0033, "step": 4212 }, { "epoch": 0.44308307150275417, "grad_norm": 2.023154902922774, "learning_rate": 2.9901818958587587e-06, "loss": 0.9801, "step": 4213 }, { "epoch": 0.4431882419446015, "grad_norm": 1.8645927902550326, "learning_rate": 2.9893632534566534e-06, "loss": 1.0054, "step": 4214 }, { "epoch": 0.4432934123864488, "grad_norm": 3.5301225857507417, "learning_rate": 2.9885445564867438e-06, "loss": 0.9769, "step": 4215 }, { "epoch": 0.4433985828282961, "grad_norm": 2.8515371337141744, "learning_rate": 2.9877258050403214e-06, "loss": 0.9968, "step": 4216 }, { "epoch": 0.4435037532701434, "grad_norm": 2.404740251256232, "learning_rate": 2.9869069992086825e-06, "loss": 0.9942, "step": 4217 }, { "epoch": 0.44360892371199073, "grad_norm": 2.046523753796662, "learning_rate": 2.9860881390831303e-06, "loss": 1.0065, "step": 4218 }, { "epoch": 0.44371409415383806, "grad_norm": 3.103804612920373, "learning_rate": 2.985269224754975e-06, "loss": 1.061, "step": 4219 }, { "epoch": 0.4438192645956854, "grad_norm": 2.221775249113264, "learning_rate": 2.9844502563155324e-06, "loss": 0.9729, "step": 4220 }, { "epoch": 0.4439244350375327, "grad_norm": 2.36487818709531, "learning_rate": 2.9836312338561223e-06, "loss": 1.0165, "step": 4221 }, { "epoch": 0.44402960547938003, "grad_norm": 2.5320619109662053, "learning_rate": 2.9828121574680717e-06, "loss": 1.0447, "step": 4222 }, { "epoch": 0.44413477592122735, "grad_norm": 2.2988633433443515, "learning_rate": 2.9819930272427162e-06, "loss": 0.9668, "step": 4223 }, { "epoch": 0.4442399463630747, "grad_norm": 2.9595824678582545, "learning_rate": 2.981173843271393e-06, "loss": 1.015, "step": 4224 }, { "epoch": 0.444345116804922, "grad_norm": 2.950867108730787, "learning_rate": 2.9803546056454487e-06, "loss": 1.0077, "step": 4225 }, { "epoch": 0.44445028724676927, "grad_norm": 1.6132738036228578, "learning_rate": 2.9795353144562344e-06, "loss": 0.9943, "step": 4226 }, { "epoch": 0.4445554576886166, "grad_norm": 2.7813805140120333, "learning_rate": 2.978715969795108e-06, "loss": 0.9776, "step": 4227 }, { "epoch": 0.4446606281304639, "grad_norm": 2.562516330449887, "learning_rate": 2.9778965717534314e-06, "loss": 0.9551, "step": 4228 }, { "epoch": 0.44476579857231124, "grad_norm": 3.1737333083955583, "learning_rate": 2.9770771204225744e-06, "loss": 0.9925, "step": 4229 }, { "epoch": 0.44487096901415857, "grad_norm": 1.8507674535018093, "learning_rate": 2.9762576158939127e-06, "loss": 1.0234, "step": 4230 }, { "epoch": 0.4449761394560059, "grad_norm": 2.6261455818000408, "learning_rate": 2.975438058258827e-06, "loss": 1.0195, "step": 4231 }, { "epoch": 0.4450813098978532, "grad_norm": 1.8245806335822785, "learning_rate": 2.974618447608705e-06, "loss": 0.9718, "step": 4232 }, { "epoch": 0.44518648033970054, "grad_norm": 2.2479753999337815, "learning_rate": 2.9737987840349393e-06, "loss": 0.9852, "step": 4233 }, { "epoch": 0.44529165078154787, "grad_norm": 2.35768351945659, "learning_rate": 2.9729790676289276e-06, "loss": 1.0047, "step": 4234 }, { "epoch": 0.4453968212233952, "grad_norm": 1.6696931548669964, "learning_rate": 2.972159298482076e-06, "loss": 0.9698, "step": 4235 }, { "epoch": 0.44550199166524246, "grad_norm": 2.0882289069459548, "learning_rate": 2.971339476685795e-06, "loss": 0.9712, "step": 4236 }, { "epoch": 0.4456071621070898, "grad_norm": 2.093444735376746, "learning_rate": 2.9705196023315007e-06, "loss": 1.0109, "step": 4237 }, { "epoch": 0.4457123325489371, "grad_norm": 2.5021177606762377, "learning_rate": 2.9696996755106155e-06, "loss": 1.0178, "step": 4238 }, { "epoch": 0.44581750299078443, "grad_norm": 2.334943268674044, "learning_rate": 2.968879696314568e-06, "loss": 0.9909, "step": 4239 }, { "epoch": 0.44592267343263176, "grad_norm": 2.3017506240577448, "learning_rate": 2.968059664834792e-06, "loss": 1.023, "step": 4240 }, { "epoch": 0.4460278438744791, "grad_norm": 2.769841880116001, "learning_rate": 2.967239581162727e-06, "loss": 0.9995, "step": 4241 }, { "epoch": 0.4461330143163264, "grad_norm": 3.7992104532494775, "learning_rate": 2.96641944538982e-06, "loss": 1.0083, "step": 4242 }, { "epoch": 0.44623818475817373, "grad_norm": 2.1347949459797393, "learning_rate": 2.9655992576075198e-06, "loss": 0.9633, "step": 4243 }, { "epoch": 0.44634335520002105, "grad_norm": 2.951928502125178, "learning_rate": 2.964779017907287e-06, "loss": 0.9934, "step": 4244 }, { "epoch": 0.4464485256418684, "grad_norm": 2.263878251484875, "learning_rate": 2.9639587263805824e-06, "loss": 1.0103, "step": 4245 }, { "epoch": 0.44655369608371565, "grad_norm": 3.2110964246131064, "learning_rate": 2.963138383118876e-06, "loss": 0.9281, "step": 4246 }, { "epoch": 0.44665886652556297, "grad_norm": 2.7206342576353966, "learning_rate": 2.962317988213642e-06, "loss": 1.0208, "step": 4247 }, { "epoch": 0.4467640369674103, "grad_norm": 2.569325435677048, "learning_rate": 2.961497541756361e-06, "loss": 0.9949, "step": 4248 }, { "epoch": 0.4468692074092576, "grad_norm": 2.0106169690636726, "learning_rate": 2.960677043838519e-06, "loss": 1.0168, "step": 4249 }, { "epoch": 0.44697437785110494, "grad_norm": 3.047440201325554, "learning_rate": 2.959856494551608e-06, "loss": 1.0501, "step": 4250 }, { "epoch": 0.44707954829295227, "grad_norm": 2.9897469916545525, "learning_rate": 2.9590358939871255e-06, "loss": 1.0254, "step": 4251 }, { "epoch": 0.4471847187347996, "grad_norm": 2.0389005489963674, "learning_rate": 2.9582152422365745e-06, "loss": 0.9759, "step": 4252 }, { "epoch": 0.4472898891766469, "grad_norm": 2.791018390371004, "learning_rate": 2.957394539391465e-06, "loss": 1.0193, "step": 4253 }, { "epoch": 0.44739505961849424, "grad_norm": 2.3959514096484473, "learning_rate": 2.956573785543311e-06, "loss": 1.0402, "step": 4254 }, { "epoch": 0.44750023006034156, "grad_norm": 2.952041521825401, "learning_rate": 2.955752980783633e-06, "loss": 1.0327, "step": 4255 }, { "epoch": 0.44760540050218883, "grad_norm": 2.834122618770755, "learning_rate": 2.9549321252039577e-06, "loss": 1.0195, "step": 4256 }, { "epoch": 0.44771057094403616, "grad_norm": 2.6232922045756832, "learning_rate": 2.954111218895816e-06, "loss": 0.9946, "step": 4257 }, { "epoch": 0.4478157413858835, "grad_norm": 2.890337682768083, "learning_rate": 2.9532902619507465e-06, "loss": 1.0015, "step": 4258 }, { "epoch": 0.4479209118277308, "grad_norm": 2.0462008188888023, "learning_rate": 2.95246925446029e-06, "loss": 1.0015, "step": 4259 }, { "epoch": 0.44802608226957813, "grad_norm": 2.3153362347848634, "learning_rate": 2.951648196515998e-06, "loss": 1.0159, "step": 4260 }, { "epoch": 0.44813125271142545, "grad_norm": 3.294187755137225, "learning_rate": 2.9508270882094227e-06, "loss": 1.0014, "step": 4261 }, { "epoch": 0.4482364231532728, "grad_norm": 2.8942766762795453, "learning_rate": 2.9500059296321254e-06, "loss": 1.015, "step": 4262 }, { "epoch": 0.4483415935951201, "grad_norm": 2.8362356385644976, "learning_rate": 2.9491847208756713e-06, "loss": 1.0182, "step": 4263 }, { "epoch": 0.4484467640369674, "grad_norm": 2.016987936853375, "learning_rate": 2.9483634620316314e-06, "loss": 0.9975, "step": 4264 }, { "epoch": 0.44855193447881475, "grad_norm": 2.3257680119464226, "learning_rate": 2.947542153191583e-06, "loss": 0.9592, "step": 4265 }, { "epoch": 0.4486571049206621, "grad_norm": 2.7528064005499218, "learning_rate": 2.946720794447106e-06, "loss": 1.0271, "step": 4266 }, { "epoch": 0.44876227536250934, "grad_norm": 2.4897714526055252, "learning_rate": 2.945899385889792e-06, "loss": 0.9719, "step": 4267 }, { "epoch": 0.44886744580435667, "grad_norm": 2.8675188349876675, "learning_rate": 2.9450779276112313e-06, "loss": 1.0455, "step": 4268 }, { "epoch": 0.448972616246204, "grad_norm": 2.63772057013864, "learning_rate": 2.944256419703025e-06, "loss": 1.0008, "step": 4269 }, { "epoch": 0.4490777866880513, "grad_norm": 2.767101795516205, "learning_rate": 2.9434348622567773e-06, "loss": 1.0132, "step": 4270 }, { "epoch": 0.44918295712989864, "grad_norm": 2.8503438719512353, "learning_rate": 2.942613255364097e-06, "loss": 1.0182, "step": 4271 }, { "epoch": 0.44928812757174597, "grad_norm": 3.264311463794955, "learning_rate": 2.941791599116601e-06, "loss": 0.9823, "step": 4272 }, { "epoch": 0.4493932980135933, "grad_norm": 1.9470160801562246, "learning_rate": 2.9409698936059083e-06, "loss": 1.0072, "step": 4273 }, { "epoch": 0.4494984684554406, "grad_norm": 2.578131298488729, "learning_rate": 2.940148138923648e-06, "loss": 0.9903, "step": 4274 }, { "epoch": 0.44960363889728794, "grad_norm": 2.5479584159562036, "learning_rate": 2.9393263351614503e-06, "loss": 1.0249, "step": 4275 }, { "epoch": 0.44970880933913526, "grad_norm": 2.3256148001185672, "learning_rate": 2.9385044824109544e-06, "loss": 1.0325, "step": 4276 }, { "epoch": 0.44981397978098253, "grad_norm": 3.1496173601363933, "learning_rate": 2.9376825807638016e-06, "loss": 1.0063, "step": 4277 }, { "epoch": 0.44991915022282986, "grad_norm": 2.851586489375221, "learning_rate": 2.936860630311642e-06, "loss": 1.029, "step": 4278 }, { "epoch": 0.4500243206646772, "grad_norm": 3.088679797466413, "learning_rate": 2.9360386311461276e-06, "loss": 1.0058, "step": 4279 }, { "epoch": 0.4501294911065245, "grad_norm": 2.054451946303782, "learning_rate": 2.9352165833589188e-06, "loss": 0.9734, "step": 4280 }, { "epoch": 0.45023466154837183, "grad_norm": 2.718591132670405, "learning_rate": 2.9343944870416798e-06, "loss": 1.0024, "step": 4281 }, { "epoch": 0.45033983199021915, "grad_norm": 2.746230280528559, "learning_rate": 2.9335723422860807e-06, "loss": 0.9915, "step": 4282 }, { "epoch": 0.4504450024320665, "grad_norm": 2.537502797059733, "learning_rate": 2.9327501491837977e-06, "loss": 0.9917, "step": 4283 }, { "epoch": 0.4505501728739138, "grad_norm": 3.1284325108868782, "learning_rate": 2.931927907826511e-06, "loss": 1.0431, "step": 4284 }, { "epoch": 0.4506553433157611, "grad_norm": 1.812931921725533, "learning_rate": 2.9311056183059085e-06, "loss": 0.9778, "step": 4285 }, { "epoch": 0.45076051375760845, "grad_norm": 2.851522179977043, "learning_rate": 2.93028328071368e-06, "loss": 0.9874, "step": 4286 }, { "epoch": 0.4508656841994557, "grad_norm": 2.766819002757065, "learning_rate": 2.9294608951415225e-06, "loss": 0.9944, "step": 4287 }, { "epoch": 0.45097085464130304, "grad_norm": 3.163681281030067, "learning_rate": 2.9286384616811397e-06, "loss": 1.031, "step": 4288 }, { "epoch": 0.45107602508315037, "grad_norm": 2.485916557061216, "learning_rate": 2.927815980424238e-06, "loss": 1.0086, "step": 4289 }, { "epoch": 0.4511811955249977, "grad_norm": 2.109206916955106, "learning_rate": 2.926993451462532e-06, "loss": 0.9913, "step": 4290 }, { "epoch": 0.451286365966845, "grad_norm": 2.457120848888426, "learning_rate": 2.926170874887738e-06, "loss": 0.9919, "step": 4291 }, { "epoch": 0.45139153640869234, "grad_norm": 2.303663013614732, "learning_rate": 2.925348250791582e-06, "loss": 0.9718, "step": 4292 }, { "epoch": 0.45149670685053966, "grad_norm": 3.1484358472259335, "learning_rate": 2.924525579265791e-06, "loss": 0.9857, "step": 4293 }, { "epoch": 0.451601877292387, "grad_norm": 3.189808105502437, "learning_rate": 2.9237028604021008e-06, "loss": 1.0218, "step": 4294 }, { "epoch": 0.4517070477342343, "grad_norm": 2.7759170958312116, "learning_rate": 2.9228800942922497e-06, "loss": 1.0121, "step": 4295 }, { "epoch": 0.45181221817608164, "grad_norm": 2.1847673720857, "learning_rate": 2.922057281027983e-06, "loss": 1.0119, "step": 4296 }, { "epoch": 0.4519173886179289, "grad_norm": 2.2941536309406096, "learning_rate": 2.921234420701051e-06, "loss": 0.9933, "step": 4297 }, { "epoch": 0.45202255905977623, "grad_norm": 1.7883355539808496, "learning_rate": 2.9204115134032086e-06, "loss": 0.9594, "step": 4298 }, { "epoch": 0.45212772950162355, "grad_norm": 2.9394681120602506, "learning_rate": 2.9195885592262167e-06, "loss": 1.0014, "step": 4299 }, { "epoch": 0.4522328999434709, "grad_norm": 3.0533456324247195, "learning_rate": 2.9187655582618413e-06, "loss": 0.9617, "step": 4300 }, { "epoch": 0.4523380703853182, "grad_norm": 1.6982849296696754, "learning_rate": 2.9179425106018532e-06, "loss": 0.9429, "step": 4301 }, { "epoch": 0.4524432408271655, "grad_norm": 3.1396740609931446, "learning_rate": 2.9171194163380277e-06, "loss": 1.0107, "step": 4302 }, { "epoch": 0.45254841126901285, "grad_norm": 2.811681718875989, "learning_rate": 2.916296275562147e-06, "loss": 1.0572, "step": 4303 }, { "epoch": 0.4526535817108602, "grad_norm": 3.2640718105886735, "learning_rate": 2.9154730883659988e-06, "loss": 1.025, "step": 4304 }, { "epoch": 0.4527587521527075, "grad_norm": 2.69588504505022, "learning_rate": 2.9146498548413725e-06, "loss": 0.9978, "step": 4305 }, { "epoch": 0.4528639225945548, "grad_norm": 2.2342874790888843, "learning_rate": 2.913826575080067e-06, "loss": 0.9304, "step": 4306 }, { "epoch": 0.4529690930364021, "grad_norm": 3.0565709108799037, "learning_rate": 2.9130032491738837e-06, "loss": 1.0373, "step": 4307 }, { "epoch": 0.4530742634782494, "grad_norm": 2.853116287639515, "learning_rate": 2.9121798772146293e-06, "loss": 1.0201, "step": 4308 }, { "epoch": 0.45317943392009674, "grad_norm": 2.3744681471107607, "learning_rate": 2.911356459294117e-06, "loss": 0.9437, "step": 4309 }, { "epoch": 0.45328460436194407, "grad_norm": 2.164104788250051, "learning_rate": 2.910532995504163e-06, "loss": 0.9791, "step": 4310 }, { "epoch": 0.4533897748037914, "grad_norm": 1.6203464986260847, "learning_rate": 2.9097094859365926e-06, "loss": 0.9675, "step": 4311 }, { "epoch": 0.4534949452456387, "grad_norm": 2.9303086296052934, "learning_rate": 2.90888593068323e-06, "loss": 1.0381, "step": 4312 }, { "epoch": 0.45360011568748604, "grad_norm": 2.0626838173774376, "learning_rate": 2.908062329835911e-06, "loss": 0.9685, "step": 4313 }, { "epoch": 0.45370528612933336, "grad_norm": 2.2697295104611115, "learning_rate": 2.9072386834864723e-06, "loss": 0.9784, "step": 4314 }, { "epoch": 0.4538104565711807, "grad_norm": 3.0376685513758077, "learning_rate": 2.9064149917267565e-06, "loss": 1.0353, "step": 4315 }, { "epoch": 0.453915627013028, "grad_norm": 2.657090557042862, "learning_rate": 2.905591254648612e-06, "loss": 0.9427, "step": 4316 }, { "epoch": 0.4540207974548753, "grad_norm": 2.3929879751726872, "learning_rate": 2.904767472343892e-06, "loss": 1.0099, "step": 4317 }, { "epoch": 0.4541259678967226, "grad_norm": 2.695393831318368, "learning_rate": 2.9039436449044543e-06, "loss": 1.0132, "step": 4318 }, { "epoch": 0.45423113833856993, "grad_norm": 2.1469397224850417, "learning_rate": 2.903119772422162e-06, "loss": 0.9917, "step": 4319 }, { "epoch": 0.45433630878041725, "grad_norm": 1.9890341493391972, "learning_rate": 2.902295854988884e-06, "loss": 1.0114, "step": 4320 }, { "epoch": 0.4544414792222646, "grad_norm": 2.6430862936585617, "learning_rate": 2.901471892696493e-06, "loss": 1.0101, "step": 4321 }, { "epoch": 0.4545466496641119, "grad_norm": 2.0147983041213253, "learning_rate": 2.900647885636867e-06, "loss": 1.0124, "step": 4322 }, { "epoch": 0.4546518201059592, "grad_norm": 2.1685192867151057, "learning_rate": 2.899823833901889e-06, "loss": 1.0, "step": 4323 }, { "epoch": 0.45475699054780655, "grad_norm": 2.332868281748152, "learning_rate": 2.8989997375834485e-06, "loss": 1.0148, "step": 4324 }, { "epoch": 0.4548621609896539, "grad_norm": 1.3686107234017948, "learning_rate": 2.8981755967734377e-06, "loss": 0.9342, "step": 4325 }, { "epoch": 0.4549673314315012, "grad_norm": 2.087757270716191, "learning_rate": 2.8973514115637534e-06, "loss": 0.9817, "step": 4326 }, { "epoch": 0.4550725018733485, "grad_norm": 2.271350804832439, "learning_rate": 2.8965271820463016e-06, "loss": 1.0087, "step": 4327 }, { "epoch": 0.4551776723151958, "grad_norm": 1.643573599294807, "learning_rate": 2.895702908312987e-06, "loss": 1.0125, "step": 4328 }, { "epoch": 0.4552828427570431, "grad_norm": 2.554616326554526, "learning_rate": 2.8948785904557262e-06, "loss": 0.9899, "step": 4329 }, { "epoch": 0.45538801319889044, "grad_norm": 2.91621589953988, "learning_rate": 2.8940542285664337e-06, "loss": 1.0189, "step": 4330 }, { "epoch": 0.45549318364073776, "grad_norm": 1.9347991717626811, "learning_rate": 2.8932298227370335e-06, "loss": 0.966, "step": 4331 }, { "epoch": 0.4555983540825851, "grad_norm": 3.5268488608131996, "learning_rate": 2.8924053730594536e-06, "loss": 0.9827, "step": 4332 }, { "epoch": 0.4557035245244324, "grad_norm": 3.3032634100912515, "learning_rate": 2.8915808796256268e-06, "loss": 1.0037, "step": 4333 }, { "epoch": 0.45580869496627974, "grad_norm": 2.594478528315312, "learning_rate": 2.89075634252749e-06, "loss": 0.9854, "step": 4334 }, { "epoch": 0.45591386540812706, "grad_norm": 2.142538714749134, "learning_rate": 2.8899317618569843e-06, "loss": 1.0015, "step": 4335 }, { "epoch": 0.4560190358499744, "grad_norm": 2.336631372632609, "learning_rate": 2.889107137706059e-06, "loss": 1.0257, "step": 4336 }, { "epoch": 0.4561242062918217, "grad_norm": 3.5495497144192676, "learning_rate": 2.8882824701666657e-06, "loss": 1.0268, "step": 4337 }, { "epoch": 0.456229376733669, "grad_norm": 1.8932999054002906, "learning_rate": 2.88745775933076e-06, "loss": 0.9928, "step": 4338 }, { "epoch": 0.4563345471755163, "grad_norm": 3.0148596426527643, "learning_rate": 2.8866330052903042e-06, "loss": 1.024, "step": 4339 }, { "epoch": 0.4564397176173636, "grad_norm": 2.0856993515410323, "learning_rate": 2.885808208137265e-06, "loss": 0.9969, "step": 4340 }, { "epoch": 0.45654488805921095, "grad_norm": 2.095948181530454, "learning_rate": 2.8849833679636137e-06, "loss": 0.9824, "step": 4341 }, { "epoch": 0.4566500585010583, "grad_norm": 2.5170281229756384, "learning_rate": 2.8841584848613254e-06, "loss": 1.002, "step": 4342 }, { "epoch": 0.4567552289429056, "grad_norm": 2.1378769455831304, "learning_rate": 2.883333558922383e-06, "loss": 1.0271, "step": 4343 }, { "epoch": 0.4568603993847529, "grad_norm": 2.3821344983844495, "learning_rate": 2.88250859023877e-06, "loss": 0.9948, "step": 4344 }, { "epoch": 0.45696556982660025, "grad_norm": 2.5487726722527357, "learning_rate": 2.8816835789024783e-06, "loss": 1.0075, "step": 4345 }, { "epoch": 0.4570707402684476, "grad_norm": 3.0550451214956023, "learning_rate": 2.880858525005502e-06, "loss": 1.0063, "step": 4346 }, { "epoch": 0.4571759107102949, "grad_norm": 2.798668732759541, "learning_rate": 2.880033428639842e-06, "loss": 0.9896, "step": 4347 }, { "epoch": 0.45728108115214217, "grad_norm": 2.980102984068875, "learning_rate": 2.8792082898975028e-06, "loss": 1.03, "step": 4348 }, { "epoch": 0.4573862515939895, "grad_norm": 1.9173203998180592, "learning_rate": 2.8783831088704923e-06, "loss": 1.001, "step": 4349 }, { "epoch": 0.4574914220358368, "grad_norm": 2.2818375318772843, "learning_rate": 2.877557885650827e-06, "loss": 1.0082, "step": 4350 }, { "epoch": 0.45759659247768414, "grad_norm": 2.3754880401750844, "learning_rate": 2.876732620330524e-06, "loss": 1.0056, "step": 4351 }, { "epoch": 0.45770176291953146, "grad_norm": 2.6543669930498495, "learning_rate": 2.8759073130016073e-06, "loss": 0.9431, "step": 4352 }, { "epoch": 0.4578069333613788, "grad_norm": 2.6939766890582453, "learning_rate": 2.8750819637561045e-06, "loss": 0.9748, "step": 4353 }, { "epoch": 0.4579121038032261, "grad_norm": 1.9175919955114296, "learning_rate": 2.87425657268605e-06, "loss": 1.0176, "step": 4354 }, { "epoch": 0.45801727424507344, "grad_norm": 2.439681288815302, "learning_rate": 2.873431139883479e-06, "loss": 0.9385, "step": 4355 }, { "epoch": 0.45812244468692076, "grad_norm": 2.8212284142415505, "learning_rate": 2.872605665440436e-06, "loss": 0.9857, "step": 4356 }, { "epoch": 0.4582276151287681, "grad_norm": 2.1470640317341196, "learning_rate": 2.8717801494489673e-06, "loss": 0.9626, "step": 4357 }, { "epoch": 0.45833278557061535, "grad_norm": 2.2492255805625514, "learning_rate": 2.8709545920011233e-06, "loss": 1.0123, "step": 4358 }, { "epoch": 0.4584379560124627, "grad_norm": 2.0002199257891906, "learning_rate": 2.8701289931889602e-06, "loss": 1.0467, "step": 4359 }, { "epoch": 0.45854312645431, "grad_norm": 2.981959655065841, "learning_rate": 2.8693033531045395e-06, "loss": 0.953, "step": 4360 }, { "epoch": 0.4586482968961573, "grad_norm": 3.851394505625027, "learning_rate": 2.868477671839926e-06, "loss": 1.0175, "step": 4361 }, { "epoch": 0.45875346733800465, "grad_norm": 2.31415083896268, "learning_rate": 2.867651949487189e-06, "loss": 1.0186, "step": 4362 }, { "epoch": 0.458858637779852, "grad_norm": 3.8867757736496027, "learning_rate": 2.8668261861384045e-06, "loss": 1.0274, "step": 4363 }, { "epoch": 0.4589638082216993, "grad_norm": 2.1989997123351555, "learning_rate": 2.8660003818856506e-06, "loss": 0.9769, "step": 4364 }, { "epoch": 0.4590689786635466, "grad_norm": 2.0392697344733546, "learning_rate": 2.865174536821011e-06, "loss": 0.9493, "step": 4365 }, { "epoch": 0.45917414910539395, "grad_norm": 2.4063204089586097, "learning_rate": 2.864348651036574e-06, "loss": 0.9577, "step": 4366 }, { "epoch": 0.45927931954724127, "grad_norm": 2.285261233874211, "learning_rate": 2.8635227246244306e-06, "loss": 0.9948, "step": 4367 }, { "epoch": 0.45938448998908854, "grad_norm": 2.2549083660343383, "learning_rate": 2.8626967576766808e-06, "loss": 1.024, "step": 4368 }, { "epoch": 0.45948966043093586, "grad_norm": 2.408292400780605, "learning_rate": 2.8618707502854243e-06, "loss": 0.9919, "step": 4369 }, { "epoch": 0.4595948308727832, "grad_norm": 2.147791593115681, "learning_rate": 2.8610447025427685e-06, "loss": 0.9478, "step": 4370 }, { "epoch": 0.4597000013146305, "grad_norm": 2.0763303161056292, "learning_rate": 2.8602186145408235e-06, "loss": 1.0489, "step": 4371 }, { "epoch": 0.45980517175647784, "grad_norm": 2.849591364090468, "learning_rate": 2.859392486371705e-06, "loss": 0.989, "step": 4372 }, { "epoch": 0.45991034219832516, "grad_norm": 2.003930986515399, "learning_rate": 2.858566318127532e-06, "loss": 0.9761, "step": 4373 }, { "epoch": 0.4600155126401725, "grad_norm": 1.9945701603167976, "learning_rate": 2.8577401099004285e-06, "loss": 1.0204, "step": 4374 }, { "epoch": 0.4601206830820198, "grad_norm": 3.075579516233079, "learning_rate": 2.856913861782525e-06, "loss": 1.0004, "step": 4375 }, { "epoch": 0.46022585352386713, "grad_norm": 2.351283254441647, "learning_rate": 2.856087573865952e-06, "loss": 1.0144, "step": 4376 }, { "epoch": 0.46033102396571446, "grad_norm": 3.339158339951501, "learning_rate": 2.8552612462428497e-06, "loss": 1.006, "step": 4377 }, { "epoch": 0.4604361944075617, "grad_norm": 2.309435332828068, "learning_rate": 2.854434879005357e-06, "loss": 1.0221, "step": 4378 }, { "epoch": 0.46054136484940905, "grad_norm": 3.2211925972738333, "learning_rate": 2.853608472245624e-06, "loss": 1.0109, "step": 4379 }, { "epoch": 0.4606465352912564, "grad_norm": 2.1892482332044327, "learning_rate": 2.8527820260557986e-06, "loss": 1.0088, "step": 4380 }, { "epoch": 0.4607517057331037, "grad_norm": 2.410757032252132, "learning_rate": 2.851955540528036e-06, "loss": 0.9959, "step": 4381 }, { "epoch": 0.460856876174951, "grad_norm": 2.359864321372521, "learning_rate": 2.8511290157544976e-06, "loss": 1.0216, "step": 4382 }, { "epoch": 0.46096204661679835, "grad_norm": 2.1948898074829355, "learning_rate": 2.8503024518273455e-06, "loss": 0.989, "step": 4383 }, { "epoch": 0.4610672170586457, "grad_norm": 2.4426656861817997, "learning_rate": 2.849475848838749e-06, "loss": 0.9701, "step": 4384 }, { "epoch": 0.461172387500493, "grad_norm": 2.5319847009537946, "learning_rate": 2.84864920688088e-06, "loss": 1.0233, "step": 4385 }, { "epoch": 0.4612775579423403, "grad_norm": 2.133328823397027, "learning_rate": 2.847822526045917e-06, "loss": 0.9656, "step": 4386 }, { "epoch": 0.46138272838418765, "grad_norm": 2.639337179377181, "learning_rate": 2.8469958064260405e-06, "loss": 1.0025, "step": 4387 }, { "epoch": 0.46148789882603497, "grad_norm": 2.0901303479901516, "learning_rate": 2.846169048113435e-06, "loss": 0.9886, "step": 4388 }, { "epoch": 0.46159306926788224, "grad_norm": 2.7143229057209957, "learning_rate": 2.8453422512002925e-06, "loss": 1.0069, "step": 4389 }, { "epoch": 0.46169823970972956, "grad_norm": 2.2128312082996158, "learning_rate": 2.844515415778806e-06, "loss": 1.0177, "step": 4390 }, { "epoch": 0.4618034101515769, "grad_norm": 2.1207444779689135, "learning_rate": 2.843688541941174e-06, "loss": 1.0304, "step": 4391 }, { "epoch": 0.4619085805934242, "grad_norm": 2.6321023824250025, "learning_rate": 2.8428616297795998e-06, "loss": 0.9876, "step": 4392 }, { "epoch": 0.46201375103527154, "grad_norm": 2.5462159065092242, "learning_rate": 2.84203467938629e-06, "loss": 1.0227, "step": 4393 }, { "epoch": 0.46211892147711886, "grad_norm": 3.258389125088761, "learning_rate": 2.8412076908534574e-06, "loss": 0.983, "step": 4394 }, { "epoch": 0.4622240919189662, "grad_norm": 2.3212982515302603, "learning_rate": 2.840380664273316e-06, "loss": 0.9358, "step": 4395 }, { "epoch": 0.4623292623608135, "grad_norm": 2.4763075667324657, "learning_rate": 2.839553599738087e-06, "loss": 1.0202, "step": 4396 }, { "epoch": 0.46243443280266083, "grad_norm": 2.0484739974347534, "learning_rate": 2.838726497339993e-06, "loss": 1.0134, "step": 4397 }, { "epoch": 0.46253960324450816, "grad_norm": 1.9402963476386015, "learning_rate": 2.8378993571712638e-06, "loss": 0.9729, "step": 4398 }, { "epoch": 0.4626447736863554, "grad_norm": 1.8355127361037, "learning_rate": 2.8370721793241314e-06, "loss": 1.0101, "step": 4399 }, { "epoch": 0.46274994412820275, "grad_norm": 2.994356739022344, "learning_rate": 2.8362449638908324e-06, "loss": 1.0008, "step": 4400 }, { "epoch": 0.4628551145700501, "grad_norm": 2.910116749399226, "learning_rate": 2.8354177109636076e-06, "loss": 0.994, "step": 4401 }, { "epoch": 0.4629602850118974, "grad_norm": 2.223042107881437, "learning_rate": 2.834590420634703e-06, "loss": 1.016, "step": 4402 }, { "epoch": 0.4630654554537447, "grad_norm": 2.6324446425874073, "learning_rate": 2.8337630929963672e-06, "loss": 1.0376, "step": 4403 }, { "epoch": 0.46317062589559205, "grad_norm": 2.8252122929662153, "learning_rate": 2.832935728140853e-06, "loss": 0.9432, "step": 4404 }, { "epoch": 0.46327579633743937, "grad_norm": 2.0038921955516105, "learning_rate": 2.83210832616042e-06, "loss": 1.0019, "step": 4405 }, { "epoch": 0.4633809667792867, "grad_norm": 2.549003738454214, "learning_rate": 2.8312808871473275e-06, "loss": 1.0029, "step": 4406 }, { "epoch": 0.463486137221134, "grad_norm": 2.2351716118016474, "learning_rate": 2.830453411193843e-06, "loss": 0.9699, "step": 4407 }, { "epoch": 0.46359130766298134, "grad_norm": 2.8680061904201173, "learning_rate": 2.829625898392237e-06, "loss": 1.0212, "step": 4408 }, { "epoch": 0.4636964781048286, "grad_norm": 2.0089360810313908, "learning_rate": 2.828798348834782e-06, "loss": 1.0066, "step": 4409 }, { "epoch": 0.46380164854667594, "grad_norm": 2.127532937133814, "learning_rate": 2.827970762613757e-06, "loss": 0.9945, "step": 4410 }, { "epoch": 0.46390681898852326, "grad_norm": 2.089558020765553, "learning_rate": 2.827143139821444e-06, "loss": 1.0153, "step": 4411 }, { "epoch": 0.4640119894303706, "grad_norm": 3.252402836184958, "learning_rate": 2.82631548055013e-06, "loss": 0.9513, "step": 4412 }, { "epoch": 0.4641171598722179, "grad_norm": 2.135962227586247, "learning_rate": 2.8254877848921036e-06, "loss": 0.9607, "step": 4413 }, { "epoch": 0.46422233031406523, "grad_norm": 2.41667417366253, "learning_rate": 2.824660052939662e-06, "loss": 0.9908, "step": 4414 }, { "epoch": 0.46432750075591256, "grad_norm": 2.380362760812115, "learning_rate": 2.823832284785102e-06, "loss": 0.9779, "step": 4415 }, { "epoch": 0.4644326711977599, "grad_norm": 2.0345880558081944, "learning_rate": 2.8230044805207275e-06, "loss": 0.9734, "step": 4416 }, { "epoch": 0.4645378416396072, "grad_norm": 2.3167139827947776, "learning_rate": 2.8221766402388436e-06, "loss": 0.96, "step": 4417 }, { "epoch": 0.46464301208145453, "grad_norm": 2.3878293198539224, "learning_rate": 2.8213487640317615e-06, "loss": 0.97, "step": 4418 }, { "epoch": 0.4647481825233018, "grad_norm": 2.1973785313368426, "learning_rate": 2.820520851991796e-06, "loss": 0.9816, "step": 4419 }, { "epoch": 0.4648533529651491, "grad_norm": 3.0125702231483262, "learning_rate": 2.8196929042112652e-06, "loss": 1.0055, "step": 4420 }, { "epoch": 0.46495852340699645, "grad_norm": 2.3918412139996, "learning_rate": 2.8188649207824925e-06, "loss": 0.9632, "step": 4421 }, { "epoch": 0.4650636938488438, "grad_norm": 1.9278006868749584, "learning_rate": 2.8180369017978037e-06, "loss": 0.9645, "step": 4422 }, { "epoch": 0.4651688642906911, "grad_norm": 2.85997981754498, "learning_rate": 2.8172088473495306e-06, "loss": 0.9861, "step": 4423 }, { "epoch": 0.4652740347325384, "grad_norm": 2.4986220629370344, "learning_rate": 2.816380757530006e-06, "loss": 0.957, "step": 4424 }, { "epoch": 0.46537920517438575, "grad_norm": 2.3157951259747316, "learning_rate": 2.8155526324315704e-06, "loss": 0.9893, "step": 4425 }, { "epoch": 0.46548437561623307, "grad_norm": 2.9956280856636384, "learning_rate": 2.814724472146564e-06, "loss": 1.0325, "step": 4426 }, { "epoch": 0.4655895460580804, "grad_norm": 2.7269340487252345, "learning_rate": 2.813896276767334e-06, "loss": 0.9351, "step": 4427 }, { "epoch": 0.4656947164999277, "grad_norm": 2.4606920746541014, "learning_rate": 2.8130680463862315e-06, "loss": 1.0123, "step": 4428 }, { "epoch": 0.465799886941775, "grad_norm": 2.848249960105425, "learning_rate": 2.8122397810956086e-06, "loss": 0.9924, "step": 4429 }, { "epoch": 0.4659050573836223, "grad_norm": 2.8446842951304783, "learning_rate": 2.811411480987825e-06, "loss": 1.0287, "step": 4430 }, { "epoch": 0.46601022782546964, "grad_norm": 2.393650778819736, "learning_rate": 2.810583146155243e-06, "loss": 1.0055, "step": 4431 }, { "epoch": 0.46611539826731696, "grad_norm": 3.0125679338832065, "learning_rate": 2.8097547766902273e-06, "loss": 0.9402, "step": 4432 }, { "epoch": 0.4662205687091643, "grad_norm": 2.4891599650912886, "learning_rate": 2.8089263726851474e-06, "loss": 0.9905, "step": 4433 }, { "epoch": 0.4663257391510116, "grad_norm": 2.6495533894705616, "learning_rate": 2.8080979342323765e-06, "loss": 0.9873, "step": 4434 }, { "epoch": 0.46643090959285893, "grad_norm": 2.5985892014306375, "learning_rate": 2.8072694614242935e-06, "loss": 1.0312, "step": 4435 }, { "epoch": 0.46653608003470626, "grad_norm": 2.636695159781973, "learning_rate": 2.8064409543532776e-06, "loss": 1.0396, "step": 4436 }, { "epoch": 0.4666412504765536, "grad_norm": 3.244510424950399, "learning_rate": 2.805612413111716e-06, "loss": 0.9766, "step": 4437 }, { "epoch": 0.4667464209184009, "grad_norm": 2.2447570199099154, "learning_rate": 2.8047838377919952e-06, "loss": 0.9901, "step": 4438 }, { "epoch": 0.4668515913602482, "grad_norm": 2.5976977350088415, "learning_rate": 2.8039552284865094e-06, "loss": 0.9973, "step": 4439 }, { "epoch": 0.4669567618020955, "grad_norm": 3.2454932031526944, "learning_rate": 2.8031265852876537e-06, "loss": 1.0331, "step": 4440 }, { "epoch": 0.4670619322439428, "grad_norm": 2.523586811336105, "learning_rate": 2.8022979082878297e-06, "loss": 0.9869, "step": 4441 }, { "epoch": 0.46716710268579015, "grad_norm": 2.302211346608485, "learning_rate": 2.801469197579441e-06, "loss": 1.0341, "step": 4442 }, { "epoch": 0.46727227312763747, "grad_norm": 2.348259230268178, "learning_rate": 2.800640453254894e-06, "loss": 0.9683, "step": 4443 }, { "epoch": 0.4673774435694848, "grad_norm": 2.366754132665982, "learning_rate": 2.799811675406601e-06, "loss": 0.9587, "step": 4444 }, { "epoch": 0.4674826140113321, "grad_norm": 2.075592911455706, "learning_rate": 2.7989828641269778e-06, "loss": 1.0085, "step": 4445 }, { "epoch": 0.46758778445317944, "grad_norm": 2.6326964601948877, "learning_rate": 2.798154019508443e-06, "loss": 0.9919, "step": 4446 }, { "epoch": 0.46769295489502677, "grad_norm": 2.8302081829382835, "learning_rate": 2.7973251416434176e-06, "loss": 1.0534, "step": 4447 }, { "epoch": 0.4677981253368741, "grad_norm": 2.639359868066465, "learning_rate": 2.7964962306243305e-06, "loss": 1.0037, "step": 4448 }, { "epoch": 0.4679032957787214, "grad_norm": 2.292040628888014, "learning_rate": 2.7956672865436095e-06, "loss": 1.0244, "step": 4449 }, { "epoch": 0.4680084662205687, "grad_norm": 3.4389539874184334, "learning_rate": 2.794838309493689e-06, "loss": 1.0291, "step": 4450 }, { "epoch": 0.468113636662416, "grad_norm": 4.035449967348666, "learning_rate": 2.7940092995670075e-06, "loss": 1.0501, "step": 4451 }, { "epoch": 0.46821880710426333, "grad_norm": 2.7831049279874653, "learning_rate": 2.7931802568560053e-06, "loss": 1.0194, "step": 4452 }, { "epoch": 0.46832397754611066, "grad_norm": 1.8206417464439855, "learning_rate": 2.792351181453126e-06, "loss": 0.9495, "step": 4453 }, { "epoch": 0.468429147987958, "grad_norm": 1.9541921059032041, "learning_rate": 2.791522073450819e-06, "loss": 1.0107, "step": 4454 }, { "epoch": 0.4685343184298053, "grad_norm": 2.8892598367431086, "learning_rate": 2.790692932941537e-06, "loss": 1.0612, "step": 4455 }, { "epoch": 0.46863948887165263, "grad_norm": 3.171614211998949, "learning_rate": 2.7898637600177343e-06, "loss": 1.0538, "step": 4456 }, { "epoch": 0.46874465931349996, "grad_norm": 2.424092647751607, "learning_rate": 2.7890345547718707e-06, "loss": 1.0326, "step": 4457 }, { "epoch": 0.4688498297553473, "grad_norm": 1.8602198107845023, "learning_rate": 2.7882053172964085e-06, "loss": 0.9663, "step": 4458 }, { "epoch": 0.4689550001971946, "grad_norm": 2.3246688695244226, "learning_rate": 2.787376047683815e-06, "loss": 0.9459, "step": 4459 }, { "epoch": 0.4690601706390419, "grad_norm": 2.762737851787094, "learning_rate": 2.78654674602656e-06, "loss": 1.0057, "step": 4460 }, { "epoch": 0.4691653410808892, "grad_norm": 2.3224208280852427, "learning_rate": 2.7857174124171166e-06, "loss": 0.988, "step": 4461 }, { "epoch": 0.4692705115227365, "grad_norm": 1.6891392684017292, "learning_rate": 2.7848880469479623e-06, "loss": 1.0095, "step": 4462 }, { "epoch": 0.46937568196458385, "grad_norm": 2.3788041616512814, "learning_rate": 2.7840586497115767e-06, "loss": 1.0111, "step": 4463 }, { "epoch": 0.46948085240643117, "grad_norm": 2.5464866689627996, "learning_rate": 2.783229220800446e-06, "loss": 0.9748, "step": 4464 }, { "epoch": 0.4695860228482785, "grad_norm": 2.6905461256581114, "learning_rate": 2.7823997603070573e-06, "loss": 1.0209, "step": 4465 }, { "epoch": 0.4696911932901258, "grad_norm": 2.767917863313156, "learning_rate": 2.7815702683239002e-06, "loss": 1.0259, "step": 4466 }, { "epoch": 0.46979636373197314, "grad_norm": 2.347477742569025, "learning_rate": 2.7807407449434726e-06, "loss": 0.9851, "step": 4467 }, { "epoch": 0.46990153417382047, "grad_norm": 1.90764637954435, "learning_rate": 2.7799111902582697e-06, "loss": 1.0211, "step": 4468 }, { "epoch": 0.4700067046156678, "grad_norm": 2.0075620650917667, "learning_rate": 2.7790816043607953e-06, "loss": 0.987, "step": 4469 }, { "epoch": 0.47011187505751506, "grad_norm": 2.7682956389063973, "learning_rate": 2.7782519873435538e-06, "loss": 0.9998, "step": 4470 }, { "epoch": 0.4702170454993624, "grad_norm": 2.292023839054441, "learning_rate": 2.7774223392990544e-06, "loss": 1.0166, "step": 4471 }, { "epoch": 0.4703222159412097, "grad_norm": 3.2215154014236353, "learning_rate": 2.776592660319809e-06, "loss": 1.0095, "step": 4472 }, { "epoch": 0.47042738638305703, "grad_norm": 2.719209058008712, "learning_rate": 2.775762950498333e-06, "loss": 0.9651, "step": 4473 }, { "epoch": 0.47053255682490436, "grad_norm": 3.210513336735668, "learning_rate": 2.7749332099271466e-06, "loss": 1.0112, "step": 4474 }, { "epoch": 0.4706377272667517, "grad_norm": 2.6300858424014715, "learning_rate": 2.7741034386987707e-06, "loss": 1.0085, "step": 4475 }, { "epoch": 0.470742897708599, "grad_norm": 3.14495333627856, "learning_rate": 2.7732736369057326e-06, "loss": 0.9815, "step": 4476 }, { "epoch": 0.47084806815044633, "grad_norm": 2.5526520052973973, "learning_rate": 2.772443804640561e-06, "loss": 0.9963, "step": 4477 }, { "epoch": 0.47095323859229365, "grad_norm": 1.9617055562355878, "learning_rate": 2.771613941995789e-06, "loss": 1.0017, "step": 4478 }, { "epoch": 0.471058409034141, "grad_norm": 2.688578495945201, "learning_rate": 2.7707840490639517e-06, "loss": 0.9938, "step": 4479 }, { "epoch": 0.47116357947598825, "grad_norm": 2.4345239857502046, "learning_rate": 2.76995412593759e-06, "loss": 0.996, "step": 4480 }, { "epoch": 0.47126874991783557, "grad_norm": 3.6250029270038393, "learning_rate": 2.769124172709246e-06, "loss": 0.9917, "step": 4481 }, { "epoch": 0.4713739203596829, "grad_norm": 2.2005515373005156, "learning_rate": 2.7682941894714664e-06, "loss": 0.9691, "step": 4482 }, { "epoch": 0.4714790908015302, "grad_norm": 2.3295932635087224, "learning_rate": 2.7674641763168003e-06, "loss": 1.017, "step": 4483 }, { "epoch": 0.47158426124337754, "grad_norm": 3.0234424271551417, "learning_rate": 2.7666341333378005e-06, "loss": 0.9685, "step": 4484 }, { "epoch": 0.47168943168522487, "grad_norm": 2.486438862191606, "learning_rate": 2.7658040606270244e-06, "loss": 0.998, "step": 4485 }, { "epoch": 0.4717946021270722, "grad_norm": 2.675116684244302, "learning_rate": 2.7649739582770297e-06, "loss": 1.0076, "step": 4486 }, { "epoch": 0.4718997725689195, "grad_norm": 2.7072141444203908, "learning_rate": 2.7641438263803804e-06, "loss": 1.0033, "step": 4487 }, { "epoch": 0.47200494301076684, "grad_norm": 2.5861797482973516, "learning_rate": 2.7633136650296434e-06, "loss": 0.9744, "step": 4488 }, { "epoch": 0.47211011345261417, "grad_norm": 2.224720554082226, "learning_rate": 2.762483474317387e-06, "loss": 1.0217, "step": 4489 }, { "epoch": 0.47221528389446144, "grad_norm": 2.245262509992629, "learning_rate": 2.7616532543361834e-06, "loss": 0.9912, "step": 4490 }, { "epoch": 0.47232045433630876, "grad_norm": 2.7604293363117023, "learning_rate": 2.7608230051786094e-06, "loss": 0.9939, "step": 4491 }, { "epoch": 0.4724256247781561, "grad_norm": 2.4111390221536224, "learning_rate": 2.7599927269372453e-06, "loss": 0.9487, "step": 4492 }, { "epoch": 0.4725307952200034, "grad_norm": 1.891935956225179, "learning_rate": 2.759162419704671e-06, "loss": 0.9603, "step": 4493 }, { "epoch": 0.47263596566185073, "grad_norm": 1.8123928584801083, "learning_rate": 2.7583320835734743e-06, "loss": 0.9951, "step": 4494 }, { "epoch": 0.47274113610369806, "grad_norm": 2.3993983584972343, "learning_rate": 2.757501718636244e-06, "loss": 0.9939, "step": 4495 }, { "epoch": 0.4728463065455454, "grad_norm": 2.0288832292698746, "learning_rate": 2.7566713249855715e-06, "loss": 1.0161, "step": 4496 }, { "epoch": 0.4729514769873927, "grad_norm": 2.323445513583333, "learning_rate": 2.7558409027140525e-06, "loss": 0.9886, "step": 4497 }, { "epoch": 0.47305664742924003, "grad_norm": 2.68445355500332, "learning_rate": 2.7550104519142846e-06, "loss": 0.9494, "step": 4498 }, { "epoch": 0.47316181787108735, "grad_norm": 2.640136937856992, "learning_rate": 2.754179972678871e-06, "loss": 1.0562, "step": 4499 }, { "epoch": 0.4732669883129346, "grad_norm": 3.324792657639246, "learning_rate": 2.753349465100415e-06, "loss": 0.9979, "step": 4500 }, { "epoch": 0.47337215875478195, "grad_norm": 2.810264675470383, "learning_rate": 2.7525189292715264e-06, "loss": 1.0266, "step": 4501 }, { "epoch": 0.47347732919662927, "grad_norm": 2.003647833018789, "learning_rate": 2.751688365284816e-06, "loss": 1.0359, "step": 4502 }, { "epoch": 0.4735824996384766, "grad_norm": 2.7806320532702697, "learning_rate": 2.7508577732328975e-06, "loss": 1.0402, "step": 4503 }, { "epoch": 0.4736876700803239, "grad_norm": 1.713725247330321, "learning_rate": 2.750027153208388e-06, "loss": 1.0154, "step": 4504 }, { "epoch": 0.47379284052217124, "grad_norm": 2.8013510184260864, "learning_rate": 2.7491965053039084e-06, "loss": 1.026, "step": 4505 }, { "epoch": 0.47389801096401857, "grad_norm": 2.7346431525668917, "learning_rate": 2.7483658296120828e-06, "loss": 1.0076, "step": 4506 }, { "epoch": 0.4740031814058659, "grad_norm": 2.805919154601316, "learning_rate": 2.747535126225538e-06, "loss": 0.9873, "step": 4507 }, { "epoch": 0.4741083518477132, "grad_norm": 2.950564613008157, "learning_rate": 2.746704395236904e-06, "loss": 1.0062, "step": 4508 }, { "epoch": 0.47421352228956054, "grad_norm": 2.751482814652587, "learning_rate": 2.745873636738813e-06, "loss": 0.9653, "step": 4509 }, { "epoch": 0.47431869273140786, "grad_norm": 2.5665028667710517, "learning_rate": 2.7450428508239024e-06, "loss": 0.9883, "step": 4510 }, { "epoch": 0.47442386317325513, "grad_norm": 3.3938183374193205, "learning_rate": 2.7442120375848096e-06, "loss": 0.9968, "step": 4511 }, { "epoch": 0.47452903361510246, "grad_norm": 2.1396244469160774, "learning_rate": 2.7433811971141772e-06, "loss": 0.964, "step": 4512 }, { "epoch": 0.4746342040569498, "grad_norm": 2.34919376628943, "learning_rate": 2.742550329504651e-06, "loss": 0.9981, "step": 4513 }, { "epoch": 0.4747393744987971, "grad_norm": 2.9534207402307864, "learning_rate": 2.7417194348488786e-06, "loss": 0.9802, "step": 4514 }, { "epoch": 0.47484454494064443, "grad_norm": 3.164085329529094, "learning_rate": 2.7408885132395117e-06, "loss": 1.0135, "step": 4515 }, { "epoch": 0.47494971538249176, "grad_norm": 2.2533801639022406, "learning_rate": 2.7400575647692046e-06, "loss": 0.9697, "step": 4516 }, { "epoch": 0.4750548858243391, "grad_norm": 2.362830237698888, "learning_rate": 2.7392265895306142e-06, "loss": 1.0129, "step": 4517 }, { "epoch": 0.4751600562661864, "grad_norm": 3.088808161086836, "learning_rate": 2.7383955876164004e-06, "loss": 0.9999, "step": 4518 }, { "epoch": 0.47526522670803373, "grad_norm": 3.1253457323179767, "learning_rate": 2.737564559119227e-06, "loss": 1.0031, "step": 4519 }, { "epoch": 0.47537039714988105, "grad_norm": 2.366491108309749, "learning_rate": 2.7367335041317593e-06, "loss": 1.0039, "step": 4520 }, { "epoch": 0.4754755675917283, "grad_norm": 2.451828923555723, "learning_rate": 2.7359024227466668e-06, "loss": 0.9815, "step": 4521 }, { "epoch": 0.47558073803357565, "grad_norm": 2.0943618962936945, "learning_rate": 2.735071315056622e-06, "loss": 0.985, "step": 4522 }, { "epoch": 0.47568590847542297, "grad_norm": 2.978757177852028, "learning_rate": 2.734240181154299e-06, "loss": 1.0017, "step": 4523 }, { "epoch": 0.4757910789172703, "grad_norm": 2.880549752555551, "learning_rate": 2.733409021132377e-06, "loss": 0.9694, "step": 4524 }, { "epoch": 0.4758962493591176, "grad_norm": 1.9750573431598037, "learning_rate": 2.7325778350835353e-06, "loss": 0.9964, "step": 4525 }, { "epoch": 0.47600141980096494, "grad_norm": 2.672025240947568, "learning_rate": 2.7317466231004584e-06, "loss": 0.9814, "step": 4526 }, { "epoch": 0.47610659024281227, "grad_norm": 2.397919630397351, "learning_rate": 2.7309153852758335e-06, "loss": 1.0414, "step": 4527 }, { "epoch": 0.4762117606846596, "grad_norm": 3.589638718925268, "learning_rate": 2.730084121702348e-06, "loss": 0.9689, "step": 4528 }, { "epoch": 0.4763169311265069, "grad_norm": 2.383301359474508, "learning_rate": 2.7292528324726963e-06, "loss": 0.9526, "step": 4529 }, { "epoch": 0.47642210156835424, "grad_norm": 1.7141669642552233, "learning_rate": 2.7284215176795724e-06, "loss": 0.9945, "step": 4530 }, { "epoch": 0.4765272720102015, "grad_norm": 2.2151545678563083, "learning_rate": 2.7275901774156753e-06, "loss": 1.0156, "step": 4531 }, { "epoch": 0.47663244245204883, "grad_norm": 2.362263151552294, "learning_rate": 2.726758811773706e-06, "loss": 0.9864, "step": 4532 }, { "epoch": 0.47673761289389616, "grad_norm": 1.719772591675931, "learning_rate": 2.725927420846367e-06, "loss": 0.9876, "step": 4533 }, { "epoch": 0.4768427833357435, "grad_norm": 2.803143954819945, "learning_rate": 2.7250960047263658e-06, "loss": 1.0204, "step": 4534 }, { "epoch": 0.4769479537775908, "grad_norm": 1.9487171453914527, "learning_rate": 2.7242645635064107e-06, "loss": 0.9932, "step": 4535 }, { "epoch": 0.47705312421943813, "grad_norm": 2.208339642159797, "learning_rate": 2.7234330972792157e-06, "loss": 0.919, "step": 4536 }, { "epoch": 0.47715829466128545, "grad_norm": 2.2756389370959207, "learning_rate": 2.7226016061374934e-06, "loss": 1.0314, "step": 4537 }, { "epoch": 0.4772634651031328, "grad_norm": 3.099273899280179, "learning_rate": 2.7217700901739637e-06, "loss": 1.0085, "step": 4538 }, { "epoch": 0.4773686355449801, "grad_norm": 2.6724241504223354, "learning_rate": 2.720938549481346e-06, "loss": 0.9733, "step": 4539 }, { "epoch": 0.4774738059868274, "grad_norm": 2.502055605870524, "learning_rate": 2.720106984152364e-06, "loss": 0.9623, "step": 4540 }, { "epoch": 0.4775789764286747, "grad_norm": 2.0664420087785245, "learning_rate": 2.719275394279743e-06, "loss": 1.0268, "step": 4541 }, { "epoch": 0.477684146870522, "grad_norm": 1.8769297433106866, "learning_rate": 2.718443779956212e-06, "loss": 1.0, "step": 4542 }, { "epoch": 0.47778931731236934, "grad_norm": 2.458314650760072, "learning_rate": 2.7176121412745026e-06, "loss": 1.0313, "step": 4543 }, { "epoch": 0.47789448775421667, "grad_norm": 2.630804381758795, "learning_rate": 2.7167804783273495e-06, "loss": 0.9763, "step": 4544 }, { "epoch": 0.477999658196064, "grad_norm": 2.9835995375477418, "learning_rate": 2.715948791207489e-06, "loss": 0.9961, "step": 4545 }, { "epoch": 0.4781048286379113, "grad_norm": 2.5063558471282446, "learning_rate": 2.7151170800076603e-06, "loss": 1.0074, "step": 4546 }, { "epoch": 0.47820999907975864, "grad_norm": 2.827999520063029, "learning_rate": 2.714285344820606e-06, "loss": 0.9995, "step": 4547 }, { "epoch": 0.47831516952160597, "grad_norm": 2.8368404828358442, "learning_rate": 2.7134535857390714e-06, "loss": 0.9858, "step": 4548 }, { "epoch": 0.4784203399634533, "grad_norm": 2.807519023318031, "learning_rate": 2.7126218028558037e-06, "loss": 0.9945, "step": 4549 }, { "epoch": 0.4785255104053006, "grad_norm": 2.3253069567766764, "learning_rate": 2.711789996263554e-06, "loss": 0.9841, "step": 4550 }, { "epoch": 0.4786306808471479, "grad_norm": 2.400222728107831, "learning_rate": 2.7109581660550733e-06, "loss": 0.992, "step": 4551 }, { "epoch": 0.4787358512889952, "grad_norm": 1.8434853526832968, "learning_rate": 2.710126312323119e-06, "loss": 0.9798, "step": 4552 }, { "epoch": 0.47884102173084253, "grad_norm": 2.339371298345462, "learning_rate": 2.7092944351604482e-06, "loss": 0.9666, "step": 4553 }, { "epoch": 0.47894619217268986, "grad_norm": 2.0644879283853754, "learning_rate": 2.7084625346598232e-06, "loss": 0.9738, "step": 4554 }, { "epoch": 0.4790513626145372, "grad_norm": 3.0372309396395987, "learning_rate": 2.707630610914005e-06, "loss": 0.9611, "step": 4555 }, { "epoch": 0.4791565330563845, "grad_norm": 2.7578247233652973, "learning_rate": 2.706798664015761e-06, "loss": 1.0389, "step": 4556 }, { "epoch": 0.47926170349823183, "grad_norm": 3.0623439608759, "learning_rate": 2.70596669405786e-06, "loss": 1.0041, "step": 4557 }, { "epoch": 0.47936687394007915, "grad_norm": 2.4941925421930433, "learning_rate": 2.7051347011330716e-06, "loss": 0.9829, "step": 4558 }, { "epoch": 0.4794720443819265, "grad_norm": 3.0490408838472103, "learning_rate": 2.704302685334171e-06, "loss": 1.0366, "step": 4559 }, { "epoch": 0.4795772148237738, "grad_norm": 2.3813045311909553, "learning_rate": 2.7034706467539335e-06, "loss": 0.9829, "step": 4560 }, { "epoch": 0.47968238526562107, "grad_norm": 2.845538555861447, "learning_rate": 2.702638585485139e-06, "loss": 0.9978, "step": 4561 }, { "epoch": 0.4797875557074684, "grad_norm": 3.2438971299974955, "learning_rate": 2.701806501620568e-06, "loss": 1.0011, "step": 4562 }, { "epoch": 0.4798927261493157, "grad_norm": 2.3425851673049585, "learning_rate": 2.700974395253004e-06, "loss": 1.0052, "step": 4563 }, { "epoch": 0.47999789659116304, "grad_norm": 2.8884679088128324, "learning_rate": 2.7001422664752338e-06, "loss": 0.969, "step": 4564 }, { "epoch": 0.48010306703301037, "grad_norm": 2.7955695336255615, "learning_rate": 2.699310115380046e-06, "loss": 1.0197, "step": 4565 }, { "epoch": 0.4802082374748577, "grad_norm": 3.190960632752079, "learning_rate": 2.6984779420602324e-06, "loss": 1.0072, "step": 4566 }, { "epoch": 0.480313407916705, "grad_norm": 2.5976595875210506, "learning_rate": 2.697645746608586e-06, "loss": 0.9822, "step": 4567 }, { "epoch": 0.48041857835855234, "grad_norm": 2.0169697804572664, "learning_rate": 2.6968135291179036e-06, "loss": 0.9938, "step": 4568 }, { "epoch": 0.48052374880039966, "grad_norm": 2.9736885221988794, "learning_rate": 2.6959812896809843e-06, "loss": 1.0062, "step": 4569 }, { "epoch": 0.480628919242247, "grad_norm": 1.4651386680966676, "learning_rate": 2.6951490283906285e-06, "loss": 0.9801, "step": 4570 }, { "epoch": 0.4807340896840943, "grad_norm": 2.0786679973928197, "learning_rate": 2.6943167453396397e-06, "loss": 1.0099, "step": 4571 }, { "epoch": 0.4808392601259416, "grad_norm": 2.3231766743941784, "learning_rate": 2.6934844406208243e-06, "loss": 0.9628, "step": 4572 }, { "epoch": 0.4809444305677889, "grad_norm": 2.358885296144915, "learning_rate": 2.6926521143269914e-06, "loss": 1.016, "step": 4573 }, { "epoch": 0.48104960100963623, "grad_norm": 2.4699479211845197, "learning_rate": 2.6918197665509506e-06, "loss": 0.9918, "step": 4574 }, { "epoch": 0.48115477145148355, "grad_norm": 2.3154126074282173, "learning_rate": 2.690987397385516e-06, "loss": 1.0321, "step": 4575 }, { "epoch": 0.4812599418933309, "grad_norm": 2.6809121807728493, "learning_rate": 2.690155006923503e-06, "loss": 1.0223, "step": 4576 }, { "epoch": 0.4813651123351782, "grad_norm": 2.8409825087081644, "learning_rate": 2.6893225952577294e-06, "loss": 1.0256, "step": 4577 }, { "epoch": 0.4814702827770255, "grad_norm": 2.235796045311807, "learning_rate": 2.688490162481015e-06, "loss": 1.0431, "step": 4578 }, { "epoch": 0.48157545321887285, "grad_norm": 2.3738201066374747, "learning_rate": 2.6876577086861844e-06, "loss": 1.0046, "step": 4579 }, { "epoch": 0.4816806236607202, "grad_norm": 2.2521790012523413, "learning_rate": 2.686825233966061e-06, "loss": 1.0065, "step": 4580 }, { "epoch": 0.4817857941025675, "grad_norm": 2.680999712329763, "learning_rate": 2.6859927384134727e-06, "loss": 0.9777, "step": 4581 }, { "epoch": 0.48189096454441477, "grad_norm": 2.4999906862595678, "learning_rate": 2.685160222121249e-06, "loss": 1.0089, "step": 4582 }, { "epoch": 0.4819961349862621, "grad_norm": 1.5800457189959451, "learning_rate": 2.6843276851822233e-06, "loss": 0.9998, "step": 4583 }, { "epoch": 0.4821013054281094, "grad_norm": 2.6596489372561725, "learning_rate": 2.6834951276892273e-06, "loss": 0.9915, "step": 4584 }, { "epoch": 0.48220647586995674, "grad_norm": 2.701721201218792, "learning_rate": 2.6826625497351e-06, "loss": 0.9542, "step": 4585 }, { "epoch": 0.48231164631180407, "grad_norm": 2.755401995048588, "learning_rate": 2.681829951412679e-06, "loss": 1.0366, "step": 4586 }, { "epoch": 0.4824168167536514, "grad_norm": 2.6480098597209576, "learning_rate": 2.6809973328148058e-06, "loss": 0.9671, "step": 4587 }, { "epoch": 0.4825219871954987, "grad_norm": 3.376948552074513, "learning_rate": 2.6801646940343245e-06, "loss": 1.0068, "step": 4588 }, { "epoch": 0.48262715763734604, "grad_norm": 2.6687667258804013, "learning_rate": 2.67933203516408e-06, "loss": 0.9519, "step": 4589 }, { "epoch": 0.48273232807919336, "grad_norm": 1.648325858154635, "learning_rate": 2.6784993562969207e-06, "loss": 1.0098, "step": 4590 }, { "epoch": 0.4828374985210407, "grad_norm": 2.22475778779079, "learning_rate": 2.6776666575256965e-06, "loss": 1.0006, "step": 4591 }, { "epoch": 0.48294266896288796, "grad_norm": 2.3818188632021093, "learning_rate": 2.676833938943259e-06, "loss": 1.0092, "step": 4592 }, { "epoch": 0.4830478394047353, "grad_norm": 2.408034896184432, "learning_rate": 2.6760012006424647e-06, "loss": 0.998, "step": 4593 }, { "epoch": 0.4831530098465826, "grad_norm": 1.8677903443913986, "learning_rate": 2.6751684427161684e-06, "loss": 0.9938, "step": 4594 }, { "epoch": 0.48325818028842993, "grad_norm": 3.2959095169625856, "learning_rate": 2.6743356652572304e-06, "loss": 0.9984, "step": 4595 }, { "epoch": 0.48336335073027725, "grad_norm": 3.1404363237116604, "learning_rate": 2.673502868358512e-06, "loss": 1.0293, "step": 4596 }, { "epoch": 0.4834685211721246, "grad_norm": 2.5720636875148197, "learning_rate": 2.6726700521128757e-06, "loss": 1.0043, "step": 4597 }, { "epoch": 0.4835736916139719, "grad_norm": 2.363912157861966, "learning_rate": 2.671837216613187e-06, "loss": 0.974, "step": 4598 }, { "epoch": 0.4836788620558192, "grad_norm": 2.618997975520033, "learning_rate": 2.6710043619523128e-06, "loss": 1.0115, "step": 4599 }, { "epoch": 0.48378403249766655, "grad_norm": 2.2993164157541632, "learning_rate": 2.6701714882231256e-06, "loss": 0.9936, "step": 4600 }, { "epoch": 0.4838892029395139, "grad_norm": 2.7590745627058864, "learning_rate": 2.669338595518494e-06, "loss": 0.9784, "step": 4601 }, { "epoch": 0.48399437338136114, "grad_norm": 2.7995862197909447, "learning_rate": 2.6685056839312944e-06, "loss": 0.976, "step": 4602 }, { "epoch": 0.48409954382320847, "grad_norm": 2.571103128838185, "learning_rate": 2.667672753554402e-06, "loss": 0.9439, "step": 4603 }, { "epoch": 0.4842047142650558, "grad_norm": 1.8408434670986793, "learning_rate": 2.6668398044806946e-06, "loss": 0.9928, "step": 4604 }, { "epoch": 0.4843098847069031, "grad_norm": 2.135680773980519, "learning_rate": 2.666006836803054e-06, "loss": 1.0166, "step": 4605 }, { "epoch": 0.48441505514875044, "grad_norm": 2.2802759432661186, "learning_rate": 2.6651738506143603e-06, "loss": 1.0261, "step": 4606 }, { "epoch": 0.48452022559059776, "grad_norm": 2.220392952522567, "learning_rate": 2.6643408460075e-06, "loss": 1.0148, "step": 4607 }, { "epoch": 0.4846253960324451, "grad_norm": 2.5358133229575097, "learning_rate": 2.663507823075358e-06, "loss": 0.9698, "step": 4608 }, { "epoch": 0.4847305664742924, "grad_norm": 2.375319857666318, "learning_rate": 2.662674781910824e-06, "loss": 0.965, "step": 4609 }, { "epoch": 0.48483573691613974, "grad_norm": 2.213027783818853, "learning_rate": 2.6618417226067877e-06, "loss": 1.0248, "step": 4610 }, { "epoch": 0.48494090735798706, "grad_norm": 2.148489365280874, "learning_rate": 2.6610086452561423e-06, "loss": 0.9983, "step": 4611 }, { "epoch": 0.48504607779983433, "grad_norm": 2.5409516705940627, "learning_rate": 2.6601755499517826e-06, "loss": 0.9994, "step": 4612 }, { "epoch": 0.48515124824168165, "grad_norm": 2.363278403171672, "learning_rate": 2.6593424367866042e-06, "loss": 1.0018, "step": 4613 }, { "epoch": 0.485256418683529, "grad_norm": 2.1133831998111945, "learning_rate": 2.658509305853507e-06, "loss": 1.0488, "step": 4614 }, { "epoch": 0.4853615891253763, "grad_norm": 2.42236880112027, "learning_rate": 2.6576761572453903e-06, "loss": 0.9917, "step": 4615 }, { "epoch": 0.4854667595672236, "grad_norm": 2.772025975679734, "learning_rate": 2.6568429910551574e-06, "loss": 1.026, "step": 4616 }, { "epoch": 0.48557193000907095, "grad_norm": 2.5143222232504487, "learning_rate": 2.6560098073757122e-06, "loss": 0.9878, "step": 4617 }, { "epoch": 0.4856771004509183, "grad_norm": 2.073400372306121, "learning_rate": 2.6551766062999624e-06, "loss": 0.9997, "step": 4618 }, { "epoch": 0.4857822708927656, "grad_norm": 1.9864972586645486, "learning_rate": 2.6543433879208147e-06, "loss": 1.0091, "step": 4619 }, { "epoch": 0.4858874413346129, "grad_norm": 2.751681846656289, "learning_rate": 2.6535101523311806e-06, "loss": 0.9822, "step": 4620 }, { "epoch": 0.48599261177646025, "grad_norm": 2.313188102376116, "learning_rate": 2.6526768996239725e-06, "loss": 0.9828, "step": 4621 }, { "epoch": 0.4860977822183075, "grad_norm": 1.8303821979796089, "learning_rate": 2.651843629892103e-06, "loss": 1.0083, "step": 4622 }, { "epoch": 0.48620295266015484, "grad_norm": 1.6838185182278091, "learning_rate": 2.65101034322849e-06, "loss": 1.0225, "step": 4623 }, { "epoch": 0.48630812310200217, "grad_norm": 2.3240841743762193, "learning_rate": 2.6501770397260503e-06, "loss": 0.9677, "step": 4624 }, { "epoch": 0.4864132935438495, "grad_norm": 2.0762876706178477, "learning_rate": 2.6493437194777038e-06, "loss": 0.9767, "step": 4625 }, { "epoch": 0.4865184639856968, "grad_norm": 2.7261027597048373, "learning_rate": 2.648510382576373e-06, "loss": 1.0191, "step": 4626 }, { "epoch": 0.48662363442754414, "grad_norm": 2.5618337156491235, "learning_rate": 2.647677029114981e-06, "loss": 0.9701, "step": 4627 }, { "epoch": 0.48672880486939146, "grad_norm": 2.703008948964744, "learning_rate": 2.6468436591864534e-06, "loss": 1.0215, "step": 4628 }, { "epoch": 0.4868339753112388, "grad_norm": 2.0399542166728675, "learning_rate": 2.6460102728837156e-06, "loss": 1.0245, "step": 4629 }, { "epoch": 0.4869391457530861, "grad_norm": 1.561628488644531, "learning_rate": 2.6451768702996987e-06, "loss": 0.9581, "step": 4630 }, { "epoch": 0.48704431619493344, "grad_norm": 1.8892703105994535, "learning_rate": 2.644343451527333e-06, "loss": 0.9982, "step": 4631 }, { "epoch": 0.48714948663678076, "grad_norm": 2.6663357784839676, "learning_rate": 2.6435100166595516e-06, "loss": 0.9515, "step": 4632 }, { "epoch": 0.48725465707862803, "grad_norm": 2.71454659482501, "learning_rate": 2.6426765657892883e-06, "loss": 1.0505, "step": 4633 }, { "epoch": 0.48735982752047535, "grad_norm": 2.4346295714541246, "learning_rate": 2.64184309900948e-06, "loss": 0.9909, "step": 4634 }, { "epoch": 0.4874649979623227, "grad_norm": 2.884081836500342, "learning_rate": 2.641009616413064e-06, "loss": 1.0068, "step": 4635 }, { "epoch": 0.48757016840417, "grad_norm": 3.1422402377931924, "learning_rate": 2.6401761180929798e-06, "loss": 1.0288, "step": 4636 }, { "epoch": 0.4876753388460173, "grad_norm": 2.524963864157816, "learning_rate": 2.63934260414217e-06, "loss": 1.0381, "step": 4637 }, { "epoch": 0.48778050928786465, "grad_norm": 2.009876109861969, "learning_rate": 2.638509074653577e-06, "loss": 0.9859, "step": 4638 }, { "epoch": 0.487885679729712, "grad_norm": 2.0009976732169847, "learning_rate": 2.637675529720147e-06, "loss": 0.9932, "step": 4639 }, { "epoch": 0.4879908501715593, "grad_norm": 2.0651436252017388, "learning_rate": 2.6368419694348248e-06, "loss": 1.0164, "step": 4640 }, { "epoch": 0.4880960206134066, "grad_norm": 2.498095061638486, "learning_rate": 2.6360083938905612e-06, "loss": 1.0162, "step": 4641 }, { "epoch": 0.48820119105525395, "grad_norm": 2.305734098685978, "learning_rate": 2.635174803180305e-06, "loss": 1.0235, "step": 4642 }, { "epoch": 0.4883063614971012, "grad_norm": 1.9716369355731187, "learning_rate": 2.6343411973970075e-06, "loss": 0.9755, "step": 4643 }, { "epoch": 0.48841153193894854, "grad_norm": 2.453378278184514, "learning_rate": 2.633507576633623e-06, "loss": 1.0, "step": 4644 }, { "epoch": 0.48851670238079586, "grad_norm": 2.868869437829872, "learning_rate": 2.632673940983106e-06, "loss": 1.0469, "step": 4645 }, { "epoch": 0.4886218728226432, "grad_norm": 3.2665412006936227, "learning_rate": 2.631840290538415e-06, "loss": 0.998, "step": 4646 }, { "epoch": 0.4887270432644905, "grad_norm": 2.280933184203751, "learning_rate": 2.6310066253925067e-06, "loss": 0.9999, "step": 4647 }, { "epoch": 0.48883221370633784, "grad_norm": 2.389682732235324, "learning_rate": 2.6301729456383425e-06, "loss": 0.9938, "step": 4648 }, { "epoch": 0.48893738414818516, "grad_norm": 2.257361752513528, "learning_rate": 2.629339251368884e-06, "loss": 1.0198, "step": 4649 }, { "epoch": 0.4890425545900325, "grad_norm": 3.1433460780930336, "learning_rate": 2.6285055426770935e-06, "loss": 1.022, "step": 4650 }, { "epoch": 0.4891477250318798, "grad_norm": 2.1429693200460904, "learning_rate": 2.627671819655937e-06, "loss": 0.999, "step": 4651 }, { "epoch": 0.48925289547372713, "grad_norm": 2.3878543730972206, "learning_rate": 2.62683808239838e-06, "loss": 1.0023, "step": 4652 }, { "epoch": 0.4893580659155744, "grad_norm": 2.7691239385645683, "learning_rate": 2.6260043309973925e-06, "loss": 0.9745, "step": 4653 }, { "epoch": 0.4894632363574217, "grad_norm": 2.499584360819442, "learning_rate": 2.625170565545943e-06, "loss": 1.0039, "step": 4654 }, { "epoch": 0.48956840679926905, "grad_norm": 2.3633614479755547, "learning_rate": 2.624336786137003e-06, "loss": 0.9862, "step": 4655 }, { "epoch": 0.4896735772411164, "grad_norm": 1.7313247618731031, "learning_rate": 2.6235029928635457e-06, "loss": 1.0106, "step": 4656 }, { "epoch": 0.4897787476829637, "grad_norm": 2.176190189031968, "learning_rate": 2.6226691858185454e-06, "loss": 0.9691, "step": 4657 }, { "epoch": 0.489883918124811, "grad_norm": 3.0580118331682637, "learning_rate": 2.621835365094978e-06, "loss": 1.0148, "step": 4658 }, { "epoch": 0.48998908856665835, "grad_norm": 2.3238272613013864, "learning_rate": 2.6210015307858207e-06, "loss": 0.9906, "step": 4659 }, { "epoch": 0.4900942590085057, "grad_norm": 2.6038315918171047, "learning_rate": 2.620167682984052e-06, "loss": 0.982, "step": 4660 }, { "epoch": 0.490199429450353, "grad_norm": 3.173624125525723, "learning_rate": 2.6193338217826536e-06, "loss": 1.0282, "step": 4661 }, { "epoch": 0.4903045998922003, "grad_norm": 2.4485387128876868, "learning_rate": 2.6184999472746076e-06, "loss": 1.019, "step": 4662 }, { "epoch": 0.4904097703340476, "grad_norm": 2.2273055407932096, "learning_rate": 2.6176660595528967e-06, "loss": 1.0285, "step": 4663 }, { "epoch": 0.4905149407758949, "grad_norm": 2.3699531308839004, "learning_rate": 2.616832158710506e-06, "loss": 0.988, "step": 4664 }, { "epoch": 0.49062011121774224, "grad_norm": 2.5297910043499283, "learning_rate": 2.6159982448404227e-06, "loss": 1.0217, "step": 4665 }, { "epoch": 0.49072528165958956, "grad_norm": 2.29822026488786, "learning_rate": 2.615164318035633e-06, "loss": 0.9885, "step": 4666 }, { "epoch": 0.4908304521014369, "grad_norm": 2.3420596691918796, "learning_rate": 2.6143303783891278e-06, "loss": 1.0152, "step": 4667 }, { "epoch": 0.4909356225432842, "grad_norm": 2.6066387189345885, "learning_rate": 2.6134964259938967e-06, "loss": 1.0215, "step": 4668 }, { "epoch": 0.49104079298513154, "grad_norm": 2.579302269115743, "learning_rate": 2.6126624609429325e-06, "loss": 0.9935, "step": 4669 }, { "epoch": 0.49114596342697886, "grad_norm": 3.1504338937696814, "learning_rate": 2.611828483329229e-06, "loss": 0.9786, "step": 4670 }, { "epoch": 0.4912511338688262, "grad_norm": 2.3543913083014365, "learning_rate": 2.6109944932457813e-06, "loss": 0.9647, "step": 4671 }, { "epoch": 0.4913563043106735, "grad_norm": 2.586644471709751, "learning_rate": 2.610160490785584e-06, "loss": 1.0352, "step": 4672 }, { "epoch": 0.4914614747525208, "grad_norm": 2.135602362463421, "learning_rate": 2.6093264760416377e-06, "loss": 1.0012, "step": 4673 }, { "epoch": 0.4915666451943681, "grad_norm": 2.8951974224499795, "learning_rate": 2.6084924491069397e-06, "loss": 0.989, "step": 4674 }, { "epoch": 0.4916718156362154, "grad_norm": 2.670407042084373, "learning_rate": 2.60765841007449e-06, "loss": 0.9966, "step": 4675 }, { "epoch": 0.49177698607806275, "grad_norm": 2.5070143385941073, "learning_rate": 2.606824359037292e-06, "loss": 0.9903, "step": 4676 }, { "epoch": 0.4918821565199101, "grad_norm": 3.1780468314666246, "learning_rate": 2.605990296088348e-06, "loss": 0.9961, "step": 4677 }, { "epoch": 0.4919873269617574, "grad_norm": 3.0212424807578233, "learning_rate": 2.6051562213206633e-06, "loss": 1.0063, "step": 4678 }, { "epoch": 0.4920924974036047, "grad_norm": 2.1752773833274732, "learning_rate": 2.604322134827242e-06, "loss": 0.9692, "step": 4679 }, { "epoch": 0.49219766784545205, "grad_norm": 2.101331063752568, "learning_rate": 2.603488036701093e-06, "loss": 0.9853, "step": 4680 }, { "epoch": 0.49230283828729937, "grad_norm": 2.7504154040985385, "learning_rate": 2.6026539270352234e-06, "loss": 0.977, "step": 4681 }, { "epoch": 0.4924080087291467, "grad_norm": 2.283095151367684, "learning_rate": 2.6018198059226433e-06, "loss": 0.9963, "step": 4682 }, { "epoch": 0.492513179170994, "grad_norm": 3.6836502509688196, "learning_rate": 2.600985673456364e-06, "loss": 1.0127, "step": 4683 }, { "epoch": 0.4926183496128413, "grad_norm": 1.7196183166216403, "learning_rate": 2.6001515297293982e-06, "loss": 0.96, "step": 4684 }, { "epoch": 0.4927235200546886, "grad_norm": 2.0308139618206105, "learning_rate": 2.599317374834759e-06, "loss": 0.9981, "step": 4685 }, { "epoch": 0.49282869049653594, "grad_norm": 2.7245202798321766, "learning_rate": 2.59848320886546e-06, "loss": 1.0274, "step": 4686 }, { "epoch": 0.49293386093838326, "grad_norm": 2.2582233947427786, "learning_rate": 2.597649031914519e-06, "loss": 1.031, "step": 4687 }, { "epoch": 0.4930390313802306, "grad_norm": 2.875499760161925, "learning_rate": 2.5968148440749526e-06, "loss": 1.0076, "step": 4688 }, { "epoch": 0.4931442018220779, "grad_norm": 2.3873631179045076, "learning_rate": 2.595980645439778e-06, "loss": 1.0003, "step": 4689 }, { "epoch": 0.49324937226392523, "grad_norm": 1.832107108457019, "learning_rate": 2.595146436102016e-06, "loss": 0.9479, "step": 4690 }, { "epoch": 0.49335454270577256, "grad_norm": 2.1390781829339893, "learning_rate": 2.5943122161546874e-06, "loss": 0.9757, "step": 4691 }, { "epoch": 0.4934597131476199, "grad_norm": 2.678011051923976, "learning_rate": 2.593477985690815e-06, "loss": 1.0185, "step": 4692 }, { "epoch": 0.4935648835894672, "grad_norm": 2.655131659501105, "learning_rate": 2.59264374480342e-06, "loss": 0.9632, "step": 4693 }, { "epoch": 0.4936700540313145, "grad_norm": 2.387020654145177, "learning_rate": 2.5918094935855275e-06, "loss": 0.9935, "step": 4694 }, { "epoch": 0.4937752244731618, "grad_norm": 2.484922013212365, "learning_rate": 2.5909752321301633e-06, "loss": 0.9588, "step": 4695 }, { "epoch": 0.4938803949150091, "grad_norm": 3.105536483019693, "learning_rate": 2.590140960530355e-06, "loss": 0.9992, "step": 4696 }, { "epoch": 0.49398556535685645, "grad_norm": 2.4101649578596445, "learning_rate": 2.589306678879129e-06, "loss": 1.039, "step": 4697 }, { "epoch": 0.4940907357987038, "grad_norm": 2.383606369665489, "learning_rate": 2.5884723872695138e-06, "loss": 1.0335, "step": 4698 }, { "epoch": 0.4941959062405511, "grad_norm": 2.5503747168717874, "learning_rate": 2.587638085794541e-06, "loss": 1.0034, "step": 4699 }, { "epoch": 0.4943010766823984, "grad_norm": 2.0251324980808287, "learning_rate": 2.5868037745472408e-06, "loss": 0.9547, "step": 4700 }, { "epoch": 0.49440624712424575, "grad_norm": 2.2254913031928596, "learning_rate": 2.5859694536206455e-06, "loss": 1.018, "step": 4701 }, { "epoch": 0.49451141756609307, "grad_norm": 1.7896906696794816, "learning_rate": 2.5851351231077876e-06, "loss": 0.9859, "step": 4702 }, { "epoch": 0.4946165880079404, "grad_norm": 2.6959960617755168, "learning_rate": 2.5843007831017024e-06, "loss": 1.0311, "step": 4703 }, { "epoch": 0.49472175844978766, "grad_norm": 3.497101772230584, "learning_rate": 2.583466433695425e-06, "loss": 0.98, "step": 4704 }, { "epoch": 0.494826928891635, "grad_norm": 2.220969128258192, "learning_rate": 2.5826320749819917e-06, "loss": 1.0059, "step": 4705 }, { "epoch": 0.4949320993334823, "grad_norm": 2.477231282570078, "learning_rate": 2.5817977070544408e-06, "loss": 1.0255, "step": 4706 }, { "epoch": 0.49503726977532964, "grad_norm": 2.43917889374586, "learning_rate": 2.5809633300058095e-06, "loss": 1.0001, "step": 4707 }, { "epoch": 0.49514244021717696, "grad_norm": 3.203459546979018, "learning_rate": 2.580128943929139e-06, "loss": 0.9777, "step": 4708 }, { "epoch": 0.4952476106590243, "grad_norm": 2.072545796219728, "learning_rate": 2.579294548917467e-06, "loss": 0.9962, "step": 4709 }, { "epoch": 0.4953527811008716, "grad_norm": 2.5943743382810465, "learning_rate": 2.578460145063838e-06, "loss": 0.9941, "step": 4710 }, { "epoch": 0.49545795154271893, "grad_norm": 1.9553371503993335, "learning_rate": 2.577625732461293e-06, "loss": 0.9708, "step": 4711 }, { "epoch": 0.49556312198456626, "grad_norm": 2.5527175195599345, "learning_rate": 2.576791311202876e-06, "loss": 1.0314, "step": 4712 }, { "epoch": 0.4956682924264136, "grad_norm": 2.7301034938935858, "learning_rate": 2.575956881381631e-06, "loss": 1.0063, "step": 4713 }, { "epoch": 0.49577346286826085, "grad_norm": 2.3128243301857165, "learning_rate": 2.575122443090604e-06, "loss": 1.0161, "step": 4714 }, { "epoch": 0.4958786333101082, "grad_norm": 2.1154594017025357, "learning_rate": 2.574287996422841e-06, "loss": 1.0245, "step": 4715 }, { "epoch": 0.4959838037519555, "grad_norm": 2.2840660853868533, "learning_rate": 2.573453541471389e-06, "loss": 1.0142, "step": 4716 }, { "epoch": 0.4960889741938028, "grad_norm": 2.5091203779297104, "learning_rate": 2.572619078329297e-06, "loss": 0.9737, "step": 4717 }, { "epoch": 0.49619414463565015, "grad_norm": 2.004908508620825, "learning_rate": 2.571784607089613e-06, "loss": 0.9905, "step": 4718 }, { "epoch": 0.49629931507749747, "grad_norm": 2.525170667494257, "learning_rate": 2.570950127845388e-06, "loss": 0.9913, "step": 4719 }, { "epoch": 0.4964044855193448, "grad_norm": 2.3933251418961414, "learning_rate": 2.5701156406896726e-06, "loss": 1.0001, "step": 4720 }, { "epoch": 0.4965096559611921, "grad_norm": 2.7182770035408166, "learning_rate": 2.5692811457155186e-06, "loss": 0.9676, "step": 4721 }, { "epoch": 0.49661482640303944, "grad_norm": 2.8066431382335386, "learning_rate": 2.5684466430159794e-06, "loss": 0.9872, "step": 4722 }, { "epoch": 0.49671999684488677, "grad_norm": 2.255340942233427, "learning_rate": 2.5676121326841063e-06, "loss": 0.9676, "step": 4723 }, { "epoch": 0.49682516728673404, "grad_norm": 2.0906594545737267, "learning_rate": 2.566777614812956e-06, "loss": 0.9905, "step": 4724 }, { "epoch": 0.49693033772858136, "grad_norm": 3.0950706282178184, "learning_rate": 2.5659430894955827e-06, "loss": 1.0, "step": 4725 }, { "epoch": 0.4970355081704287, "grad_norm": 2.7642796610888065, "learning_rate": 2.5651085568250426e-06, "loss": 0.9555, "step": 4726 }, { "epoch": 0.497140678612276, "grad_norm": 2.584998095593244, "learning_rate": 2.564274016894393e-06, "loss": 0.987, "step": 4727 }, { "epoch": 0.49724584905412333, "grad_norm": 2.688374289525621, "learning_rate": 2.5634394697966915e-06, "loss": 0.9615, "step": 4728 }, { "epoch": 0.49735101949597066, "grad_norm": 2.9732686786827056, "learning_rate": 2.562604915624996e-06, "loss": 0.9954, "step": 4729 }, { "epoch": 0.497456189937818, "grad_norm": 2.6909679945625635, "learning_rate": 2.5617703544723653e-06, "loss": 0.9819, "step": 4730 }, { "epoch": 0.4975613603796653, "grad_norm": 2.397135959093128, "learning_rate": 2.5609357864318614e-06, "loss": 1.0235, "step": 4731 }, { "epoch": 0.49766653082151263, "grad_norm": 2.7889907201615185, "learning_rate": 2.5601012115965425e-06, "loss": 1.0192, "step": 4732 }, { "epoch": 0.49777170126335996, "grad_norm": 2.4282752934234417, "learning_rate": 2.559266630059473e-06, "loss": 1.0139, "step": 4733 }, { "epoch": 0.4978768717052072, "grad_norm": 2.8902978095243874, "learning_rate": 2.5584320419137127e-06, "loss": 1.0143, "step": 4734 }, { "epoch": 0.49798204214705455, "grad_norm": 3.286409074951358, "learning_rate": 2.5575974472523273e-06, "loss": 1.0451, "step": 4735 }, { "epoch": 0.4980872125889019, "grad_norm": 2.009314598243189, "learning_rate": 2.556762846168378e-06, "loss": 0.9562, "step": 4736 }, { "epoch": 0.4981923830307492, "grad_norm": 1.832895791185105, "learning_rate": 2.5559282387549305e-06, "loss": 1.0244, "step": 4737 }, { "epoch": 0.4982975534725965, "grad_norm": 2.1324340102401798, "learning_rate": 2.5550936251050503e-06, "loss": 1.0173, "step": 4738 }, { "epoch": 0.49840272391444385, "grad_norm": 2.608030569296082, "learning_rate": 2.5542590053118022e-06, "loss": 0.986, "step": 4739 }, { "epoch": 0.49850789435629117, "grad_norm": 2.7802760127585473, "learning_rate": 2.553424379468254e-06, "loss": 1.0234, "step": 4740 }, { "epoch": 0.4986130647981385, "grad_norm": 2.7173683170964082, "learning_rate": 2.5525897476674722e-06, "loss": 1.0201, "step": 4741 }, { "epoch": 0.4987182352399858, "grad_norm": 2.67038694261878, "learning_rate": 2.5517551100025257e-06, "loss": 1.004, "step": 4742 }, { "epoch": 0.49882340568183314, "grad_norm": 2.9249794319362192, "learning_rate": 2.550920466566483e-06, "loss": 1.011, "step": 4743 }, { "epoch": 0.49892857612368047, "grad_norm": 2.6543517747008405, "learning_rate": 2.5500858174524105e-06, "loss": 0.9933, "step": 4744 }, { "epoch": 0.49903374656552774, "grad_norm": 2.3829909949524537, "learning_rate": 2.5492511627533816e-06, "loss": 1.0197, "step": 4745 }, { "epoch": 0.49913891700737506, "grad_norm": 2.850982046315611, "learning_rate": 2.548416502562465e-06, "loss": 0.9597, "step": 4746 }, { "epoch": 0.4992440874492224, "grad_norm": 2.1710344138345365, "learning_rate": 2.5475818369727328e-06, "loss": 0.9962, "step": 4747 }, { "epoch": 0.4993492578910697, "grad_norm": 2.3265309831025265, "learning_rate": 2.5467471660772557e-06, "loss": 1.0114, "step": 4748 }, { "epoch": 0.49945442833291703, "grad_norm": 2.701708578526532, "learning_rate": 2.545912489969107e-06, "loss": 1.0285, "step": 4749 }, { "epoch": 0.49955959877476436, "grad_norm": 2.650784964191637, "learning_rate": 2.5450778087413588e-06, "loss": 0.9867, "step": 4750 }, { "epoch": 0.4996647692166117, "grad_norm": 2.5087138546199355, "learning_rate": 2.5442431224870847e-06, "loss": 1.0158, "step": 4751 }, { "epoch": 0.499769939658459, "grad_norm": 2.995804939311976, "learning_rate": 2.5434084312993582e-06, "loss": 1.0099, "step": 4752 }, { "epoch": 0.49987511010030633, "grad_norm": 2.6588509171871553, "learning_rate": 2.542573735271255e-06, "loss": 1.0047, "step": 4753 }, { "epoch": 0.49998028054215365, "grad_norm": 1.8191787178519514, "learning_rate": 2.5417390344958494e-06, "loss": 1.0107, "step": 4754 }, { "epoch": 0.5000854509840009, "grad_norm": 1.8754132743779173, "learning_rate": 2.5409043290662173e-06, "loss": 0.988, "step": 4755 }, { "epoch": 0.5001906214258482, "grad_norm": 2.631056408985051, "learning_rate": 2.5400696190754347e-06, "loss": 1.0357, "step": 4756 }, { "epoch": 0.5002957918676956, "grad_norm": 2.032133232558368, "learning_rate": 2.5392349046165783e-06, "loss": 0.9912, "step": 4757 }, { "epoch": 0.5004009623095429, "grad_norm": 2.0586532168086187, "learning_rate": 2.5384001857827256e-06, "loss": 0.9694, "step": 4758 }, { "epoch": 0.5005061327513902, "grad_norm": 2.644307665365883, "learning_rate": 2.5375654626669537e-06, "loss": 0.9973, "step": 4759 }, { "epoch": 0.5006113031932375, "grad_norm": 1.773857858460592, "learning_rate": 2.5367307353623404e-06, "loss": 0.9653, "step": 4760 }, { "epoch": 0.5007164736350849, "grad_norm": 2.3144669620091585, "learning_rate": 2.5358960039619653e-06, "loss": 0.9818, "step": 4761 }, { "epoch": 0.5008216440769322, "grad_norm": 3.4633187371850513, "learning_rate": 2.535061268558906e-06, "loss": 1.0327, "step": 4762 }, { "epoch": 0.5009268145187795, "grad_norm": 1.7464824159525518, "learning_rate": 2.5342265292462437e-06, "loss": 0.9835, "step": 4763 }, { "epoch": 0.5010319849606268, "grad_norm": 2.5356175046255336, "learning_rate": 2.5333917861170576e-06, "loss": 0.9872, "step": 4764 }, { "epoch": 0.5011371554024742, "grad_norm": 2.4355449363964503, "learning_rate": 2.5325570392644282e-06, "loss": 1.0068, "step": 4765 }, { "epoch": 0.5012423258443215, "grad_norm": 2.415671397242718, "learning_rate": 2.531722288781436e-06, "loss": 0.9525, "step": 4766 }, { "epoch": 0.5013474962861688, "grad_norm": 3.179290978551529, "learning_rate": 2.5308875347611613e-06, "loss": 1.0196, "step": 4767 }, { "epoch": 0.5014526667280161, "grad_norm": 2.0301483199514765, "learning_rate": 2.530052777296687e-06, "loss": 1.0312, "step": 4768 }, { "epoch": 0.5015578371698635, "grad_norm": 1.7988911344950873, "learning_rate": 2.5292180164810944e-06, "loss": 0.9735, "step": 4769 }, { "epoch": 0.5016630076117107, "grad_norm": 2.91048308505893, "learning_rate": 2.5283832524074657e-06, "loss": 1.0213, "step": 4770 }, { "epoch": 0.501768178053558, "grad_norm": 2.4229460616322136, "learning_rate": 2.5275484851688848e-06, "loss": 0.9999, "step": 4771 }, { "epoch": 0.5018733484954053, "grad_norm": 2.4418059020235003, "learning_rate": 2.5267137148584335e-06, "loss": 0.9837, "step": 4772 }, { "epoch": 0.5019785189372526, "grad_norm": 2.3825700805364, "learning_rate": 2.5258789415691947e-06, "loss": 1.039, "step": 4773 }, { "epoch": 0.5020836893791, "grad_norm": 2.5170794669290286, "learning_rate": 2.5250441653942536e-06, "loss": 0.9986, "step": 4774 }, { "epoch": 0.5021888598209473, "grad_norm": 2.894554710829124, "learning_rate": 2.5242093864266936e-06, "loss": 0.9848, "step": 4775 }, { "epoch": 0.5022940302627946, "grad_norm": 2.3297677080374064, "learning_rate": 2.5233746047595984e-06, "loss": 0.9898, "step": 4776 }, { "epoch": 0.502399200704642, "grad_norm": 2.7131245724405604, "learning_rate": 2.5225398204860534e-06, "loss": 1.0009, "step": 4777 }, { "epoch": 0.5025043711464893, "grad_norm": 2.264411534263541, "learning_rate": 2.5217050336991434e-06, "loss": 0.9784, "step": 4778 }, { "epoch": 0.5026095415883366, "grad_norm": 2.2425268597601833, "learning_rate": 2.520870244491954e-06, "loss": 0.9914, "step": 4779 }, { "epoch": 0.5027147120301839, "grad_norm": 1.5512671176052935, "learning_rate": 2.5200354529575693e-06, "loss": 0.9783, "step": 4780 }, { "epoch": 0.5028198824720312, "grad_norm": 2.4180849734923515, "learning_rate": 2.5192006591890767e-06, "loss": 1.0115, "step": 4781 }, { "epoch": 0.5029250529138786, "grad_norm": 2.0161358166931813, "learning_rate": 2.5183658632795614e-06, "loss": 1.011, "step": 4782 }, { "epoch": 0.5030302233557259, "grad_norm": 2.9226670044367222, "learning_rate": 2.5175310653221092e-06, "loss": 0.9739, "step": 4783 }, { "epoch": 0.5031353937975732, "grad_norm": 2.421297281935595, "learning_rate": 2.5166962654098075e-06, "loss": 1.0089, "step": 4784 }, { "epoch": 0.5032405642394205, "grad_norm": 1.820279687289599, "learning_rate": 2.515861463635742e-06, "loss": 1.0076, "step": 4785 }, { "epoch": 0.5033457346812679, "grad_norm": 2.13363221480922, "learning_rate": 2.515026660093e-06, "loss": 1.0352, "step": 4786 }, { "epoch": 0.5034509051231152, "grad_norm": 2.2192281908305596, "learning_rate": 2.51419185487467e-06, "loss": 0.9947, "step": 4787 }, { "epoch": 0.5035560755649625, "grad_norm": 2.6940645632774833, "learning_rate": 2.5133570480738367e-06, "loss": 0.9647, "step": 4788 }, { "epoch": 0.5036612460068098, "grad_norm": 2.92639520094335, "learning_rate": 2.5125222397835893e-06, "loss": 0.9956, "step": 4789 }, { "epoch": 0.503766416448657, "grad_norm": 1.796576628883353, "learning_rate": 2.511687430097014e-06, "loss": 0.9822, "step": 4790 }, { "epoch": 0.5038715868905044, "grad_norm": 2.66563998878901, "learning_rate": 2.5108526191072e-06, "loss": 0.9973, "step": 4791 }, { "epoch": 0.5039767573323517, "grad_norm": 1.8895115699475404, "learning_rate": 2.5100178069072347e-06, "loss": 0.9552, "step": 4792 }, { "epoch": 0.504081927774199, "grad_norm": 3.798229950568707, "learning_rate": 2.5091829935902063e-06, "loss": 0.9752, "step": 4793 }, { "epoch": 0.5041870982160463, "grad_norm": 2.8809636374230667, "learning_rate": 2.508348179249203e-06, "loss": 0.983, "step": 4794 }, { "epoch": 0.5042922686578937, "grad_norm": 2.5746810183718436, "learning_rate": 2.5075133639773126e-06, "loss": 0.9668, "step": 4795 }, { "epoch": 0.504397439099741, "grad_norm": 1.8118205407198673, "learning_rate": 2.506678547867623e-06, "loss": 0.999, "step": 4796 }, { "epoch": 0.5045026095415883, "grad_norm": 1.5885999010984382, "learning_rate": 2.5058437310132244e-06, "loss": 0.9747, "step": 4797 }, { "epoch": 0.5046077799834356, "grad_norm": 2.1825556187323114, "learning_rate": 2.5050089135072044e-06, "loss": 0.9754, "step": 4798 }, { "epoch": 0.504712950425283, "grad_norm": 2.1535118451086257, "learning_rate": 2.5041740954426508e-06, "loss": 1.0055, "step": 4799 }, { "epoch": 0.5048181208671303, "grad_norm": 2.1050204876713177, "learning_rate": 2.5033392769126543e-06, "loss": 1.0276, "step": 4800 }, { "epoch": 0.5049232913089776, "grad_norm": 2.139583763580259, "learning_rate": 2.5025044580103012e-06, "loss": 1.0135, "step": 4801 }, { "epoch": 0.5050284617508249, "grad_norm": 2.1492851865676337, "learning_rate": 2.5016696388286827e-06, "loss": 1.014, "step": 4802 }, { "epoch": 0.5051336321926723, "grad_norm": 2.1397490291747014, "learning_rate": 2.5008348194608855e-06, "loss": 0.991, "step": 4803 }, { "epoch": 0.5052388026345196, "grad_norm": 1.9983819330325248, "learning_rate": 2.5e-06, "loss": 1.009, "step": 4804 }, { "epoch": 0.5053439730763669, "grad_norm": 2.5956563261204013, "learning_rate": 2.499165180539115e-06, "loss": 0.9449, "step": 4805 }, { "epoch": 0.5054491435182142, "grad_norm": 2.1548335567068073, "learning_rate": 2.498330361171318e-06, "loss": 0.9516, "step": 4806 }, { "epoch": 0.5055543139600616, "grad_norm": 2.73508525509217, "learning_rate": 2.497495541989699e-06, "loss": 0.9602, "step": 4807 }, { "epoch": 0.5056594844019089, "grad_norm": 2.3580994201959253, "learning_rate": 2.496660723087347e-06, "loss": 0.9723, "step": 4808 }, { "epoch": 0.5057646548437562, "grad_norm": 3.0042626219250823, "learning_rate": 2.49582590455735e-06, "loss": 0.9715, "step": 4809 }, { "epoch": 0.5058698252856035, "grad_norm": 2.3224530834303185, "learning_rate": 2.494991086492797e-06, "loss": 1.0099, "step": 4810 }, { "epoch": 0.5059749957274507, "grad_norm": 2.5954016653855962, "learning_rate": 2.4941562689867755e-06, "loss": 1.0442, "step": 4811 }, { "epoch": 0.5060801661692981, "grad_norm": 2.561011878419228, "learning_rate": 2.493321452132377e-06, "loss": 0.9306, "step": 4812 }, { "epoch": 0.5061853366111454, "grad_norm": 2.6220115240229416, "learning_rate": 2.4924866360226883e-06, "loss": 0.9854, "step": 4813 }, { "epoch": 0.5062905070529927, "grad_norm": 2.529146248465547, "learning_rate": 2.4916518207507977e-06, "loss": 1.0399, "step": 4814 }, { "epoch": 0.50639567749484, "grad_norm": 2.308532895773023, "learning_rate": 2.490817006409794e-06, "loss": 0.974, "step": 4815 }, { "epoch": 0.5065008479366874, "grad_norm": 2.8751837068490085, "learning_rate": 2.4899821930927653e-06, "loss": 0.9367, "step": 4816 }, { "epoch": 0.5066060183785347, "grad_norm": 1.8788646946917793, "learning_rate": 2.4891473808928006e-06, "loss": 0.9854, "step": 4817 }, { "epoch": 0.506711188820382, "grad_norm": 2.1679354845155387, "learning_rate": 2.488312569902987e-06, "loss": 0.9615, "step": 4818 }, { "epoch": 0.5068163592622293, "grad_norm": 2.3508798049934403, "learning_rate": 2.487477760216412e-06, "loss": 1.0156, "step": 4819 }, { "epoch": 0.5069215297040767, "grad_norm": 1.7607835727712602, "learning_rate": 2.4866429519261646e-06, "loss": 0.994, "step": 4820 }, { "epoch": 0.507026700145924, "grad_norm": 3.0523263238936593, "learning_rate": 2.4858081451253306e-06, "loss": 0.975, "step": 4821 }, { "epoch": 0.5071318705877713, "grad_norm": 2.3526180310210454, "learning_rate": 2.4849733399070002e-06, "loss": 0.9872, "step": 4822 }, { "epoch": 0.5072370410296186, "grad_norm": 2.263463303823174, "learning_rate": 2.484138536364259e-06, "loss": 0.9537, "step": 4823 }, { "epoch": 0.507342211471466, "grad_norm": 2.1671169893440982, "learning_rate": 2.4833037345901937e-06, "loss": 0.9872, "step": 4824 }, { "epoch": 0.5074473819133133, "grad_norm": 2.73902112314044, "learning_rate": 2.482468934677891e-06, "loss": 1.0494, "step": 4825 }, { "epoch": 0.5075525523551606, "grad_norm": 2.6521521906637107, "learning_rate": 2.481634136720439e-06, "loss": 0.9943, "step": 4826 }, { "epoch": 0.5076577227970079, "grad_norm": 3.004342981120889, "learning_rate": 2.4807993408109237e-06, "loss": 0.9704, "step": 4827 }, { "epoch": 0.5077628932388553, "grad_norm": 3.205741832679773, "learning_rate": 2.479964547042431e-06, "loss": 0.9874, "step": 4828 }, { "epoch": 0.5078680636807026, "grad_norm": 3.0608467513431106, "learning_rate": 2.479129755508047e-06, "loss": 1.0236, "step": 4829 }, { "epoch": 0.5079732341225499, "grad_norm": 2.2772013134452402, "learning_rate": 2.478294966300857e-06, "loss": 0.993, "step": 4830 }, { "epoch": 0.5080784045643971, "grad_norm": 2.143379541365547, "learning_rate": 2.477460179513947e-06, "loss": 0.9831, "step": 4831 }, { "epoch": 0.5081835750062444, "grad_norm": 2.086617872315913, "learning_rate": 2.4766253952404024e-06, "loss": 0.9932, "step": 4832 }, { "epoch": 0.5082887454480918, "grad_norm": 2.6508833715078386, "learning_rate": 2.4757906135733077e-06, "loss": 1.0029, "step": 4833 }, { "epoch": 0.5083939158899391, "grad_norm": 2.8727794559719513, "learning_rate": 2.4749558346057464e-06, "loss": 1.0128, "step": 4834 }, { "epoch": 0.5084990863317864, "grad_norm": 2.1203097915270797, "learning_rate": 2.4741210584308053e-06, "loss": 0.9912, "step": 4835 }, { "epoch": 0.5086042567736337, "grad_norm": 2.6936248414531594, "learning_rate": 2.4732862851415674e-06, "loss": 0.9887, "step": 4836 }, { "epoch": 0.5087094272154811, "grad_norm": 2.388064075424561, "learning_rate": 2.472451514831116e-06, "loss": 0.9818, "step": 4837 }, { "epoch": 0.5088145976573284, "grad_norm": 2.4879710422290318, "learning_rate": 2.4716167475925356e-06, "loss": 1.0028, "step": 4838 }, { "epoch": 0.5089197680991757, "grad_norm": 2.35770735993766, "learning_rate": 2.470781983518906e-06, "loss": 1.0071, "step": 4839 }, { "epoch": 0.509024938541023, "grad_norm": 2.0072570921567605, "learning_rate": 2.4699472227033137e-06, "loss": 1.0177, "step": 4840 }, { "epoch": 0.5091301089828704, "grad_norm": 2.926954873115335, "learning_rate": 2.46911246523884e-06, "loss": 0.9914, "step": 4841 }, { "epoch": 0.5092352794247177, "grad_norm": 1.8985162910533802, "learning_rate": 2.4682777112185657e-06, "loss": 0.96, "step": 4842 }, { "epoch": 0.509340449866565, "grad_norm": 2.961749775540693, "learning_rate": 2.467442960735573e-06, "loss": 1.011, "step": 4843 }, { "epoch": 0.5094456203084123, "grad_norm": 2.828236340352394, "learning_rate": 2.466608213882943e-06, "loss": 1.0186, "step": 4844 }, { "epoch": 0.5095507907502597, "grad_norm": 2.933167988889152, "learning_rate": 2.4657734707537567e-06, "loss": 0.997, "step": 4845 }, { "epoch": 0.509655961192107, "grad_norm": 2.4675557778050945, "learning_rate": 2.464938731441095e-06, "loss": 0.947, "step": 4846 }, { "epoch": 0.5097611316339543, "grad_norm": 2.084400197522081, "learning_rate": 2.464103996038036e-06, "loss": 0.9936, "step": 4847 }, { "epoch": 0.5098663020758016, "grad_norm": 2.5000064897079946, "learning_rate": 2.46326926463766e-06, "loss": 0.9836, "step": 4848 }, { "epoch": 0.509971472517649, "grad_norm": 2.4556021731452953, "learning_rate": 2.462434537333047e-06, "loss": 0.9781, "step": 4849 }, { "epoch": 0.5100766429594963, "grad_norm": 3.10178764217554, "learning_rate": 2.461599814217275e-06, "loss": 0.9933, "step": 4850 }, { "epoch": 0.5101818134013435, "grad_norm": 2.50622015636115, "learning_rate": 2.4607650953834225e-06, "loss": 1.0053, "step": 4851 }, { "epoch": 0.5102869838431908, "grad_norm": 2.9146926707614864, "learning_rate": 2.459930380924566e-06, "loss": 1.0261, "step": 4852 }, { "epoch": 0.5103921542850381, "grad_norm": 2.2712260753133715, "learning_rate": 2.459095670933783e-06, "loss": 1.0111, "step": 4853 }, { "epoch": 0.5104973247268855, "grad_norm": 2.0427151120190445, "learning_rate": 2.458260965504151e-06, "loss": 1.0178, "step": 4854 }, { "epoch": 0.5106024951687328, "grad_norm": 2.615519242206928, "learning_rate": 2.457426264728746e-06, "loss": 1.0209, "step": 4855 }, { "epoch": 0.5107076656105801, "grad_norm": 2.8016701953883216, "learning_rate": 2.4565915687006426e-06, "loss": 1.0498, "step": 4856 }, { "epoch": 0.5108128360524274, "grad_norm": 1.909826636778149, "learning_rate": 2.4557568775129157e-06, "loss": 0.9668, "step": 4857 }, { "epoch": 0.5109180064942748, "grad_norm": 2.0558742173386637, "learning_rate": 2.454922191258642e-06, "loss": 0.9879, "step": 4858 }, { "epoch": 0.5110231769361221, "grad_norm": 2.0257159398608646, "learning_rate": 2.454087510030894e-06, "loss": 0.9859, "step": 4859 }, { "epoch": 0.5111283473779694, "grad_norm": 1.7754121266644842, "learning_rate": 2.453252833922745e-06, "loss": 0.9699, "step": 4860 }, { "epoch": 0.5112335178198167, "grad_norm": 2.4413398534310375, "learning_rate": 2.4524181630272685e-06, "loss": 1.0299, "step": 4861 }, { "epoch": 0.5113386882616641, "grad_norm": 2.715928878533507, "learning_rate": 2.451583497437535e-06, "loss": 0.9741, "step": 4862 }, { "epoch": 0.5114438587035114, "grad_norm": 2.3218910000415365, "learning_rate": 2.450748837246619e-06, "loss": 1.0065, "step": 4863 }, { "epoch": 0.5115490291453587, "grad_norm": 2.2782925199881214, "learning_rate": 2.44991418254759e-06, "loss": 0.9806, "step": 4864 }, { "epoch": 0.511654199587206, "grad_norm": 2.7149913688207135, "learning_rate": 2.449079533433519e-06, "loss": 1.0209, "step": 4865 }, { "epoch": 0.5117593700290534, "grad_norm": 2.3390153312282185, "learning_rate": 2.448244889997475e-06, "loss": 0.999, "step": 4866 }, { "epoch": 0.5118645404709007, "grad_norm": 3.0557721293095157, "learning_rate": 2.447410252332528e-06, "loss": 1.0012, "step": 4867 }, { "epoch": 0.511969710912748, "grad_norm": 2.866535350728344, "learning_rate": 2.4465756205317466e-06, "loss": 0.992, "step": 4868 }, { "epoch": 0.5120748813545953, "grad_norm": 2.9450377971992023, "learning_rate": 2.4457409946881986e-06, "loss": 1.0557, "step": 4869 }, { "epoch": 0.5121800517964427, "grad_norm": 2.786445259165392, "learning_rate": 2.444906374894951e-06, "loss": 1.0568, "step": 4870 }, { "epoch": 0.51228522223829, "grad_norm": 1.9248634237188782, "learning_rate": 2.4440717612450695e-06, "loss": 0.9872, "step": 4871 }, { "epoch": 0.5123903926801372, "grad_norm": 3.2932080298333166, "learning_rate": 2.4432371538316226e-06, "loss": 0.9767, "step": 4872 }, { "epoch": 0.5124955631219845, "grad_norm": 2.5355754300367486, "learning_rate": 2.4424025527476735e-06, "loss": 1.0017, "step": 4873 }, { "epoch": 0.5126007335638318, "grad_norm": 2.598457779940784, "learning_rate": 2.441567958086288e-06, "loss": 0.9825, "step": 4874 }, { "epoch": 0.5127059040056792, "grad_norm": 2.252477222253449, "learning_rate": 2.4407333699405285e-06, "loss": 1.0299, "step": 4875 }, { "epoch": 0.5128110744475265, "grad_norm": 1.61574628720382, "learning_rate": 2.4398987884034574e-06, "loss": 0.9719, "step": 4876 }, { "epoch": 0.5129162448893738, "grad_norm": 2.9461045046867795, "learning_rate": 2.43906421356814e-06, "loss": 1.0177, "step": 4877 }, { "epoch": 0.5130214153312211, "grad_norm": 2.6086241827887715, "learning_rate": 2.4382296455276355e-06, "loss": 0.9679, "step": 4878 }, { "epoch": 0.5131265857730685, "grad_norm": 2.469431110622542, "learning_rate": 2.4373950843750053e-06, "loss": 0.9654, "step": 4879 }, { "epoch": 0.5132317562149158, "grad_norm": 2.4965960753098253, "learning_rate": 2.4365605302033094e-06, "loss": 0.9802, "step": 4880 }, { "epoch": 0.5133369266567631, "grad_norm": 2.0319790709927505, "learning_rate": 2.435725983105608e-06, "loss": 0.9885, "step": 4881 }, { "epoch": 0.5134420970986104, "grad_norm": 2.4000275308260544, "learning_rate": 2.4348914431749578e-06, "loss": 0.9208, "step": 4882 }, { "epoch": 0.5135472675404578, "grad_norm": 2.317952586045972, "learning_rate": 2.434056910504418e-06, "loss": 0.9975, "step": 4883 }, { "epoch": 0.5136524379823051, "grad_norm": 2.2760790757419778, "learning_rate": 2.4332223851870453e-06, "loss": 1.0071, "step": 4884 }, { "epoch": 0.5137576084241524, "grad_norm": 2.4027564608452723, "learning_rate": 2.4323878673158937e-06, "loss": 1.0331, "step": 4885 }, { "epoch": 0.5138627788659997, "grad_norm": 1.8639452403440218, "learning_rate": 2.431553356984022e-06, "loss": 0.9873, "step": 4886 }, { "epoch": 0.5139679493078471, "grad_norm": 2.5864359775236245, "learning_rate": 2.430718854284482e-06, "loss": 0.9993, "step": 4887 }, { "epoch": 0.5140731197496944, "grad_norm": 1.8024207909165022, "learning_rate": 2.429884359310328e-06, "loss": 1.0033, "step": 4888 }, { "epoch": 0.5141782901915417, "grad_norm": 2.4114408144418538, "learning_rate": 2.4290498721546123e-06, "loss": 1.0138, "step": 4889 }, { "epoch": 0.514283460633389, "grad_norm": 2.8587403555199553, "learning_rate": 2.4282153929103874e-06, "loss": 0.9866, "step": 4890 }, { "epoch": 0.5143886310752364, "grad_norm": 2.2763471355900706, "learning_rate": 2.427380921670704e-06, "loss": 0.9761, "step": 4891 }, { "epoch": 0.5144938015170836, "grad_norm": 2.7630996690467073, "learning_rate": 2.426546458528612e-06, "loss": 1.0034, "step": 4892 }, { "epoch": 0.5145989719589309, "grad_norm": 2.5503825990043034, "learning_rate": 2.42571200357716e-06, "loss": 0.9842, "step": 4893 }, { "epoch": 0.5147041424007782, "grad_norm": 1.707781885008851, "learning_rate": 2.4248775569093968e-06, "loss": 0.9673, "step": 4894 }, { "epoch": 0.5148093128426255, "grad_norm": 2.372412499450598, "learning_rate": 2.4240431186183695e-06, "loss": 1.0197, "step": 4895 }, { "epoch": 0.5149144832844729, "grad_norm": 2.6726883190728916, "learning_rate": 2.423208688797125e-06, "loss": 0.9981, "step": 4896 }, { "epoch": 0.5150196537263202, "grad_norm": 2.672102772646679, "learning_rate": 2.422374267538708e-06, "loss": 0.9785, "step": 4897 }, { "epoch": 0.5151248241681675, "grad_norm": 3.3567918073562586, "learning_rate": 2.4215398549361632e-06, "loss": 0.9869, "step": 4898 }, { "epoch": 0.5152299946100148, "grad_norm": 3.2146007931473184, "learning_rate": 2.420705451082533e-06, "loss": 0.9755, "step": 4899 }, { "epoch": 0.5153351650518622, "grad_norm": 2.8601027840979563, "learning_rate": 2.4198710560708623e-06, "loss": 1.0102, "step": 4900 }, { "epoch": 0.5154403354937095, "grad_norm": 2.49782936985743, "learning_rate": 2.419036669994191e-06, "loss": 1.0102, "step": 4901 }, { "epoch": 0.5155455059355568, "grad_norm": 2.666513374876871, "learning_rate": 2.41820229294556e-06, "loss": 0.9846, "step": 4902 }, { "epoch": 0.5156506763774041, "grad_norm": 2.1553652633705274, "learning_rate": 2.4173679250180083e-06, "loss": 0.9994, "step": 4903 }, { "epoch": 0.5157558468192515, "grad_norm": 1.9176350923655092, "learning_rate": 2.4165335663045753e-06, "loss": 0.9929, "step": 4904 }, { "epoch": 0.5158610172610988, "grad_norm": 2.8881593227917377, "learning_rate": 2.4156992168982985e-06, "loss": 1.0123, "step": 4905 }, { "epoch": 0.5159661877029461, "grad_norm": 1.7026322357589312, "learning_rate": 2.4148648768922133e-06, "loss": 0.9522, "step": 4906 }, { "epoch": 0.5160713581447934, "grad_norm": 1.9182156402806023, "learning_rate": 2.4140305463793557e-06, "loss": 0.9721, "step": 4907 }, { "epoch": 0.5161765285866408, "grad_norm": 2.630435818921885, "learning_rate": 2.4131962254527592e-06, "loss": 0.9998, "step": 4908 }, { "epoch": 0.5162816990284881, "grad_norm": 2.639541492793874, "learning_rate": 2.41236191420546e-06, "loss": 1.0166, "step": 4909 }, { "epoch": 0.5163868694703354, "grad_norm": 2.193440397829881, "learning_rate": 2.4115276127304866e-06, "loss": 1.0148, "step": 4910 }, { "epoch": 0.5164920399121827, "grad_norm": 1.9956942722452817, "learning_rate": 2.4106933211208723e-06, "loss": 0.9871, "step": 4911 }, { "epoch": 0.51659721035403, "grad_norm": 2.841990398589798, "learning_rate": 2.4098590394696452e-06, "loss": 1.01, "step": 4912 }, { "epoch": 0.5167023807958773, "grad_norm": 2.4733117430723324, "learning_rate": 2.4090247678698362e-06, "loss": 1.0348, "step": 4913 }, { "epoch": 0.5168075512377246, "grad_norm": 1.852569961733493, "learning_rate": 2.408190506414473e-06, "loss": 0.9695, "step": 4914 }, { "epoch": 0.5169127216795719, "grad_norm": 2.4532174848303683, "learning_rate": 2.4073562551965814e-06, "loss": 0.9901, "step": 4915 }, { "epoch": 0.5170178921214192, "grad_norm": 2.5251790955216697, "learning_rate": 2.4065220143091863e-06, "loss": 1.0092, "step": 4916 }, { "epoch": 0.5171230625632666, "grad_norm": 2.76475785288874, "learning_rate": 2.405687783845313e-06, "loss": 0.9948, "step": 4917 }, { "epoch": 0.5172282330051139, "grad_norm": 2.7685770717204776, "learning_rate": 2.4048535638979844e-06, "loss": 0.9939, "step": 4918 }, { "epoch": 0.5173334034469612, "grad_norm": 2.2946067444852036, "learning_rate": 2.4040193545602232e-06, "loss": 0.9687, "step": 4919 }, { "epoch": 0.5174385738888085, "grad_norm": 2.645304674423841, "learning_rate": 2.403185155925049e-06, "loss": 1.0173, "step": 4920 }, { "epoch": 0.5175437443306559, "grad_norm": 2.629339889738927, "learning_rate": 2.4023509680854822e-06, "loss": 0.9829, "step": 4921 }, { "epoch": 0.5176489147725032, "grad_norm": 2.1954075878052035, "learning_rate": 2.40151679113454e-06, "loss": 0.9908, "step": 4922 }, { "epoch": 0.5177540852143505, "grad_norm": 2.095867879804772, "learning_rate": 2.400682625165242e-06, "loss": 0.9969, "step": 4923 }, { "epoch": 0.5178592556561978, "grad_norm": 1.9455070815665647, "learning_rate": 2.399848470270602e-06, "loss": 0.9971, "step": 4924 }, { "epoch": 0.5179644260980452, "grad_norm": 2.507117651664635, "learning_rate": 2.399014326543637e-06, "loss": 1.0045, "step": 4925 }, { "epoch": 0.5180695965398925, "grad_norm": 2.4607100953155503, "learning_rate": 2.398180194077357e-06, "loss": 1.0235, "step": 4926 }, { "epoch": 0.5181747669817398, "grad_norm": 2.777674225709361, "learning_rate": 2.3973460729647775e-06, "loss": 1.0138, "step": 4927 }, { "epoch": 0.5182799374235871, "grad_norm": 2.313723654298759, "learning_rate": 2.396511963298908e-06, "loss": 0.9688, "step": 4928 }, { "epoch": 0.5183851078654345, "grad_norm": 2.3190874101733803, "learning_rate": 2.395677865172759e-06, "loss": 1.0122, "step": 4929 }, { "epoch": 0.5184902783072818, "grad_norm": 2.7640107446846773, "learning_rate": 2.394843778679338e-06, "loss": 0.9832, "step": 4930 }, { "epoch": 0.5185954487491291, "grad_norm": 2.4081953739998534, "learning_rate": 2.3940097039116523e-06, "loss": 1.014, "step": 4931 }, { "epoch": 0.5187006191909764, "grad_norm": 1.923457778269713, "learning_rate": 2.3931756409627084e-06, "loss": 0.9874, "step": 4932 }, { "epoch": 0.5188057896328236, "grad_norm": 1.8553693404082456, "learning_rate": 2.3923415899255105e-06, "loss": 0.9887, "step": 4933 }, { "epoch": 0.518910960074671, "grad_norm": 2.7194260650322684, "learning_rate": 2.3915075508930615e-06, "loss": 0.9387, "step": 4934 }, { "epoch": 0.5190161305165183, "grad_norm": 2.652242421666457, "learning_rate": 2.3906735239583623e-06, "loss": 1.0244, "step": 4935 }, { "epoch": 0.5191213009583656, "grad_norm": 2.549188854335063, "learning_rate": 2.389839509214416e-06, "loss": 1.0246, "step": 4936 }, { "epoch": 0.5192264714002129, "grad_norm": 2.2665938281306155, "learning_rate": 2.3890055067542195e-06, "loss": 1.0106, "step": 4937 }, { "epoch": 0.5193316418420603, "grad_norm": 2.8241998887282795, "learning_rate": 2.3881715166707716e-06, "loss": 1.0155, "step": 4938 }, { "epoch": 0.5194368122839076, "grad_norm": 2.152806090447936, "learning_rate": 2.387337539057068e-06, "loss": 1.0189, "step": 4939 }, { "epoch": 0.5195419827257549, "grad_norm": 2.172323988446627, "learning_rate": 2.3865035740061037e-06, "loss": 1.0695, "step": 4940 }, { "epoch": 0.5196471531676022, "grad_norm": 2.728428245649283, "learning_rate": 2.385669621610873e-06, "loss": 1.0112, "step": 4941 }, { "epoch": 0.5197523236094496, "grad_norm": 2.124385885202202, "learning_rate": 2.384835681964368e-06, "loss": 1.0248, "step": 4942 }, { "epoch": 0.5198574940512969, "grad_norm": 2.313862649372938, "learning_rate": 2.3840017551595785e-06, "loss": 0.9969, "step": 4943 }, { "epoch": 0.5199626644931442, "grad_norm": 2.4524723823538954, "learning_rate": 2.3831678412894947e-06, "loss": 0.9844, "step": 4944 }, { "epoch": 0.5200678349349915, "grad_norm": 2.5132178605505486, "learning_rate": 2.3823339404471037e-06, "loss": 1.04, "step": 4945 }, { "epoch": 0.5201730053768389, "grad_norm": 2.4402520311081197, "learning_rate": 2.3815000527253933e-06, "loss": 0.9824, "step": 4946 }, { "epoch": 0.5202781758186862, "grad_norm": 2.1382688241450247, "learning_rate": 2.380666178217347e-06, "loss": 1.0087, "step": 4947 }, { "epoch": 0.5203833462605335, "grad_norm": 2.538021983793176, "learning_rate": 2.3798323170159487e-06, "loss": 0.9869, "step": 4948 }, { "epoch": 0.5204885167023808, "grad_norm": 1.851342803348307, "learning_rate": 2.3789984692141798e-06, "loss": 0.9851, "step": 4949 }, { "epoch": 0.5205936871442282, "grad_norm": 2.176853442535673, "learning_rate": 2.3781646349050227e-06, "loss": 0.9821, "step": 4950 }, { "epoch": 0.5206988575860755, "grad_norm": 2.3979243648827984, "learning_rate": 2.377330814181455e-06, "loss": 0.9397, "step": 4951 }, { "epoch": 0.5208040280279228, "grad_norm": 2.0633810413062212, "learning_rate": 2.3764970071364548e-06, "loss": 1.0225, "step": 4952 }, { "epoch": 0.52090919846977, "grad_norm": 1.7807728183386664, "learning_rate": 2.3756632138629977e-06, "loss": 0.9851, "step": 4953 }, { "epoch": 0.5210143689116173, "grad_norm": 2.2949369278058382, "learning_rate": 2.3748294344540575e-06, "loss": 0.9848, "step": 4954 }, { "epoch": 0.5211195393534647, "grad_norm": 2.8758134936809965, "learning_rate": 2.373995669002608e-06, "loss": 1.0296, "step": 4955 }, { "epoch": 0.521224709795312, "grad_norm": 2.7047106011767563, "learning_rate": 2.37316191760162e-06, "loss": 1.0089, "step": 4956 }, { "epoch": 0.5213298802371593, "grad_norm": 2.4083545951327463, "learning_rate": 2.3723281803440642e-06, "loss": 0.9828, "step": 4957 }, { "epoch": 0.5214350506790066, "grad_norm": 2.3483276709030703, "learning_rate": 2.371494457322907e-06, "loss": 0.9683, "step": 4958 }, { "epoch": 0.521540221120854, "grad_norm": 2.6165413672922004, "learning_rate": 2.3706607486311166e-06, "loss": 0.9807, "step": 4959 }, { "epoch": 0.5216453915627013, "grad_norm": 2.7199298338176434, "learning_rate": 2.3698270543616583e-06, "loss": 0.9758, "step": 4960 }, { "epoch": 0.5217505620045486, "grad_norm": 1.6020078122627697, "learning_rate": 2.368993374607494e-06, "loss": 0.993, "step": 4961 }, { "epoch": 0.5218557324463959, "grad_norm": 3.0119525509546476, "learning_rate": 2.3681597094615863e-06, "loss": 1.04, "step": 4962 }, { "epoch": 0.5219609028882433, "grad_norm": 2.8336389633739376, "learning_rate": 2.367326059016894e-06, "loss": 0.9782, "step": 4963 }, { "epoch": 0.5220660733300906, "grad_norm": 3.302155303408267, "learning_rate": 2.3664924233663776e-06, "loss": 0.9895, "step": 4964 }, { "epoch": 0.5221712437719379, "grad_norm": 2.0928625598039674, "learning_rate": 2.3656588026029938e-06, "loss": 1.042, "step": 4965 }, { "epoch": 0.5222764142137852, "grad_norm": 2.5158469098657816, "learning_rate": 2.3648251968196964e-06, "loss": 0.9834, "step": 4966 }, { "epoch": 0.5223815846556326, "grad_norm": 2.8574951074085377, "learning_rate": 2.36399160610944e-06, "loss": 0.9998, "step": 4967 }, { "epoch": 0.5224867550974799, "grad_norm": 1.7337887982119702, "learning_rate": 2.3631580305651756e-06, "loss": 1.0026, "step": 4968 }, { "epoch": 0.5225919255393272, "grad_norm": 3.308910933603135, "learning_rate": 2.362324470279854e-06, "loss": 0.9672, "step": 4969 }, { "epoch": 0.5226970959811745, "grad_norm": 2.6333972565437556, "learning_rate": 2.3614909253464235e-06, "loss": 0.9774, "step": 4970 }, { "epoch": 0.5228022664230219, "grad_norm": 2.09659946546361, "learning_rate": 2.360657395857831e-06, "loss": 0.9807, "step": 4971 }, { "epoch": 0.5229074368648692, "grad_norm": 2.6391317053576806, "learning_rate": 2.3598238819070206e-06, "loss": 1.0443, "step": 4972 }, { "epoch": 0.5230126073067164, "grad_norm": 2.242550715788799, "learning_rate": 2.358990383586937e-06, "loss": 0.966, "step": 4973 }, { "epoch": 0.5231177777485637, "grad_norm": 3.108913453931425, "learning_rate": 2.3581569009905204e-06, "loss": 0.9724, "step": 4974 }, { "epoch": 0.523222948190411, "grad_norm": 1.9901033912663888, "learning_rate": 2.357323434210712e-06, "loss": 0.9695, "step": 4975 }, { "epoch": 0.5233281186322584, "grad_norm": 2.168744011583421, "learning_rate": 2.3564899833404496e-06, "loss": 0.9924, "step": 4976 }, { "epoch": 0.5234332890741057, "grad_norm": 2.549473243082255, "learning_rate": 2.355656548472667e-06, "loss": 1.0382, "step": 4977 }, { "epoch": 0.523538459515953, "grad_norm": 2.5199612478036957, "learning_rate": 2.3548231297003017e-06, "loss": 1.0082, "step": 4978 }, { "epoch": 0.5236436299578003, "grad_norm": 2.3189840650193148, "learning_rate": 2.3539897271162853e-06, "loss": 0.9743, "step": 4979 }, { "epoch": 0.5237488003996477, "grad_norm": 1.8363174051887219, "learning_rate": 2.3531563408135482e-06, "loss": 1.014, "step": 4980 }, { "epoch": 0.523853970841495, "grad_norm": 2.136662771433713, "learning_rate": 2.3523229708850194e-06, "loss": 1.0037, "step": 4981 }, { "epoch": 0.5239591412833423, "grad_norm": 2.5855613260565393, "learning_rate": 2.351489617423627e-06, "loss": 0.9987, "step": 4982 }, { "epoch": 0.5240643117251896, "grad_norm": 2.3432603718739737, "learning_rate": 2.3506562805222966e-06, "loss": 0.9765, "step": 4983 }, { "epoch": 0.524169482167037, "grad_norm": 2.6466370290433874, "learning_rate": 2.3498229602739506e-06, "loss": 0.99, "step": 4984 }, { "epoch": 0.5242746526088843, "grad_norm": 3.0276801019190107, "learning_rate": 2.348989656771511e-06, "loss": 0.9581, "step": 4985 }, { "epoch": 0.5243798230507316, "grad_norm": 3.1030135072125486, "learning_rate": 2.348156370107897e-06, "loss": 0.9684, "step": 4986 }, { "epoch": 0.5244849934925789, "grad_norm": 1.6705531610760058, "learning_rate": 2.3473231003760283e-06, "loss": 1.0041, "step": 4987 }, { "epoch": 0.5245901639344263, "grad_norm": 3.195727549016848, "learning_rate": 2.3464898476688198e-06, "loss": 1.0116, "step": 4988 }, { "epoch": 0.5246953343762736, "grad_norm": 2.4173618786228532, "learning_rate": 2.345656612079186e-06, "loss": 0.9629, "step": 4989 }, { "epoch": 0.5248005048181209, "grad_norm": 2.6428711784981602, "learning_rate": 2.3448233937000385e-06, "loss": 1.0233, "step": 4990 }, { "epoch": 0.5249056752599682, "grad_norm": 1.912207664947132, "learning_rate": 2.343990192624288e-06, "loss": 0.9782, "step": 4991 }, { "epoch": 0.5250108457018156, "grad_norm": 2.2091019995514523, "learning_rate": 2.3431570089448434e-06, "loss": 0.9875, "step": 4992 }, { "epoch": 0.5251160161436629, "grad_norm": 2.7977308048194285, "learning_rate": 2.3423238427546105e-06, "loss": 1.0392, "step": 4993 }, { "epoch": 0.5252211865855101, "grad_norm": 2.639342545737909, "learning_rate": 2.341490694146494e-06, "loss": 1.0452, "step": 4994 }, { "epoch": 0.5253263570273574, "grad_norm": 2.2851218048922695, "learning_rate": 2.3406575632133953e-06, "loss": 0.9841, "step": 4995 }, { "epoch": 0.5254315274692047, "grad_norm": 1.9739459077968367, "learning_rate": 2.339824450048218e-06, "loss": 0.9819, "step": 4996 }, { "epoch": 0.5255366979110521, "grad_norm": 2.0317174932305333, "learning_rate": 2.3389913547438586e-06, "loss": 1.0313, "step": 4997 }, { "epoch": 0.5256418683528994, "grad_norm": 2.7337990453530154, "learning_rate": 2.338158277393213e-06, "loss": 1.0307, "step": 4998 }, { "epoch": 0.5257470387947467, "grad_norm": 2.665374217415374, "learning_rate": 2.337325218089177e-06, "loss": 0.9793, "step": 4999 }, { "epoch": 0.525852209236594, "grad_norm": 2.167783407256349, "learning_rate": 2.3364921769246423e-06, "loss": 0.9815, "step": 5000 }, { "epoch": 0.5259573796784414, "grad_norm": 1.9578480148487534, "learning_rate": 2.335659153992501e-06, "loss": 0.9851, "step": 5001 }, { "epoch": 0.5260625501202887, "grad_norm": 1.8750731661000946, "learning_rate": 2.33482614938564e-06, "loss": 1.0107, "step": 5002 }, { "epoch": 0.526167720562136, "grad_norm": 2.7268491605591705, "learning_rate": 2.3339931631969473e-06, "loss": 1.0255, "step": 5003 }, { "epoch": 0.5262728910039833, "grad_norm": 2.3624254794486292, "learning_rate": 2.333160195519306e-06, "loss": 1.0034, "step": 5004 }, { "epoch": 0.5263780614458307, "grad_norm": 2.0278498795912956, "learning_rate": 2.3323272464455987e-06, "loss": 0.9947, "step": 5005 }, { "epoch": 0.526483231887678, "grad_norm": 2.2289155760280592, "learning_rate": 2.331494316068706e-06, "loss": 0.9749, "step": 5006 }, { "epoch": 0.5265884023295253, "grad_norm": 2.4790681587132335, "learning_rate": 2.3306614044815066e-06, "loss": 0.9922, "step": 5007 }, { "epoch": 0.5266935727713726, "grad_norm": 2.253254009909184, "learning_rate": 2.329828511776876e-06, "loss": 0.9891, "step": 5008 }, { "epoch": 0.52679874321322, "grad_norm": 2.4459741279261102, "learning_rate": 2.328995638047687e-06, "loss": 1.0001, "step": 5009 }, { "epoch": 0.5269039136550673, "grad_norm": 2.9185918204182313, "learning_rate": 2.328162783386814e-06, "loss": 0.9973, "step": 5010 }, { "epoch": 0.5270090840969146, "grad_norm": 2.0661644888279582, "learning_rate": 2.327329947887125e-06, "loss": 1.0438, "step": 5011 }, { "epoch": 0.5271142545387619, "grad_norm": 1.9485234437575496, "learning_rate": 2.3264971316414893e-06, "loss": 1.0256, "step": 5012 }, { "epoch": 0.5272194249806093, "grad_norm": 2.178434629119489, "learning_rate": 2.325664334742771e-06, "loss": 1.0014, "step": 5013 }, { "epoch": 0.5273245954224565, "grad_norm": 2.0382608544824095, "learning_rate": 2.3248315572838316e-06, "loss": 0.9811, "step": 5014 }, { "epoch": 0.5274297658643038, "grad_norm": 2.2529667292304634, "learning_rate": 2.323998799357536e-06, "loss": 0.9599, "step": 5015 }, { "epoch": 0.5275349363061511, "grad_norm": 2.1875904622022673, "learning_rate": 2.3231660610567415e-06, "loss": 0.9548, "step": 5016 }, { "epoch": 0.5276401067479984, "grad_norm": 2.294988151424705, "learning_rate": 2.322333342474305e-06, "loss": 1.0398, "step": 5017 }, { "epoch": 0.5277452771898458, "grad_norm": 2.593613259374422, "learning_rate": 2.32150064370308e-06, "loss": 0.9981, "step": 5018 }, { "epoch": 0.5278504476316931, "grad_norm": 2.4843528327017097, "learning_rate": 2.3206679648359206e-06, "loss": 0.9869, "step": 5019 }, { "epoch": 0.5279556180735404, "grad_norm": 2.368580138964457, "learning_rate": 2.3198353059656763e-06, "loss": 0.942, "step": 5020 }, { "epoch": 0.5280607885153877, "grad_norm": 3.1046397593748103, "learning_rate": 2.319002667185195e-06, "loss": 0.9921, "step": 5021 }, { "epoch": 0.5281659589572351, "grad_norm": 2.386039450298401, "learning_rate": 2.318170048587322e-06, "loss": 0.9925, "step": 5022 }, { "epoch": 0.5282711293990824, "grad_norm": 2.1962062400668736, "learning_rate": 2.3173374502649006e-06, "loss": 1.0175, "step": 5023 }, { "epoch": 0.5283762998409297, "grad_norm": 1.897357930598202, "learning_rate": 2.316504872310773e-06, "loss": 1.015, "step": 5024 }, { "epoch": 0.528481470282777, "grad_norm": 2.699356451977988, "learning_rate": 2.315672314817778e-06, "loss": 1.0146, "step": 5025 }, { "epoch": 0.5285866407246244, "grad_norm": 2.291400597221481, "learning_rate": 2.3148397778787514e-06, "loss": 1.003, "step": 5026 }, { "epoch": 0.5286918111664717, "grad_norm": 1.81066716415222, "learning_rate": 2.3140072615865273e-06, "loss": 0.9671, "step": 5027 }, { "epoch": 0.528796981608319, "grad_norm": 2.428779670922376, "learning_rate": 2.3131747660339396e-06, "loss": 1.0034, "step": 5028 }, { "epoch": 0.5289021520501663, "grad_norm": 2.2304748543670074, "learning_rate": 2.3123422913138165e-06, "loss": 0.9739, "step": 5029 }, { "epoch": 0.5290073224920137, "grad_norm": 2.107958029699162, "learning_rate": 2.3115098375189854e-06, "loss": 1.0084, "step": 5030 }, { "epoch": 0.529112492933861, "grad_norm": 3.179847886514185, "learning_rate": 2.310677404742272e-06, "loss": 1.0061, "step": 5031 }, { "epoch": 0.5292176633757083, "grad_norm": 1.720669464426383, "learning_rate": 2.309844993076498e-06, "loss": 0.9939, "step": 5032 }, { "epoch": 0.5293228338175556, "grad_norm": 2.00459599458911, "learning_rate": 2.3090126026144847e-06, "loss": 0.9521, "step": 5033 }, { "epoch": 0.5294280042594028, "grad_norm": 2.3013300137074255, "learning_rate": 2.3081802334490502e-06, "loss": 0.9883, "step": 5034 }, { "epoch": 0.5295331747012502, "grad_norm": 2.3045482143367697, "learning_rate": 2.30734788567301e-06, "loss": 0.9919, "step": 5035 }, { "epoch": 0.5296383451430975, "grad_norm": 1.67169847842081, "learning_rate": 2.3065155593791756e-06, "loss": 0.9767, "step": 5036 }, { "epoch": 0.5297435155849448, "grad_norm": 2.894233184600396, "learning_rate": 2.3056832546603607e-06, "loss": 1.0191, "step": 5037 }, { "epoch": 0.5298486860267921, "grad_norm": 2.011314434528289, "learning_rate": 2.3048509716093723e-06, "loss": 0.9874, "step": 5038 }, { "epoch": 0.5299538564686395, "grad_norm": 2.2683865903528293, "learning_rate": 2.3040187103190165e-06, "loss": 0.9704, "step": 5039 }, { "epoch": 0.5300590269104868, "grad_norm": 2.092218000336117, "learning_rate": 2.303186470882097e-06, "loss": 0.9837, "step": 5040 }, { "epoch": 0.5301641973523341, "grad_norm": 2.5135197465867765, "learning_rate": 2.302354253391414e-06, "loss": 0.9526, "step": 5041 }, { "epoch": 0.5302693677941814, "grad_norm": 2.572619453041509, "learning_rate": 2.301522057939768e-06, "loss": 0.9711, "step": 5042 }, { "epoch": 0.5303745382360288, "grad_norm": 2.9257982055535323, "learning_rate": 2.3006898846199544e-06, "loss": 1.0119, "step": 5043 }, { "epoch": 0.5304797086778761, "grad_norm": 2.7098826216208995, "learning_rate": 2.299857733524767e-06, "loss": 1.0066, "step": 5044 }, { "epoch": 0.5305848791197234, "grad_norm": 2.727684751730271, "learning_rate": 2.299025604746997e-06, "loss": 0.9946, "step": 5045 }, { "epoch": 0.5306900495615707, "grad_norm": 2.3317306284613957, "learning_rate": 2.2981934983794324e-06, "loss": 0.9265, "step": 5046 }, { "epoch": 0.5307952200034181, "grad_norm": 2.5826246634345753, "learning_rate": 2.2973614145148616e-06, "loss": 1.0235, "step": 5047 }, { "epoch": 0.5309003904452654, "grad_norm": 2.433635645176636, "learning_rate": 2.2965293532460673e-06, "loss": 0.9693, "step": 5048 }, { "epoch": 0.5310055608871127, "grad_norm": 2.259538998753554, "learning_rate": 2.2956973146658303e-06, "loss": 0.9975, "step": 5049 }, { "epoch": 0.53111073132896, "grad_norm": 2.3054114447327847, "learning_rate": 2.294865298866929e-06, "loss": 0.991, "step": 5050 }, { "epoch": 0.5312159017708074, "grad_norm": 3.420364728806349, "learning_rate": 2.294033305942141e-06, "loss": 1.0031, "step": 5051 }, { "epoch": 0.5313210722126547, "grad_norm": 2.8657151853369234, "learning_rate": 2.2932013359842398e-06, "loss": 0.9782, "step": 5052 }, { "epoch": 0.531426242654502, "grad_norm": 3.1777989573404217, "learning_rate": 2.292369389085996e-06, "loss": 0.9521, "step": 5053 }, { "epoch": 0.5315314130963493, "grad_norm": 2.635587308253445, "learning_rate": 2.291537465340178e-06, "loss": 0.9735, "step": 5054 }, { "epoch": 0.5316365835381965, "grad_norm": 2.7400667145766846, "learning_rate": 2.2907055648395517e-06, "loss": 1.0065, "step": 5055 }, { "epoch": 0.5317417539800439, "grad_norm": 2.1418010167978383, "learning_rate": 2.2898736876768816e-06, "loss": 1.0043, "step": 5056 }, { "epoch": 0.5318469244218912, "grad_norm": 2.4204236378757296, "learning_rate": 2.289041833944927e-06, "loss": 0.9886, "step": 5057 }, { "epoch": 0.5319520948637385, "grad_norm": 2.2258054408956847, "learning_rate": 2.2882100037364472e-06, "loss": 0.9754, "step": 5058 }, { "epoch": 0.5320572653055858, "grad_norm": 1.698663415933468, "learning_rate": 2.2873781971441963e-06, "loss": 1.0052, "step": 5059 }, { "epoch": 0.5321624357474332, "grad_norm": 2.39347765845993, "learning_rate": 2.2865464142609286e-06, "loss": 1.0169, "step": 5060 }, { "epoch": 0.5322676061892805, "grad_norm": 2.1009995798487497, "learning_rate": 2.2857146551793943e-06, "loss": 1.0106, "step": 5061 }, { "epoch": 0.5323727766311278, "grad_norm": 2.695049651098317, "learning_rate": 2.2848829199923405e-06, "loss": 0.9646, "step": 5062 }, { "epoch": 0.5324779470729751, "grad_norm": 2.092568062548765, "learning_rate": 2.2840512087925127e-06, "loss": 0.988, "step": 5063 }, { "epoch": 0.5325831175148225, "grad_norm": 2.183783767757615, "learning_rate": 2.283219521672651e-06, "loss": 0.9897, "step": 5064 }, { "epoch": 0.5326882879566698, "grad_norm": 1.8926321090318932, "learning_rate": 2.282387858725498e-06, "loss": 0.999, "step": 5065 }, { "epoch": 0.5327934583985171, "grad_norm": 2.3234643524419205, "learning_rate": 2.281556220043789e-06, "loss": 0.9972, "step": 5066 }, { "epoch": 0.5328986288403644, "grad_norm": 2.311574355596325, "learning_rate": 2.280724605720258e-06, "loss": 1.0388, "step": 5067 }, { "epoch": 0.5330037992822118, "grad_norm": 2.6794835776991985, "learning_rate": 2.2798930158476375e-06, "loss": 1.0157, "step": 5068 }, { "epoch": 0.5331089697240591, "grad_norm": 2.314690603027226, "learning_rate": 2.279061450518655e-06, "loss": 1.0114, "step": 5069 }, { "epoch": 0.5332141401659064, "grad_norm": 2.199288704139533, "learning_rate": 2.278229909826037e-06, "loss": 0.9784, "step": 5070 }, { "epoch": 0.5333193106077537, "grad_norm": 1.817939098985619, "learning_rate": 2.2773983938625074e-06, "loss": 0.9549, "step": 5071 }, { "epoch": 0.533424481049601, "grad_norm": 2.0687055899360303, "learning_rate": 2.276566902720786e-06, "loss": 0.9939, "step": 5072 }, { "epoch": 0.5335296514914484, "grad_norm": 2.9464457058239257, "learning_rate": 2.2757354364935893e-06, "loss": 0.9716, "step": 5073 }, { "epoch": 0.5336348219332957, "grad_norm": 1.5594113007408739, "learning_rate": 2.274903995273635e-06, "loss": 0.9705, "step": 5074 }, { "epoch": 0.5337399923751429, "grad_norm": 2.6090779806826836, "learning_rate": 2.2740725791536337e-06, "loss": 0.9881, "step": 5075 }, { "epoch": 0.5338451628169902, "grad_norm": 1.8377197278317314, "learning_rate": 2.2732411882262946e-06, "loss": 0.9562, "step": 5076 }, { "epoch": 0.5339503332588376, "grad_norm": 2.7435287336866505, "learning_rate": 2.272409822584325e-06, "loss": 1.0032, "step": 5077 }, { "epoch": 0.5340555037006849, "grad_norm": 2.212795732909546, "learning_rate": 2.2715784823204275e-06, "loss": 0.9326, "step": 5078 }, { "epoch": 0.5341606741425322, "grad_norm": 2.3267493730330373, "learning_rate": 2.270747167527304e-06, "loss": 1.0236, "step": 5079 }, { "epoch": 0.5342658445843795, "grad_norm": 2.622033615860494, "learning_rate": 2.2699158782976527e-06, "loss": 0.9834, "step": 5080 }, { "epoch": 0.5343710150262269, "grad_norm": 2.210083057529942, "learning_rate": 2.269084614724168e-06, "loss": 1.0015, "step": 5081 }, { "epoch": 0.5344761854680742, "grad_norm": 2.8063796227677877, "learning_rate": 2.268253376899542e-06, "loss": 0.9861, "step": 5082 }, { "epoch": 0.5345813559099215, "grad_norm": 2.54873266219812, "learning_rate": 2.267422164916465e-06, "loss": 0.9637, "step": 5083 }, { "epoch": 0.5346865263517688, "grad_norm": 2.9683253595659123, "learning_rate": 2.2665909788676236e-06, "loss": 1.0272, "step": 5084 }, { "epoch": 0.5347916967936162, "grad_norm": 2.795406678292719, "learning_rate": 2.2657598188457015e-06, "loss": 0.9601, "step": 5085 }, { "epoch": 0.5348968672354635, "grad_norm": 2.423988503624797, "learning_rate": 2.264928684943379e-06, "loss": 1.016, "step": 5086 }, { "epoch": 0.5350020376773108, "grad_norm": 3.0389490068520244, "learning_rate": 2.264097577253333e-06, "loss": 0.98, "step": 5087 }, { "epoch": 0.5351072081191581, "grad_norm": 2.591628315768945, "learning_rate": 2.263266495868241e-06, "loss": 0.9451, "step": 5088 }, { "epoch": 0.5352123785610055, "grad_norm": 2.85809426107325, "learning_rate": 2.262435440880774e-06, "loss": 1.0456, "step": 5089 }, { "epoch": 0.5353175490028528, "grad_norm": 2.4229876711875367, "learning_rate": 2.2616044123836005e-06, "loss": 0.962, "step": 5090 }, { "epoch": 0.5354227194447001, "grad_norm": 2.272935313170715, "learning_rate": 2.2607734104693866e-06, "loss": 1.0093, "step": 5091 }, { "epoch": 0.5355278898865474, "grad_norm": 2.9194015986993644, "learning_rate": 2.2599424352307958e-06, "loss": 0.9677, "step": 5092 }, { "epoch": 0.5356330603283948, "grad_norm": 2.5574503210340755, "learning_rate": 2.2591114867604887e-06, "loss": 0.9673, "step": 5093 }, { "epoch": 0.5357382307702421, "grad_norm": 1.81531777330057, "learning_rate": 2.258280565151122e-06, "loss": 1.005, "step": 5094 }, { "epoch": 0.5358434012120893, "grad_norm": 2.8458384104973278, "learning_rate": 2.25744967049535e-06, "loss": 0.9489, "step": 5095 }, { "epoch": 0.5359485716539366, "grad_norm": 2.1465556016022735, "learning_rate": 2.2566188028858228e-06, "loss": 0.995, "step": 5096 }, { "epoch": 0.5360537420957839, "grad_norm": 2.3813284857921637, "learning_rate": 2.2557879624151912e-06, "loss": 1.0476, "step": 5097 }, { "epoch": 0.5361589125376313, "grad_norm": 2.2720674366303735, "learning_rate": 2.2549571491760985e-06, "loss": 0.9967, "step": 5098 }, { "epoch": 0.5362640829794786, "grad_norm": 2.1256470660057305, "learning_rate": 2.254126363261188e-06, "loss": 1.0248, "step": 5099 }, { "epoch": 0.5363692534213259, "grad_norm": 1.430140563761567, "learning_rate": 2.2532956047630973e-06, "loss": 0.9834, "step": 5100 }, { "epoch": 0.5364744238631732, "grad_norm": 2.2091020729494324, "learning_rate": 2.252464873774462e-06, "loss": 0.9964, "step": 5101 }, { "epoch": 0.5365795943050206, "grad_norm": 3.05315371393753, "learning_rate": 2.2516341703879176e-06, "loss": 1.0133, "step": 5102 }, { "epoch": 0.5366847647468679, "grad_norm": 2.67327232115545, "learning_rate": 2.2508034946960924e-06, "loss": 1.0006, "step": 5103 }, { "epoch": 0.5367899351887152, "grad_norm": 2.990185755256598, "learning_rate": 2.2499728467916133e-06, "loss": 0.9785, "step": 5104 }, { "epoch": 0.5368951056305625, "grad_norm": 2.391793174882368, "learning_rate": 2.249142226767104e-06, "loss": 0.9347, "step": 5105 }, { "epoch": 0.5370002760724099, "grad_norm": 2.5608418817555014, "learning_rate": 2.248311634715185e-06, "loss": 1.0156, "step": 5106 }, { "epoch": 0.5371054465142572, "grad_norm": 2.8618225744834067, "learning_rate": 2.247481070728474e-06, "loss": 1.029, "step": 5107 }, { "epoch": 0.5372106169561045, "grad_norm": 1.5332374205582922, "learning_rate": 2.2466505348995854e-06, "loss": 0.9641, "step": 5108 }, { "epoch": 0.5373157873979518, "grad_norm": 2.445318078146189, "learning_rate": 2.24582002732113e-06, "loss": 1.0037, "step": 5109 }, { "epoch": 0.5374209578397992, "grad_norm": 1.948245870338096, "learning_rate": 2.244989548085716e-06, "loss": 0.9739, "step": 5110 }, { "epoch": 0.5375261282816465, "grad_norm": 1.9108232074625904, "learning_rate": 2.2441590972859484e-06, "loss": 1.0196, "step": 5111 }, { "epoch": 0.5376312987234938, "grad_norm": 2.239416100740601, "learning_rate": 2.2433286750144293e-06, "loss": 0.9678, "step": 5112 }, { "epoch": 0.5377364691653411, "grad_norm": 2.205078947975664, "learning_rate": 2.2424982813637567e-06, "loss": 0.9857, "step": 5113 }, { "epoch": 0.5378416396071884, "grad_norm": 2.253513657680878, "learning_rate": 2.241667916426526e-06, "loss": 0.9517, "step": 5114 }, { "epoch": 0.5379468100490358, "grad_norm": 3.06969161503822, "learning_rate": 2.240837580295329e-06, "loss": 1.0048, "step": 5115 }, { "epoch": 0.538051980490883, "grad_norm": 2.435849474370094, "learning_rate": 2.2400072730627556e-06, "loss": 0.9674, "step": 5116 }, { "epoch": 0.5381571509327303, "grad_norm": 2.5677719895294433, "learning_rate": 2.239176994821391e-06, "loss": 0.9729, "step": 5117 }, { "epoch": 0.5382623213745776, "grad_norm": 2.258970213664192, "learning_rate": 2.2383467456638175e-06, "loss": 0.9593, "step": 5118 }, { "epoch": 0.538367491816425, "grad_norm": 2.4559982555422057, "learning_rate": 2.237516525682614e-06, "loss": 1.0028, "step": 5119 }, { "epoch": 0.5384726622582723, "grad_norm": 3.0661627782794625, "learning_rate": 2.2366863349703574e-06, "loss": 0.9849, "step": 5120 }, { "epoch": 0.5385778327001196, "grad_norm": 2.208903743452161, "learning_rate": 2.23585617361962e-06, "loss": 0.9887, "step": 5121 }, { "epoch": 0.5386830031419669, "grad_norm": 2.5131994229325, "learning_rate": 2.2350260417229715e-06, "loss": 1.0212, "step": 5122 }, { "epoch": 0.5387881735838143, "grad_norm": 2.513688036331137, "learning_rate": 2.234195939372977e-06, "loss": 1.011, "step": 5123 }, { "epoch": 0.5388933440256616, "grad_norm": 1.9065741666317384, "learning_rate": 2.2333658666621995e-06, "loss": 0.9949, "step": 5124 }, { "epoch": 0.5389985144675089, "grad_norm": 2.2999327606370494, "learning_rate": 2.2325358236832e-06, "loss": 0.9497, "step": 5125 }, { "epoch": 0.5391036849093562, "grad_norm": 3.0371065795572543, "learning_rate": 2.231705810528534e-06, "loss": 1.0135, "step": 5126 }, { "epoch": 0.5392088553512036, "grad_norm": 3.086589128157086, "learning_rate": 2.230875827290755e-06, "loss": 1.0293, "step": 5127 }, { "epoch": 0.5393140257930509, "grad_norm": 2.6399411648602977, "learning_rate": 2.2300458740624102e-06, "loss": 0.9888, "step": 5128 }, { "epoch": 0.5394191962348982, "grad_norm": 1.58725667452964, "learning_rate": 2.2292159509360487e-06, "loss": 0.9702, "step": 5129 }, { "epoch": 0.5395243666767455, "grad_norm": 2.7241128959636027, "learning_rate": 2.228386058004212e-06, "loss": 1.0053, "step": 5130 }, { "epoch": 0.5396295371185929, "grad_norm": 1.7958176439133673, "learning_rate": 2.22755619535944e-06, "loss": 0.9931, "step": 5131 }, { "epoch": 0.5397347075604402, "grad_norm": 2.215221909482859, "learning_rate": 2.2267263630942682e-06, "loss": 1.0076, "step": 5132 }, { "epoch": 0.5398398780022875, "grad_norm": 2.3342570809884267, "learning_rate": 2.2258965613012293e-06, "loss": 1.0169, "step": 5133 }, { "epoch": 0.5399450484441348, "grad_norm": 2.2114565815704097, "learning_rate": 2.2250667900728543e-06, "loss": 1.0046, "step": 5134 }, { "epoch": 0.5400502188859821, "grad_norm": 2.32994956342613, "learning_rate": 2.224237049501668e-06, "loss": 1.0162, "step": 5135 }, { "epoch": 0.5401553893278294, "grad_norm": 2.2005296173151296, "learning_rate": 2.223407339680192e-06, "loss": 1.0203, "step": 5136 }, { "epoch": 0.5402605597696767, "grad_norm": 2.71416231138807, "learning_rate": 2.222577660700947e-06, "loss": 0.9696, "step": 5137 }, { "epoch": 0.540365730211524, "grad_norm": 2.3021954271008283, "learning_rate": 2.2217480126564462e-06, "loss": 1.0134, "step": 5138 }, { "epoch": 0.5404709006533713, "grad_norm": 2.069021577288529, "learning_rate": 2.220918395639205e-06, "loss": 0.9861, "step": 5139 }, { "epoch": 0.5405760710952187, "grad_norm": 2.113347825343487, "learning_rate": 2.2200888097417308e-06, "loss": 0.9842, "step": 5140 }, { "epoch": 0.540681241537066, "grad_norm": 2.862415076458137, "learning_rate": 2.219259255056528e-06, "loss": 0.9512, "step": 5141 }, { "epoch": 0.5407864119789133, "grad_norm": 2.494202998333855, "learning_rate": 2.2184297316760998e-06, "loss": 0.9957, "step": 5142 }, { "epoch": 0.5408915824207606, "grad_norm": 2.5037856881206646, "learning_rate": 2.2176002396929435e-06, "loss": 1.0187, "step": 5143 }, { "epoch": 0.540996752862608, "grad_norm": 2.300224957499141, "learning_rate": 2.2167707791995547e-06, "loss": 0.9993, "step": 5144 }, { "epoch": 0.5411019233044553, "grad_norm": 1.7557662844334694, "learning_rate": 2.2159413502884237e-06, "loss": 0.9592, "step": 5145 }, { "epoch": 0.5412070937463026, "grad_norm": 2.5946831278066744, "learning_rate": 2.2151119530520394e-06, "loss": 1.0363, "step": 5146 }, { "epoch": 0.5413122641881499, "grad_norm": 2.6143394641387623, "learning_rate": 2.2142825875828838e-06, "loss": 0.9531, "step": 5147 }, { "epoch": 0.5414174346299973, "grad_norm": 2.381679279813613, "learning_rate": 2.2134532539734406e-06, "loss": 1.0057, "step": 5148 }, { "epoch": 0.5415226050718446, "grad_norm": 1.8202176335628701, "learning_rate": 2.2126239523161854e-06, "loss": 1.0132, "step": 5149 }, { "epoch": 0.5416277755136919, "grad_norm": 2.30712857841413, "learning_rate": 2.2117946827035927e-06, "loss": 0.9888, "step": 5150 }, { "epoch": 0.5417329459555392, "grad_norm": 2.5917572577099084, "learning_rate": 2.2109654452281297e-06, "loss": 0.9757, "step": 5151 }, { "epoch": 0.5418381163973865, "grad_norm": 1.9879341436149915, "learning_rate": 2.210136239982266e-06, "loss": 1.0149, "step": 5152 }, { "epoch": 0.5419432868392339, "grad_norm": 2.784187511162275, "learning_rate": 2.2093070670584636e-06, "loss": 0.9713, "step": 5153 }, { "epoch": 0.5420484572810812, "grad_norm": 2.8215281669994905, "learning_rate": 2.2084779265491817e-06, "loss": 0.996, "step": 5154 }, { "epoch": 0.5421536277229285, "grad_norm": 2.659872718452111, "learning_rate": 2.207648818546875e-06, "loss": 0.9979, "step": 5155 }, { "epoch": 0.5422587981647757, "grad_norm": 2.4973238896568617, "learning_rate": 2.206819743143996e-06, "loss": 1.0018, "step": 5156 }, { "epoch": 0.5423639686066231, "grad_norm": 3.3899471789949573, "learning_rate": 2.2059907004329934e-06, "loss": 1.0076, "step": 5157 }, { "epoch": 0.5424691390484704, "grad_norm": 3.008953501860152, "learning_rate": 2.2051616905063112e-06, "loss": 0.9814, "step": 5158 }, { "epoch": 0.5425743094903177, "grad_norm": 2.4382683163743804, "learning_rate": 2.2043327134563917e-06, "loss": 0.9694, "step": 5159 }, { "epoch": 0.542679479932165, "grad_norm": 2.4112838713428606, "learning_rate": 2.203503769375671e-06, "loss": 0.9944, "step": 5160 }, { "epoch": 0.5427846503740124, "grad_norm": 2.3975472141330703, "learning_rate": 2.2026748583565824e-06, "loss": 0.9987, "step": 5161 }, { "epoch": 0.5428898208158597, "grad_norm": 2.4732650547216, "learning_rate": 2.201845980491558e-06, "loss": 1.0106, "step": 5162 }, { "epoch": 0.542994991257707, "grad_norm": 1.9440375264246743, "learning_rate": 2.2010171358730227e-06, "loss": 0.9711, "step": 5163 }, { "epoch": 0.5431001616995543, "grad_norm": 2.950910241043643, "learning_rate": 2.2001883245933992e-06, "loss": 1.0359, "step": 5164 }, { "epoch": 0.5432053321414017, "grad_norm": 1.7374181043854764, "learning_rate": 2.199359546745106e-06, "loss": 0.9894, "step": 5165 }, { "epoch": 0.543310502583249, "grad_norm": 1.919204253825503, "learning_rate": 2.19853080242056e-06, "loss": 0.9888, "step": 5166 }, { "epoch": 0.5434156730250963, "grad_norm": 2.194559285983916, "learning_rate": 2.1977020917121707e-06, "loss": 0.9831, "step": 5167 }, { "epoch": 0.5435208434669436, "grad_norm": 2.616426607003015, "learning_rate": 2.1968734147123467e-06, "loss": 1.0058, "step": 5168 }, { "epoch": 0.543626013908791, "grad_norm": 2.9485745152973513, "learning_rate": 2.196044771513492e-06, "loss": 1.0286, "step": 5169 }, { "epoch": 0.5437311843506383, "grad_norm": 2.3426077436696096, "learning_rate": 2.195216162208005e-06, "loss": 0.9494, "step": 5170 }, { "epoch": 0.5438363547924856, "grad_norm": 2.350276659584285, "learning_rate": 2.1943875868882853e-06, "loss": 1.0144, "step": 5171 }, { "epoch": 0.5439415252343329, "grad_norm": 1.9725474379833872, "learning_rate": 2.1935590456467232e-06, "loss": 0.9755, "step": 5172 }, { "epoch": 0.5440466956761802, "grad_norm": 2.4143702510240055, "learning_rate": 2.192730538575708e-06, "loss": 0.9701, "step": 5173 }, { "epoch": 0.5441518661180276, "grad_norm": 2.1110930182865064, "learning_rate": 2.191902065767624e-06, "loss": 1.0355, "step": 5174 }, { "epoch": 0.5442570365598749, "grad_norm": 4.361748721313425, "learning_rate": 2.191073627314854e-06, "loss": 0.9864, "step": 5175 }, { "epoch": 0.5443622070017222, "grad_norm": 2.501366700470722, "learning_rate": 2.1902452233097736e-06, "loss": 0.9758, "step": 5176 }, { "epoch": 0.5444673774435694, "grad_norm": 2.235469201389308, "learning_rate": 2.1894168538447576e-06, "loss": 1.0104, "step": 5177 }, { "epoch": 0.5445725478854168, "grad_norm": 2.6134379475508367, "learning_rate": 2.1885885190121753e-06, "loss": 0.9644, "step": 5178 }, { "epoch": 0.5446777183272641, "grad_norm": 2.6083073446693876, "learning_rate": 2.187760218904392e-06, "loss": 0.9883, "step": 5179 }, { "epoch": 0.5447828887691114, "grad_norm": 2.888337664510672, "learning_rate": 2.1869319536137693e-06, "loss": 0.9995, "step": 5180 }, { "epoch": 0.5448880592109587, "grad_norm": 2.1326399057874914, "learning_rate": 2.1861037232326666e-06, "loss": 0.9857, "step": 5181 }, { "epoch": 0.544993229652806, "grad_norm": 2.545298533806266, "learning_rate": 2.1852755278534373e-06, "loss": 0.9809, "step": 5182 }, { "epoch": 0.5450984000946534, "grad_norm": 2.0108239660027656, "learning_rate": 2.18444736756843e-06, "loss": 1.0229, "step": 5183 }, { "epoch": 0.5452035705365007, "grad_norm": 2.6337533910173057, "learning_rate": 2.183619242469994e-06, "loss": 0.9919, "step": 5184 }, { "epoch": 0.545308740978348, "grad_norm": 2.5146353222564235, "learning_rate": 2.1827911526504702e-06, "loss": 0.9946, "step": 5185 }, { "epoch": 0.5454139114201954, "grad_norm": 2.613024635280223, "learning_rate": 2.1819630982021967e-06, "loss": 0.9718, "step": 5186 }, { "epoch": 0.5455190818620427, "grad_norm": 2.26349598516355, "learning_rate": 2.1811350792175084e-06, "loss": 1.0053, "step": 5187 }, { "epoch": 0.54562425230389, "grad_norm": 2.3470184717219422, "learning_rate": 2.1803070957887348e-06, "loss": 0.9996, "step": 5188 }, { "epoch": 0.5457294227457373, "grad_norm": 2.476693756773956, "learning_rate": 2.1794791480082046e-06, "loss": 1.0368, "step": 5189 }, { "epoch": 0.5458345931875846, "grad_norm": 2.836739750714608, "learning_rate": 2.1786512359682394e-06, "loss": 0.9856, "step": 5190 }, { "epoch": 0.545939763629432, "grad_norm": 2.120438515765004, "learning_rate": 2.1778233597611576e-06, "loss": 0.9753, "step": 5191 }, { "epoch": 0.5460449340712793, "grad_norm": 2.085336690843402, "learning_rate": 2.1769955194792737e-06, "loss": 1.0106, "step": 5192 }, { "epoch": 0.5461501045131266, "grad_norm": 2.388620073312146, "learning_rate": 2.176167715214898e-06, "loss": 0.9767, "step": 5193 }, { "epoch": 0.546255274954974, "grad_norm": 2.71199718772841, "learning_rate": 2.1753399470603387e-06, "loss": 1.0116, "step": 5194 }, { "epoch": 0.5463604453968213, "grad_norm": 2.6031851920934397, "learning_rate": 2.174512215107897e-06, "loss": 0.997, "step": 5195 }, { "epoch": 0.5464656158386686, "grad_norm": 2.5270157415798815, "learning_rate": 2.173684519449872e-06, "loss": 1.0268, "step": 5196 }, { "epoch": 0.5465707862805158, "grad_norm": 3.0251465048345607, "learning_rate": 2.1728568601785564e-06, "loss": 1.0076, "step": 5197 }, { "epoch": 0.5466759567223631, "grad_norm": 3.213521672648237, "learning_rate": 2.172029237386244e-06, "loss": 1.0297, "step": 5198 }, { "epoch": 0.5467811271642105, "grad_norm": 2.4599343033374774, "learning_rate": 2.1712016511652187e-06, "loss": 1.0106, "step": 5199 }, { "epoch": 0.5468862976060578, "grad_norm": 2.744190371006768, "learning_rate": 2.170374101607764e-06, "loss": 1.0298, "step": 5200 }, { "epoch": 0.5469914680479051, "grad_norm": 2.49369766763488, "learning_rate": 2.169546588806158e-06, "loss": 1.0502, "step": 5201 }, { "epoch": 0.5470966384897524, "grad_norm": 2.381587412730638, "learning_rate": 2.168719112852673e-06, "loss": 0.9862, "step": 5202 }, { "epoch": 0.5472018089315998, "grad_norm": 3.9803983911874736, "learning_rate": 2.167891673839581e-06, "loss": 1.0081, "step": 5203 }, { "epoch": 0.5473069793734471, "grad_norm": 2.8513338869013287, "learning_rate": 2.1670642718591477e-06, "loss": 0.9999, "step": 5204 }, { "epoch": 0.5474121498152944, "grad_norm": 2.087249392253148, "learning_rate": 2.166236907003634e-06, "loss": 0.9908, "step": 5205 }, { "epoch": 0.5475173202571417, "grad_norm": 2.1537694107930956, "learning_rate": 2.1654095793652975e-06, "loss": 0.9787, "step": 5206 }, { "epoch": 0.547622490698989, "grad_norm": 2.398002826051045, "learning_rate": 2.1645822890363928e-06, "loss": 1.0128, "step": 5207 }, { "epoch": 0.5477276611408364, "grad_norm": 1.8014888313043216, "learning_rate": 2.1637550361091685e-06, "loss": 0.9712, "step": 5208 }, { "epoch": 0.5478328315826837, "grad_norm": 2.6606854949977783, "learning_rate": 2.16292782067587e-06, "loss": 1.0116, "step": 5209 }, { "epoch": 0.547938002024531, "grad_norm": 2.688100293619295, "learning_rate": 2.162100642828737e-06, "loss": 1.0044, "step": 5210 }, { "epoch": 0.5480431724663783, "grad_norm": 2.4430646701220544, "learning_rate": 2.161273502660007e-06, "loss": 1.0112, "step": 5211 }, { "epoch": 0.5481483429082257, "grad_norm": 2.0241794197127927, "learning_rate": 2.1604464002619135e-06, "loss": 1.0267, "step": 5212 }, { "epoch": 0.548253513350073, "grad_norm": 3.178008506690804, "learning_rate": 2.1596193357266844e-06, "loss": 0.9995, "step": 5213 }, { "epoch": 0.5483586837919203, "grad_norm": 2.161378205518831, "learning_rate": 2.1587923091465434e-06, "loss": 0.976, "step": 5214 }, { "epoch": 0.5484638542337676, "grad_norm": 2.2458856326219885, "learning_rate": 2.1579653206137104e-06, "loss": 1.0095, "step": 5215 }, { "epoch": 0.548569024675615, "grad_norm": 2.276838712981099, "learning_rate": 2.1571383702204006e-06, "loss": 0.9981, "step": 5216 }, { "epoch": 0.5486741951174622, "grad_norm": 2.5745480726564693, "learning_rate": 2.1563114580588267e-06, "loss": 1.0043, "step": 5217 }, { "epoch": 0.5487793655593095, "grad_norm": 1.8966939253835509, "learning_rate": 2.1554845842211954e-06, "loss": 1.0158, "step": 5218 }, { "epoch": 0.5488845360011568, "grad_norm": 2.0953028195766423, "learning_rate": 2.1546577487997087e-06, "loss": 0.9859, "step": 5219 }, { "epoch": 0.5489897064430042, "grad_norm": 2.110354152880763, "learning_rate": 2.1538309518865646e-06, "loss": 0.9917, "step": 5220 }, { "epoch": 0.5490948768848515, "grad_norm": 2.3921433816101945, "learning_rate": 2.1530041935739604e-06, "loss": 0.9732, "step": 5221 }, { "epoch": 0.5492000473266988, "grad_norm": 1.5432785190992437, "learning_rate": 2.1521774739540833e-06, "loss": 0.9604, "step": 5222 }, { "epoch": 0.5493052177685461, "grad_norm": 2.6627689190811914, "learning_rate": 2.1513507931191203e-06, "loss": 1.0003, "step": 5223 }, { "epoch": 0.5494103882103935, "grad_norm": 2.4129431035360427, "learning_rate": 2.1505241511612522e-06, "loss": 0.9286, "step": 5224 }, { "epoch": 0.5495155586522408, "grad_norm": 2.9528788317691554, "learning_rate": 2.149697548172655e-06, "loss": 0.9514, "step": 5225 }, { "epoch": 0.5496207290940881, "grad_norm": 2.2730624529958146, "learning_rate": 2.1488709842455033e-06, "loss": 0.9983, "step": 5226 }, { "epoch": 0.5497258995359354, "grad_norm": 3.166867500015208, "learning_rate": 2.1480444594719647e-06, "loss": 1.0307, "step": 5227 }, { "epoch": 0.5498310699777827, "grad_norm": 2.4113920354615916, "learning_rate": 2.1472179739442027e-06, "loss": 0.9667, "step": 5228 }, { "epoch": 0.5499362404196301, "grad_norm": 2.518675037782677, "learning_rate": 2.1463915277543766e-06, "loss": 1.0238, "step": 5229 }, { "epoch": 0.5500414108614774, "grad_norm": 2.6250890442322454, "learning_rate": 2.1455651209946428e-06, "loss": 0.9742, "step": 5230 }, { "epoch": 0.5501465813033247, "grad_norm": 2.02618334080473, "learning_rate": 2.144738753757151e-06, "loss": 0.9678, "step": 5231 }, { "epoch": 0.550251751745172, "grad_norm": 2.667267963473209, "learning_rate": 2.1439124261340484e-06, "loss": 0.9861, "step": 5232 }, { "epoch": 0.5503569221870194, "grad_norm": 2.1122311481039597, "learning_rate": 2.1430861382174763e-06, "loss": 0.9488, "step": 5233 }, { "epoch": 0.5504620926288667, "grad_norm": 1.7454212223945111, "learning_rate": 2.1422598900995715e-06, "loss": 0.9995, "step": 5234 }, { "epoch": 0.550567263070714, "grad_norm": 2.5491840982703975, "learning_rate": 2.1414336818724685e-06, "loss": 0.9631, "step": 5235 }, { "epoch": 0.5506724335125613, "grad_norm": 2.0258659355875044, "learning_rate": 2.140607513628296e-06, "loss": 0.9681, "step": 5236 }, { "epoch": 0.5507776039544087, "grad_norm": 3.187827818577956, "learning_rate": 2.1397813854591778e-06, "loss": 0.962, "step": 5237 }, { "epoch": 0.5508827743962559, "grad_norm": 2.1053841231191734, "learning_rate": 2.138955297457233e-06, "loss": 0.9881, "step": 5238 }, { "epoch": 0.5509879448381032, "grad_norm": 2.0350361564304102, "learning_rate": 2.138129249714576e-06, "loss": 0.978, "step": 5239 }, { "epoch": 0.5510931152799505, "grad_norm": 2.624945121824399, "learning_rate": 2.13730324232332e-06, "loss": 0.9699, "step": 5240 }, { "epoch": 0.5511982857217979, "grad_norm": 3.242345363363925, "learning_rate": 2.13647727537557e-06, "loss": 0.9856, "step": 5241 }, { "epoch": 0.5513034561636452, "grad_norm": 2.8837592921549278, "learning_rate": 2.135651348963428e-06, "loss": 1.0201, "step": 5242 }, { "epoch": 0.5514086266054925, "grad_norm": 2.3013474493107795, "learning_rate": 2.1348254631789895e-06, "loss": 0.9774, "step": 5243 }, { "epoch": 0.5515137970473398, "grad_norm": 2.113009140432173, "learning_rate": 2.13399961811435e-06, "loss": 0.976, "step": 5244 }, { "epoch": 0.5516189674891872, "grad_norm": 2.6604980643288236, "learning_rate": 2.133173813861596e-06, "loss": 1.0091, "step": 5245 }, { "epoch": 0.5517241379310345, "grad_norm": 3.1134737984113303, "learning_rate": 2.1323480505128115e-06, "loss": 0.9967, "step": 5246 }, { "epoch": 0.5518293083728818, "grad_norm": 2.167403323911424, "learning_rate": 2.131522328160075e-06, "loss": 0.9711, "step": 5247 }, { "epoch": 0.5519344788147291, "grad_norm": 2.3379019799741236, "learning_rate": 2.130696646895461e-06, "loss": 0.9972, "step": 5248 }, { "epoch": 0.5520396492565764, "grad_norm": 2.059963339031156, "learning_rate": 2.12987100681104e-06, "loss": 0.9488, "step": 5249 }, { "epoch": 0.5521448196984238, "grad_norm": 2.4196742713351194, "learning_rate": 2.1290454079988775e-06, "loss": 1.0105, "step": 5250 }, { "epoch": 0.5522499901402711, "grad_norm": 2.462036105246982, "learning_rate": 2.128219850551034e-06, "loss": 0.9945, "step": 5251 }, { "epoch": 0.5523551605821184, "grad_norm": 1.8416505320697634, "learning_rate": 2.1273943345595637e-06, "loss": 1.0043, "step": 5252 }, { "epoch": 0.5524603310239657, "grad_norm": 3.251515812829299, "learning_rate": 2.1265688601165206e-06, "loss": 1.0242, "step": 5253 }, { "epoch": 0.5525655014658131, "grad_norm": 2.6527329475849313, "learning_rate": 2.125743427313951e-06, "loss": 1.0045, "step": 5254 }, { "epoch": 0.5526706719076604, "grad_norm": 1.9950343766089205, "learning_rate": 2.124918036243896e-06, "loss": 0.9664, "step": 5255 }, { "epoch": 0.5527758423495077, "grad_norm": 2.2943708516221455, "learning_rate": 2.124092686998394e-06, "loss": 1.0111, "step": 5256 }, { "epoch": 0.552881012791355, "grad_norm": 1.6382639178089367, "learning_rate": 2.123267379669477e-06, "loss": 1.0042, "step": 5257 }, { "epoch": 0.5529861832332023, "grad_norm": 2.9901639984780255, "learning_rate": 2.122442114349174e-06, "loss": 1.0122, "step": 5258 }, { "epoch": 0.5530913536750496, "grad_norm": 2.1998167103396287, "learning_rate": 2.1216168911295085e-06, "loss": 1.0353, "step": 5259 }, { "epoch": 0.5531965241168969, "grad_norm": 2.9897721880211736, "learning_rate": 2.120791710102499e-06, "loss": 1.0083, "step": 5260 }, { "epoch": 0.5533016945587442, "grad_norm": 2.054680773983714, "learning_rate": 2.1199665713601593e-06, "loss": 1.0036, "step": 5261 }, { "epoch": 0.5534068650005916, "grad_norm": 2.617746462194278, "learning_rate": 2.1191414749944985e-06, "loss": 0.9665, "step": 5262 }, { "epoch": 0.5535120354424389, "grad_norm": 1.9936196114564972, "learning_rate": 2.1183164210975226e-06, "loss": 1.0104, "step": 5263 }, { "epoch": 0.5536172058842862, "grad_norm": 2.469231832845493, "learning_rate": 2.1174914097612308e-06, "loss": 0.994, "step": 5264 }, { "epoch": 0.5537223763261335, "grad_norm": 2.93259002955845, "learning_rate": 2.1166664410776184e-06, "loss": 1.0127, "step": 5265 }, { "epoch": 0.5538275467679808, "grad_norm": 2.0537797985124326, "learning_rate": 2.1158415151386746e-06, "loss": 0.9471, "step": 5266 }, { "epoch": 0.5539327172098282, "grad_norm": 2.134532328734205, "learning_rate": 2.115016632036387e-06, "loss": 0.9859, "step": 5267 }, { "epoch": 0.5540378876516755, "grad_norm": 2.2964327708883134, "learning_rate": 2.1141917918627357e-06, "loss": 1.0047, "step": 5268 }, { "epoch": 0.5541430580935228, "grad_norm": 2.8516902511053535, "learning_rate": 2.113366994709697e-06, "loss": 1.0079, "step": 5269 }, { "epoch": 0.5542482285353701, "grad_norm": 2.3546571378436743, "learning_rate": 2.1125422406692416e-06, "loss": 0.9691, "step": 5270 }, { "epoch": 0.5543533989772175, "grad_norm": 2.630944240951188, "learning_rate": 2.1117175298333347e-06, "loss": 0.983, "step": 5271 }, { "epoch": 0.5544585694190648, "grad_norm": 1.8823762990892248, "learning_rate": 2.1108928622939413e-06, "loss": 0.9626, "step": 5272 }, { "epoch": 0.5545637398609121, "grad_norm": 2.355887099553958, "learning_rate": 2.110068238143016e-06, "loss": 0.9441, "step": 5273 }, { "epoch": 0.5546689103027594, "grad_norm": 2.4481500085464205, "learning_rate": 2.1092436574725113e-06, "loss": 1.0182, "step": 5274 }, { "epoch": 0.5547740807446068, "grad_norm": 2.088588569858711, "learning_rate": 2.1084191203743732e-06, "loss": 0.9869, "step": 5275 }, { "epoch": 0.5548792511864541, "grad_norm": 2.935034768068429, "learning_rate": 2.1075946269405464e-06, "loss": 1.0415, "step": 5276 }, { "epoch": 0.5549844216283014, "grad_norm": 2.511909936791334, "learning_rate": 2.106770177262967e-06, "loss": 1.0, "step": 5277 }, { "epoch": 0.5550895920701486, "grad_norm": 2.489430938185479, "learning_rate": 2.105945771433567e-06, "loss": 1.013, "step": 5278 }, { "epoch": 0.555194762511996, "grad_norm": 2.8817643395461388, "learning_rate": 2.105121409544275e-06, "loss": 1.002, "step": 5279 }, { "epoch": 0.5552999329538433, "grad_norm": 2.9876800647989525, "learning_rate": 2.104297091687013e-06, "loss": 0.9837, "step": 5280 }, { "epoch": 0.5554051033956906, "grad_norm": 2.521676022126096, "learning_rate": 2.1034728179536996e-06, "loss": 0.9923, "step": 5281 }, { "epoch": 0.5555102738375379, "grad_norm": 2.3179342164652317, "learning_rate": 2.102648588436247e-06, "loss": 1.0151, "step": 5282 }, { "epoch": 0.5556154442793853, "grad_norm": 2.328687099694681, "learning_rate": 2.101824403226564e-06, "loss": 0.9904, "step": 5283 }, { "epoch": 0.5557206147212326, "grad_norm": 2.4299365708893346, "learning_rate": 2.1010002624165528e-06, "loss": 1.0372, "step": 5284 }, { "epoch": 0.5558257851630799, "grad_norm": 1.9606388049139083, "learning_rate": 2.100176166098111e-06, "loss": 0.9814, "step": 5285 }, { "epoch": 0.5559309556049272, "grad_norm": 1.6767864003425093, "learning_rate": 2.0993521143631335e-06, "loss": 0.9458, "step": 5286 }, { "epoch": 0.5560361260467745, "grad_norm": 2.1286537540888175, "learning_rate": 2.098528107303508e-06, "loss": 0.962, "step": 5287 }, { "epoch": 0.5561412964886219, "grad_norm": 2.5220581659039674, "learning_rate": 2.0977041450111173e-06, "loss": 1.0144, "step": 5288 }, { "epoch": 0.5562464669304692, "grad_norm": 2.168639158568607, "learning_rate": 2.0968802275778384e-06, "loss": 1.0465, "step": 5289 }, { "epoch": 0.5563516373723165, "grad_norm": 2.760268469404148, "learning_rate": 2.0960563550955465e-06, "loss": 0.9649, "step": 5290 }, { "epoch": 0.5564568078141638, "grad_norm": 2.1553647272545158, "learning_rate": 2.095232527656109e-06, "loss": 0.999, "step": 5291 }, { "epoch": 0.5565619782560112, "grad_norm": 1.619929018860887, "learning_rate": 2.0944087453513887e-06, "loss": 0.977, "step": 5292 }, { "epoch": 0.5566671486978585, "grad_norm": 3.121330671866382, "learning_rate": 2.0935850082732444e-06, "loss": 1.0139, "step": 5293 }, { "epoch": 0.5567723191397058, "grad_norm": 2.3387671861808363, "learning_rate": 2.0927613165135285e-06, "loss": 1.0007, "step": 5294 }, { "epoch": 0.5568774895815531, "grad_norm": 2.200825022390463, "learning_rate": 2.0919376701640897e-06, "loss": 0.9796, "step": 5295 }, { "epoch": 0.5569826600234005, "grad_norm": 2.3884861298168962, "learning_rate": 2.0911140693167703e-06, "loss": 0.9875, "step": 5296 }, { "epoch": 0.5570878304652478, "grad_norm": 2.5987592283460597, "learning_rate": 2.0902905140634087e-06, "loss": 1.0331, "step": 5297 }, { "epoch": 0.5571930009070951, "grad_norm": 3.301654294448108, "learning_rate": 2.0894670044958364e-06, "loss": 1.0113, "step": 5298 }, { "epoch": 0.5572981713489423, "grad_norm": 2.711878855343128, "learning_rate": 2.0886435407058836e-06, "loss": 1.0027, "step": 5299 }, { "epoch": 0.5574033417907897, "grad_norm": 2.8480009398658406, "learning_rate": 2.087820122785371e-06, "loss": 0.974, "step": 5300 }, { "epoch": 0.557508512232637, "grad_norm": 2.3312290115794108, "learning_rate": 2.0869967508261175e-06, "loss": 0.9928, "step": 5301 }, { "epoch": 0.5576136826744843, "grad_norm": 1.936637576330423, "learning_rate": 2.086173424919934e-06, "loss": 1.0031, "step": 5302 }, { "epoch": 0.5577188531163316, "grad_norm": 1.902158601451205, "learning_rate": 2.085350145158628e-06, "loss": 1.0094, "step": 5303 }, { "epoch": 0.557824023558179, "grad_norm": 2.1117431500701827, "learning_rate": 2.084526911634002e-06, "loss": 0.9817, "step": 5304 }, { "epoch": 0.5579291940000263, "grad_norm": 2.541061650720203, "learning_rate": 2.0837037244378534e-06, "loss": 0.9666, "step": 5305 }, { "epoch": 0.5580343644418736, "grad_norm": 2.1751713964862716, "learning_rate": 2.082880583661973e-06, "loss": 0.9924, "step": 5306 }, { "epoch": 0.5581395348837209, "grad_norm": 2.0010450338415353, "learning_rate": 2.082057489398148e-06, "loss": 1.012, "step": 5307 }, { "epoch": 0.5582447053255682, "grad_norm": 2.476716525076265, "learning_rate": 2.0812344417381595e-06, "loss": 1.0308, "step": 5308 }, { "epoch": 0.5583498757674156, "grad_norm": 2.563557102937592, "learning_rate": 2.0804114407737837e-06, "loss": 0.9702, "step": 5309 }, { "epoch": 0.5584550462092629, "grad_norm": 2.7438116844536573, "learning_rate": 2.0795884865967922e-06, "loss": 1.0164, "step": 5310 }, { "epoch": 0.5585602166511102, "grad_norm": 2.8471554577442273, "learning_rate": 2.07876557929895e-06, "loss": 1.007, "step": 5311 }, { "epoch": 0.5586653870929575, "grad_norm": 2.018132015140999, "learning_rate": 2.077942718972017e-06, "loss": 1.0139, "step": 5312 }, { "epoch": 0.5587705575348049, "grad_norm": 2.125285679282089, "learning_rate": 2.0771199057077507e-06, "loss": 1.028, "step": 5313 }, { "epoch": 0.5588757279766522, "grad_norm": 2.7625535043659775, "learning_rate": 2.0762971395978996e-06, "loss": 1.0062, "step": 5314 }, { "epoch": 0.5589808984184995, "grad_norm": 2.1017174794213997, "learning_rate": 2.0754744207342097e-06, "loss": 1.0082, "step": 5315 }, { "epoch": 0.5590860688603468, "grad_norm": 1.782677163365059, "learning_rate": 2.074651749208419e-06, "loss": 0.9765, "step": 5316 }, { "epoch": 0.5591912393021942, "grad_norm": 3.167958590956396, "learning_rate": 2.0738291251122624e-06, "loss": 0.9923, "step": 5317 }, { "epoch": 0.5592964097440415, "grad_norm": 1.907619393947842, "learning_rate": 2.073006548537469e-06, "loss": 1.0287, "step": 5318 }, { "epoch": 0.5594015801858887, "grad_norm": 2.7345986219575256, "learning_rate": 2.0721840195757626e-06, "loss": 0.9527, "step": 5319 }, { "epoch": 0.559506750627736, "grad_norm": 2.4886965308181384, "learning_rate": 2.0713615383188615e-06, "loss": 1.0136, "step": 5320 }, { "epoch": 0.5596119210695834, "grad_norm": 2.6055536744435512, "learning_rate": 2.0705391048584775e-06, "loss": 0.9965, "step": 5321 }, { "epoch": 0.5597170915114307, "grad_norm": 2.087005530742932, "learning_rate": 2.0697167192863205e-06, "loss": 1.0241, "step": 5322 }, { "epoch": 0.559822261953278, "grad_norm": 2.5369112749685554, "learning_rate": 2.0688943816940927e-06, "loss": 1.0075, "step": 5323 }, { "epoch": 0.5599274323951253, "grad_norm": 2.7221541004536953, "learning_rate": 2.0680720921734894e-06, "loss": 0.9315, "step": 5324 }, { "epoch": 0.5600326028369726, "grad_norm": 2.022091119318397, "learning_rate": 2.067249850816203e-06, "loss": 0.9864, "step": 5325 }, { "epoch": 0.56013777327882, "grad_norm": 2.3688556004256567, "learning_rate": 2.0664276577139193e-06, "loss": 1.0195, "step": 5326 }, { "epoch": 0.5602429437206673, "grad_norm": 2.4773968134541096, "learning_rate": 2.065605512958321e-06, "loss": 1.0232, "step": 5327 }, { "epoch": 0.5603481141625146, "grad_norm": 2.462817643536204, "learning_rate": 2.0647834166410825e-06, "loss": 0.9874, "step": 5328 }, { "epoch": 0.560453284604362, "grad_norm": 2.305518728419192, "learning_rate": 2.0639613688538733e-06, "loss": 0.9921, "step": 5329 }, { "epoch": 0.5605584550462093, "grad_norm": 2.6367842405171005, "learning_rate": 2.063139369688359e-06, "loss": 0.9613, "step": 5330 }, { "epoch": 0.5606636254880566, "grad_norm": 1.8818850010645942, "learning_rate": 2.062317419236199e-06, "loss": 0.9777, "step": 5331 }, { "epoch": 0.5607687959299039, "grad_norm": 2.1790160668617444, "learning_rate": 2.0614955175890464e-06, "loss": 1.0092, "step": 5332 }, { "epoch": 0.5608739663717512, "grad_norm": 2.0639573437892857, "learning_rate": 2.06067366483855e-06, "loss": 1.0138, "step": 5333 }, { "epoch": 0.5609791368135986, "grad_norm": 1.9579365333347298, "learning_rate": 2.0598518610763534e-06, "loss": 0.9952, "step": 5334 }, { "epoch": 0.5610843072554459, "grad_norm": 2.2186014741559656, "learning_rate": 2.0590301063940917e-06, "loss": 0.9515, "step": 5335 }, { "epoch": 0.5611894776972932, "grad_norm": 1.9011562436125449, "learning_rate": 2.0582084008834003e-06, "loss": 0.9342, "step": 5336 }, { "epoch": 0.5612946481391405, "grad_norm": 1.8095902323216666, "learning_rate": 2.057386744635904e-06, "loss": 1.0039, "step": 5337 }, { "epoch": 0.5613998185809879, "grad_norm": 1.8780136670652687, "learning_rate": 2.056565137743224e-06, "loss": 0.9827, "step": 5338 }, { "epoch": 0.5615049890228351, "grad_norm": 3.2992844080330874, "learning_rate": 2.055743580296976e-06, "loss": 0.9805, "step": 5339 }, { "epoch": 0.5616101594646824, "grad_norm": 3.092310843970352, "learning_rate": 2.0549220723887687e-06, "loss": 1.0094, "step": 5340 }, { "epoch": 0.5617153299065297, "grad_norm": 2.4046006250279435, "learning_rate": 2.0541006141102086e-06, "loss": 0.967, "step": 5341 }, { "epoch": 0.561820500348377, "grad_norm": 2.1278856112151336, "learning_rate": 2.0532792055528946e-06, "loss": 1.012, "step": 5342 }, { "epoch": 0.5619256707902244, "grad_norm": 3.1700362603693475, "learning_rate": 2.052457846808419e-06, "loss": 0.9602, "step": 5343 }, { "epoch": 0.5620308412320717, "grad_norm": 1.9326932220836064, "learning_rate": 2.0516365379683694e-06, "loss": 0.9952, "step": 5344 }, { "epoch": 0.562136011673919, "grad_norm": 2.7114145309970086, "learning_rate": 2.0508152791243296e-06, "loss": 0.948, "step": 5345 }, { "epoch": 0.5622411821157663, "grad_norm": 2.3203144056712706, "learning_rate": 2.0499940703678755e-06, "loss": 0.9971, "step": 5346 }, { "epoch": 0.5623463525576137, "grad_norm": 2.3494148393308154, "learning_rate": 2.049172911790578e-06, "loss": 0.9742, "step": 5347 }, { "epoch": 0.562451522999461, "grad_norm": 2.0875975574978782, "learning_rate": 2.0483518034840034e-06, "loss": 0.9948, "step": 5348 }, { "epoch": 0.5625566934413083, "grad_norm": 1.970777139152171, "learning_rate": 2.0475307455397103e-06, "loss": 0.946, "step": 5349 }, { "epoch": 0.5626618638831556, "grad_norm": 2.6489850276180555, "learning_rate": 2.0467097380492547e-06, "loss": 1.0242, "step": 5350 }, { "epoch": 0.562767034325003, "grad_norm": 2.199452020540128, "learning_rate": 2.0458887811041842e-06, "loss": 1.0094, "step": 5351 }, { "epoch": 0.5628722047668503, "grad_norm": 2.3720136627286084, "learning_rate": 2.045067874796043e-06, "loss": 0.9981, "step": 5352 }, { "epoch": 0.5629773752086976, "grad_norm": 2.5209154043777073, "learning_rate": 2.044247019216367e-06, "loss": 1.0028, "step": 5353 }, { "epoch": 0.5630825456505449, "grad_norm": 2.5610925238059186, "learning_rate": 2.0434262144566895e-06, "loss": 1.0665, "step": 5354 }, { "epoch": 0.5631877160923923, "grad_norm": 2.897831403216789, "learning_rate": 2.0426054606085356e-06, "loss": 1.0275, "step": 5355 }, { "epoch": 0.5632928865342396, "grad_norm": 2.376676302583607, "learning_rate": 2.0417847577634263e-06, "loss": 0.9664, "step": 5356 }, { "epoch": 0.5633980569760869, "grad_norm": 1.902345338626602, "learning_rate": 2.040964106012876e-06, "loss": 0.964, "step": 5357 }, { "epoch": 0.5635032274179342, "grad_norm": 2.8141159096441513, "learning_rate": 2.0401435054483925e-06, "loss": 0.959, "step": 5358 }, { "epoch": 0.5636083978597816, "grad_norm": 2.213475597622369, "learning_rate": 2.0393229561614817e-06, "loss": 0.9913, "step": 5359 }, { "epoch": 0.5637135683016288, "grad_norm": 2.8077710198008616, "learning_rate": 2.03850245824364e-06, "loss": 0.9795, "step": 5360 }, { "epoch": 0.5638187387434761, "grad_norm": 1.7009560943465891, "learning_rate": 2.037682011786359e-06, "loss": 0.9519, "step": 5361 }, { "epoch": 0.5639239091853234, "grad_norm": 1.9158342012022915, "learning_rate": 2.036861616881125e-06, "loss": 1.0132, "step": 5362 }, { "epoch": 0.5640290796271707, "grad_norm": 2.4899098257579384, "learning_rate": 2.036041273619418e-06, "loss": 0.9862, "step": 5363 }, { "epoch": 0.5641342500690181, "grad_norm": 2.8756220189009487, "learning_rate": 2.035220982092714e-06, "loss": 0.9549, "step": 5364 }, { "epoch": 0.5642394205108654, "grad_norm": 2.466341733072218, "learning_rate": 2.0344007423924807e-06, "loss": 0.9962, "step": 5365 }, { "epoch": 0.5643445909527127, "grad_norm": 2.1782294813736254, "learning_rate": 2.0335805546101817e-06, "loss": 0.9483, "step": 5366 }, { "epoch": 0.56444976139456, "grad_norm": 2.117012458223317, "learning_rate": 2.0327604188372735e-06, "loss": 1.0088, "step": 5367 }, { "epoch": 0.5645549318364074, "grad_norm": 1.9302647942163518, "learning_rate": 2.0319403351652086e-06, "loss": 0.9985, "step": 5368 }, { "epoch": 0.5646601022782547, "grad_norm": 2.2737930556012915, "learning_rate": 2.0311203036854326e-06, "loss": 0.9287, "step": 5369 }, { "epoch": 0.564765272720102, "grad_norm": 1.6722842954733659, "learning_rate": 2.0303003244893853e-06, "loss": 1.0125, "step": 5370 }, { "epoch": 0.5648704431619493, "grad_norm": 2.9762602394882434, "learning_rate": 2.0294803976685006e-06, "loss": 0.9461, "step": 5371 }, { "epoch": 0.5649756136037967, "grad_norm": 2.979884850333689, "learning_rate": 2.028660523314205e-06, "loss": 1.0165, "step": 5372 }, { "epoch": 0.565080784045644, "grad_norm": 2.4718005740112305, "learning_rate": 2.0278407015179243e-06, "loss": 0.9953, "step": 5373 }, { "epoch": 0.5651859544874913, "grad_norm": 2.038230117240658, "learning_rate": 2.027020932371073e-06, "loss": 0.9773, "step": 5374 }, { "epoch": 0.5652911249293386, "grad_norm": 2.3536455408799988, "learning_rate": 2.0262012159650624e-06, "loss": 0.9947, "step": 5375 }, { "epoch": 0.565396295371186, "grad_norm": 2.4699699581202252, "learning_rate": 2.0253815523912955e-06, "loss": 1.0202, "step": 5376 }, { "epoch": 0.5655014658130333, "grad_norm": 2.7568128217452608, "learning_rate": 2.024561941741173e-06, "loss": 0.9816, "step": 5377 }, { "epoch": 0.5656066362548806, "grad_norm": 2.5880748676931025, "learning_rate": 2.0237423841060877e-06, "loss": 0.9817, "step": 5378 }, { "epoch": 0.5657118066967279, "grad_norm": 2.3807827153101138, "learning_rate": 2.0229228795774264e-06, "loss": 0.9829, "step": 5379 }, { "epoch": 0.5658169771385752, "grad_norm": 2.419880425354416, "learning_rate": 2.02210342824657e-06, "loss": 0.9412, "step": 5380 }, { "epoch": 0.5659221475804225, "grad_norm": 3.206944366362426, "learning_rate": 2.021284030204893e-06, "loss": 1.0397, "step": 5381 }, { "epoch": 0.5660273180222698, "grad_norm": 2.48547978811849, "learning_rate": 2.020464685543766e-06, "loss": 0.971, "step": 5382 }, { "epoch": 0.5661324884641171, "grad_norm": 2.6366008661640525, "learning_rate": 2.0196453943545517e-06, "loss": 1.0017, "step": 5383 }, { "epoch": 0.5662376589059644, "grad_norm": 2.6842165173464867, "learning_rate": 2.0188261567286076e-06, "loss": 1.0313, "step": 5384 }, { "epoch": 0.5663428293478118, "grad_norm": 2.6982167121358684, "learning_rate": 2.018006972757285e-06, "loss": 0.9802, "step": 5385 }, { "epoch": 0.5664479997896591, "grad_norm": 2.4433736862921362, "learning_rate": 2.0171878425319283e-06, "loss": 0.962, "step": 5386 }, { "epoch": 0.5665531702315064, "grad_norm": 2.475980829378924, "learning_rate": 2.0163687661438786e-06, "loss": 0.9586, "step": 5387 }, { "epoch": 0.5666583406733537, "grad_norm": 2.6704965382072063, "learning_rate": 2.0155497436844684e-06, "loss": 1.0241, "step": 5388 }, { "epoch": 0.5667635111152011, "grad_norm": 2.6472388985915325, "learning_rate": 2.0147307752450253e-06, "loss": 0.9816, "step": 5389 }, { "epoch": 0.5668686815570484, "grad_norm": 2.2155377465887067, "learning_rate": 2.0139118609168697e-06, "loss": 0.996, "step": 5390 }, { "epoch": 0.5669738519988957, "grad_norm": 2.6569851803737827, "learning_rate": 2.0130930007913184e-06, "loss": 1.003, "step": 5391 }, { "epoch": 0.567079022440743, "grad_norm": 2.2619946690776236, "learning_rate": 2.01227419495968e-06, "loss": 1.0121, "step": 5392 }, { "epoch": 0.5671841928825904, "grad_norm": 3.0146876977488497, "learning_rate": 2.011455443513257e-06, "loss": 1.0017, "step": 5393 }, { "epoch": 0.5672893633244377, "grad_norm": 2.7358969902861303, "learning_rate": 2.010636746543348e-06, "loss": 1.0223, "step": 5394 }, { "epoch": 0.567394533766285, "grad_norm": 2.5446999109646975, "learning_rate": 2.009818104141242e-06, "loss": 0.9872, "step": 5395 }, { "epoch": 0.5674997042081323, "grad_norm": 2.5508462818540094, "learning_rate": 2.0089995163982263e-06, "loss": 1.0033, "step": 5396 }, { "epoch": 0.5676048746499797, "grad_norm": 1.9993969226847457, "learning_rate": 2.0081809834055787e-06, "loss": 0.9702, "step": 5397 }, { "epoch": 0.567710045091827, "grad_norm": 2.836401177949353, "learning_rate": 2.007362505254572e-06, "loss": 0.9877, "step": 5398 }, { "epoch": 0.5678152155336743, "grad_norm": 2.653491277012509, "learning_rate": 2.0065440820364718e-06, "loss": 0.9803, "step": 5399 }, { "epoch": 0.5679203859755215, "grad_norm": 3.4470366242642854, "learning_rate": 2.005725713842541e-06, "loss": 1.0328, "step": 5400 }, { "epoch": 0.5680255564173688, "grad_norm": 2.9653592066566667, "learning_rate": 2.0049074007640324e-06, "loss": 1.0015, "step": 5401 }, { "epoch": 0.5681307268592162, "grad_norm": 1.4100311048508116, "learning_rate": 2.004089142892195e-06, "loss": 0.9846, "step": 5402 }, { "epoch": 0.5682358973010635, "grad_norm": 2.9910996927005007, "learning_rate": 2.0032709403182705e-06, "loss": 1.0175, "step": 5403 }, { "epoch": 0.5683410677429108, "grad_norm": 1.6159196986325306, "learning_rate": 2.002452793133494e-06, "loss": 0.958, "step": 5404 }, { "epoch": 0.5684462381847581, "grad_norm": 2.343270925363438, "learning_rate": 2.001634701429097e-06, "loss": 1.0338, "step": 5405 }, { "epoch": 0.5685514086266055, "grad_norm": 2.6339615242917187, "learning_rate": 2.000816665296302e-06, "loss": 0.9807, "step": 5406 }, { "epoch": 0.5686565790684528, "grad_norm": 2.928072201278944, "learning_rate": 1.999998684826327e-06, "loss": 1.0176, "step": 5407 }, { "epoch": 0.5687617495103001, "grad_norm": 2.4139743488107976, "learning_rate": 1.9991807601103823e-06, "loss": 0.984, "step": 5408 }, { "epoch": 0.5688669199521474, "grad_norm": 2.171791427192439, "learning_rate": 1.9983628912396726e-06, "loss": 1.0245, "step": 5409 }, { "epoch": 0.5689720903939948, "grad_norm": 2.804934183732406, "learning_rate": 1.997545078305399e-06, "loss": 1.0397, "step": 5410 }, { "epoch": 0.5690772608358421, "grad_norm": 1.974490805904351, "learning_rate": 1.9967273213987515e-06, "loss": 0.9931, "step": 5411 }, { "epoch": 0.5691824312776894, "grad_norm": 2.2394117401253917, "learning_rate": 1.995909620610918e-06, "loss": 1.0064, "step": 5412 }, { "epoch": 0.5692876017195367, "grad_norm": 2.7031568108174393, "learning_rate": 1.9950919760330757e-06, "loss": 0.9608, "step": 5413 }, { "epoch": 0.5693927721613841, "grad_norm": 1.7767471050410926, "learning_rate": 1.9942743877564018e-06, "loss": 0.9177, "step": 5414 }, { "epoch": 0.5694979426032314, "grad_norm": 1.9741959110358989, "learning_rate": 1.9934568558720623e-06, "loss": 1.0155, "step": 5415 }, { "epoch": 0.5696031130450787, "grad_norm": 2.120718009384387, "learning_rate": 1.9926393804712183e-06, "loss": 0.9933, "step": 5416 }, { "epoch": 0.569708283486926, "grad_norm": 2.3893886698304554, "learning_rate": 1.9918219616450246e-06, "loss": 0.9487, "step": 5417 }, { "epoch": 0.5698134539287734, "grad_norm": 2.1203012449126217, "learning_rate": 1.9910045994846294e-06, "loss": 1.0161, "step": 5418 }, { "epoch": 0.5699186243706207, "grad_norm": 2.402604527740921, "learning_rate": 1.990187294081176e-06, "loss": 0.9735, "step": 5419 }, { "epoch": 0.570023794812468, "grad_norm": 2.3672965950739213, "learning_rate": 1.9893700455257996e-06, "loss": 0.9659, "step": 5420 }, { "epoch": 0.5701289652543152, "grad_norm": 2.210201260927024, "learning_rate": 1.98855285390963e-06, "loss": 0.9688, "step": 5421 }, { "epoch": 0.5702341356961625, "grad_norm": 2.653764999915913, "learning_rate": 1.987735719323789e-06, "loss": 0.9776, "step": 5422 }, { "epoch": 0.5703393061380099, "grad_norm": 2.5642420040701692, "learning_rate": 1.986918641859396e-06, "loss": 0.995, "step": 5423 }, { "epoch": 0.5704444765798572, "grad_norm": 2.2961949153030443, "learning_rate": 1.9861016216075596e-06, "loss": 0.9892, "step": 5424 }, { "epoch": 0.5705496470217045, "grad_norm": 2.767041920692721, "learning_rate": 1.985284658659385e-06, "loss": 1.0365, "step": 5425 }, { "epoch": 0.5706548174635518, "grad_norm": 1.648569046551329, "learning_rate": 1.9844677531059698e-06, "loss": 0.9728, "step": 5426 }, { "epoch": 0.5707599879053992, "grad_norm": 2.6969141927182005, "learning_rate": 1.9836509050384035e-06, "loss": 0.9942, "step": 5427 }, { "epoch": 0.5708651583472465, "grad_norm": 2.2808653469120084, "learning_rate": 1.982834114547773e-06, "loss": 0.9663, "step": 5428 }, { "epoch": 0.5709703287890938, "grad_norm": 2.167902550863519, "learning_rate": 1.982017381725157e-06, "loss": 0.992, "step": 5429 }, { "epoch": 0.5710754992309411, "grad_norm": 2.3499387994244323, "learning_rate": 1.981200706661626e-06, "loss": 0.9837, "step": 5430 }, { "epoch": 0.5711806696727885, "grad_norm": 2.182644078032054, "learning_rate": 1.9803840894482468e-06, "loss": 0.9578, "step": 5431 }, { "epoch": 0.5712858401146358, "grad_norm": 3.3676977139942434, "learning_rate": 1.9795675301760776e-06, "loss": 0.9728, "step": 5432 }, { "epoch": 0.5713910105564831, "grad_norm": 2.353096165552454, "learning_rate": 1.978751028936172e-06, "loss": 0.9739, "step": 5433 }, { "epoch": 0.5714961809983304, "grad_norm": 2.5627941445914373, "learning_rate": 1.9779345858195757e-06, "loss": 0.9801, "step": 5434 }, { "epoch": 0.5716013514401778, "grad_norm": 2.8224949763649367, "learning_rate": 1.977118200917329e-06, "loss": 1.0196, "step": 5435 }, { "epoch": 0.5717065218820251, "grad_norm": 2.41926342232906, "learning_rate": 1.9763018743204637e-06, "loss": 0.9623, "step": 5436 }, { "epoch": 0.5718116923238724, "grad_norm": 2.5803768185373457, "learning_rate": 1.9754856061200085e-06, "loss": 1.0174, "step": 5437 }, { "epoch": 0.5719168627657197, "grad_norm": 2.6612045523496857, "learning_rate": 1.974669396406983e-06, "loss": 0.9732, "step": 5438 }, { "epoch": 0.5720220332075671, "grad_norm": 1.9777273507957978, "learning_rate": 1.9738532452724007e-06, "loss": 0.9457, "step": 5439 }, { "epoch": 0.5721272036494144, "grad_norm": 3.0778735684461056, "learning_rate": 1.9730371528072687e-06, "loss": 1.0039, "step": 5440 }, { "epoch": 0.5722323740912616, "grad_norm": 2.7734640366863537, "learning_rate": 1.972221119102587e-06, "loss": 1.0047, "step": 5441 }, { "epoch": 0.5723375445331089, "grad_norm": 2.360250328220414, "learning_rate": 1.9714051442493516e-06, "loss": 0.9788, "step": 5442 }, { "epoch": 0.5724427149749562, "grad_norm": 2.027404275154293, "learning_rate": 1.970589228338548e-06, "loss": 0.9784, "step": 5443 }, { "epoch": 0.5725478854168036, "grad_norm": 2.3164867712708825, "learning_rate": 1.969773371461159e-06, "loss": 0.9658, "step": 5444 }, { "epoch": 0.5726530558586509, "grad_norm": 1.9144239908740546, "learning_rate": 1.9689575737081567e-06, "loss": 0.9609, "step": 5445 }, { "epoch": 0.5727582263004982, "grad_norm": 2.938174043030208, "learning_rate": 1.9681418351705116e-06, "loss": 1.0395, "step": 5446 }, { "epoch": 0.5728633967423455, "grad_norm": 2.1982822359165013, "learning_rate": 1.967326155939183e-06, "loss": 0.9507, "step": 5447 }, { "epoch": 0.5729685671841929, "grad_norm": 2.795386799438768, "learning_rate": 1.9665105361051255e-06, "loss": 0.9534, "step": 5448 }, { "epoch": 0.5730737376260402, "grad_norm": 2.748854629184378, "learning_rate": 1.965694975759288e-06, "loss": 0.99, "step": 5449 }, { "epoch": 0.5731789080678875, "grad_norm": 2.829975197200465, "learning_rate": 1.9648794749926103e-06, "loss": 1.0186, "step": 5450 }, { "epoch": 0.5732840785097348, "grad_norm": 2.7529320683500447, "learning_rate": 1.9640640338960294e-06, "loss": 0.9807, "step": 5451 }, { "epoch": 0.5733892489515822, "grad_norm": 2.558464051548766, "learning_rate": 1.9632486525604715e-06, "loss": 0.9763, "step": 5452 }, { "epoch": 0.5734944193934295, "grad_norm": 2.2378183775994893, "learning_rate": 1.962433331076859e-06, "loss": 1.0049, "step": 5453 }, { "epoch": 0.5735995898352768, "grad_norm": 1.9794409306196663, "learning_rate": 1.961618069536105e-06, "loss": 0.9516, "step": 5454 }, { "epoch": 0.5737047602771241, "grad_norm": 2.5990704751367244, "learning_rate": 1.960802868029119e-06, "loss": 0.9981, "step": 5455 }, { "epoch": 0.5738099307189715, "grad_norm": 2.111642340238224, "learning_rate": 1.9599877266468024e-06, "loss": 0.9729, "step": 5456 }, { "epoch": 0.5739151011608188, "grad_norm": 2.337460712610555, "learning_rate": 1.959172645480049e-06, "loss": 0.9886, "step": 5457 }, { "epoch": 0.5740202716026661, "grad_norm": 3.42592059314716, "learning_rate": 1.958357624619747e-06, "loss": 0.9792, "step": 5458 }, { "epoch": 0.5741254420445134, "grad_norm": 3.04481036795497, "learning_rate": 1.957542664156776e-06, "loss": 1.0186, "step": 5459 }, { "epoch": 0.5742306124863608, "grad_norm": 3.566327107470445, "learning_rate": 1.9567277641820136e-06, "loss": 1.0102, "step": 5460 }, { "epoch": 0.574335782928208, "grad_norm": 2.262412810337984, "learning_rate": 1.9559129247863253e-06, "loss": 0.9987, "step": 5461 }, { "epoch": 0.5744409533700553, "grad_norm": 2.230615471687081, "learning_rate": 1.9550981460605734e-06, "loss": 0.9789, "step": 5462 }, { "epoch": 0.5745461238119026, "grad_norm": 2.3226218833907217, "learning_rate": 1.9542834280956102e-06, "loss": 0.9984, "step": 5463 }, { "epoch": 0.57465129425375, "grad_norm": 3.038633583612286, "learning_rate": 1.9534687709822834e-06, "loss": 1.0057, "step": 5464 }, { "epoch": 0.5747564646955973, "grad_norm": 2.271266550812082, "learning_rate": 1.952654174811435e-06, "loss": 0.9788, "step": 5465 }, { "epoch": 0.5748616351374446, "grad_norm": 1.7249233590279198, "learning_rate": 1.951839639673898e-06, "loss": 1.0057, "step": 5466 }, { "epoch": 0.5749668055792919, "grad_norm": 2.7803541788635417, "learning_rate": 1.9510251656605e-06, "loss": 1.0254, "step": 5467 }, { "epoch": 0.5750719760211392, "grad_norm": 1.8011230248841785, "learning_rate": 1.9502107528620593e-06, "loss": 0.9893, "step": 5468 }, { "epoch": 0.5751771464629866, "grad_norm": 2.82475143858172, "learning_rate": 1.949396401369392e-06, "loss": 0.9615, "step": 5469 }, { "epoch": 0.5752823169048339, "grad_norm": 2.6365865937523014, "learning_rate": 1.9485821112733023e-06, "loss": 0.9794, "step": 5470 }, { "epoch": 0.5753874873466812, "grad_norm": 2.8901960806978737, "learning_rate": 1.947767882664591e-06, "loss": 1.0382, "step": 5471 }, { "epoch": 0.5754926577885285, "grad_norm": 1.6028594756674903, "learning_rate": 1.946953715634051e-06, "loss": 1.0084, "step": 5472 }, { "epoch": 0.5755978282303759, "grad_norm": 2.680056010624629, "learning_rate": 1.9461396102724666e-06, "loss": 0.9539, "step": 5473 }, { "epoch": 0.5757029986722232, "grad_norm": 1.9522822778958524, "learning_rate": 1.9453255666706193e-06, "loss": 0.9717, "step": 5474 }, { "epoch": 0.5758081691140705, "grad_norm": 1.8400125921360837, "learning_rate": 1.94451158491928e-06, "loss": 1.0238, "step": 5475 }, { "epoch": 0.5759133395559178, "grad_norm": 2.9729085425856856, "learning_rate": 1.9436976651092143e-06, "loss": 0.9872, "step": 5476 }, { "epoch": 0.5760185099977652, "grad_norm": 3.0463506648492644, "learning_rate": 1.94288380733118e-06, "loss": 1.0292, "step": 5477 }, { "epoch": 0.5761236804396125, "grad_norm": 3.312807191743345, "learning_rate": 1.9420700116759295e-06, "loss": 0.9883, "step": 5478 }, { "epoch": 0.5762288508814598, "grad_norm": 2.193873965057158, "learning_rate": 1.9412562782342067e-06, "loss": 0.9968, "step": 5479 }, { "epoch": 0.5763340213233071, "grad_norm": 2.4838716286811677, "learning_rate": 1.9404426070967495e-06, "loss": 0.9702, "step": 5480 }, { "epoch": 0.5764391917651545, "grad_norm": 3.7749376463759794, "learning_rate": 1.9396289983542884e-06, "loss": 1.0172, "step": 5481 }, { "epoch": 0.5765443622070017, "grad_norm": 2.5023157843427355, "learning_rate": 1.9388154520975465e-06, "loss": 1.0102, "step": 5482 }, { "epoch": 0.576649532648849, "grad_norm": 2.7098943623571663, "learning_rate": 1.938001968417242e-06, "loss": 1.0124, "step": 5483 }, { "epoch": 0.5767547030906963, "grad_norm": 2.548253174199779, "learning_rate": 1.9371885474040838e-06, "loss": 1.0292, "step": 5484 }, { "epoch": 0.5768598735325436, "grad_norm": 2.5033277887085417, "learning_rate": 1.936375189148774e-06, "loss": 0.9901, "step": 5485 }, { "epoch": 0.576965043974391, "grad_norm": 2.114247029054567, "learning_rate": 1.9355618937420092e-06, "loss": 0.9394, "step": 5486 }, { "epoch": 0.5770702144162383, "grad_norm": 1.8131362680388996, "learning_rate": 1.934748661274477e-06, "loss": 0.957, "step": 5487 }, { "epoch": 0.5771753848580856, "grad_norm": 2.203134930907001, "learning_rate": 1.9339354918368613e-06, "loss": 0.9839, "step": 5488 }, { "epoch": 0.5772805552999329, "grad_norm": 1.8390231714993754, "learning_rate": 1.9331223855198355e-06, "loss": 0.9737, "step": 5489 }, { "epoch": 0.5773857257417803, "grad_norm": 2.67902585867112, "learning_rate": 1.9323093424140673e-06, "loss": 0.9992, "step": 5490 }, { "epoch": 0.5774908961836276, "grad_norm": 1.7899460341786475, "learning_rate": 1.931496362610217e-06, "loss": 1.0174, "step": 5491 }, { "epoch": 0.5775960666254749, "grad_norm": 2.056265293390317, "learning_rate": 1.930683446198939e-06, "loss": 1.0018, "step": 5492 }, { "epoch": 0.5777012370673222, "grad_norm": 2.1358816380940886, "learning_rate": 1.9298705932708793e-06, "loss": 0.9516, "step": 5493 }, { "epoch": 0.5778064075091696, "grad_norm": 1.7026983219962548, "learning_rate": 1.9290578039166775e-06, "loss": 1.0184, "step": 5494 }, { "epoch": 0.5779115779510169, "grad_norm": 2.710604777509262, "learning_rate": 1.9282450782269657e-06, "loss": 0.9627, "step": 5495 }, { "epoch": 0.5780167483928642, "grad_norm": 2.4275052932179864, "learning_rate": 1.9274324162923685e-06, "loss": 0.981, "step": 5496 }, { "epoch": 0.5781219188347115, "grad_norm": 2.639347852054867, "learning_rate": 1.926619818203506e-06, "loss": 1.0225, "step": 5497 }, { "epoch": 0.5782270892765589, "grad_norm": 3.013422310390402, "learning_rate": 1.925807284050987e-06, "loss": 1.0114, "step": 5498 }, { "epoch": 0.5783322597184062, "grad_norm": 2.5584374470603453, "learning_rate": 1.9249948139254173e-06, "loss": 0.9257, "step": 5499 }, { "epoch": 0.5784374301602535, "grad_norm": 2.6663867612637335, "learning_rate": 1.924182407917391e-06, "loss": 0.9599, "step": 5500 }, { "epoch": 0.5785426006021008, "grad_norm": 3.6833651707957387, "learning_rate": 1.9233700661175e-06, "loss": 0.9995, "step": 5501 }, { "epoch": 0.578647771043948, "grad_norm": 1.9915527610671544, "learning_rate": 1.922557788616327e-06, "loss": 1.0072, "step": 5502 }, { "epoch": 0.5787529414857954, "grad_norm": 2.3509129428046993, "learning_rate": 1.921745575504446e-06, "loss": 0.9915, "step": 5503 }, { "epoch": 0.5788581119276427, "grad_norm": 2.4528194678305097, "learning_rate": 1.920933426872425e-06, "loss": 0.9599, "step": 5504 }, { "epoch": 0.57896328236949, "grad_norm": 2.6177871712631915, "learning_rate": 1.9201213428108246e-06, "loss": 0.9898, "step": 5505 }, { "epoch": 0.5790684528113373, "grad_norm": 2.5053984489921155, "learning_rate": 1.9193093234102e-06, "loss": 1.0196, "step": 5506 }, { "epoch": 0.5791736232531847, "grad_norm": 2.498558815345497, "learning_rate": 1.9184973687610965e-06, "loss": 0.9645, "step": 5507 }, { "epoch": 0.579278793695032, "grad_norm": 3.05754856518924, "learning_rate": 1.917685478954054e-06, "loss": 1.0264, "step": 5508 }, { "epoch": 0.5793839641368793, "grad_norm": 2.2653910526244214, "learning_rate": 1.9168736540796043e-06, "loss": 0.9704, "step": 5509 }, { "epoch": 0.5794891345787266, "grad_norm": 2.48443214283071, "learning_rate": 1.9160618942282705e-06, "loss": 1.0255, "step": 5510 }, { "epoch": 0.579594305020574, "grad_norm": 3.004247233784531, "learning_rate": 1.915250199490573e-06, "loss": 1.0294, "step": 5511 }, { "epoch": 0.5796994754624213, "grad_norm": 2.4418258201583103, "learning_rate": 1.914438569957021e-06, "loss": 1.0082, "step": 5512 }, { "epoch": 0.5798046459042686, "grad_norm": 2.4279113250200823, "learning_rate": 1.9136270057181173e-06, "loss": 0.951, "step": 5513 }, { "epoch": 0.5799098163461159, "grad_norm": 2.2619878885329263, "learning_rate": 1.9128155068643563e-06, "loss": 1.0286, "step": 5514 }, { "epoch": 0.5800149867879633, "grad_norm": 3.0238638965690576, "learning_rate": 1.912004073486229e-06, "loss": 0.998, "step": 5515 }, { "epoch": 0.5801201572298106, "grad_norm": 2.392433746833724, "learning_rate": 1.9111927056742146e-06, "loss": 0.971, "step": 5516 }, { "epoch": 0.5802253276716579, "grad_norm": 2.2813223909499403, "learning_rate": 1.9103814035187877e-06, "loss": 1.0173, "step": 5517 }, { "epoch": 0.5803304981135052, "grad_norm": 2.7032063827189092, "learning_rate": 1.909570167110415e-06, "loss": 1.0182, "step": 5518 }, { "epoch": 0.5804356685553526, "grad_norm": 2.30490410235801, "learning_rate": 1.9087589965395547e-06, "loss": 0.9975, "step": 5519 }, { "epoch": 0.5805408389971999, "grad_norm": 2.769410328624913, "learning_rate": 1.9079478918966595e-06, "loss": 0.9609, "step": 5520 }, { "epoch": 0.5806460094390472, "grad_norm": 2.3183308581320694, "learning_rate": 1.9071368532721734e-06, "loss": 1.0247, "step": 5521 }, { "epoch": 0.5807511798808944, "grad_norm": 3.0996340248511767, "learning_rate": 1.9063258807565338e-06, "loss": 0.9765, "step": 5522 }, { "epoch": 0.5808563503227417, "grad_norm": 2.6197973686870553, "learning_rate": 1.9055149744401693e-06, "loss": 0.9358, "step": 5523 }, { "epoch": 0.5809615207645891, "grad_norm": 2.241372830499171, "learning_rate": 1.9047041344135045e-06, "loss": 0.9821, "step": 5524 }, { "epoch": 0.5810666912064364, "grad_norm": 2.9668398883596905, "learning_rate": 1.903893360766953e-06, "loss": 1.0332, "step": 5525 }, { "epoch": 0.5811718616482837, "grad_norm": 2.2000463719499064, "learning_rate": 1.9030826535909225e-06, "loss": 0.9604, "step": 5526 }, { "epoch": 0.581277032090131, "grad_norm": 2.212791966762905, "learning_rate": 1.9022720129758132e-06, "loss": 0.9615, "step": 5527 }, { "epoch": 0.5813822025319784, "grad_norm": 2.2976157836326805, "learning_rate": 1.9014614390120174e-06, "loss": 0.9992, "step": 5528 }, { "epoch": 0.5814873729738257, "grad_norm": 3.038579357250074, "learning_rate": 1.9006509317899207e-06, "loss": 1.0264, "step": 5529 }, { "epoch": 0.581592543415673, "grad_norm": 2.8863407093044735, "learning_rate": 1.8998404913999016e-06, "loss": 0.9813, "step": 5530 }, { "epoch": 0.5816977138575203, "grad_norm": 2.8846661345060953, "learning_rate": 1.8990301179323298e-06, "loss": 0.9626, "step": 5531 }, { "epoch": 0.5818028842993677, "grad_norm": 2.4582156857408557, "learning_rate": 1.8982198114775683e-06, "loss": 0.9984, "step": 5532 }, { "epoch": 0.581908054741215, "grad_norm": 2.178455937450243, "learning_rate": 1.8974095721259718e-06, "loss": 0.9677, "step": 5533 }, { "epoch": 0.5820132251830623, "grad_norm": 2.4539058224075823, "learning_rate": 1.89659939996789e-06, "loss": 0.947, "step": 5534 }, { "epoch": 0.5821183956249096, "grad_norm": 2.9776623960749364, "learning_rate": 1.8957892950936623e-06, "loss": 1.0148, "step": 5535 }, { "epoch": 0.582223566066757, "grad_norm": 3.114821126421381, "learning_rate": 1.8949792575936222e-06, "loss": 0.9682, "step": 5536 }, { "epoch": 0.5823287365086043, "grad_norm": 1.7570932709447848, "learning_rate": 1.8941692875580934e-06, "loss": 0.9917, "step": 5537 }, { "epoch": 0.5824339069504516, "grad_norm": 2.612838069227518, "learning_rate": 1.8933593850773963e-06, "loss": 1.0041, "step": 5538 }, { "epoch": 0.5825390773922989, "grad_norm": 2.0944013087449957, "learning_rate": 1.8925495502418407e-06, "loss": 0.9842, "step": 5539 }, { "epoch": 0.5826442478341463, "grad_norm": 1.8402090587110502, "learning_rate": 1.8917397831417285e-06, "loss": 0.9741, "step": 5540 }, { "epoch": 0.5827494182759936, "grad_norm": 2.3971865882038634, "learning_rate": 1.8909300838673562e-06, "loss": 0.9897, "step": 5541 }, { "epoch": 0.5828545887178409, "grad_norm": 2.7386768450510988, "learning_rate": 1.89012045250901e-06, "loss": 1.0536, "step": 5542 }, { "epoch": 0.5829597591596881, "grad_norm": 2.4719585825244157, "learning_rate": 1.889310889156972e-06, "loss": 0.9932, "step": 5543 }, { "epoch": 0.5830649296015354, "grad_norm": 1.9873200200685415, "learning_rate": 1.8885013939015133e-06, "loss": 1.0105, "step": 5544 }, { "epoch": 0.5831701000433828, "grad_norm": 2.0027592555824563, "learning_rate": 1.8876919668329002e-06, "loss": 0.9932, "step": 5545 }, { "epoch": 0.5832752704852301, "grad_norm": 2.9904414990454433, "learning_rate": 1.8868826080413876e-06, "loss": 1.0046, "step": 5546 }, { "epoch": 0.5833804409270774, "grad_norm": 2.7824243964175452, "learning_rate": 1.8860733176172286e-06, "loss": 1.0286, "step": 5547 }, { "epoch": 0.5834856113689247, "grad_norm": 2.1679565097639917, "learning_rate": 1.8852640956506643e-06, "loss": 0.9693, "step": 5548 }, { "epoch": 0.5835907818107721, "grad_norm": 2.0987314485767423, "learning_rate": 1.884454942231928e-06, "loss": 0.9306, "step": 5549 }, { "epoch": 0.5836959522526194, "grad_norm": 2.74265547993407, "learning_rate": 1.8836458574512478e-06, "loss": 0.9962, "step": 5550 }, { "epoch": 0.5838011226944667, "grad_norm": 1.8785431998355895, "learning_rate": 1.882836841398841e-06, "loss": 0.9127, "step": 5551 }, { "epoch": 0.583906293136314, "grad_norm": 2.4599246039003364, "learning_rate": 1.882027894164922e-06, "loss": 0.9913, "step": 5552 }, { "epoch": 0.5840114635781614, "grad_norm": 2.296968019204936, "learning_rate": 1.8812190158396931e-06, "loss": 0.9596, "step": 5553 }, { "epoch": 0.5841166340200087, "grad_norm": 2.65439428097566, "learning_rate": 1.880410206513351e-06, "loss": 1.025, "step": 5554 }, { "epoch": 0.584221804461856, "grad_norm": 2.1109947099788995, "learning_rate": 1.8796014662760842e-06, "loss": 1.0029, "step": 5555 }, { "epoch": 0.5843269749037033, "grad_norm": 2.7664051470715405, "learning_rate": 1.8787927952180724e-06, "loss": 0.971, "step": 5556 }, { "epoch": 0.5844321453455507, "grad_norm": 2.202625098363754, "learning_rate": 1.8779841934294904e-06, "loss": 0.9731, "step": 5557 }, { "epoch": 0.584537315787398, "grad_norm": 2.2262523402556313, "learning_rate": 1.8771756610005028e-06, "loss": 0.9645, "step": 5558 }, { "epoch": 0.5846424862292453, "grad_norm": 2.203570080279773, "learning_rate": 1.8763671980212673e-06, "loss": 0.9762, "step": 5559 }, { "epoch": 0.5847476566710926, "grad_norm": 3.406979001707062, "learning_rate": 1.8755588045819325e-06, "loss": 0.988, "step": 5560 }, { "epoch": 0.58485282711294, "grad_norm": 2.3316323807570685, "learning_rate": 1.8747504807726433e-06, "loss": 0.9723, "step": 5561 }, { "epoch": 0.5849579975547873, "grad_norm": 2.5275255811504995, "learning_rate": 1.8739422266835321e-06, "loss": 1.0129, "step": 5562 }, { "epoch": 0.5850631679966345, "grad_norm": 2.700141617045467, "learning_rate": 1.8731340424047265e-06, "loss": 0.9889, "step": 5563 }, { "epoch": 0.5851683384384818, "grad_norm": 2.367236088955419, "learning_rate": 1.8723259280263451e-06, "loss": 0.9935, "step": 5564 }, { "epoch": 0.5852735088803291, "grad_norm": 2.0726305566915997, "learning_rate": 1.871517883638497e-06, "loss": 0.9489, "step": 5565 }, { "epoch": 0.5853786793221765, "grad_norm": 2.140334180895899, "learning_rate": 1.8707099093312883e-06, "loss": 0.9789, "step": 5566 }, { "epoch": 0.5854838497640238, "grad_norm": 2.3801510907541035, "learning_rate": 1.869902005194813e-06, "loss": 0.9661, "step": 5567 }, { "epoch": 0.5855890202058711, "grad_norm": 1.94348395585535, "learning_rate": 1.869094171319159e-06, "loss": 0.9675, "step": 5568 }, { "epoch": 0.5856941906477184, "grad_norm": 1.8197908570293184, "learning_rate": 1.8682864077944058e-06, "loss": 0.9888, "step": 5569 }, { "epoch": 0.5857993610895658, "grad_norm": 2.3503761909453478, "learning_rate": 1.8674787147106255e-06, "loss": 0.9775, "step": 5570 }, { "epoch": 0.5859045315314131, "grad_norm": 2.424973046847033, "learning_rate": 1.8666710921578823e-06, "loss": 1.0109, "step": 5571 }, { "epoch": 0.5860097019732604, "grad_norm": 3.1180462687524346, "learning_rate": 1.865863540226232e-06, "loss": 1.001, "step": 5572 }, { "epoch": 0.5861148724151077, "grad_norm": 2.5532427885272946, "learning_rate": 1.8650560590057235e-06, "loss": 1.0227, "step": 5573 }, { "epoch": 0.5862200428569551, "grad_norm": 2.2339931967688704, "learning_rate": 1.8642486485863954e-06, "loss": 1.0216, "step": 5574 }, { "epoch": 0.5863252132988024, "grad_norm": 1.9277968092888864, "learning_rate": 1.8634413090582831e-06, "loss": 0.98, "step": 5575 }, { "epoch": 0.5864303837406497, "grad_norm": 2.7536242032196583, "learning_rate": 1.8626340405114097e-06, "loss": 0.9137, "step": 5576 }, { "epoch": 0.586535554182497, "grad_norm": 2.4161018794939157, "learning_rate": 1.8618268430357916e-06, "loss": 1.0122, "step": 5577 }, { "epoch": 0.5866407246243444, "grad_norm": 2.243061062715193, "learning_rate": 1.8610197167214383e-06, "loss": 0.9863, "step": 5578 }, { "epoch": 0.5867458950661917, "grad_norm": 2.371022287772955, "learning_rate": 1.8602126616583499e-06, "loss": 0.9608, "step": 5579 }, { "epoch": 0.586851065508039, "grad_norm": 1.9392011011181172, "learning_rate": 1.8594056779365202e-06, "loss": 0.9605, "step": 5580 }, { "epoch": 0.5869562359498863, "grad_norm": 2.688618985702904, "learning_rate": 1.8585987656459337e-06, "loss": 0.9798, "step": 5581 }, { "epoch": 0.5870614063917337, "grad_norm": 2.0172735352124884, "learning_rate": 1.8577919248765675e-06, "loss": 0.9679, "step": 5582 }, { "epoch": 0.5871665768335809, "grad_norm": 2.4283093844913197, "learning_rate": 1.8569851557183894e-06, "loss": 1.0153, "step": 5583 }, { "epoch": 0.5872717472754282, "grad_norm": 2.4357436603887823, "learning_rate": 1.8561784582613631e-06, "loss": 0.9547, "step": 5584 }, { "epoch": 0.5873769177172755, "grad_norm": 1.6496752603527884, "learning_rate": 1.8553718325954395e-06, "loss": 1.0061, "step": 5585 }, { "epoch": 0.5874820881591228, "grad_norm": 2.3550989815363454, "learning_rate": 1.8545652788105644e-06, "loss": 1.0181, "step": 5586 }, { "epoch": 0.5875872586009702, "grad_norm": 2.4247760719245592, "learning_rate": 1.8537587969966746e-06, "loss": 1.0417, "step": 5587 }, { "epoch": 0.5876924290428175, "grad_norm": 2.7483529202080748, "learning_rate": 1.852952387243698e-06, "loss": 0.9984, "step": 5588 }, { "epoch": 0.5877975994846648, "grad_norm": 2.3988717114809655, "learning_rate": 1.8521460496415577e-06, "loss": 1.021, "step": 5589 }, { "epoch": 0.5879027699265121, "grad_norm": 3.4921295132942594, "learning_rate": 1.8513397842801655e-06, "loss": 0.9935, "step": 5590 }, { "epoch": 0.5880079403683595, "grad_norm": 2.9336016166705066, "learning_rate": 1.8505335912494265e-06, "loss": 0.9968, "step": 5591 }, { "epoch": 0.5881131108102068, "grad_norm": 2.8340234509298563, "learning_rate": 1.8497274706392369e-06, "loss": 1.0128, "step": 5592 }, { "epoch": 0.5882182812520541, "grad_norm": 3.1961181107327, "learning_rate": 1.848921422539486e-06, "loss": 1.022, "step": 5593 }, { "epoch": 0.5883234516939014, "grad_norm": 1.9909100410145897, "learning_rate": 1.8481154470400545e-06, "loss": 0.9937, "step": 5594 }, { "epoch": 0.5884286221357488, "grad_norm": 3.066382990655515, "learning_rate": 1.8473095442308145e-06, "loss": 0.9469, "step": 5595 }, { "epoch": 0.5885337925775961, "grad_norm": 2.6179963095077716, "learning_rate": 1.8465037142016306e-06, "loss": 0.9608, "step": 5596 }, { "epoch": 0.5886389630194434, "grad_norm": 2.3037057873169524, "learning_rate": 1.8456979570423583e-06, "loss": 1.0054, "step": 5597 }, { "epoch": 0.5887441334612907, "grad_norm": 2.2460590369251268, "learning_rate": 1.8448922728428474e-06, "loss": 0.9827, "step": 5598 }, { "epoch": 0.588849303903138, "grad_norm": 2.7120869322911623, "learning_rate": 1.844086661692937e-06, "loss": 0.9871, "step": 5599 }, { "epoch": 0.5889544743449854, "grad_norm": 3.41715465139414, "learning_rate": 1.84328112368246e-06, "loss": 0.9804, "step": 5600 }, { "epoch": 0.5890596447868327, "grad_norm": 3.3908096687842604, "learning_rate": 1.8424756589012385e-06, "loss": 1.0025, "step": 5601 }, { "epoch": 0.58916481522868, "grad_norm": 2.0383431427479506, "learning_rate": 1.841670267439088e-06, "loss": 0.9688, "step": 5602 }, { "epoch": 0.5892699856705274, "grad_norm": 2.6775576431658092, "learning_rate": 1.8408649493858176e-06, "loss": 0.966, "step": 5603 }, { "epoch": 0.5893751561123746, "grad_norm": 2.571976853064117, "learning_rate": 1.8400597048312257e-06, "loss": 1.0121, "step": 5604 }, { "epoch": 0.5894803265542219, "grad_norm": 2.4059753404598236, "learning_rate": 1.8392545338651036e-06, "loss": 0.9888, "step": 5605 }, { "epoch": 0.5895854969960692, "grad_norm": 2.069543872655015, "learning_rate": 1.8384494365772333e-06, "loss": 0.9747, "step": 5606 }, { "epoch": 0.5896906674379165, "grad_norm": 2.30354396158409, "learning_rate": 1.8376444130573901e-06, "loss": 0.9904, "step": 5607 }, { "epoch": 0.5897958378797639, "grad_norm": 2.2282342710722456, "learning_rate": 1.8368394633953402e-06, "loss": 0.9951, "step": 5608 }, { "epoch": 0.5899010083216112, "grad_norm": 2.523786425607499, "learning_rate": 1.8360345876808422e-06, "loss": 1.0192, "step": 5609 }, { "epoch": 0.5900061787634585, "grad_norm": 2.435738401461322, "learning_rate": 1.835229786003645e-06, "loss": 0.9807, "step": 5610 }, { "epoch": 0.5901113492053058, "grad_norm": 2.4101222248980094, "learning_rate": 1.83442505845349e-06, "loss": 0.9915, "step": 5611 }, { "epoch": 0.5902165196471532, "grad_norm": 1.8954669177419814, "learning_rate": 1.8336204051201124e-06, "loss": 0.9815, "step": 5612 }, { "epoch": 0.5903216900890005, "grad_norm": 2.367357060810497, "learning_rate": 1.832815826093236e-06, "loss": 1.0251, "step": 5613 }, { "epoch": 0.5904268605308478, "grad_norm": 2.4881726831979085, "learning_rate": 1.8320113214625783e-06, "loss": 0.9472, "step": 5614 }, { "epoch": 0.5905320309726951, "grad_norm": 2.7845683502036715, "learning_rate": 1.8312068913178466e-06, "loss": 0.9873, "step": 5615 }, { "epoch": 0.5906372014145425, "grad_norm": 1.968736042696628, "learning_rate": 1.8304025357487427e-06, "loss": 0.9252, "step": 5616 }, { "epoch": 0.5907423718563898, "grad_norm": 2.8097759990413094, "learning_rate": 1.829598254844957e-06, "loss": 0.9349, "step": 5617 }, { "epoch": 0.5908475422982371, "grad_norm": 2.375269564916978, "learning_rate": 1.8287940486961744e-06, "loss": 1.0258, "step": 5618 }, { "epoch": 0.5909527127400844, "grad_norm": 2.3758359793157484, "learning_rate": 1.8279899173920692e-06, "loss": 0.9658, "step": 5619 }, { "epoch": 0.5910578831819318, "grad_norm": 2.9877292184232345, "learning_rate": 1.827185861022308e-06, "loss": 1.0124, "step": 5620 }, { "epoch": 0.5911630536237791, "grad_norm": 1.9064000817314557, "learning_rate": 1.8263818796765506e-06, "loss": 0.9492, "step": 5621 }, { "epoch": 0.5912682240656264, "grad_norm": 2.4552260061481768, "learning_rate": 1.8255779734444462e-06, "loss": 0.9983, "step": 5622 }, { "epoch": 0.5913733945074737, "grad_norm": 2.652810955909284, "learning_rate": 1.8247741424156373e-06, "loss": 1.0046, "step": 5623 }, { "epoch": 0.5914785649493209, "grad_norm": 2.4957564210840797, "learning_rate": 1.8239703866797553e-06, "loss": 1.0137, "step": 5624 }, { "epoch": 0.5915837353911683, "grad_norm": 2.776375551754447, "learning_rate": 1.8231667063264282e-06, "loss": 1.016, "step": 5625 }, { "epoch": 0.5916889058330156, "grad_norm": 2.2349936262589067, "learning_rate": 1.822363101445271e-06, "loss": 0.99, "step": 5626 }, { "epoch": 0.5917940762748629, "grad_norm": 2.136732395843005, "learning_rate": 1.8215595721258921e-06, "loss": 0.9894, "step": 5627 }, { "epoch": 0.5918992467167102, "grad_norm": 2.695017472417158, "learning_rate": 1.8207561184578915e-06, "loss": 1.0002, "step": 5628 }, { "epoch": 0.5920044171585576, "grad_norm": 2.69308063651352, "learning_rate": 1.8199527405308593e-06, "loss": 1.0041, "step": 5629 }, { "epoch": 0.5921095876004049, "grad_norm": 2.534730264118862, "learning_rate": 1.81914943843438e-06, "loss": 1.0193, "step": 5630 }, { "epoch": 0.5922147580422522, "grad_norm": 2.4582659603999564, "learning_rate": 1.818346212258027e-06, "loss": 1.0088, "step": 5631 }, { "epoch": 0.5923199284840995, "grad_norm": 2.495714807866576, "learning_rate": 1.817543062091367e-06, "loss": 0.9929, "step": 5632 }, { "epoch": 0.5924250989259469, "grad_norm": 2.58654760819409, "learning_rate": 1.8167399880239572e-06, "loss": 0.995, "step": 5633 }, { "epoch": 0.5925302693677942, "grad_norm": 2.876672103469995, "learning_rate": 1.815936990145345e-06, "loss": 0.954, "step": 5634 }, { "epoch": 0.5926354398096415, "grad_norm": 2.384145352263863, "learning_rate": 1.8151340685450745e-06, "loss": 0.9896, "step": 5635 }, { "epoch": 0.5927406102514888, "grad_norm": 3.345693108309314, "learning_rate": 1.8143312233126748e-06, "loss": 1.0084, "step": 5636 }, { "epoch": 0.5928457806933362, "grad_norm": 3.5913950173149782, "learning_rate": 1.81352845453767e-06, "loss": 0.983, "step": 5637 }, { "epoch": 0.5929509511351835, "grad_norm": 2.3914332678489827, "learning_rate": 1.8127257623095743e-06, "loss": 1.002, "step": 5638 }, { "epoch": 0.5930561215770308, "grad_norm": 3.185510171534475, "learning_rate": 1.811923146717896e-06, "loss": 0.9976, "step": 5639 }, { "epoch": 0.5931612920188781, "grad_norm": 2.0993299580165767, "learning_rate": 1.811120607852132e-06, "loss": 0.9655, "step": 5640 }, { "epoch": 0.5932664624607255, "grad_norm": 2.8049029671520116, "learning_rate": 1.8103181458017719e-06, "loss": 0.9852, "step": 5641 }, { "epoch": 0.5933716329025728, "grad_norm": 2.0996772552523675, "learning_rate": 1.8095157606562957e-06, "loss": 0.9822, "step": 5642 }, { "epoch": 0.5934768033444201, "grad_norm": 2.3389036391566878, "learning_rate": 1.8087134525051762e-06, "loss": 1.0309, "step": 5643 }, { "epoch": 0.5935819737862674, "grad_norm": 2.3139277809580077, "learning_rate": 1.8079112214378769e-06, "loss": 0.9498, "step": 5644 }, { "epoch": 0.5936871442281146, "grad_norm": 2.6546675909969113, "learning_rate": 1.807109067543853e-06, "loss": 1.0182, "step": 5645 }, { "epoch": 0.593792314669962, "grad_norm": 2.591901764829732, "learning_rate": 1.8063069909125502e-06, "loss": 1.0092, "step": 5646 }, { "epoch": 0.5938974851118093, "grad_norm": 1.5887306890493018, "learning_rate": 1.805504991633406e-06, "loss": 0.9922, "step": 5647 }, { "epoch": 0.5940026555536566, "grad_norm": 2.160624868604629, "learning_rate": 1.8047030697958513e-06, "loss": 0.9504, "step": 5648 }, { "epoch": 0.5941078259955039, "grad_norm": 2.0657716812683518, "learning_rate": 1.8039012254893054e-06, "loss": 0.993, "step": 5649 }, { "epoch": 0.5942129964373513, "grad_norm": 2.827332461738818, "learning_rate": 1.8030994588031804e-06, "loss": 1.009, "step": 5650 }, { "epoch": 0.5943181668791986, "grad_norm": 2.5335501756770125, "learning_rate": 1.80229776982688e-06, "loss": 0.9963, "step": 5651 }, { "epoch": 0.5944233373210459, "grad_norm": 2.394767186447968, "learning_rate": 1.801496158649797e-06, "loss": 1.0434, "step": 5652 }, { "epoch": 0.5945285077628932, "grad_norm": 3.032987653965299, "learning_rate": 1.800694625361319e-06, "loss": 1.0317, "step": 5653 }, { "epoch": 0.5946336782047406, "grad_norm": 2.3486440028502384, "learning_rate": 1.799893170050823e-06, "loss": 0.9893, "step": 5654 }, { "epoch": 0.5947388486465879, "grad_norm": 2.014009694496243, "learning_rate": 1.7990917928076768e-06, "loss": 0.9973, "step": 5655 }, { "epoch": 0.5948440190884352, "grad_norm": 1.9133594346364118, "learning_rate": 1.7982904937212409e-06, "loss": 0.9813, "step": 5656 }, { "epoch": 0.5949491895302825, "grad_norm": 3.1233038474070423, "learning_rate": 1.7974892728808653e-06, "loss": 0.9512, "step": 5657 }, { "epoch": 0.5950543599721299, "grad_norm": 2.19478691298938, "learning_rate": 1.7966881303758938e-06, "loss": 0.9901, "step": 5658 }, { "epoch": 0.5951595304139772, "grad_norm": 2.1551749316228093, "learning_rate": 1.795887066295659e-06, "loss": 1.0007, "step": 5659 }, { "epoch": 0.5952647008558245, "grad_norm": 2.5085227649724957, "learning_rate": 1.7950860807294863e-06, "loss": 1.0337, "step": 5660 }, { "epoch": 0.5953698712976718, "grad_norm": 2.385692972652722, "learning_rate": 1.7942851737666906e-06, "loss": 0.9654, "step": 5661 }, { "epoch": 0.5954750417395192, "grad_norm": 2.221445423122614, "learning_rate": 1.7934843454965808e-06, "loss": 0.9642, "step": 5662 }, { "epoch": 0.5955802121813665, "grad_norm": 2.745661167238812, "learning_rate": 1.7926835960084555e-06, "loss": 1.0256, "step": 5663 }, { "epoch": 0.5956853826232138, "grad_norm": 2.227270973619197, "learning_rate": 1.7918829253916032e-06, "loss": 1.0145, "step": 5664 }, { "epoch": 0.595790553065061, "grad_norm": 1.9107730031675045, "learning_rate": 1.7910823337353062e-06, "loss": 0.9939, "step": 5665 }, { "epoch": 0.5958957235069083, "grad_norm": 2.273694926717976, "learning_rate": 1.790281821128835e-06, "loss": 1.0495, "step": 5666 }, { "epoch": 0.5960008939487557, "grad_norm": 2.3672345502650383, "learning_rate": 1.7894813876614547e-06, "loss": 0.9918, "step": 5667 }, { "epoch": 0.596106064390603, "grad_norm": 2.338013846800265, "learning_rate": 1.7886810334224192e-06, "loss": 1.0049, "step": 5668 }, { "epoch": 0.5962112348324503, "grad_norm": 2.2878641074827413, "learning_rate": 1.7878807585009744e-06, "loss": 0.9282, "step": 5669 }, { "epoch": 0.5963164052742976, "grad_norm": 2.447388144552986, "learning_rate": 1.7870805629863563e-06, "loss": 0.9712, "step": 5670 }, { "epoch": 0.596421575716145, "grad_norm": 2.4810306355833025, "learning_rate": 1.7862804469677942e-06, "loss": 0.9651, "step": 5671 }, { "epoch": 0.5965267461579923, "grad_norm": 2.049575677232311, "learning_rate": 1.7854804105345064e-06, "loss": 0.971, "step": 5672 }, { "epoch": 0.5966319165998396, "grad_norm": 2.715949255809658, "learning_rate": 1.7846804537757034e-06, "loss": 0.9999, "step": 5673 }, { "epoch": 0.5967370870416869, "grad_norm": 2.384190947460149, "learning_rate": 1.7838805767805866e-06, "loss": 0.9835, "step": 5674 }, { "epoch": 0.5968422574835343, "grad_norm": 1.9117408819820847, "learning_rate": 1.7830807796383475e-06, "loss": 0.969, "step": 5675 }, { "epoch": 0.5969474279253816, "grad_norm": 2.26677544024507, "learning_rate": 1.782281062438172e-06, "loss": 0.9658, "step": 5676 }, { "epoch": 0.5970525983672289, "grad_norm": 2.0051996758671806, "learning_rate": 1.7814814252692333e-06, "loss": 0.9929, "step": 5677 }, { "epoch": 0.5971577688090762, "grad_norm": 2.6826187119200426, "learning_rate": 1.7806818682206972e-06, "loss": 1.0851, "step": 5678 }, { "epoch": 0.5972629392509236, "grad_norm": 2.619403217077916, "learning_rate": 1.779882391381721e-06, "loss": 0.9448, "step": 5679 }, { "epoch": 0.5973681096927709, "grad_norm": 2.5065928671207316, "learning_rate": 1.7790829948414512e-06, "loss": 1.0461, "step": 5680 }, { "epoch": 0.5974732801346182, "grad_norm": 2.450270322938859, "learning_rate": 1.778283678689029e-06, "loss": 0.9973, "step": 5681 }, { "epoch": 0.5975784505764655, "grad_norm": 2.519020526501517, "learning_rate": 1.7774844430135823e-06, "loss": 0.9834, "step": 5682 }, { "epoch": 0.5976836210183128, "grad_norm": 1.6420026627989386, "learning_rate": 1.7766852879042335e-06, "loss": 0.9535, "step": 5683 }, { "epoch": 0.5977887914601602, "grad_norm": 2.0627308895943623, "learning_rate": 1.7758862134500926e-06, "loss": 0.9999, "step": 5684 }, { "epoch": 0.5978939619020074, "grad_norm": 2.6591210105862904, "learning_rate": 1.7750872197402652e-06, "loss": 0.9714, "step": 5685 }, { "epoch": 0.5979991323438547, "grad_norm": 2.4500227653148294, "learning_rate": 1.7742883068638447e-06, "loss": 0.9585, "step": 5686 }, { "epoch": 0.598104302785702, "grad_norm": 2.740294615631952, "learning_rate": 1.773489474909915e-06, "loss": 1.014, "step": 5687 }, { "epoch": 0.5982094732275494, "grad_norm": 3.1109859813510115, "learning_rate": 1.7726907239675523e-06, "loss": 0.9821, "step": 5688 }, { "epoch": 0.5983146436693967, "grad_norm": 2.1772995997746176, "learning_rate": 1.771892054125823e-06, "loss": 1.0209, "step": 5689 }, { "epoch": 0.598419814111244, "grad_norm": 2.8946158732426333, "learning_rate": 1.7710934654737868e-06, "loss": 1.0099, "step": 5690 }, { "epoch": 0.5985249845530913, "grad_norm": 2.1027519425308787, "learning_rate": 1.7702949581004917e-06, "loss": 1.0164, "step": 5691 }, { "epoch": 0.5986301549949387, "grad_norm": 2.931727859528439, "learning_rate": 1.769496532094977e-06, "loss": 0.9897, "step": 5692 }, { "epoch": 0.598735325436786, "grad_norm": 2.3348996193471274, "learning_rate": 1.7686981875462733e-06, "loss": 1.0096, "step": 5693 }, { "epoch": 0.5988404958786333, "grad_norm": 2.4906181074969207, "learning_rate": 1.7678999245434036e-06, "loss": 0.985, "step": 5694 }, { "epoch": 0.5989456663204806, "grad_norm": 1.712480678425093, "learning_rate": 1.7671017431753789e-06, "loss": 0.9953, "step": 5695 }, { "epoch": 0.599050836762328, "grad_norm": 1.9969216227932245, "learning_rate": 1.7663036435312037e-06, "loss": 1.0244, "step": 5696 }, { "epoch": 0.5991560072041753, "grad_norm": 2.663976765479451, "learning_rate": 1.7655056256998712e-06, "loss": 1.0014, "step": 5697 }, { "epoch": 0.5992611776460226, "grad_norm": 2.1061164190156885, "learning_rate": 1.7647076897703664e-06, "loss": 0.9526, "step": 5698 }, { "epoch": 0.5993663480878699, "grad_norm": 2.164328415014977, "learning_rate": 1.7639098358316673e-06, "loss": 0.9751, "step": 5699 }, { "epoch": 0.5994715185297173, "grad_norm": 2.926709142359428, "learning_rate": 1.7631120639727396e-06, "loss": 0.993, "step": 5700 }, { "epoch": 0.5995766889715646, "grad_norm": 2.5586116221192676, "learning_rate": 1.762314374282541e-06, "loss": 0.9988, "step": 5701 }, { "epoch": 0.5996818594134119, "grad_norm": 1.9550872865008981, "learning_rate": 1.7615167668500205e-06, "loss": 0.9676, "step": 5702 }, { "epoch": 0.5997870298552592, "grad_norm": 2.7161983180737646, "learning_rate": 1.7607192417641164e-06, "loss": 0.9666, "step": 5703 }, { "epoch": 0.5998922002971065, "grad_norm": 2.6847054400836576, "learning_rate": 1.7599217991137604e-06, "loss": 1.0108, "step": 5704 }, { "epoch": 0.5999973707389539, "grad_norm": 2.3979400888890416, "learning_rate": 1.759124438987873e-06, "loss": 0.9661, "step": 5705 }, { "epoch": 0.6001025411808011, "grad_norm": 2.2431642149168467, "learning_rate": 1.758327161475366e-06, "loss": 0.982, "step": 5706 }, { "epoch": 0.6002077116226484, "grad_norm": 2.177608584163875, "learning_rate": 1.7575299666651413e-06, "loss": 0.9424, "step": 5707 }, { "epoch": 0.6003128820644957, "grad_norm": 1.7290319576886468, "learning_rate": 1.7567328546460939e-06, "loss": 0.944, "step": 5708 }, { "epoch": 0.6004180525063431, "grad_norm": 3.080894919454507, "learning_rate": 1.7559358255071068e-06, "loss": 1.0085, "step": 5709 }, { "epoch": 0.6005232229481904, "grad_norm": 2.060775484827086, "learning_rate": 1.755138879337055e-06, "loss": 0.9477, "step": 5710 }, { "epoch": 0.6006283933900377, "grad_norm": 2.6684216105836183, "learning_rate": 1.754342016224805e-06, "loss": 1.0047, "step": 5711 }, { "epoch": 0.600733563831885, "grad_norm": 2.1366313634883154, "learning_rate": 1.7535452362592116e-06, "loss": 0.9851, "step": 5712 }, { "epoch": 0.6008387342737324, "grad_norm": 2.3649889418446284, "learning_rate": 1.7527485395291234e-06, "loss": 1.0063, "step": 5713 }, { "epoch": 0.6009439047155797, "grad_norm": 1.9751341643467382, "learning_rate": 1.7519519261233786e-06, "loss": 0.9699, "step": 5714 }, { "epoch": 0.601049075157427, "grad_norm": 3.1232686070504982, "learning_rate": 1.7511553961308048e-06, "loss": 0.9794, "step": 5715 }, { "epoch": 0.6011542455992743, "grad_norm": 1.7374660242199027, "learning_rate": 1.750358949640221e-06, "loss": 0.9681, "step": 5716 }, { "epoch": 0.6012594160411217, "grad_norm": 2.5925063706238216, "learning_rate": 1.749562586740438e-06, "loss": 1.0276, "step": 5717 }, { "epoch": 0.601364586482969, "grad_norm": 2.2871091706939097, "learning_rate": 1.7487663075202565e-06, "loss": 0.9771, "step": 5718 }, { "epoch": 0.6014697569248163, "grad_norm": 2.64298782172665, "learning_rate": 1.7479701120684678e-06, "loss": 1.0117, "step": 5719 }, { "epoch": 0.6015749273666636, "grad_norm": 2.2690844026727097, "learning_rate": 1.747174000473853e-06, "loss": 0.9819, "step": 5720 }, { "epoch": 0.601680097808511, "grad_norm": 3.5195991147155135, "learning_rate": 1.7463779728251844e-06, "loss": 1.0233, "step": 5721 }, { "epoch": 0.6017852682503583, "grad_norm": 2.001707103161917, "learning_rate": 1.745582029211228e-06, "loss": 1.0275, "step": 5722 }, { "epoch": 0.6018904386922056, "grad_norm": 2.008284524458236, "learning_rate": 1.7447861697207352e-06, "loss": 1.0409, "step": 5723 }, { "epoch": 0.6019956091340529, "grad_norm": 2.814557277899713, "learning_rate": 1.7439903944424513e-06, "loss": 0.9048, "step": 5724 }, { "epoch": 0.6021007795759002, "grad_norm": 2.115344313736399, "learning_rate": 1.7431947034651111e-06, "loss": 1.0137, "step": 5725 }, { "epoch": 0.6022059500177475, "grad_norm": 1.708140537223153, "learning_rate": 1.7423990968774397e-06, "loss": 0.93, "step": 5726 }, { "epoch": 0.6023111204595948, "grad_norm": 3.0743193234151365, "learning_rate": 1.7416035747681554e-06, "loss": 1.0175, "step": 5727 }, { "epoch": 0.6024162909014421, "grad_norm": 2.109346929311083, "learning_rate": 1.7408081372259633e-06, "loss": 0.957, "step": 5728 }, { "epoch": 0.6025214613432894, "grad_norm": 2.63470957119076, "learning_rate": 1.740012784339562e-06, "loss": 0.9973, "step": 5729 }, { "epoch": 0.6026266317851368, "grad_norm": 2.789184435937574, "learning_rate": 1.7392175161976384e-06, "loss": 0.9654, "step": 5730 }, { "epoch": 0.6027318022269841, "grad_norm": 2.891274776951523, "learning_rate": 1.7384223328888724e-06, "loss": 0.9714, "step": 5731 }, { "epoch": 0.6028369726688314, "grad_norm": 2.3509105765329568, "learning_rate": 1.7376272345019325e-06, "loss": 1.0159, "step": 5732 }, { "epoch": 0.6029421431106787, "grad_norm": 2.8871291599462374, "learning_rate": 1.736832221125478e-06, "loss": 0.9938, "step": 5733 }, { "epoch": 0.603047313552526, "grad_norm": 2.5476087471404476, "learning_rate": 1.7360372928481594e-06, "loss": 1.0238, "step": 5734 }, { "epoch": 0.6031524839943734, "grad_norm": 2.607771150913734, "learning_rate": 1.7352424497586163e-06, "loss": 0.9731, "step": 5735 }, { "epoch": 0.6032576544362207, "grad_norm": 1.834225254634065, "learning_rate": 1.7344476919454816e-06, "loss": 0.9735, "step": 5736 }, { "epoch": 0.603362824878068, "grad_norm": 2.1538425332658617, "learning_rate": 1.7336530194973766e-06, "loss": 0.9937, "step": 5737 }, { "epoch": 0.6034679953199154, "grad_norm": 2.0723158686851604, "learning_rate": 1.732858432502914e-06, "loss": 0.9705, "step": 5738 }, { "epoch": 0.6035731657617627, "grad_norm": 2.7600166097053385, "learning_rate": 1.7320639310506935e-06, "loss": 0.9406, "step": 5739 }, { "epoch": 0.60367833620361, "grad_norm": 2.4094461693884144, "learning_rate": 1.7312695152293112e-06, "loss": 1.017, "step": 5740 }, { "epoch": 0.6037835066454573, "grad_norm": 3.320433240431043, "learning_rate": 1.73047518512735e-06, "loss": 1.0132, "step": 5741 }, { "epoch": 0.6038886770873046, "grad_norm": 2.3770891741620512, "learning_rate": 1.729680940833383e-06, "loss": 1.0349, "step": 5742 }, { "epoch": 0.603993847529152, "grad_norm": 2.2580054578011373, "learning_rate": 1.7288867824359757e-06, "loss": 0.9242, "step": 5743 }, { "epoch": 0.6040990179709993, "grad_norm": 2.280728334297776, "learning_rate": 1.7280927100236816e-06, "loss": 0.9499, "step": 5744 }, { "epoch": 0.6042041884128466, "grad_norm": 3.1522624706614413, "learning_rate": 1.7272987236850475e-06, "loss": 1.0292, "step": 5745 }, { "epoch": 0.6043093588546938, "grad_norm": 2.9642417340517553, "learning_rate": 1.7265048235086078e-06, "loss": 1.0452, "step": 5746 }, { "epoch": 0.6044145292965412, "grad_norm": 2.6256509593397146, "learning_rate": 1.7257110095828894e-06, "loss": 0.9713, "step": 5747 }, { "epoch": 0.6045196997383885, "grad_norm": 2.363877425800695, "learning_rate": 1.7249172819964084e-06, "loss": 0.9749, "step": 5748 }, { "epoch": 0.6046248701802358, "grad_norm": 2.8124098873748755, "learning_rate": 1.7241236408376702e-06, "loss": 1.031, "step": 5749 }, { "epoch": 0.6047300406220831, "grad_norm": 1.9881986505849607, "learning_rate": 1.7233300861951743e-06, "loss": 1.024, "step": 5750 }, { "epoch": 0.6048352110639305, "grad_norm": 2.6635556816470602, "learning_rate": 1.7225366181574072e-06, "loss": 1.0287, "step": 5751 }, { "epoch": 0.6049403815057778, "grad_norm": 2.277134066909746, "learning_rate": 1.7217432368128468e-06, "loss": 0.9973, "step": 5752 }, { "epoch": 0.6050455519476251, "grad_norm": 2.3189335868813776, "learning_rate": 1.7209499422499607e-06, "loss": 1.0081, "step": 5753 }, { "epoch": 0.6051507223894724, "grad_norm": 2.2043466967505334, "learning_rate": 1.7201567345572084e-06, "loss": 1.0241, "step": 5754 }, { "epoch": 0.6052558928313198, "grad_norm": 2.1236697874477737, "learning_rate": 1.7193636138230382e-06, "loss": 0.9812, "step": 5755 }, { "epoch": 0.6053610632731671, "grad_norm": 2.774793111808096, "learning_rate": 1.7185705801358892e-06, "loss": 0.9876, "step": 5756 }, { "epoch": 0.6054662337150144, "grad_norm": 2.7424681309962864, "learning_rate": 1.7177776335841912e-06, "loss": 1.0057, "step": 5757 }, { "epoch": 0.6055714041568617, "grad_norm": 1.6016238303844932, "learning_rate": 1.7169847742563624e-06, "loss": 0.9981, "step": 5758 }, { "epoch": 0.605676574598709, "grad_norm": 2.5812078320001364, "learning_rate": 1.7161920022408153e-06, "loss": 0.9899, "step": 5759 }, { "epoch": 0.6057817450405564, "grad_norm": 2.099760762558072, "learning_rate": 1.7153993176259481e-06, "loss": 0.9424, "step": 5760 }, { "epoch": 0.6058869154824037, "grad_norm": 2.7673202803512353, "learning_rate": 1.714606720500152e-06, "loss": 0.9918, "step": 5761 }, { "epoch": 0.605992085924251, "grad_norm": 2.0971157563339053, "learning_rate": 1.7138142109518068e-06, "loss": 1.0119, "step": 5762 }, { "epoch": 0.6060972563660983, "grad_norm": 2.4244381472334124, "learning_rate": 1.7130217890692857e-06, "loss": 0.9751, "step": 5763 }, { "epoch": 0.6062024268079457, "grad_norm": 1.875983236644177, "learning_rate": 1.7122294549409486e-06, "loss": 0.9974, "step": 5764 }, { "epoch": 0.606307597249793, "grad_norm": 1.9342289332061562, "learning_rate": 1.7114372086551466e-06, "loss": 0.9757, "step": 5765 }, { "epoch": 0.6064127676916403, "grad_norm": 2.6297959475374175, "learning_rate": 1.7106450503002214e-06, "loss": 0.9784, "step": 5766 }, { "epoch": 0.6065179381334875, "grad_norm": 3.026075897619725, "learning_rate": 1.709852979964505e-06, "loss": 0.9993, "step": 5767 }, { "epoch": 0.6066231085753349, "grad_norm": 2.099309327822422, "learning_rate": 1.7090609977363198e-06, "loss": 0.9941, "step": 5768 }, { "epoch": 0.6067282790171822, "grad_norm": 2.839764446363711, "learning_rate": 1.7082691037039772e-06, "loss": 0.9789, "step": 5769 }, { "epoch": 0.6068334494590295, "grad_norm": 2.6592502093600325, "learning_rate": 1.7074772979557802e-06, "loss": 1.0025, "step": 5770 }, { "epoch": 0.6069386199008768, "grad_norm": 2.3396615859703527, "learning_rate": 1.7066855805800203e-06, "loss": 0.945, "step": 5771 }, { "epoch": 0.6070437903427242, "grad_norm": 2.967177827861926, "learning_rate": 1.7058939516649814e-06, "loss": 0.9851, "step": 5772 }, { "epoch": 0.6071489607845715, "grad_norm": 2.8406257980156466, "learning_rate": 1.7051024112989365e-06, "loss": 0.9895, "step": 5773 }, { "epoch": 0.6072541312264188, "grad_norm": 2.539983854743361, "learning_rate": 1.7043109595701472e-06, "loss": 1.0268, "step": 5774 }, { "epoch": 0.6073593016682661, "grad_norm": 2.991656526457313, "learning_rate": 1.7035195965668669e-06, "loss": 0.9855, "step": 5775 }, { "epoch": 0.6074644721101135, "grad_norm": 1.9105665351289631, "learning_rate": 1.7027283223773378e-06, "loss": 0.9728, "step": 5776 }, { "epoch": 0.6075696425519608, "grad_norm": 2.2526519815756303, "learning_rate": 1.7019371370897953e-06, "loss": 0.9993, "step": 5777 }, { "epoch": 0.6076748129938081, "grad_norm": 1.9285557068670938, "learning_rate": 1.7011460407924616e-06, "loss": 0.9362, "step": 5778 }, { "epoch": 0.6077799834356554, "grad_norm": 3.059051701475386, "learning_rate": 1.70035503357355e-06, "loss": 1.0401, "step": 5779 }, { "epoch": 0.6078851538775027, "grad_norm": 2.7521768591254525, "learning_rate": 1.6995641155212638e-06, "loss": 1.0341, "step": 5780 }, { "epoch": 0.6079903243193501, "grad_norm": 2.680907512225668, "learning_rate": 1.6987732867237967e-06, "loss": 0.9964, "step": 5781 }, { "epoch": 0.6080954947611974, "grad_norm": 2.8137952010868705, "learning_rate": 1.6979825472693325e-06, "loss": 0.9617, "step": 5782 }, { "epoch": 0.6082006652030447, "grad_norm": 3.038100520437388, "learning_rate": 1.6971918972460446e-06, "loss": 1.0006, "step": 5783 }, { "epoch": 0.608305835644892, "grad_norm": 2.754354820698447, "learning_rate": 1.6964013367420967e-06, "loss": 0.9527, "step": 5784 }, { "epoch": 0.6084110060867394, "grad_norm": 2.8270349910908354, "learning_rate": 1.695610865845641e-06, "loss": 0.9745, "step": 5785 }, { "epoch": 0.6085161765285867, "grad_norm": 2.7388567829030115, "learning_rate": 1.694820484644824e-06, "loss": 0.9969, "step": 5786 }, { "epoch": 0.6086213469704339, "grad_norm": 2.0265884465537396, "learning_rate": 1.6940301932277775e-06, "loss": 0.982, "step": 5787 }, { "epoch": 0.6087265174122812, "grad_norm": 2.679646606508221, "learning_rate": 1.6932399916826254e-06, "loss": 1.0164, "step": 5788 }, { "epoch": 0.6088316878541286, "grad_norm": 2.4228792065312215, "learning_rate": 1.692449880097482e-06, "loss": 0.9963, "step": 5789 }, { "epoch": 0.6089368582959759, "grad_norm": 2.5768372333502376, "learning_rate": 1.6916598585604488e-06, "loss": 0.9351, "step": 5790 }, { "epoch": 0.6090420287378232, "grad_norm": 2.1737942757800415, "learning_rate": 1.6908699271596213e-06, "loss": 0.9949, "step": 5791 }, { "epoch": 0.6091471991796705, "grad_norm": 3.049935748235343, "learning_rate": 1.690080085983083e-06, "loss": 0.9867, "step": 5792 }, { "epoch": 0.6092523696215179, "grad_norm": 2.1746481010421723, "learning_rate": 1.6892903351189065e-06, "loss": 0.982, "step": 5793 }, { "epoch": 0.6093575400633652, "grad_norm": 2.9706327728285493, "learning_rate": 1.6885006746551551e-06, "loss": 0.9631, "step": 5794 }, { "epoch": 0.6094627105052125, "grad_norm": 1.935639999637283, "learning_rate": 1.6877111046798829e-06, "loss": 0.9725, "step": 5795 }, { "epoch": 0.6095678809470598, "grad_norm": 1.936700358641335, "learning_rate": 1.6869216252811327e-06, "loss": 0.939, "step": 5796 }, { "epoch": 0.6096730513889072, "grad_norm": 2.0817904454728176, "learning_rate": 1.6861322365469373e-06, "loss": 0.9761, "step": 5797 }, { "epoch": 0.6097782218307545, "grad_norm": 1.5332951079387047, "learning_rate": 1.6853429385653196e-06, "loss": 1.0034, "step": 5798 }, { "epoch": 0.6098833922726018, "grad_norm": 1.995502353302454, "learning_rate": 1.6845537314242925e-06, "loss": 0.9639, "step": 5799 }, { "epoch": 0.6099885627144491, "grad_norm": 2.0050706202560553, "learning_rate": 1.6837646152118592e-06, "loss": 0.9861, "step": 5800 }, { "epoch": 0.6100937331562964, "grad_norm": 2.331889223890956, "learning_rate": 1.6829755900160127e-06, "loss": 0.9623, "step": 5801 }, { "epoch": 0.6101989035981438, "grad_norm": 2.77022089749751, "learning_rate": 1.6821866559247344e-06, "loss": 1.0013, "step": 5802 }, { "epoch": 0.6103040740399911, "grad_norm": 2.550781452325086, "learning_rate": 1.681397813025997e-06, "loss": 1.0148, "step": 5803 }, { "epoch": 0.6104092444818384, "grad_norm": 2.978580897273411, "learning_rate": 1.6806090614077619e-06, "loss": 0.9679, "step": 5804 }, { "epoch": 0.6105144149236857, "grad_norm": 2.394625694101748, "learning_rate": 1.6798204011579824e-06, "loss": 0.9528, "step": 5805 }, { "epoch": 0.6106195853655331, "grad_norm": 2.0977205974223407, "learning_rate": 1.6790318323645996e-06, "loss": 1.0424, "step": 5806 }, { "epoch": 0.6107247558073803, "grad_norm": 2.7073708250144715, "learning_rate": 1.678243355115545e-06, "loss": 0.9837, "step": 5807 }, { "epoch": 0.6108299262492276, "grad_norm": 2.815173180361358, "learning_rate": 1.6774549694987388e-06, "loss": 0.9832, "step": 5808 }, { "epoch": 0.6109350966910749, "grad_norm": 2.3860074360187573, "learning_rate": 1.676666675602095e-06, "loss": 1.0108, "step": 5809 }, { "epoch": 0.6110402671329223, "grad_norm": 2.5298738306057746, "learning_rate": 1.6758784735135119e-06, "loss": 1.0427, "step": 5810 }, { "epoch": 0.6111454375747696, "grad_norm": 2.5000170631689826, "learning_rate": 1.6750903633208812e-06, "loss": 0.9571, "step": 5811 }, { "epoch": 0.6112506080166169, "grad_norm": 2.50169544611617, "learning_rate": 1.6743023451120831e-06, "loss": 0.9463, "step": 5812 }, { "epoch": 0.6113557784584642, "grad_norm": 1.7081141405864757, "learning_rate": 1.673514418974987e-06, "loss": 0.9924, "step": 5813 }, { "epoch": 0.6114609489003116, "grad_norm": 3.2506880206165825, "learning_rate": 1.672726584997454e-06, "loss": 0.9991, "step": 5814 }, { "epoch": 0.6115661193421589, "grad_norm": 2.953416905081632, "learning_rate": 1.6719388432673336e-06, "loss": 1.0007, "step": 5815 }, { "epoch": 0.6116712897840062, "grad_norm": 2.818662791521142, "learning_rate": 1.6711511938724648e-06, "loss": 0.9789, "step": 5816 }, { "epoch": 0.6117764602258535, "grad_norm": 2.3521060058811187, "learning_rate": 1.670363636900676e-06, "loss": 0.9882, "step": 5817 }, { "epoch": 0.6118816306677008, "grad_norm": 2.293305321670319, "learning_rate": 1.669576172439787e-06, "loss": 1.0534, "step": 5818 }, { "epoch": 0.6119868011095482, "grad_norm": 1.6726020449012442, "learning_rate": 1.668788800577606e-06, "loss": 0.9545, "step": 5819 }, { "epoch": 0.6120919715513955, "grad_norm": 2.76117276419402, "learning_rate": 1.6680015214019305e-06, "loss": 0.977, "step": 5820 }, { "epoch": 0.6121971419932428, "grad_norm": 2.1802762486981258, "learning_rate": 1.6672143350005488e-06, "loss": 1.0135, "step": 5821 }, { "epoch": 0.6123023124350901, "grad_norm": 3.9598469389280613, "learning_rate": 1.6664272414612368e-06, "loss": 1.0158, "step": 5822 }, { "epoch": 0.6124074828769375, "grad_norm": 2.2549595245033145, "learning_rate": 1.6656402408717636e-06, "loss": 0.9378, "step": 5823 }, { "epoch": 0.6125126533187848, "grad_norm": 2.196920678193086, "learning_rate": 1.6648533333198858e-06, "loss": 1.0162, "step": 5824 }, { "epoch": 0.6126178237606321, "grad_norm": 2.6642892656099866, "learning_rate": 1.664066518893349e-06, "loss": 1.0044, "step": 5825 }, { "epoch": 0.6127229942024794, "grad_norm": 2.7734023454217027, "learning_rate": 1.6632797976798887e-06, "loss": 0.9841, "step": 5826 }, { "epoch": 0.6128281646443268, "grad_norm": 2.3104109605714718, "learning_rate": 1.6624931697672298e-06, "loss": 0.973, "step": 5827 }, { "epoch": 0.612933335086174, "grad_norm": 2.9928279355445984, "learning_rate": 1.6617066352430893e-06, "loss": 0.984, "step": 5828 }, { "epoch": 0.6130385055280213, "grad_norm": 2.5371426562781116, "learning_rate": 1.6609201941951715e-06, "loss": 0.9845, "step": 5829 }, { "epoch": 0.6131436759698686, "grad_norm": 1.8754458670700893, "learning_rate": 1.6601338467111699e-06, "loss": 0.9817, "step": 5830 }, { "epoch": 0.613248846411716, "grad_norm": 2.322347493475784, "learning_rate": 1.6593475928787683e-06, "loss": 0.9833, "step": 5831 }, { "epoch": 0.6133540168535633, "grad_norm": 2.604820493235049, "learning_rate": 1.6585614327856408e-06, "loss": 0.9869, "step": 5832 }, { "epoch": 0.6134591872954106, "grad_norm": 2.2184504978618222, "learning_rate": 1.6577753665194502e-06, "loss": 0.9635, "step": 5833 }, { "epoch": 0.6135643577372579, "grad_norm": 2.8750863642027644, "learning_rate": 1.656989394167849e-06, "loss": 1.0122, "step": 5834 }, { "epoch": 0.6136695281791053, "grad_norm": 2.429229413863499, "learning_rate": 1.6562035158184791e-06, "loss": 0.9418, "step": 5835 }, { "epoch": 0.6137746986209526, "grad_norm": 2.404031095251293, "learning_rate": 1.655417731558971e-06, "loss": 0.9868, "step": 5836 }, { "epoch": 0.6138798690627999, "grad_norm": 3.657059627061674, "learning_rate": 1.6546320414769474e-06, "loss": 1.0213, "step": 5837 }, { "epoch": 0.6139850395046472, "grad_norm": 2.3207503976843564, "learning_rate": 1.6538464456600186e-06, "loss": 0.9973, "step": 5838 }, { "epoch": 0.6140902099464945, "grad_norm": 2.1274755433279586, "learning_rate": 1.6530609441957844e-06, "loss": 0.9843, "step": 5839 }, { "epoch": 0.6141953803883419, "grad_norm": 1.7062502169444627, "learning_rate": 1.6522755371718333e-06, "loss": 0.9573, "step": 5840 }, { "epoch": 0.6143005508301892, "grad_norm": 2.7514773804668002, "learning_rate": 1.6514902246757458e-06, "loss": 1.0072, "step": 5841 }, { "epoch": 0.6144057212720365, "grad_norm": 2.251985168187469, "learning_rate": 1.6507050067950898e-06, "loss": 0.9661, "step": 5842 }, { "epoch": 0.6145108917138838, "grad_norm": 1.8065977765727852, "learning_rate": 1.649919883617423e-06, "loss": 0.9683, "step": 5843 }, { "epoch": 0.6146160621557312, "grad_norm": 2.9561496412074106, "learning_rate": 1.6491348552302927e-06, "loss": 1.0008, "step": 5844 }, { "epoch": 0.6147212325975785, "grad_norm": 1.9869101556732869, "learning_rate": 1.6483499217212357e-06, "loss": 1.0071, "step": 5845 }, { "epoch": 0.6148264030394258, "grad_norm": 2.1835075291524184, "learning_rate": 1.6475650831777784e-06, "loss": 0.9863, "step": 5846 }, { "epoch": 0.6149315734812731, "grad_norm": 2.430561118173312, "learning_rate": 1.6467803396874365e-06, "loss": 0.9679, "step": 5847 }, { "epoch": 0.6150367439231204, "grad_norm": 2.28403426164745, "learning_rate": 1.6459956913377145e-06, "loss": 1.02, "step": 5848 }, { "epoch": 0.6151419143649677, "grad_norm": 2.6620061740930017, "learning_rate": 1.6452111382161074e-06, "loss": 0.9631, "step": 5849 }, { "epoch": 0.615247084806815, "grad_norm": 1.6904530329363399, "learning_rate": 1.6444266804100972e-06, "loss": 0.9713, "step": 5850 }, { "epoch": 0.6153522552486623, "grad_norm": 2.3212203494074455, "learning_rate": 1.6436423180071598e-06, "loss": 0.968, "step": 5851 }, { "epoch": 0.6154574256905097, "grad_norm": 2.5364871090662837, "learning_rate": 1.6428580510947563e-06, "loss": 0.9652, "step": 5852 }, { "epoch": 0.615562596132357, "grad_norm": 3.030037498205161, "learning_rate": 1.6420738797603386e-06, "loss": 0.9879, "step": 5853 }, { "epoch": 0.6156677665742043, "grad_norm": 2.822749830603122, "learning_rate": 1.6412898040913471e-06, "loss": 1.0141, "step": 5854 }, { "epoch": 0.6157729370160516, "grad_norm": 2.5948534835106645, "learning_rate": 1.6405058241752142e-06, "loss": 0.9961, "step": 5855 }, { "epoch": 0.615878107457899, "grad_norm": 1.7892323129704704, "learning_rate": 1.6397219400993584e-06, "loss": 0.9357, "step": 5856 }, { "epoch": 0.6159832778997463, "grad_norm": 2.4386090952796935, "learning_rate": 1.6389381519511893e-06, "loss": 0.977, "step": 5857 }, { "epoch": 0.6160884483415936, "grad_norm": 2.621745541745822, "learning_rate": 1.6381544598181048e-06, "loss": 1.0185, "step": 5858 }, { "epoch": 0.6161936187834409, "grad_norm": 2.232535333948487, "learning_rate": 1.6373708637874925e-06, "loss": 1.0113, "step": 5859 }, { "epoch": 0.6162987892252882, "grad_norm": 3.2749764704846553, "learning_rate": 1.6365873639467314e-06, "loss": 0.9737, "step": 5860 }, { "epoch": 0.6164039596671356, "grad_norm": 1.869914950095561, "learning_rate": 1.635803960383186e-06, "loss": 1.0177, "step": 5861 }, { "epoch": 0.6165091301089829, "grad_norm": 2.285719824488398, "learning_rate": 1.6350206531842122e-06, "loss": 1.0154, "step": 5862 }, { "epoch": 0.6166143005508302, "grad_norm": 2.4241270283535825, "learning_rate": 1.634237442437154e-06, "loss": 1.0028, "step": 5863 }, { "epoch": 0.6167194709926775, "grad_norm": 2.2622451802360932, "learning_rate": 1.6334543282293475e-06, "loss": 0.9626, "step": 5864 }, { "epoch": 0.6168246414345249, "grad_norm": 3.2500786695737562, "learning_rate": 1.6326713106481148e-06, "loss": 0.9625, "step": 5865 }, { "epoch": 0.6169298118763722, "grad_norm": 3.008447944721862, "learning_rate": 1.6318883897807688e-06, "loss": 0.9794, "step": 5866 }, { "epoch": 0.6170349823182195, "grad_norm": 2.7479663807080907, "learning_rate": 1.631105565714611e-06, "loss": 0.988, "step": 5867 }, { "epoch": 0.6171401527600667, "grad_norm": 2.784120946238147, "learning_rate": 1.6303228385369318e-06, "loss": 1.0219, "step": 5868 }, { "epoch": 0.617245323201914, "grad_norm": 2.3151431857814155, "learning_rate": 1.629540208335013e-06, "loss": 1.0192, "step": 5869 }, { "epoch": 0.6173504936437614, "grad_norm": 2.5308826576755656, "learning_rate": 1.6287576751961225e-06, "loss": 0.9902, "step": 5870 }, { "epoch": 0.6174556640856087, "grad_norm": 2.3429901480764976, "learning_rate": 1.6279752392075192e-06, "loss": 0.9569, "step": 5871 }, { "epoch": 0.617560834527456, "grad_norm": 2.181050276365485, "learning_rate": 1.6271929004564514e-06, "loss": 1.0071, "step": 5872 }, { "epoch": 0.6176660049693034, "grad_norm": 2.601702484291357, "learning_rate": 1.6264106590301538e-06, "loss": 0.9857, "step": 5873 }, { "epoch": 0.6177711754111507, "grad_norm": 2.459923668745263, "learning_rate": 1.6256285150158551e-06, "loss": 0.9918, "step": 5874 }, { "epoch": 0.617876345852998, "grad_norm": 2.197411706775619, "learning_rate": 1.6248464685007692e-06, "loss": 0.9421, "step": 5875 }, { "epoch": 0.6179815162948453, "grad_norm": 2.095985627899745, "learning_rate": 1.6240645195721017e-06, "loss": 1.0025, "step": 5876 }, { "epoch": 0.6180866867366926, "grad_norm": 2.749362024760833, "learning_rate": 1.6232826683170425e-06, "loss": 1.034, "step": 5877 }, { "epoch": 0.61819185717854, "grad_norm": 2.274015303414764, "learning_rate": 1.6225009148227778e-06, "loss": 1.0224, "step": 5878 }, { "epoch": 0.6182970276203873, "grad_norm": 2.895694165275508, "learning_rate": 1.6217192591764774e-06, "loss": 1.0139, "step": 5879 }, { "epoch": 0.6184021980622346, "grad_norm": 2.7394375689175225, "learning_rate": 1.6209377014653028e-06, "loss": 1.001, "step": 5880 }, { "epoch": 0.618507368504082, "grad_norm": 2.9670712927952763, "learning_rate": 1.6201562417764028e-06, "loss": 1.0286, "step": 5881 }, { "epoch": 0.6186125389459293, "grad_norm": 2.313393062040808, "learning_rate": 1.6193748801969164e-06, "loss": 1.0052, "step": 5882 }, { "epoch": 0.6187177093877766, "grad_norm": 2.578512587304975, "learning_rate": 1.6185936168139724e-06, "loss": 1.0067, "step": 5883 }, { "epoch": 0.6188228798296239, "grad_norm": 2.489037978706978, "learning_rate": 1.617812451714687e-06, "loss": 0.9524, "step": 5884 }, { "epoch": 0.6189280502714712, "grad_norm": 2.056449619227656, "learning_rate": 1.6170313849861662e-06, "loss": 0.9762, "step": 5885 }, { "epoch": 0.6190332207133186, "grad_norm": 2.7761192879641396, "learning_rate": 1.6162504167155044e-06, "loss": 0.9772, "step": 5886 }, { "epoch": 0.6191383911551659, "grad_norm": 2.2429168320600237, "learning_rate": 1.6154695469897873e-06, "loss": 0.9714, "step": 5887 }, { "epoch": 0.6192435615970132, "grad_norm": 2.0592296766661806, "learning_rate": 1.6146887758960865e-06, "loss": 1.0256, "step": 5888 }, { "epoch": 0.6193487320388604, "grad_norm": 2.393849999328136, "learning_rate": 1.6139081035214654e-06, "loss": 0.9837, "step": 5889 }, { "epoch": 0.6194539024807078, "grad_norm": 2.618576013105238, "learning_rate": 1.6131275299529736e-06, "loss": 1.0011, "step": 5890 }, { "epoch": 0.6195590729225551, "grad_norm": 2.2774723438717595, "learning_rate": 1.612347055277651e-06, "loss": 0.9881, "step": 5891 }, { "epoch": 0.6196642433644024, "grad_norm": 1.7906851084234103, "learning_rate": 1.6115666795825284e-06, "loss": 0.9792, "step": 5892 }, { "epoch": 0.6197694138062497, "grad_norm": 2.148647148559524, "learning_rate": 1.6107864029546221e-06, "loss": 1.0106, "step": 5893 }, { "epoch": 0.619874584248097, "grad_norm": 2.6630752042411907, "learning_rate": 1.6100062254809395e-06, "loss": 1.0022, "step": 5894 }, { "epoch": 0.6199797546899444, "grad_norm": 2.1012206126791355, "learning_rate": 1.6092261472484765e-06, "loss": 0.9314, "step": 5895 }, { "epoch": 0.6200849251317917, "grad_norm": 2.581191874335221, "learning_rate": 1.6084461683442176e-06, "loss": 0.9863, "step": 5896 }, { "epoch": 0.620190095573639, "grad_norm": 2.4711381450267513, "learning_rate": 1.6076662888551373e-06, "loss": 0.9523, "step": 5897 }, { "epoch": 0.6202952660154863, "grad_norm": 1.9627152201554907, "learning_rate": 1.6068865088681975e-06, "loss": 0.9788, "step": 5898 }, { "epoch": 0.6204004364573337, "grad_norm": 3.1900135916304433, "learning_rate": 1.6061068284703502e-06, "loss": 0.9554, "step": 5899 }, { "epoch": 0.620505606899181, "grad_norm": 2.4902737582100762, "learning_rate": 1.605327247748534e-06, "loss": 0.9749, "step": 5900 }, { "epoch": 0.6206107773410283, "grad_norm": 1.979528958259765, "learning_rate": 1.604547766789681e-06, "loss": 0.9627, "step": 5901 }, { "epoch": 0.6207159477828756, "grad_norm": 2.5553977998948945, "learning_rate": 1.6037683856807083e-06, "loss": 0.9983, "step": 5902 }, { "epoch": 0.620821118224723, "grad_norm": 2.5672010413971638, "learning_rate": 1.6029891045085224e-06, "loss": 1.0212, "step": 5903 }, { "epoch": 0.6209262886665703, "grad_norm": 2.4249488163123267, "learning_rate": 1.60220992336002e-06, "loss": 1.0261, "step": 5904 }, { "epoch": 0.6210314591084176, "grad_norm": 2.6586835053107656, "learning_rate": 1.6014308423220848e-06, "loss": 0.9869, "step": 5905 }, { "epoch": 0.6211366295502649, "grad_norm": 1.9453115647301724, "learning_rate": 1.6006518614815913e-06, "loss": 1.002, "step": 5906 }, { "epoch": 0.6212417999921123, "grad_norm": 2.0323748172785425, "learning_rate": 1.5998729809254017e-06, "loss": 0.9806, "step": 5907 }, { "epoch": 0.6213469704339596, "grad_norm": 2.1843910280641445, "learning_rate": 1.599094200740367e-06, "loss": 0.9464, "step": 5908 }, { "epoch": 0.6214521408758068, "grad_norm": 2.7758359003572113, "learning_rate": 1.5983155210133267e-06, "loss": 1.0226, "step": 5909 }, { "epoch": 0.6215573113176541, "grad_norm": 2.6474203799852445, "learning_rate": 1.5975369418311113e-06, "loss": 1.0131, "step": 5910 }, { "epoch": 0.6216624817595015, "grad_norm": 2.2015849233799907, "learning_rate": 1.5967584632805378e-06, "loss": 0.9715, "step": 5911 }, { "epoch": 0.6217676522013488, "grad_norm": 2.5493391965343144, "learning_rate": 1.595980085448412e-06, "loss": 0.9627, "step": 5912 }, { "epoch": 0.6218728226431961, "grad_norm": 2.116283213919637, "learning_rate": 1.5952018084215293e-06, "loss": 0.9863, "step": 5913 }, { "epoch": 0.6219779930850434, "grad_norm": 2.4987388936618853, "learning_rate": 1.5944236322866725e-06, "loss": 0.9764, "step": 5914 }, { "epoch": 0.6220831635268907, "grad_norm": 2.327898890577115, "learning_rate": 1.5936455571306164e-06, "loss": 1.0053, "step": 5915 }, { "epoch": 0.6221883339687381, "grad_norm": 2.0393178342923215, "learning_rate": 1.592867583040122e-06, "loss": 0.9501, "step": 5916 }, { "epoch": 0.6222935044105854, "grad_norm": 2.5407681947430527, "learning_rate": 1.5920897101019384e-06, "loss": 1.0122, "step": 5917 }, { "epoch": 0.6223986748524327, "grad_norm": 2.4916758411681226, "learning_rate": 1.5913119384028048e-06, "loss": 0.9687, "step": 5918 }, { "epoch": 0.62250384529428, "grad_norm": 1.9567570424692333, "learning_rate": 1.5905342680294495e-06, "loss": 0.9773, "step": 5919 }, { "epoch": 0.6226090157361274, "grad_norm": 3.263714903866836, "learning_rate": 1.589756699068588e-06, "loss": 0.9846, "step": 5920 }, { "epoch": 0.6227141861779747, "grad_norm": 2.9575517862827936, "learning_rate": 1.5889792316069259e-06, "loss": 1.0221, "step": 5921 }, { "epoch": 0.622819356619822, "grad_norm": 2.606415686984147, "learning_rate": 1.5882018657311563e-06, "loss": 0.9988, "step": 5922 }, { "epoch": 0.6229245270616693, "grad_norm": 2.446109177024214, "learning_rate": 1.587424601527961e-06, "loss": 0.9587, "step": 5923 }, { "epoch": 0.6230296975035167, "grad_norm": 2.553224170320699, "learning_rate": 1.5866474390840126e-06, "loss": 0.9538, "step": 5924 }, { "epoch": 0.623134867945364, "grad_norm": 2.5094380956851507, "learning_rate": 1.5858703784859697e-06, "loss": 1.03, "step": 5925 }, { "epoch": 0.6232400383872113, "grad_norm": 2.0855074263373163, "learning_rate": 1.5850934198204812e-06, "loss": 0.98, "step": 5926 }, { "epoch": 0.6233452088290586, "grad_norm": 2.6961341409896455, "learning_rate": 1.5843165631741841e-06, "loss": 1.0272, "step": 5927 }, { "epoch": 0.623450379270906, "grad_norm": 2.4500422545848313, "learning_rate": 1.5835398086337017e-06, "loss": 0.9762, "step": 5928 }, { "epoch": 0.6235555497127532, "grad_norm": 1.9103182982629987, "learning_rate": 1.582763156285651e-06, "loss": 0.9472, "step": 5929 }, { "epoch": 0.6236607201546005, "grad_norm": 2.412571799050157, "learning_rate": 1.581986606216634e-06, "loss": 0.9921, "step": 5930 }, { "epoch": 0.6237658905964478, "grad_norm": 2.5521277009233536, "learning_rate": 1.5812101585132416e-06, "loss": 0.9579, "step": 5931 }, { "epoch": 0.6238710610382951, "grad_norm": 2.6571937101267458, "learning_rate": 1.5804338132620535e-06, "loss": 1.017, "step": 5932 }, { "epoch": 0.6239762314801425, "grad_norm": 2.0622319382949676, "learning_rate": 1.5796575705496392e-06, "loss": 0.9466, "step": 5933 }, { "epoch": 0.6240814019219898, "grad_norm": 3.2155213422084974, "learning_rate": 1.578881430462555e-06, "loss": 1.0423, "step": 5934 }, { "epoch": 0.6241865723638371, "grad_norm": 2.8066759471700085, "learning_rate": 1.5781053930873468e-06, "loss": 1.0102, "step": 5935 }, { "epoch": 0.6242917428056844, "grad_norm": 2.3021253355837206, "learning_rate": 1.5773294585105492e-06, "loss": 0.9828, "step": 5936 }, { "epoch": 0.6243969132475318, "grad_norm": 2.476492213812445, "learning_rate": 1.5765536268186832e-06, "loss": 1.0049, "step": 5937 }, { "epoch": 0.6245020836893791, "grad_norm": 2.5139739664998375, "learning_rate": 1.5757778980982627e-06, "loss": 1.0335, "step": 5938 }, { "epoch": 0.6246072541312264, "grad_norm": 2.2258196498122844, "learning_rate": 1.5750022724357861e-06, "loss": 0.9761, "step": 5939 }, { "epoch": 0.6247124245730737, "grad_norm": 2.726588286992223, "learning_rate": 1.5742267499177416e-06, "loss": 1.0145, "step": 5940 }, { "epoch": 0.6248175950149211, "grad_norm": 2.6669317872534544, "learning_rate": 1.5734513306306053e-06, "loss": 0.9992, "step": 5941 }, { "epoch": 0.6249227654567684, "grad_norm": 2.084506573805443, "learning_rate": 1.5726760146608443e-06, "loss": 1.0112, "step": 5942 }, { "epoch": 0.6250279358986157, "grad_norm": 2.6171495295222855, "learning_rate": 1.5719008020949114e-06, "loss": 0.9778, "step": 5943 }, { "epoch": 0.625133106340463, "grad_norm": 2.140554094520001, "learning_rate": 1.5711256930192486e-06, "loss": 1.0035, "step": 5944 }, { "epoch": 0.6252382767823104, "grad_norm": 1.9264447426695412, "learning_rate": 1.5703506875202868e-06, "loss": 0.9721, "step": 5945 }, { "epoch": 0.6253434472241577, "grad_norm": 2.5055312262881144, "learning_rate": 1.569575785684444e-06, "loss": 0.9955, "step": 5946 }, { "epoch": 0.625448617666005, "grad_norm": 2.807271348200614, "learning_rate": 1.5688009875981308e-06, "loss": 1.0316, "step": 5947 }, { "epoch": 0.6255537881078523, "grad_norm": 2.0493312335861074, "learning_rate": 1.5680262933477405e-06, "loss": 0.9913, "step": 5948 }, { "epoch": 0.6256589585496997, "grad_norm": 2.7559029051850708, "learning_rate": 1.5672517030196582e-06, "loss": 1.0043, "step": 5949 }, { "epoch": 0.6257641289915469, "grad_norm": 2.1694704480396445, "learning_rate": 1.5664772167002573e-06, "loss": 0.9878, "step": 5950 }, { "epoch": 0.6258692994333942, "grad_norm": 2.1988058558900323, "learning_rate": 1.565702834475897e-06, "loss": 0.9623, "step": 5951 }, { "epoch": 0.6259744698752415, "grad_norm": 2.6284645651941245, "learning_rate": 1.5649285564329296e-06, "loss": 0.9351, "step": 5952 }, { "epoch": 0.6260796403170888, "grad_norm": 2.2987819649896393, "learning_rate": 1.5641543826576926e-06, "loss": 0.9896, "step": 5953 }, { "epoch": 0.6261848107589362, "grad_norm": 3.0571010198179485, "learning_rate": 1.5633803132365117e-06, "loss": 1.0123, "step": 5954 }, { "epoch": 0.6262899812007835, "grad_norm": 3.3037893530878386, "learning_rate": 1.5626063482557009e-06, "loss": 0.9803, "step": 5955 }, { "epoch": 0.6263951516426308, "grad_norm": 2.9959633481925416, "learning_rate": 1.561832487801565e-06, "loss": 0.9568, "step": 5956 }, { "epoch": 0.6265003220844781, "grad_norm": 2.3258563684955638, "learning_rate": 1.5610587319603942e-06, "loss": 0.9807, "step": 5957 }, { "epoch": 0.6266054925263255, "grad_norm": 2.434441203619329, "learning_rate": 1.5602850808184695e-06, "loss": 0.9661, "step": 5958 }, { "epoch": 0.6267106629681728, "grad_norm": 2.33530559179327, "learning_rate": 1.5595115344620575e-06, "loss": 0.9838, "step": 5959 }, { "epoch": 0.6268158334100201, "grad_norm": 2.2518352944630355, "learning_rate": 1.558738092977415e-06, "loss": 1.0295, "step": 5960 }, { "epoch": 0.6269210038518674, "grad_norm": 2.4291496261842473, "learning_rate": 1.5579647564507878e-06, "loss": 1.0062, "step": 5961 }, { "epoch": 0.6270261742937148, "grad_norm": 2.436150506595444, "learning_rate": 1.5571915249684084e-06, "loss": 0.9884, "step": 5962 }, { "epoch": 0.6271313447355621, "grad_norm": 2.3348752680999665, "learning_rate": 1.5564183986164982e-06, "loss": 0.9936, "step": 5963 }, { "epoch": 0.6272365151774094, "grad_norm": 2.694260375344963, "learning_rate": 1.5556453774812652e-06, "loss": 0.9632, "step": 5964 }, { "epoch": 0.6273416856192567, "grad_norm": 3.2038875399300526, "learning_rate": 1.5548724616489095e-06, "loss": 0.9407, "step": 5965 }, { "epoch": 0.6274468560611041, "grad_norm": 2.7091848613454936, "learning_rate": 1.5540996512056164e-06, "loss": 0.9873, "step": 5966 }, { "epoch": 0.6275520265029514, "grad_norm": 2.8068933011488033, "learning_rate": 1.5533269462375603e-06, "loss": 0.9585, "step": 5967 }, { "epoch": 0.6276571969447987, "grad_norm": 2.7111104148324543, "learning_rate": 1.5525543468309035e-06, "loss": 0.987, "step": 5968 }, { "epoch": 0.627762367386646, "grad_norm": 2.744267573867375, "learning_rate": 1.5517818530717965e-06, "loss": 0.9686, "step": 5969 }, { "epoch": 0.6278675378284932, "grad_norm": 2.830012535610845, "learning_rate": 1.5510094650463797e-06, "loss": 0.9701, "step": 5970 }, { "epoch": 0.6279727082703406, "grad_norm": 3.230771720949463, "learning_rate": 1.5502371828407797e-06, "loss": 1.073, "step": 5971 }, { "epoch": 0.6280778787121879, "grad_norm": 2.3409480978908896, "learning_rate": 1.5494650065411116e-06, "loss": 0.9638, "step": 5972 }, { "epoch": 0.6281830491540352, "grad_norm": 2.444047229295055, "learning_rate": 1.548692936233479e-06, "loss": 0.9757, "step": 5973 }, { "epoch": 0.6282882195958825, "grad_norm": 2.00504969984711, "learning_rate": 1.5479209720039733e-06, "loss": 0.9657, "step": 5974 }, { "epoch": 0.6283933900377299, "grad_norm": 3.516884926223067, "learning_rate": 1.5471491139386763e-06, "loss": 0.9693, "step": 5975 }, { "epoch": 0.6284985604795772, "grad_norm": 2.794487173825674, "learning_rate": 1.546377362123655e-06, "loss": 0.9452, "step": 5976 }, { "epoch": 0.6286037309214245, "grad_norm": 2.1935018379858984, "learning_rate": 1.5456057166449657e-06, "loss": 0.9745, "step": 5977 }, { "epoch": 0.6287089013632718, "grad_norm": 2.297663462287113, "learning_rate": 1.5448341775886528e-06, "loss": 0.9751, "step": 5978 }, { "epoch": 0.6288140718051192, "grad_norm": 2.4320433680260085, "learning_rate": 1.544062745040749e-06, "loss": 0.9293, "step": 5979 }, { "epoch": 0.6289192422469665, "grad_norm": 2.872577702758348, "learning_rate": 1.5432914190872757e-06, "loss": 0.9477, "step": 5980 }, { "epoch": 0.6290244126888138, "grad_norm": 2.6721948871558783, "learning_rate": 1.542520199814241e-06, "loss": 0.9874, "step": 5981 }, { "epoch": 0.6291295831306611, "grad_norm": 2.5749173147193996, "learning_rate": 1.541749087307642e-06, "loss": 0.9875, "step": 5982 }, { "epoch": 0.6292347535725085, "grad_norm": 3.239662190499185, "learning_rate": 1.540978081653463e-06, "loss": 0.9966, "step": 5983 }, { "epoch": 0.6293399240143558, "grad_norm": 2.4691533579634926, "learning_rate": 1.5402071829376785e-06, "loss": 0.9739, "step": 5984 }, { "epoch": 0.6294450944562031, "grad_norm": 2.59932511245003, "learning_rate": 1.5394363912462492e-06, "loss": 0.9414, "step": 5985 }, { "epoch": 0.6295502648980504, "grad_norm": 3.223852238411686, "learning_rate": 1.5386657066651242e-06, "loss": 1.0248, "step": 5986 }, { "epoch": 0.6296554353398978, "grad_norm": 2.6080630359778993, "learning_rate": 1.5378951292802397e-06, "loss": 0.9842, "step": 5987 }, { "epoch": 0.6297606057817451, "grad_norm": 2.133772821565045, "learning_rate": 1.537124659177523e-06, "loss": 0.9879, "step": 5988 }, { "epoch": 0.6298657762235924, "grad_norm": 2.3424669533242573, "learning_rate": 1.5363542964428869e-06, "loss": 0.981, "step": 5989 }, { "epoch": 0.6299709466654396, "grad_norm": 2.805150094282492, "learning_rate": 1.5355840411622324e-06, "loss": 0.9469, "step": 5990 }, { "epoch": 0.630076117107287, "grad_norm": 2.1906730624214235, "learning_rate": 1.5348138934214493e-06, "loss": 0.9742, "step": 5991 }, { "epoch": 0.6301812875491343, "grad_norm": 2.1142812888162434, "learning_rate": 1.5340438533064144e-06, "loss": 0.9788, "step": 5992 }, { "epoch": 0.6302864579909816, "grad_norm": 3.1567558738222607, "learning_rate": 1.533273920902994e-06, "loss": 0.9616, "step": 5993 }, { "epoch": 0.6303916284328289, "grad_norm": 2.9030050091513866, "learning_rate": 1.5325040962970417e-06, "loss": 0.9866, "step": 5994 }, { "epoch": 0.6304967988746762, "grad_norm": 2.7460052058389817, "learning_rate": 1.5317343795743978e-06, "loss": 0.9868, "step": 5995 }, { "epoch": 0.6306019693165236, "grad_norm": 2.777830833328807, "learning_rate": 1.5309647708208928e-06, "loss": 1.0023, "step": 5996 }, { "epoch": 0.6307071397583709, "grad_norm": 2.5120232310444104, "learning_rate": 1.5301952701223422e-06, "loss": 0.972, "step": 5997 }, { "epoch": 0.6308123102002182, "grad_norm": 2.5363484810380212, "learning_rate": 1.5294258775645545e-06, "loss": 0.9653, "step": 5998 }, { "epoch": 0.6309174806420655, "grad_norm": 2.3239370528340144, "learning_rate": 1.5286565932333206e-06, "loss": 0.9899, "step": 5999 }, { "epoch": 0.6310226510839129, "grad_norm": 1.8488945840537439, "learning_rate": 1.527887417214422e-06, "loss": 0.9603, "step": 6000 }, { "epoch": 0.6311278215257602, "grad_norm": 3.48906837044783, "learning_rate": 1.5271183495936273e-06, "loss": 1.0177, "step": 6001 }, { "epoch": 0.6312329919676075, "grad_norm": 2.163852345437768, "learning_rate": 1.526349390456695e-06, "loss": 0.9964, "step": 6002 }, { "epoch": 0.6313381624094548, "grad_norm": 2.5997209044493865, "learning_rate": 1.5255805398893694e-06, "loss": 1.0004, "step": 6003 }, { "epoch": 0.6314433328513022, "grad_norm": 2.375034238569322, "learning_rate": 1.524811797977383e-06, "loss": 0.9733, "step": 6004 }, { "epoch": 0.6315485032931495, "grad_norm": 2.496467895172032, "learning_rate": 1.524043164806457e-06, "loss": 1.0152, "step": 6005 }, { "epoch": 0.6316536737349968, "grad_norm": 1.9885390550524185, "learning_rate": 1.523274640462299e-06, "loss": 0.9841, "step": 6006 }, { "epoch": 0.6317588441768441, "grad_norm": 3.025548010215934, "learning_rate": 1.5225062250306061e-06, "loss": 1.0333, "step": 6007 }, { "epoch": 0.6318640146186915, "grad_norm": 2.3886268665683668, "learning_rate": 1.521737918597063e-06, "loss": 0.9801, "step": 6008 }, { "epoch": 0.6319691850605388, "grad_norm": 2.787511865407236, "learning_rate": 1.5209697212473411e-06, "loss": 0.9432, "step": 6009 }, { "epoch": 0.6320743555023861, "grad_norm": 1.9376239960228194, "learning_rate": 1.5202016330670998e-06, "loss": 0.9643, "step": 6010 }, { "epoch": 0.6321795259442333, "grad_norm": 2.5039360525755274, "learning_rate": 1.5194336541419889e-06, "loss": 1.0006, "step": 6011 }, { "epoch": 0.6322846963860806, "grad_norm": 2.2219077385221486, "learning_rate": 1.5186657845576428e-06, "loss": 0.9957, "step": 6012 }, { "epoch": 0.632389866827928, "grad_norm": 2.1820409541460544, "learning_rate": 1.517898024399685e-06, "loss": 1.001, "step": 6013 }, { "epoch": 0.6324950372697753, "grad_norm": 3.0044306866211405, "learning_rate": 1.5171303737537268e-06, "loss": 1.0513, "step": 6014 }, { "epoch": 0.6326002077116226, "grad_norm": 2.353451046725791, "learning_rate": 1.5163628327053661e-06, "loss": 0.9949, "step": 6015 }, { "epoch": 0.6327053781534699, "grad_norm": 1.7590698297866734, "learning_rate": 1.5155954013401916e-06, "loss": 1.0115, "step": 6016 }, { "epoch": 0.6328105485953173, "grad_norm": 2.903364189025722, "learning_rate": 1.514828079743777e-06, "loss": 1.0058, "step": 6017 }, { "epoch": 0.6329157190371646, "grad_norm": 2.331986775898218, "learning_rate": 1.5140608680016843e-06, "loss": 0.9509, "step": 6018 }, { "epoch": 0.6330208894790119, "grad_norm": 2.2153310954039225, "learning_rate": 1.5132937661994642e-06, "loss": 0.9977, "step": 6019 }, { "epoch": 0.6331260599208592, "grad_norm": 2.5817333311290285, "learning_rate": 1.5125267744226538e-06, "loss": 1.0117, "step": 6020 }, { "epoch": 0.6332312303627066, "grad_norm": 2.1972799748746903, "learning_rate": 1.5117598927567791e-06, "loss": 0.9816, "step": 6021 }, { "epoch": 0.6333364008045539, "grad_norm": 2.501611715704273, "learning_rate": 1.5109931212873535e-06, "loss": 0.9624, "step": 6022 }, { "epoch": 0.6334415712464012, "grad_norm": 3.048683807069792, "learning_rate": 1.510226460099878e-06, "loss": 1.0291, "step": 6023 }, { "epoch": 0.6335467416882485, "grad_norm": 1.8577271918034934, "learning_rate": 1.5094599092798396e-06, "loss": 0.9834, "step": 6024 }, { "epoch": 0.6336519121300959, "grad_norm": 2.360244200259059, "learning_rate": 1.5086934689127173e-06, "loss": 1.0152, "step": 6025 }, { "epoch": 0.6337570825719432, "grad_norm": 3.3673054887669793, "learning_rate": 1.5079271390839739e-06, "loss": 1.0165, "step": 6026 }, { "epoch": 0.6338622530137905, "grad_norm": 2.0209824979538773, "learning_rate": 1.507160919879061e-06, "loss": 1.0005, "step": 6027 }, { "epoch": 0.6339674234556378, "grad_norm": 2.4114885968260156, "learning_rate": 1.5063948113834184e-06, "loss": 1.0082, "step": 6028 }, { "epoch": 0.6340725938974852, "grad_norm": 2.185300297703002, "learning_rate": 1.5056288136824725e-06, "loss": 0.9766, "step": 6029 }, { "epoch": 0.6341777643393325, "grad_norm": 2.0177323129193434, "learning_rate": 1.5048629268616387e-06, "loss": 0.9728, "step": 6030 }, { "epoch": 0.6342829347811797, "grad_norm": 2.653274446710612, "learning_rate": 1.5040971510063194e-06, "loss": 0.9947, "step": 6031 }, { "epoch": 0.634388105223027, "grad_norm": 2.579001884562927, "learning_rate": 1.5033314862019038e-06, "loss": 1.0079, "step": 6032 }, { "epoch": 0.6344932756648743, "grad_norm": 2.5583814952866994, "learning_rate": 1.5025659325337691e-06, "loss": 1.0113, "step": 6033 }, { "epoch": 0.6345984461067217, "grad_norm": 2.626887953110027, "learning_rate": 1.5018004900872826e-06, "loss": 1.0277, "step": 6034 }, { "epoch": 0.634703616548569, "grad_norm": 2.1091067755253032, "learning_rate": 1.5010351589477955e-06, "loss": 0.9559, "step": 6035 }, { "epoch": 0.6348087869904163, "grad_norm": 1.8200915464301803, "learning_rate": 1.500269939200648e-06, "loss": 0.9921, "step": 6036 }, { "epoch": 0.6349139574322636, "grad_norm": 3.206419511627608, "learning_rate": 1.4995048309311689e-06, "loss": 1.0505, "step": 6037 }, { "epoch": 0.635019127874111, "grad_norm": 3.2074930349618525, "learning_rate": 1.4987398342246723e-06, "loss": 0.9945, "step": 6038 }, { "epoch": 0.6351242983159583, "grad_norm": 2.533006871994517, "learning_rate": 1.4979749491664631e-06, "loss": 0.9701, "step": 6039 }, { "epoch": 0.6352294687578056, "grad_norm": 2.7185236711474596, "learning_rate": 1.4972101758418307e-06, "loss": 1.0024, "step": 6040 }, { "epoch": 0.6353346391996529, "grad_norm": 2.946674199556197, "learning_rate": 1.496445514336054e-06, "loss": 0.9976, "step": 6041 }, { "epoch": 0.6354398096415003, "grad_norm": 1.7428228667592487, "learning_rate": 1.4956809647343984e-06, "loss": 1.0117, "step": 6042 }, { "epoch": 0.6355449800833476, "grad_norm": 2.3626314845775838, "learning_rate": 1.494916527122116e-06, "loss": 1.0219, "step": 6043 }, { "epoch": 0.6356501505251949, "grad_norm": 2.6996995243978272, "learning_rate": 1.4941522015844497e-06, "loss": 0.9868, "step": 6044 }, { "epoch": 0.6357553209670422, "grad_norm": 2.428347148699208, "learning_rate": 1.4933879882066265e-06, "loss": 0.9514, "step": 6045 }, { "epoch": 0.6358604914088896, "grad_norm": 1.4275506316675006, "learning_rate": 1.4926238870738624e-06, "loss": 0.965, "step": 6046 }, { "epoch": 0.6359656618507369, "grad_norm": 2.520049309698355, "learning_rate": 1.4918598982713591e-06, "loss": 1.0004, "step": 6047 }, { "epoch": 0.6360708322925842, "grad_norm": 2.0394100491968654, "learning_rate": 1.4910960218843099e-06, "loss": 0.981, "step": 6048 }, { "epoch": 0.6361760027344315, "grad_norm": 2.761031291865611, "learning_rate": 1.4903322579978916e-06, "loss": 0.9439, "step": 6049 }, { "epoch": 0.6362811731762789, "grad_norm": 1.7561053210029363, "learning_rate": 1.4895686066972703e-06, "loss": 0.9956, "step": 6050 }, { "epoch": 0.6363863436181261, "grad_norm": 1.662203724317949, "learning_rate": 1.4888050680675983e-06, "loss": 0.9915, "step": 6051 }, { "epoch": 0.6364915140599734, "grad_norm": 2.6919552015061026, "learning_rate": 1.4880416421940155e-06, "loss": 1.0345, "step": 6052 }, { "epoch": 0.6365966845018207, "grad_norm": 2.0994310916738197, "learning_rate": 1.4872783291616516e-06, "loss": 0.9567, "step": 6053 }, { "epoch": 0.636701854943668, "grad_norm": 2.8279048616917497, "learning_rate": 1.4865151290556213e-06, "loss": 1.0237, "step": 6054 }, { "epoch": 0.6368070253855154, "grad_norm": 1.997694763719727, "learning_rate": 1.4857520419610269e-06, "loss": 1.0362, "step": 6055 }, { "epoch": 0.6369121958273627, "grad_norm": 2.8035596983277227, "learning_rate": 1.4849890679629585e-06, "loss": 1.0114, "step": 6056 }, { "epoch": 0.63701736626921, "grad_norm": 3.067338117412502, "learning_rate": 1.4842262071464941e-06, "loss": 0.9986, "step": 6057 }, { "epoch": 0.6371225367110573, "grad_norm": 2.258041094300344, "learning_rate": 1.4834634595966987e-06, "loss": 0.9978, "step": 6058 }, { "epoch": 0.6372277071529047, "grad_norm": 1.7649409916767491, "learning_rate": 1.4827008253986242e-06, "loss": 0.9676, "step": 6059 }, { "epoch": 0.637332877594752, "grad_norm": 2.156642745687781, "learning_rate": 1.4819383046373103e-06, "loss": 1.0234, "step": 6060 }, { "epoch": 0.6374380480365993, "grad_norm": 1.9425720258392243, "learning_rate": 1.481175897397783e-06, "loss": 0.9994, "step": 6061 }, { "epoch": 0.6375432184784466, "grad_norm": 1.747020487159967, "learning_rate": 1.4804136037650587e-06, "loss": 0.9864, "step": 6062 }, { "epoch": 0.637648388920294, "grad_norm": 2.71636510704543, "learning_rate": 1.4796514238241384e-06, "loss": 1.0384, "step": 6063 }, { "epoch": 0.6377535593621413, "grad_norm": 2.528649446654802, "learning_rate": 1.47888935766001e-06, "loss": 0.9637, "step": 6064 }, { "epoch": 0.6378587298039886, "grad_norm": 2.2556064050120743, "learning_rate": 1.4781274053576502e-06, "loss": 1.008, "step": 6065 }, { "epoch": 0.6379639002458359, "grad_norm": 1.750081160005925, "learning_rate": 1.4773655670020235e-06, "loss": 0.9957, "step": 6066 }, { "epoch": 0.6380690706876833, "grad_norm": 2.250883027340165, "learning_rate": 1.47660384267808e-06, "loss": 0.9868, "step": 6067 }, { "epoch": 0.6381742411295306, "grad_norm": 2.060415813689357, "learning_rate": 1.4758422324707583e-06, "loss": 0.9704, "step": 6068 }, { "epoch": 0.6382794115713779, "grad_norm": 1.9461628426655384, "learning_rate": 1.4750807364649833e-06, "loss": 0.9545, "step": 6069 }, { "epoch": 0.6383845820132252, "grad_norm": 2.1407118008400485, "learning_rate": 1.474319354745668e-06, "loss": 0.9842, "step": 6070 }, { "epoch": 0.6384897524550726, "grad_norm": 2.469491976655806, "learning_rate": 1.4735580873977125e-06, "loss": 0.9548, "step": 6071 }, { "epoch": 0.6385949228969198, "grad_norm": 2.115135764982275, "learning_rate": 1.4727969345060041e-06, "loss": 1.0031, "step": 6072 }, { "epoch": 0.6387000933387671, "grad_norm": 3.4636565981184195, "learning_rate": 1.472035896155417e-06, "loss": 1.0143, "step": 6073 }, { "epoch": 0.6388052637806144, "grad_norm": 2.4623855590153383, "learning_rate": 1.4712749724308135e-06, "loss": 1.0005, "step": 6074 }, { "epoch": 0.6389104342224617, "grad_norm": 3.4935329001539603, "learning_rate": 1.4705141634170402e-06, "loss": 0.9797, "step": 6075 }, { "epoch": 0.6390156046643091, "grad_norm": 2.981348278086113, "learning_rate": 1.4697534691989362e-06, "loss": 0.9747, "step": 6076 }, { "epoch": 0.6391207751061564, "grad_norm": 2.3924446707785187, "learning_rate": 1.468992889861324e-06, "loss": 0.9882, "step": 6077 }, { "epoch": 0.6392259455480037, "grad_norm": 2.23110800034559, "learning_rate": 1.4682324254890135e-06, "loss": 1.009, "step": 6078 }, { "epoch": 0.639331115989851, "grad_norm": 2.468541532847342, "learning_rate": 1.467472076166802e-06, "loss": 0.9841, "step": 6079 }, { "epoch": 0.6394362864316984, "grad_norm": 2.1517955193808342, "learning_rate": 1.4667118419794756e-06, "loss": 0.9967, "step": 6080 }, { "epoch": 0.6395414568735457, "grad_norm": 2.5661813956881527, "learning_rate": 1.4659517230118059e-06, "loss": 0.9628, "step": 6081 }, { "epoch": 0.639646627315393, "grad_norm": 2.181840326831563, "learning_rate": 1.4651917193485516e-06, "loss": 0.9237, "step": 6082 }, { "epoch": 0.6397517977572403, "grad_norm": 2.1097264744955604, "learning_rate": 1.4644318310744593e-06, "loss": 1.0078, "step": 6083 }, { "epoch": 0.6398569681990877, "grad_norm": 3.493695271673113, "learning_rate": 1.4636720582742614e-06, "loss": 0.9912, "step": 6084 }, { "epoch": 0.639962138640935, "grad_norm": 1.9541985454696953, "learning_rate": 1.4629124010326808e-06, "loss": 0.9942, "step": 6085 }, { "epoch": 0.6400673090827823, "grad_norm": 1.814035910685774, "learning_rate": 1.4621528594344237e-06, "loss": 1.0028, "step": 6086 }, { "epoch": 0.6401724795246296, "grad_norm": 2.696801122775463, "learning_rate": 1.461393433564185e-06, "loss": 0.9912, "step": 6087 }, { "epoch": 0.640277649966477, "grad_norm": 1.8963092165199675, "learning_rate": 1.4606341235066452e-06, "loss": 1.0165, "step": 6088 }, { "epoch": 0.6403828204083243, "grad_norm": 3.669943765578645, "learning_rate": 1.4598749293464763e-06, "loss": 1.0253, "step": 6089 }, { "epoch": 0.6404879908501716, "grad_norm": 2.8563817058680616, "learning_rate": 1.459115851168333e-06, "loss": 0.9864, "step": 6090 }, { "epoch": 0.6405931612920189, "grad_norm": 3.028542769912519, "learning_rate": 1.458356889056857e-06, "loss": 0.9806, "step": 6091 }, { "epoch": 0.6406983317338661, "grad_norm": 2.5809797491743653, "learning_rate": 1.4575980430966808e-06, "loss": 1.0162, "step": 6092 }, { "epoch": 0.6408035021757135, "grad_norm": 2.040383893422859, "learning_rate": 1.4568393133724185e-06, "loss": 0.9846, "step": 6093 }, { "epoch": 0.6409086726175608, "grad_norm": 2.4533017761010236, "learning_rate": 1.4560806999686782e-06, "loss": 1.0269, "step": 6094 }, { "epoch": 0.6410138430594081, "grad_norm": 3.402665196483549, "learning_rate": 1.4553222029700483e-06, "loss": 1.0354, "step": 6095 }, { "epoch": 0.6411190135012554, "grad_norm": 1.8564212218833198, "learning_rate": 1.4545638224611091e-06, "loss": 0.969, "step": 6096 }, { "epoch": 0.6412241839431028, "grad_norm": 1.8685811138285715, "learning_rate": 1.453805558526424e-06, "loss": 0.9675, "step": 6097 }, { "epoch": 0.6413293543849501, "grad_norm": 2.0516034951437496, "learning_rate": 1.4530474112505462e-06, "loss": 0.9577, "step": 6098 }, { "epoch": 0.6414345248267974, "grad_norm": 2.632085890015541, "learning_rate": 1.4522893807180163e-06, "loss": 1.0097, "step": 6099 }, { "epoch": 0.6415396952686447, "grad_norm": 2.3357227858145326, "learning_rate": 1.4515314670133582e-06, "loss": 0.99, "step": 6100 }, { "epoch": 0.6416448657104921, "grad_norm": 2.031648424124929, "learning_rate": 1.4507736702210872e-06, "loss": 0.9714, "step": 6101 }, { "epoch": 0.6417500361523394, "grad_norm": 2.9746248980463488, "learning_rate": 1.4500159904257008e-06, "loss": 0.995, "step": 6102 }, { "epoch": 0.6418552065941867, "grad_norm": 2.4550674213700825, "learning_rate": 1.4492584277116901e-06, "loss": 1.0096, "step": 6103 }, { "epoch": 0.641960377036034, "grad_norm": 2.5455704526475222, "learning_rate": 1.4485009821635269e-06, "loss": 0.9562, "step": 6104 }, { "epoch": 0.6420655474778814, "grad_norm": 2.233105772105395, "learning_rate": 1.4477436538656715e-06, "loss": 0.9256, "step": 6105 }, { "epoch": 0.6421707179197287, "grad_norm": 2.738769620298446, "learning_rate": 1.446986442902574e-06, "loss": 1.0144, "step": 6106 }, { "epoch": 0.642275888361576, "grad_norm": 1.7815706347021845, "learning_rate": 1.4462293493586662e-06, "loss": 0.9717, "step": 6107 }, { "epoch": 0.6423810588034233, "grad_norm": 2.0497666168497335, "learning_rate": 1.445472373318374e-06, "loss": 0.9561, "step": 6108 }, { "epoch": 0.6424862292452707, "grad_norm": 2.6274997137270244, "learning_rate": 1.444715514866103e-06, "loss": 1.0196, "step": 6109 }, { "epoch": 0.642591399687118, "grad_norm": 3.440273225742282, "learning_rate": 1.443958774086251e-06, "loss": 1.0102, "step": 6110 }, { "epoch": 0.6426965701289653, "grad_norm": 2.483691809703666, "learning_rate": 1.443202151063198e-06, "loss": 1.0265, "step": 6111 }, { "epoch": 0.6428017405708125, "grad_norm": 2.447339258191458, "learning_rate": 1.4424456458813147e-06, "loss": 1.0198, "step": 6112 }, { "epoch": 0.6429069110126598, "grad_norm": 2.47035563695523, "learning_rate": 1.4416892586249586e-06, "loss": 1.0044, "step": 6113 }, { "epoch": 0.6430120814545072, "grad_norm": 2.1915716837499333, "learning_rate": 1.4409329893784702e-06, "loss": 0.9784, "step": 6114 }, { "epoch": 0.6431172518963545, "grad_norm": 2.2232422956545825, "learning_rate": 1.4401768382261813e-06, "loss": 1.0041, "step": 6115 }, { "epoch": 0.6432224223382018, "grad_norm": 2.022035643282418, "learning_rate": 1.4394208052524062e-06, "loss": 1.0323, "step": 6116 }, { "epoch": 0.6433275927800491, "grad_norm": 2.1999594819259496, "learning_rate": 1.4386648905414525e-06, "loss": 0.9813, "step": 6117 }, { "epoch": 0.6434327632218965, "grad_norm": 2.4472550369492936, "learning_rate": 1.4379090941776067e-06, "loss": 0.9525, "step": 6118 }, { "epoch": 0.6435379336637438, "grad_norm": 2.4214125950190817, "learning_rate": 1.4371534162451487e-06, "loss": 0.9848, "step": 6119 }, { "epoch": 0.6436431041055911, "grad_norm": 2.6024268782469013, "learning_rate": 1.4363978568283412e-06, "loss": 1.0264, "step": 6120 }, { "epoch": 0.6437482745474384, "grad_norm": 2.842563298958492, "learning_rate": 1.4356424160114332e-06, "loss": 0.9535, "step": 6121 }, { "epoch": 0.6438534449892858, "grad_norm": 2.086520974335246, "learning_rate": 1.4348870938786657e-06, "loss": 0.9933, "step": 6122 }, { "epoch": 0.6439586154311331, "grad_norm": 2.6018674216608306, "learning_rate": 1.434131890514261e-06, "loss": 0.9847, "step": 6123 }, { "epoch": 0.6440637858729804, "grad_norm": 1.7399725974174627, "learning_rate": 1.4333768060024308e-06, "loss": 0.9887, "step": 6124 }, { "epoch": 0.6441689563148277, "grad_norm": 2.33460686855743, "learning_rate": 1.4326218404273718e-06, "loss": 1.0107, "step": 6125 }, { "epoch": 0.6442741267566751, "grad_norm": 2.304286713750231, "learning_rate": 1.4318669938732694e-06, "loss": 0.9888, "step": 6126 }, { "epoch": 0.6443792971985224, "grad_norm": 2.617604600587451, "learning_rate": 1.4311122664242955e-06, "loss": 0.9804, "step": 6127 }, { "epoch": 0.6444844676403697, "grad_norm": 2.884591813362857, "learning_rate": 1.430357658164606e-06, "loss": 0.9886, "step": 6128 }, { "epoch": 0.644589638082217, "grad_norm": 2.437029381096377, "learning_rate": 1.4296031691783485e-06, "loss": 1.0295, "step": 6129 }, { "epoch": 0.6446948085240644, "grad_norm": 2.9913792325943303, "learning_rate": 1.4288487995496508e-06, "loss": 0.9888, "step": 6130 }, { "epoch": 0.6447999789659117, "grad_norm": 2.4797754234444715, "learning_rate": 1.4280945493626347e-06, "loss": 0.9409, "step": 6131 }, { "epoch": 0.644905149407759, "grad_norm": 3.0033762892288545, "learning_rate": 1.427340418701402e-06, "loss": 1.0166, "step": 6132 }, { "epoch": 0.6450103198496062, "grad_norm": 2.37731076377192, "learning_rate": 1.4265864076500465e-06, "loss": 0.963, "step": 6133 }, { "epoch": 0.6451154902914535, "grad_norm": 2.6651860260554954, "learning_rate": 1.4258325162926441e-06, "loss": 1.0274, "step": 6134 }, { "epoch": 0.6452206607333009, "grad_norm": 2.9869829981562384, "learning_rate": 1.4250787447132607e-06, "loss": 1.0177, "step": 6135 }, { "epoch": 0.6453258311751482, "grad_norm": 2.170891852808566, "learning_rate": 1.4243250929959484e-06, "loss": 0.982, "step": 6136 }, { "epoch": 0.6454310016169955, "grad_norm": 2.170159054445325, "learning_rate": 1.4235715612247435e-06, "loss": 1.0116, "step": 6137 }, { "epoch": 0.6455361720588428, "grad_norm": 2.4494534962313868, "learning_rate": 1.4228181494836724e-06, "loss": 0.959, "step": 6138 }, { "epoch": 0.6456413425006902, "grad_norm": 1.7490013283349262, "learning_rate": 1.4220648578567444e-06, "loss": 1.0183, "step": 6139 }, { "epoch": 0.6457465129425375, "grad_norm": 2.8377357573800515, "learning_rate": 1.4213116864279586e-06, "loss": 0.977, "step": 6140 }, { "epoch": 0.6458516833843848, "grad_norm": 1.933344788178783, "learning_rate": 1.4205586352813e-06, "loss": 0.9859, "step": 6141 }, { "epoch": 0.6459568538262321, "grad_norm": 2.280844764369398, "learning_rate": 1.4198057045007384e-06, "loss": 1.0191, "step": 6142 }, { "epoch": 0.6460620242680795, "grad_norm": 2.2048132233498654, "learning_rate": 1.4190528941702328e-06, "loss": 1.0413, "step": 6143 }, { "epoch": 0.6461671947099268, "grad_norm": 2.4477864525484363, "learning_rate": 1.4183002043737246e-06, "loss": 1.0105, "step": 6144 }, { "epoch": 0.6462723651517741, "grad_norm": 2.3566748243820625, "learning_rate": 1.4175476351951484e-06, "loss": 0.9748, "step": 6145 }, { "epoch": 0.6463775355936214, "grad_norm": 2.636120161039787, "learning_rate": 1.4167951867184187e-06, "loss": 0.984, "step": 6146 }, { "epoch": 0.6464827060354688, "grad_norm": 2.367508226857458, "learning_rate": 1.4160428590274416e-06, "loss": 0.9811, "step": 6147 }, { "epoch": 0.6465878764773161, "grad_norm": 2.2275791479433167, "learning_rate": 1.415290652206105e-06, "loss": 0.9852, "step": 6148 }, { "epoch": 0.6466930469191634, "grad_norm": 2.9242032394552138, "learning_rate": 1.414538566338287e-06, "loss": 1.0, "step": 6149 }, { "epoch": 0.6467982173610107, "grad_norm": 2.200563913281183, "learning_rate": 1.4137866015078523e-06, "loss": 0.9766, "step": 6150 }, { "epoch": 0.646903387802858, "grad_norm": 2.136169934182686, "learning_rate": 1.4130347577986481e-06, "loss": 0.9891, "step": 6151 }, { "epoch": 0.6470085582447054, "grad_norm": 2.4522731115485916, "learning_rate": 1.4122830352945133e-06, "loss": 0.9605, "step": 6152 }, { "epoch": 0.6471137286865526, "grad_norm": 1.8397790333899535, "learning_rate": 1.411531434079268e-06, "loss": 1.0013, "step": 6153 }, { "epoch": 0.6472188991283999, "grad_norm": 2.2803600413168237, "learning_rate": 1.410779954236725e-06, "loss": 0.9917, "step": 6154 }, { "epoch": 0.6473240695702472, "grad_norm": 2.8291558323999912, "learning_rate": 1.4100285958506785e-06, "loss": 1.0154, "step": 6155 }, { "epoch": 0.6474292400120946, "grad_norm": 2.737018049431013, "learning_rate": 1.4092773590049098e-06, "loss": 0.9962, "step": 6156 }, { "epoch": 0.6475344104539419, "grad_norm": 2.5922522490787583, "learning_rate": 1.4085262437831886e-06, "loss": 0.9314, "step": 6157 }, { "epoch": 0.6476395808957892, "grad_norm": 2.1557844949857463, "learning_rate": 1.4077752502692704e-06, "loss": 0.9797, "step": 6158 }, { "epoch": 0.6477447513376365, "grad_norm": 2.208795077852058, "learning_rate": 1.4070243785468974e-06, "loss": 0.9637, "step": 6159 }, { "epoch": 0.6478499217794839, "grad_norm": 2.7684955688133965, "learning_rate": 1.4062736286997952e-06, "loss": 0.9932, "step": 6160 }, { "epoch": 0.6479550922213312, "grad_norm": 2.7322932255125925, "learning_rate": 1.4055230008116813e-06, "loss": 0.9904, "step": 6161 }, { "epoch": 0.6480602626631785, "grad_norm": 2.64744883621316, "learning_rate": 1.404772494966254e-06, "loss": 1.0088, "step": 6162 }, { "epoch": 0.6481654331050258, "grad_norm": 2.5697944667740513, "learning_rate": 1.4040221112472014e-06, "loss": 0.9693, "step": 6163 }, { "epoch": 0.6482706035468732, "grad_norm": 2.108197664933067, "learning_rate": 1.4032718497381981e-06, "loss": 1.032, "step": 6164 }, { "epoch": 0.6483757739887205, "grad_norm": 3.875251629143974, "learning_rate": 1.4025217105229021e-06, "loss": 1.0005, "step": 6165 }, { "epoch": 0.6484809444305678, "grad_norm": 2.3973712746700664, "learning_rate": 1.4017716936849623e-06, "loss": 0.9738, "step": 6166 }, { "epoch": 0.6485861148724151, "grad_norm": 2.2650362281817866, "learning_rate": 1.4010217993080076e-06, "loss": 0.9662, "step": 6167 }, { "epoch": 0.6486912853142625, "grad_norm": 2.1269159039754277, "learning_rate": 1.400272027475662e-06, "loss": 1.0022, "step": 6168 }, { "epoch": 0.6487964557561098, "grad_norm": 2.177929012659701, "learning_rate": 1.399522378271527e-06, "loss": 0.9988, "step": 6169 }, { "epoch": 0.6489016261979571, "grad_norm": 2.684842639627906, "learning_rate": 1.3987728517791966e-06, "loss": 0.9721, "step": 6170 }, { "epoch": 0.6490067966398044, "grad_norm": 2.838066575594318, "learning_rate": 1.3980234480822468e-06, "loss": 0.9978, "step": 6171 }, { "epoch": 0.6491119670816518, "grad_norm": 2.6540051552459465, "learning_rate": 1.397274167264243e-06, "loss": 0.9959, "step": 6172 }, { "epoch": 0.649217137523499, "grad_norm": 2.4639663876901814, "learning_rate": 1.3965250094087373e-06, "loss": 0.9836, "step": 6173 }, { "epoch": 0.6493223079653463, "grad_norm": 2.447427698722012, "learning_rate": 1.3957759745992637e-06, "loss": 0.9602, "step": 6174 }, { "epoch": 0.6494274784071936, "grad_norm": 2.2679043767007983, "learning_rate": 1.395027062919348e-06, "loss": 1.0334, "step": 6175 }, { "epoch": 0.6495326488490409, "grad_norm": 3.5789206773428317, "learning_rate": 1.3942782744524974e-06, "loss": 1.0175, "step": 6176 }, { "epoch": 0.6496378192908883, "grad_norm": 2.729641701499508, "learning_rate": 1.3935296092822087e-06, "loss": 1.0013, "step": 6177 }, { "epoch": 0.6497429897327356, "grad_norm": 2.3392616870926886, "learning_rate": 1.392781067491965e-06, "loss": 0.9527, "step": 6178 }, { "epoch": 0.6498481601745829, "grad_norm": 2.969259025385436, "learning_rate": 1.3920326491652325e-06, "loss": 0.9911, "step": 6179 }, { "epoch": 0.6499533306164302, "grad_norm": 2.055601110395823, "learning_rate": 1.3912843543854664e-06, "loss": 0.9653, "step": 6180 }, { "epoch": 0.6500585010582776, "grad_norm": 2.808238408960534, "learning_rate": 1.3905361832361078e-06, "loss": 0.9675, "step": 6181 }, { "epoch": 0.6501636715001249, "grad_norm": 2.2954725127353854, "learning_rate": 1.3897881358005843e-06, "loss": 0.9861, "step": 6182 }, { "epoch": 0.6502688419419722, "grad_norm": 2.4627639209709113, "learning_rate": 1.389040212162307e-06, "loss": 0.9841, "step": 6183 }, { "epoch": 0.6503740123838195, "grad_norm": 3.075750096193752, "learning_rate": 1.3882924124046775e-06, "loss": 1.0039, "step": 6184 }, { "epoch": 0.6504791828256669, "grad_norm": 2.715707058368657, "learning_rate": 1.387544736611079e-06, "loss": 0.9638, "step": 6185 }, { "epoch": 0.6505843532675142, "grad_norm": 1.977183746259915, "learning_rate": 1.3867971848648843e-06, "loss": 0.9912, "step": 6186 }, { "epoch": 0.6506895237093615, "grad_norm": 2.7109584352412353, "learning_rate": 1.386049757249452e-06, "loss": 1.016, "step": 6187 }, { "epoch": 0.6507946941512088, "grad_norm": 2.56279869947222, "learning_rate": 1.3853024538481241e-06, "loss": 0.9732, "step": 6188 }, { "epoch": 0.6508998645930562, "grad_norm": 2.1123247416434574, "learning_rate": 1.384555274744233e-06, "loss": 1.0225, "step": 6189 }, { "epoch": 0.6510050350349035, "grad_norm": 3.0471903006110517, "learning_rate": 1.3838082200210932e-06, "loss": 0.9687, "step": 6190 }, { "epoch": 0.6511102054767508, "grad_norm": 2.272404748311318, "learning_rate": 1.3830612897620072e-06, "loss": 1.004, "step": 6191 }, { "epoch": 0.6512153759185981, "grad_norm": 2.2705689260324875, "learning_rate": 1.3823144840502656e-06, "loss": 0.9655, "step": 6192 }, { "epoch": 0.6513205463604455, "grad_norm": 2.549896761841074, "learning_rate": 1.3815678029691399e-06, "loss": 0.951, "step": 6193 }, { "epoch": 0.6514257168022927, "grad_norm": 2.7098082789675817, "learning_rate": 1.3808212466018927e-06, "loss": 0.9638, "step": 6194 }, { "epoch": 0.65153088724414, "grad_norm": 2.473211401630005, "learning_rate": 1.3800748150317709e-06, "loss": 0.9972, "step": 6195 }, { "epoch": 0.6516360576859873, "grad_norm": 2.3649286529172047, "learning_rate": 1.3793285083420077e-06, "loss": 0.991, "step": 6196 }, { "epoch": 0.6517412281278346, "grad_norm": 2.074820659875855, "learning_rate": 1.378582326615821e-06, "loss": 0.9987, "step": 6197 }, { "epoch": 0.651846398569682, "grad_norm": 2.2908355538023453, "learning_rate": 1.3778362699364167e-06, "loss": 0.8914, "step": 6198 }, { "epoch": 0.6519515690115293, "grad_norm": 2.495491364615591, "learning_rate": 1.377090338386985e-06, "loss": 0.9856, "step": 6199 }, { "epoch": 0.6520567394533766, "grad_norm": 1.941710625750233, "learning_rate": 1.3763445320507034e-06, "loss": 1.0263, "step": 6200 }, { "epoch": 0.6521619098952239, "grad_norm": 2.2537782874692978, "learning_rate": 1.3755988510107365e-06, "loss": 0.9837, "step": 6201 }, { "epoch": 0.6522670803370713, "grad_norm": 2.14058569239182, "learning_rate": 1.3748532953502317e-06, "loss": 0.9417, "step": 6202 }, { "epoch": 0.6523722507789186, "grad_norm": 2.2055386916726043, "learning_rate": 1.3741078651523242e-06, "loss": 0.9859, "step": 6203 }, { "epoch": 0.6524774212207659, "grad_norm": 2.59614689675084, "learning_rate": 1.3733625605001365e-06, "loss": 0.9907, "step": 6204 }, { "epoch": 0.6525825916626132, "grad_norm": 2.4124820848680453, "learning_rate": 1.3726173814767763e-06, "loss": 1.0189, "step": 6205 }, { "epoch": 0.6526877621044606, "grad_norm": 2.350472903283974, "learning_rate": 1.3718723281653357e-06, "loss": 0.9483, "step": 6206 }, { "epoch": 0.6527929325463079, "grad_norm": 2.728569075199262, "learning_rate": 1.3711274006488935e-06, "loss": 1.0009, "step": 6207 }, { "epoch": 0.6528981029881552, "grad_norm": 2.1797869690723095, "learning_rate": 1.370382599010515e-06, "loss": 0.9916, "step": 6208 }, { "epoch": 0.6530032734300025, "grad_norm": 2.34014133371477, "learning_rate": 1.3696379233332518e-06, "loss": 0.9545, "step": 6209 }, { "epoch": 0.6531084438718499, "grad_norm": 2.760835763478809, "learning_rate": 1.3688933737001425e-06, "loss": 0.9907, "step": 6210 }, { "epoch": 0.6532136143136972, "grad_norm": 2.0489131366244666, "learning_rate": 1.3681489501942077e-06, "loss": 0.9922, "step": 6211 }, { "epoch": 0.6533187847555445, "grad_norm": 2.74268259260636, "learning_rate": 1.3674046528984576e-06, "loss": 0.9946, "step": 6212 }, { "epoch": 0.6534239551973918, "grad_norm": 2.3837332766442016, "learning_rate": 1.3666604818958878e-06, "loss": 0.9978, "step": 6213 }, { "epoch": 0.653529125639239, "grad_norm": 2.286480330832379, "learning_rate": 1.3659164372694771e-06, "loss": 1.0212, "step": 6214 }, { "epoch": 0.6536342960810864, "grad_norm": 3.110138767109496, "learning_rate": 1.365172519102195e-06, "loss": 1.0182, "step": 6215 }, { "epoch": 0.6537394665229337, "grad_norm": 2.3763826353787456, "learning_rate": 1.3644287274769915e-06, "loss": 0.9838, "step": 6216 }, { "epoch": 0.653844636964781, "grad_norm": 2.147543965542234, "learning_rate": 1.3636850624768065e-06, "loss": 1.065, "step": 6217 }, { "epoch": 0.6539498074066283, "grad_norm": 1.9128638684889196, "learning_rate": 1.362941524184564e-06, "loss": 0.99, "step": 6218 }, { "epoch": 0.6540549778484757, "grad_norm": 2.701396037832273, "learning_rate": 1.3621981126831755e-06, "loss": 1.0051, "step": 6219 }, { "epoch": 0.654160148290323, "grad_norm": 2.8040498677343324, "learning_rate": 1.3614548280555351e-06, "loss": 1.0138, "step": 6220 }, { "epoch": 0.6542653187321703, "grad_norm": 2.0771737084674466, "learning_rate": 1.3607116703845273e-06, "loss": 1.0207, "step": 6221 }, { "epoch": 0.6543704891740176, "grad_norm": 1.906154408918418, "learning_rate": 1.3599686397530171e-06, "loss": 1.0037, "step": 6222 }, { "epoch": 0.654475659615865, "grad_norm": 2.6242007085780337, "learning_rate": 1.35922573624386e-06, "loss": 0.9953, "step": 6223 }, { "epoch": 0.6545808300577123, "grad_norm": 2.536900445914889, "learning_rate": 1.3584829599398958e-06, "loss": 0.9774, "step": 6224 }, { "epoch": 0.6546860004995596, "grad_norm": 2.3679575872686933, "learning_rate": 1.3577403109239485e-06, "loss": 0.9633, "step": 6225 }, { "epoch": 0.6547911709414069, "grad_norm": 2.7750626080040086, "learning_rate": 1.35699778927883e-06, "loss": 0.9824, "step": 6226 }, { "epoch": 0.6548963413832543, "grad_norm": 2.2512171030808386, "learning_rate": 1.3562553950873377e-06, "loss": 0.9846, "step": 6227 }, { "epoch": 0.6550015118251016, "grad_norm": 3.2875448840376817, "learning_rate": 1.3555131284322532e-06, "loss": 0.9589, "step": 6228 }, { "epoch": 0.6551066822669489, "grad_norm": 2.026779321691147, "learning_rate": 1.3547709893963462e-06, "loss": 0.9613, "step": 6229 }, { "epoch": 0.6552118527087962, "grad_norm": 2.4724040803859197, "learning_rate": 1.3540289780623697e-06, "loss": 0.985, "step": 6230 }, { "epoch": 0.6553170231506436, "grad_norm": 2.210229193121631, "learning_rate": 1.3532870945130642e-06, "loss": 1.0203, "step": 6231 }, { "epoch": 0.6554221935924909, "grad_norm": 2.7088718437080894, "learning_rate": 1.3525453388311554e-06, "loss": 0.9994, "step": 6232 }, { "epoch": 0.6555273640343382, "grad_norm": 2.6674805083220607, "learning_rate": 1.3518037110993565e-06, "loss": 1.0023, "step": 6233 }, { "epoch": 0.6556325344761854, "grad_norm": 2.4368790272797347, "learning_rate": 1.3510622114003619e-06, "loss": 1.024, "step": 6234 }, { "epoch": 0.6557377049180327, "grad_norm": 2.355980409234641, "learning_rate": 1.3503208398168567e-06, "loss": 1.0132, "step": 6235 }, { "epoch": 0.6558428753598801, "grad_norm": 2.617783917085451, "learning_rate": 1.349579596431509e-06, "loss": 1.0071, "step": 6236 }, { "epoch": 0.6559480458017274, "grad_norm": 1.908042391800999, "learning_rate": 1.3488384813269726e-06, "loss": 0.9537, "step": 6237 }, { "epoch": 0.6560532162435747, "grad_norm": 2.4517991712108955, "learning_rate": 1.3480974945858887e-06, "loss": 0.9943, "step": 6238 }, { "epoch": 0.656158386685422, "grad_norm": 2.125250466725107, "learning_rate": 1.347356636290882e-06, "loss": 0.9884, "step": 6239 }, { "epoch": 0.6562635571272694, "grad_norm": 2.9442428748019256, "learning_rate": 1.3466159065245637e-06, "loss": 0.9988, "step": 6240 }, { "epoch": 0.6563687275691167, "grad_norm": 2.1219839011084813, "learning_rate": 1.3458753053695332e-06, "loss": 0.9689, "step": 6241 }, { "epoch": 0.656473898010964, "grad_norm": 2.175678143174604, "learning_rate": 1.3451348329083702e-06, "loss": 0.9788, "step": 6242 }, { "epoch": 0.6565790684528113, "grad_norm": 1.8922658899883003, "learning_rate": 1.344394489223646e-06, "loss": 0.9898, "step": 6243 }, { "epoch": 0.6566842388946587, "grad_norm": 2.6214577423300742, "learning_rate": 1.3436542743979125e-06, "loss": 0.9652, "step": 6244 }, { "epoch": 0.656789409336506, "grad_norm": 2.105636623831596, "learning_rate": 1.3429141885137097e-06, "loss": 0.9862, "step": 6245 }, { "epoch": 0.6568945797783533, "grad_norm": 1.9675801964204271, "learning_rate": 1.342174231653564e-06, "loss": 0.936, "step": 6246 }, { "epoch": 0.6569997502202006, "grad_norm": 2.1697661785388473, "learning_rate": 1.3414344038999862e-06, "loss": 0.9785, "step": 6247 }, { "epoch": 0.657104920662048, "grad_norm": 2.187917791791845, "learning_rate": 1.340694705335472e-06, "loss": 1.0148, "step": 6248 }, { "epoch": 0.6572100911038953, "grad_norm": 3.026719311049528, "learning_rate": 1.3399551360425035e-06, "loss": 0.9711, "step": 6249 }, { "epoch": 0.6573152615457426, "grad_norm": 1.8530024264442755, "learning_rate": 1.33921569610355e-06, "loss": 0.9588, "step": 6250 }, { "epoch": 0.6574204319875899, "grad_norm": 2.1627719960544014, "learning_rate": 1.3384763856010626e-06, "loss": 0.9773, "step": 6251 }, { "epoch": 0.6575256024294373, "grad_norm": 2.049389943315943, "learning_rate": 1.3377372046174826e-06, "loss": 0.9599, "step": 6252 }, { "epoch": 0.6576307728712846, "grad_norm": 2.0628952108415333, "learning_rate": 1.3369981532352317e-06, "loss": 1.0007, "step": 6253 }, { "epoch": 0.6577359433131319, "grad_norm": 2.491700162689158, "learning_rate": 1.3362592315367212e-06, "loss": 0.9925, "step": 6254 }, { "epoch": 0.6578411137549791, "grad_norm": 2.2480692531605677, "learning_rate": 1.3355204396043467e-06, "loss": 0.9865, "step": 6255 }, { "epoch": 0.6579462841968264, "grad_norm": 2.1560294381554073, "learning_rate": 1.3347817775204903e-06, "loss": 0.9463, "step": 6256 }, { "epoch": 0.6580514546386738, "grad_norm": 2.1594218843164397, "learning_rate": 1.3340432453675173e-06, "loss": 0.9732, "step": 6257 }, { "epoch": 0.6581566250805211, "grad_norm": 2.3800077094356666, "learning_rate": 1.3333048432277779e-06, "loss": 1.0126, "step": 6258 }, { "epoch": 0.6582617955223684, "grad_norm": 2.109956422004809, "learning_rate": 1.3325665711836138e-06, "loss": 0.9533, "step": 6259 }, { "epoch": 0.6583669659642157, "grad_norm": 3.0683493261098493, "learning_rate": 1.331828429317345e-06, "loss": 0.9883, "step": 6260 }, { "epoch": 0.6584721364060631, "grad_norm": 2.686877306764361, "learning_rate": 1.3310904177112819e-06, "loss": 0.9882, "step": 6261 }, { "epoch": 0.6585773068479104, "grad_norm": 3.1631141233592492, "learning_rate": 1.3303525364477166e-06, "loss": 1.0159, "step": 6262 }, { "epoch": 0.6586824772897577, "grad_norm": 3.0131960777287388, "learning_rate": 1.3296147856089298e-06, "loss": 0.9658, "step": 6263 }, { "epoch": 0.658787647731605, "grad_norm": 2.449844623908453, "learning_rate": 1.328877165277187e-06, "loss": 0.9998, "step": 6264 }, { "epoch": 0.6588928181734524, "grad_norm": 2.452655678014848, "learning_rate": 1.328139675534737e-06, "loss": 0.9983, "step": 6265 }, { "epoch": 0.6589979886152997, "grad_norm": 2.2755161044714, "learning_rate": 1.3274023164638178e-06, "loss": 1.0014, "step": 6266 }, { "epoch": 0.659103159057147, "grad_norm": 1.9204300885679346, "learning_rate": 1.326665088146648e-06, "loss": 0.9859, "step": 6267 }, { "epoch": 0.6592083294989943, "grad_norm": 3.1297354355899274, "learning_rate": 1.325927990665436e-06, "loss": 0.9901, "step": 6268 }, { "epoch": 0.6593134999408417, "grad_norm": 2.6490272337574448, "learning_rate": 1.325191024102373e-06, "loss": 1.006, "step": 6269 }, { "epoch": 0.659418670382689, "grad_norm": 3.235289643820615, "learning_rate": 1.3244541885396384e-06, "loss": 0.9851, "step": 6270 }, { "epoch": 0.6595238408245363, "grad_norm": 2.232938536691584, "learning_rate": 1.3237174840593927e-06, "loss": 0.9571, "step": 6271 }, { "epoch": 0.6596290112663836, "grad_norm": 2.7467215459350838, "learning_rate": 1.3229809107437852e-06, "loss": 0.9742, "step": 6272 }, { "epoch": 0.659734181708231, "grad_norm": 3.265655439864346, "learning_rate": 1.3222444686749508e-06, "loss": 0.9426, "step": 6273 }, { "epoch": 0.6598393521500783, "grad_norm": 2.7142378977870782, "learning_rate": 1.3215081579350058e-06, "loss": 0.9626, "step": 6274 }, { "epoch": 0.6599445225919255, "grad_norm": 2.424253609885278, "learning_rate": 1.3207719786060575e-06, "loss": 0.9681, "step": 6275 }, { "epoch": 0.6600496930337728, "grad_norm": 2.650710054297971, "learning_rate": 1.3200359307701926e-06, "loss": 1.0027, "step": 6276 }, { "epoch": 0.6601548634756201, "grad_norm": 2.106049814182672, "learning_rate": 1.319300014509488e-06, "loss": 0.9976, "step": 6277 }, { "epoch": 0.6602600339174675, "grad_norm": 3.0478589082735597, "learning_rate": 1.318564229906005e-06, "loss": 0.9812, "step": 6278 }, { "epoch": 0.6603652043593148, "grad_norm": 2.644175851321169, "learning_rate": 1.317828577041787e-06, "loss": 0.9984, "step": 6279 }, { "epoch": 0.6604703748011621, "grad_norm": 4.149275333858121, "learning_rate": 1.317093055998867e-06, "loss": 0.9905, "step": 6280 }, { "epoch": 0.6605755452430094, "grad_norm": 2.1753727776576413, "learning_rate": 1.3163576668592581e-06, "loss": 0.9645, "step": 6281 }, { "epoch": 0.6606807156848568, "grad_norm": 2.1321309814458433, "learning_rate": 1.315622409704967e-06, "loss": 0.9764, "step": 6282 }, { "epoch": 0.6607858861267041, "grad_norm": 2.3284143235104104, "learning_rate": 1.3148872846179761e-06, "loss": 0.9685, "step": 6283 }, { "epoch": 0.6608910565685514, "grad_norm": 2.20797024529712, "learning_rate": 1.31415229168026e-06, "loss": 0.9727, "step": 6284 }, { "epoch": 0.6609962270103987, "grad_norm": 2.27464739045528, "learning_rate": 1.3134174309737752e-06, "loss": 0.9636, "step": 6285 }, { "epoch": 0.661101397452246, "grad_norm": 2.564977023089282, "learning_rate": 1.312682702580464e-06, "loss": 0.9836, "step": 6286 }, { "epoch": 0.6612065678940934, "grad_norm": 2.088470460153833, "learning_rate": 1.3119481065822559e-06, "loss": 0.8773, "step": 6287 }, { "epoch": 0.6613117383359407, "grad_norm": 2.191541507414501, "learning_rate": 1.3112136430610623e-06, "loss": 1.0141, "step": 6288 }, { "epoch": 0.661416908777788, "grad_norm": 2.459055525272844, "learning_rate": 1.310479312098783e-06, "loss": 0.9724, "step": 6289 }, { "epoch": 0.6615220792196354, "grad_norm": 2.5763375289572004, "learning_rate": 1.3097451137772999e-06, "loss": 1.0105, "step": 6290 }, { "epoch": 0.6616272496614827, "grad_norm": 3.1403073156867753, "learning_rate": 1.3090110481784831e-06, "loss": 0.9903, "step": 6291 }, { "epoch": 0.66173242010333, "grad_norm": 2.718007886394719, "learning_rate": 1.3082771153841872e-06, "loss": 1.0049, "step": 6292 }, { "epoch": 0.6618375905451773, "grad_norm": 2.551223778632464, "learning_rate": 1.3075433154762496e-06, "loss": 0.9796, "step": 6293 }, { "epoch": 0.6619427609870246, "grad_norm": 2.1469053961592754, "learning_rate": 1.3068096485364967e-06, "loss": 1.0102, "step": 6294 }, { "epoch": 0.6620479314288719, "grad_norm": 3.4328998554698007, "learning_rate": 1.306076114646735e-06, "loss": 0.9514, "step": 6295 }, { "epoch": 0.6621531018707192, "grad_norm": 2.9569815422885153, "learning_rate": 1.3053427138887631e-06, "loss": 0.9742, "step": 6296 }, { "epoch": 0.6622582723125665, "grad_norm": 2.8570811225791637, "learning_rate": 1.3046094463443582e-06, "loss": 1.0205, "step": 6297 }, { "epoch": 0.6623634427544138, "grad_norm": 2.44062115758859, "learning_rate": 1.3038763120952871e-06, "loss": 1.004, "step": 6298 }, { "epoch": 0.6624686131962612, "grad_norm": 2.1860706563332344, "learning_rate": 1.303143311223298e-06, "loss": 0.9852, "step": 6299 }, { "epoch": 0.6625737836381085, "grad_norm": 2.305746587101714, "learning_rate": 1.302410443810127e-06, "loss": 0.9965, "step": 6300 }, { "epoch": 0.6626789540799558, "grad_norm": 1.9272014050310644, "learning_rate": 1.3016777099374962e-06, "loss": 1.025, "step": 6301 }, { "epoch": 0.6627841245218031, "grad_norm": 2.647801095541516, "learning_rate": 1.3009451096871084e-06, "loss": 0.9724, "step": 6302 }, { "epoch": 0.6628892949636505, "grad_norm": 2.479510774074513, "learning_rate": 1.3002126431406565e-06, "loss": 0.9775, "step": 6303 }, { "epoch": 0.6629944654054978, "grad_norm": 3.0571345818594478, "learning_rate": 1.2994803103798131e-06, "loss": 0.986, "step": 6304 }, { "epoch": 0.6630996358473451, "grad_norm": 2.7508548007010054, "learning_rate": 1.2987481114862427e-06, "loss": 0.9855, "step": 6305 }, { "epoch": 0.6632048062891924, "grad_norm": 1.9774859354202081, "learning_rate": 1.2980160465415891e-06, "loss": 0.9723, "step": 6306 }, { "epoch": 0.6633099767310398, "grad_norm": 2.2732675979550114, "learning_rate": 1.2972841156274843e-06, "loss": 0.9814, "step": 6307 }, { "epoch": 0.6634151471728871, "grad_norm": 2.362143185441358, "learning_rate": 1.2965523188255438e-06, "loss": 1.0208, "step": 6308 }, { "epoch": 0.6635203176147344, "grad_norm": 2.2701352939481096, "learning_rate": 1.2958206562173664e-06, "loss": 0.9388, "step": 6309 }, { "epoch": 0.6636254880565817, "grad_norm": 2.3921549016140102, "learning_rate": 1.2950891278845423e-06, "loss": 0.9988, "step": 6310 }, { "epoch": 0.663730658498429, "grad_norm": 2.573032844055122, "learning_rate": 1.2943577339086395e-06, "loss": 0.9928, "step": 6311 }, { "epoch": 0.6638358289402764, "grad_norm": 1.896933627943948, "learning_rate": 1.2936264743712159e-06, "loss": 0.9814, "step": 6312 }, { "epoch": 0.6639409993821237, "grad_norm": 2.511447955098937, "learning_rate": 1.292895349353811e-06, "loss": 0.9846, "step": 6313 }, { "epoch": 0.664046169823971, "grad_norm": 2.391646179219941, "learning_rate": 1.2921643589379517e-06, "loss": 0.9703, "step": 6314 }, { "epoch": 0.6641513402658183, "grad_norm": 2.067946485876376, "learning_rate": 1.2914335032051502e-06, "loss": 0.971, "step": 6315 }, { "epoch": 0.6642565107076656, "grad_norm": 3.1972490953679165, "learning_rate": 1.2907027822369006e-06, "loss": 0.9421, "step": 6316 }, { "epoch": 0.6643616811495129, "grad_norm": 1.7933848682194085, "learning_rate": 1.289972196114686e-06, "loss": 1.0276, "step": 6317 }, { "epoch": 0.6644668515913602, "grad_norm": 2.1050494682029797, "learning_rate": 1.2892417449199696e-06, "loss": 0.9682, "step": 6318 }, { "epoch": 0.6645720220332075, "grad_norm": 2.5224497949789235, "learning_rate": 1.2885114287342058e-06, "loss": 1.0111, "step": 6319 }, { "epoch": 0.6646771924750549, "grad_norm": 1.9853269766464046, "learning_rate": 1.287781247638828e-06, "loss": 0.9859, "step": 6320 }, { "epoch": 0.6647823629169022, "grad_norm": 3.571360120955266, "learning_rate": 1.2870512017152586e-06, "loss": 1.0006, "step": 6321 }, { "epoch": 0.6648875333587495, "grad_norm": 2.8210890079031303, "learning_rate": 1.286321291044902e-06, "loss": 0.9834, "step": 6322 }, { "epoch": 0.6649927038005968, "grad_norm": 2.1350217532955544, "learning_rate": 1.2855915157091498e-06, "loss": 1.0235, "step": 6323 }, { "epoch": 0.6650978742424442, "grad_norm": 2.939945404933564, "learning_rate": 1.2848618757893782e-06, "loss": 0.986, "step": 6324 }, { "epoch": 0.6652030446842915, "grad_norm": 2.8261629770042602, "learning_rate": 1.284132371366946e-06, "loss": 1.0038, "step": 6325 }, { "epoch": 0.6653082151261388, "grad_norm": 2.52942309852291, "learning_rate": 1.2834030025232006e-06, "loss": 0.9817, "step": 6326 }, { "epoch": 0.6654133855679861, "grad_norm": 2.432243507022762, "learning_rate": 1.2826737693394693e-06, "loss": 1.0329, "step": 6327 }, { "epoch": 0.6655185560098335, "grad_norm": 2.2735077443497937, "learning_rate": 1.2819446718970713e-06, "loss": 1.0334, "step": 6328 }, { "epoch": 0.6656237264516808, "grad_norm": 2.6901417303834467, "learning_rate": 1.2812157102773043e-06, "loss": 1.0319, "step": 6329 }, { "epoch": 0.6657288968935281, "grad_norm": 1.9151652735311289, "learning_rate": 1.2804868845614527e-06, "loss": 1.0053, "step": 6330 }, { "epoch": 0.6658340673353754, "grad_norm": 2.923158306662884, "learning_rate": 1.279758194830788e-06, "loss": 0.963, "step": 6331 }, { "epoch": 0.6659392377772227, "grad_norm": 3.174548375700343, "learning_rate": 1.2790296411665618e-06, "loss": 0.9674, "step": 6332 }, { "epoch": 0.6660444082190701, "grad_norm": 3.0051084482405717, "learning_rate": 1.2783012236500173e-06, "loss": 0.9453, "step": 6333 }, { "epoch": 0.6661495786609174, "grad_norm": 2.594123710120902, "learning_rate": 1.2775729423623759e-06, "loss": 0.9764, "step": 6334 }, { "epoch": 0.6662547491027647, "grad_norm": 2.5531587705993126, "learning_rate": 1.2768447973848485e-06, "loss": 0.9683, "step": 6335 }, { "epoch": 0.6663599195446119, "grad_norm": 3.1748751741089167, "learning_rate": 1.276116788798627e-06, "loss": 1.02, "step": 6336 }, { "epoch": 0.6664650899864593, "grad_norm": 2.7632150853823148, "learning_rate": 1.2753889166848909e-06, "loss": 0.9777, "step": 6337 }, { "epoch": 0.6665702604283066, "grad_norm": 2.0037733131489106, "learning_rate": 1.274661181124805e-06, "loss": 0.9881, "step": 6338 }, { "epoch": 0.6666754308701539, "grad_norm": 2.4827855341616885, "learning_rate": 1.2739335821995153e-06, "loss": 0.94, "step": 6339 }, { "epoch": 0.6667806013120012, "grad_norm": 2.8119810771826517, "learning_rate": 1.2732061199901563e-06, "loss": 0.994, "step": 6340 }, { "epoch": 0.6668857717538486, "grad_norm": 2.752059269835914, "learning_rate": 1.2724787945778427e-06, "loss": 1.0225, "step": 6341 }, { "epoch": 0.6669909421956959, "grad_norm": 2.2241542929333216, "learning_rate": 1.271751606043682e-06, "loss": 0.9708, "step": 6342 }, { "epoch": 0.6670961126375432, "grad_norm": 2.6235709049395997, "learning_rate": 1.2710245544687568e-06, "loss": 1.0189, "step": 6343 }, { "epoch": 0.6672012830793905, "grad_norm": 3.156266897222367, "learning_rate": 1.2702976399341422e-06, "loss": 0.9985, "step": 6344 }, { "epoch": 0.6673064535212379, "grad_norm": 2.230980642118251, "learning_rate": 1.2695708625208933e-06, "loss": 0.9875, "step": 6345 }, { "epoch": 0.6674116239630852, "grad_norm": 2.5767378584379155, "learning_rate": 1.2688442223100494e-06, "loss": 0.9804, "step": 6346 }, { "epoch": 0.6675167944049325, "grad_norm": 2.029400978615484, "learning_rate": 1.268117719382641e-06, "loss": 0.9311, "step": 6347 }, { "epoch": 0.6676219648467798, "grad_norm": 2.6045821189170235, "learning_rate": 1.2673913538196753e-06, "loss": 1.0034, "step": 6348 }, { "epoch": 0.6677271352886271, "grad_norm": 2.0673514221546965, "learning_rate": 1.26666512570215e-06, "loss": 0.9702, "step": 6349 }, { "epoch": 0.6678323057304745, "grad_norm": 2.227452564501476, "learning_rate": 1.265939035111043e-06, "loss": 0.9654, "step": 6350 }, { "epoch": 0.6679374761723218, "grad_norm": 2.389880074299462, "learning_rate": 1.26521308212732e-06, "loss": 1.024, "step": 6351 }, { "epoch": 0.6680426466141691, "grad_norm": 1.9298756073215733, "learning_rate": 1.2644872668319317e-06, "loss": 1.0022, "step": 6352 }, { "epoch": 0.6681478170560164, "grad_norm": 2.8622091569435577, "learning_rate": 1.2637615893058098e-06, "loss": 0.9935, "step": 6353 }, { "epoch": 0.6682529874978638, "grad_norm": 3.031543303955931, "learning_rate": 1.263036049629875e-06, "loss": 0.9945, "step": 6354 }, { "epoch": 0.6683581579397111, "grad_norm": 3.1179126127334835, "learning_rate": 1.262310647885028e-06, "loss": 1.0373, "step": 6355 }, { "epoch": 0.6684633283815583, "grad_norm": 2.301408947362086, "learning_rate": 1.2615853841521602e-06, "loss": 1.0232, "step": 6356 }, { "epoch": 0.6685684988234056, "grad_norm": 2.9564009207787585, "learning_rate": 1.2608602585121419e-06, "loss": 1.0009, "step": 6357 }, { "epoch": 0.668673669265253, "grad_norm": 2.338028385808441, "learning_rate": 1.2601352710458314e-06, "loss": 0.9979, "step": 6358 }, { "epoch": 0.6687788397071003, "grad_norm": 1.9749844430021521, "learning_rate": 1.2594104218340686e-06, "loss": 0.9774, "step": 6359 }, { "epoch": 0.6688840101489476, "grad_norm": 2.0803838048430623, "learning_rate": 1.2586857109576814e-06, "loss": 1.0137, "step": 6360 }, { "epoch": 0.6689891805907949, "grad_norm": 2.561718404866887, "learning_rate": 1.257961138497481e-06, "loss": 1.0203, "step": 6361 }, { "epoch": 0.6690943510326423, "grad_norm": 2.110231753776184, "learning_rate": 1.2572367045342615e-06, "loss": 0.9843, "step": 6362 }, { "epoch": 0.6691995214744896, "grad_norm": 3.728842150551063, "learning_rate": 1.256512409148804e-06, "loss": 1.0121, "step": 6363 }, { "epoch": 0.6693046919163369, "grad_norm": 2.6450699087235745, "learning_rate": 1.2557882524218722e-06, "loss": 1.0254, "step": 6364 }, { "epoch": 0.6694098623581842, "grad_norm": 3.3862188687250474, "learning_rate": 1.2550642344342155e-06, "loss": 0.9904, "step": 6365 }, { "epoch": 0.6695150328000316, "grad_norm": 2.6560070020655164, "learning_rate": 1.2543403552665684e-06, "loss": 1.001, "step": 6366 }, { "epoch": 0.6696202032418789, "grad_norm": 2.066321011865616, "learning_rate": 1.2536166149996476e-06, "loss": 1.0294, "step": 6367 }, { "epoch": 0.6697253736837262, "grad_norm": 1.841460947605558, "learning_rate": 1.252893013714157e-06, "loss": 0.955, "step": 6368 }, { "epoch": 0.6698305441255735, "grad_norm": 2.6195237655803427, "learning_rate": 1.2521695514907817e-06, "loss": 0.981, "step": 6369 }, { "epoch": 0.6699357145674208, "grad_norm": 2.5282441482759155, "learning_rate": 1.2514462284101969e-06, "loss": 1.0073, "step": 6370 }, { "epoch": 0.6700408850092682, "grad_norm": 2.420722358809797, "learning_rate": 1.2507230445530554e-06, "loss": 1.0199, "step": 6371 }, { "epoch": 0.6701460554511155, "grad_norm": 3.210358669515261, "learning_rate": 1.2500000000000007e-06, "loss": 0.935, "step": 6372 }, { "epoch": 0.6702512258929628, "grad_norm": 2.6066278908101244, "learning_rate": 1.2492770948316548e-06, "loss": 0.9807, "step": 6373 }, { "epoch": 0.6703563963348101, "grad_norm": 2.166982928665858, "learning_rate": 1.2485543291286292e-06, "loss": 1.0028, "step": 6374 }, { "epoch": 0.6704615667766575, "grad_norm": 2.3061126697659953, "learning_rate": 1.2478317029715182e-06, "loss": 0.972, "step": 6375 }, { "epoch": 0.6705667372185048, "grad_norm": 2.574290187989157, "learning_rate": 1.2471092164408985e-06, "loss": 0.9746, "step": 6376 }, { "epoch": 0.670671907660352, "grad_norm": 2.5423268036429754, "learning_rate": 1.2463868696173351e-06, "loss": 1.0209, "step": 6377 }, { "epoch": 0.6707770781021993, "grad_norm": 2.2164659344968767, "learning_rate": 1.245664662581372e-06, "loss": 0.9704, "step": 6378 }, { "epoch": 0.6708822485440467, "grad_norm": 2.582570427543782, "learning_rate": 1.2449425954135452e-06, "loss": 0.9712, "step": 6379 }, { "epoch": 0.670987418985894, "grad_norm": 2.124683883692389, "learning_rate": 1.2442206681943685e-06, "loss": 0.9725, "step": 6380 }, { "epoch": 0.6710925894277413, "grad_norm": 2.3639142162437095, "learning_rate": 1.2434988810043416e-06, "loss": 0.9681, "step": 6381 }, { "epoch": 0.6711977598695886, "grad_norm": 2.1442126123743024, "learning_rate": 1.2427772339239502e-06, "loss": 1.0037, "step": 6382 }, { "epoch": 0.671302930311436, "grad_norm": 2.625018523809141, "learning_rate": 1.2420557270336638e-06, "loss": 1.0121, "step": 6383 }, { "epoch": 0.6714081007532833, "grad_norm": 2.4345832011147217, "learning_rate": 1.241334360413937e-06, "loss": 0.972, "step": 6384 }, { "epoch": 0.6715132711951306, "grad_norm": 2.7727329895954993, "learning_rate": 1.2406131341452054e-06, "loss": 1.0481, "step": 6385 }, { "epoch": 0.6716184416369779, "grad_norm": 2.7456790530416173, "learning_rate": 1.2398920483078939e-06, "loss": 0.9972, "step": 6386 }, { "epoch": 0.6717236120788252, "grad_norm": 2.2868278468077765, "learning_rate": 1.2391711029824065e-06, "loss": 0.981, "step": 6387 }, { "epoch": 0.6718287825206726, "grad_norm": 2.193127747109827, "learning_rate": 1.2384502982491359e-06, "loss": 0.9802, "step": 6388 }, { "epoch": 0.6719339529625199, "grad_norm": 2.525039802068027, "learning_rate": 1.2377296341884578e-06, "loss": 1.0594, "step": 6389 }, { "epoch": 0.6720391234043672, "grad_norm": 2.2688658169002793, "learning_rate": 1.2370091108807307e-06, "loss": 0.9756, "step": 6390 }, { "epoch": 0.6721442938462145, "grad_norm": 2.8518070010439383, "learning_rate": 1.2362887284062994e-06, "loss": 1.0145, "step": 6391 }, { "epoch": 0.6722494642880619, "grad_norm": 3.120055638002566, "learning_rate": 1.23556848684549e-06, "loss": 1.0178, "step": 6392 }, { "epoch": 0.6723546347299092, "grad_norm": 2.677188223326482, "learning_rate": 1.2348483862786188e-06, "loss": 0.9663, "step": 6393 }, { "epoch": 0.6724598051717565, "grad_norm": 2.764351849759641, "learning_rate": 1.2341284267859796e-06, "loss": 0.9915, "step": 6394 }, { "epoch": 0.6725649756136038, "grad_norm": 1.9868623247964174, "learning_rate": 1.2334086084478553e-06, "loss": 0.9902, "step": 6395 }, { "epoch": 0.6726701460554512, "grad_norm": 1.88432409309354, "learning_rate": 1.2326889313445095e-06, "loss": 1.0214, "step": 6396 }, { "epoch": 0.6727753164972984, "grad_norm": 2.505160726077182, "learning_rate": 1.2319693955561926e-06, "loss": 0.996, "step": 6397 }, { "epoch": 0.6728804869391457, "grad_norm": 2.4305679692895326, "learning_rate": 1.2312500011631396e-06, "loss": 0.9957, "step": 6398 }, { "epoch": 0.672985657380993, "grad_norm": 2.26838492918931, "learning_rate": 1.2305307482455661e-06, "loss": 0.9879, "step": 6399 }, { "epoch": 0.6730908278228404, "grad_norm": 2.7005160503685772, "learning_rate": 1.2298116368836772e-06, "loss": 1.0049, "step": 6400 }, { "epoch": 0.6731959982646877, "grad_norm": 2.5263479523166814, "learning_rate": 1.2290926671576567e-06, "loss": 1.0359, "step": 6401 }, { "epoch": 0.673301168706535, "grad_norm": 2.1790008720432605, "learning_rate": 1.2283738391476766e-06, "loss": 1.0109, "step": 6402 }, { "epoch": 0.6734063391483823, "grad_norm": 2.6192166675598467, "learning_rate": 1.2276551529338929e-06, "loss": 0.9518, "step": 6403 }, { "epoch": 0.6735115095902297, "grad_norm": 2.905972519031942, "learning_rate": 1.2269366085964424e-06, "loss": 1.0174, "step": 6404 }, { "epoch": 0.673616680032077, "grad_norm": 2.9312474143114664, "learning_rate": 1.2262182062154498e-06, "loss": 1.0203, "step": 6405 }, { "epoch": 0.6737218504739243, "grad_norm": 2.394870761083195, "learning_rate": 1.225499945871022e-06, "loss": 0.9592, "step": 6406 }, { "epoch": 0.6738270209157716, "grad_norm": 2.025708578404046, "learning_rate": 1.2247818276432522e-06, "loss": 0.9798, "step": 6407 }, { "epoch": 0.673932191357619, "grad_norm": 2.183993520411737, "learning_rate": 1.2240638516122135e-06, "loss": 0.9284, "step": 6408 }, { "epoch": 0.6740373617994663, "grad_norm": 1.9540801713327498, "learning_rate": 1.2233460178579683e-06, "loss": 0.9748, "step": 6409 }, { "epoch": 0.6741425322413136, "grad_norm": 2.397767392403271, "learning_rate": 1.2226283264605587e-06, "loss": 0.9958, "step": 6410 }, { "epoch": 0.6742477026831609, "grad_norm": 2.63791023453177, "learning_rate": 1.2219107775000136e-06, "loss": 0.9764, "step": 6411 }, { "epoch": 0.6743528731250082, "grad_norm": 2.6612509498444563, "learning_rate": 1.2211933710563462e-06, "loss": 0.9665, "step": 6412 }, { "epoch": 0.6744580435668556, "grad_norm": 2.1922298975484, "learning_rate": 1.2204761072095511e-06, "loss": 1.0004, "step": 6413 }, { "epoch": 0.6745632140087029, "grad_norm": 2.3286284733059666, "learning_rate": 1.219758986039611e-06, "loss": 0.9977, "step": 6414 }, { "epoch": 0.6746683844505502, "grad_norm": 2.8936932199687746, "learning_rate": 1.2190420076264877e-06, "loss": 1.0052, "step": 6415 }, { "epoch": 0.6747735548923975, "grad_norm": 2.9466430859373043, "learning_rate": 1.2183251720501317e-06, "loss": 1.0172, "step": 6416 }, { "epoch": 0.6748787253342448, "grad_norm": 2.6112081025461276, "learning_rate": 1.2176084793904764e-06, "loss": 1.026, "step": 6417 }, { "epoch": 0.6749838957760921, "grad_norm": 2.623855271718345, "learning_rate": 1.2168919297274368e-06, "loss": 0.998, "step": 6418 }, { "epoch": 0.6750890662179394, "grad_norm": 2.4575121259688033, "learning_rate": 1.2161755231409142e-06, "loss": 0.9709, "step": 6419 }, { "epoch": 0.6751942366597867, "grad_norm": 2.5971196149724416, "learning_rate": 1.2154592597107942e-06, "loss": 0.9818, "step": 6420 }, { "epoch": 0.675299407101634, "grad_norm": 1.8871909749453315, "learning_rate": 1.214743139516946e-06, "loss": 1.0128, "step": 6421 }, { "epoch": 0.6754045775434814, "grad_norm": 2.2047792064214824, "learning_rate": 1.2140271626392215e-06, "loss": 0.9645, "step": 6422 }, { "epoch": 0.6755097479853287, "grad_norm": 2.1961869799992435, "learning_rate": 1.2133113291574586e-06, "loss": 0.9703, "step": 6423 }, { "epoch": 0.675614918427176, "grad_norm": 2.958708144601371, "learning_rate": 1.212595639151477e-06, "loss": 0.9813, "step": 6424 }, { "epoch": 0.6757200888690233, "grad_norm": 2.1479518310219854, "learning_rate": 1.211880092701083e-06, "loss": 1.0119, "step": 6425 }, { "epoch": 0.6758252593108707, "grad_norm": 2.390810440217156, "learning_rate": 1.2111646898860654e-06, "loss": 0.9381, "step": 6426 }, { "epoch": 0.675930429752718, "grad_norm": 2.2064670963516835, "learning_rate": 1.2104494307861963e-06, "loss": 0.9644, "step": 6427 }, { "epoch": 0.6760356001945653, "grad_norm": 2.429723636327678, "learning_rate": 1.2097343154812332e-06, "loss": 0.9703, "step": 6428 }, { "epoch": 0.6761407706364126, "grad_norm": 3.083948528744446, "learning_rate": 1.2090193440509173e-06, "loss": 1.0232, "step": 6429 }, { "epoch": 0.67624594107826, "grad_norm": 2.9234421217471187, "learning_rate": 1.208304516574974e-06, "loss": 0.9884, "step": 6430 }, { "epoch": 0.6763511115201073, "grad_norm": 1.6972930816606224, "learning_rate": 1.2075898331331112e-06, "loss": 1.0111, "step": 6431 }, { "epoch": 0.6764562819619546, "grad_norm": 3.532197380851382, "learning_rate": 1.206875293805021e-06, "loss": 1.0103, "step": 6432 }, { "epoch": 0.6765614524038019, "grad_norm": 2.534725775044316, "learning_rate": 1.206160898670381e-06, "loss": 0.9684, "step": 6433 }, { "epoch": 0.6766666228456493, "grad_norm": 2.6453642866329408, "learning_rate": 1.2054466478088515e-06, "loss": 1.0008, "step": 6434 }, { "epoch": 0.6767717932874966, "grad_norm": 2.312434495761649, "learning_rate": 1.2047325413000782e-06, "loss": 0.9743, "step": 6435 }, { "epoch": 0.6768769637293439, "grad_norm": 1.9592460033323447, "learning_rate": 1.2040185792236874e-06, "loss": 1.004, "step": 6436 }, { "epoch": 0.6769821341711912, "grad_norm": 1.8537125451220031, "learning_rate": 1.2033047616592938e-06, "loss": 0.9589, "step": 6437 }, { "epoch": 0.6770873046130385, "grad_norm": 2.3821524850998137, "learning_rate": 1.2025910886864914e-06, "loss": 1.0142, "step": 6438 }, { "epoch": 0.6771924750548858, "grad_norm": 2.6671405915081805, "learning_rate": 1.2018775603848613e-06, "loss": 0.9977, "step": 6439 }, { "epoch": 0.6772976454967331, "grad_norm": 2.9782145090252388, "learning_rate": 1.201164176833968e-06, "loss": 0.9522, "step": 6440 }, { "epoch": 0.6774028159385804, "grad_norm": 2.3275648903630803, "learning_rate": 1.2004509381133577e-06, "loss": 0.9778, "step": 6441 }, { "epoch": 0.6775079863804278, "grad_norm": 2.2558601171743953, "learning_rate": 1.1997378443025633e-06, "loss": 1.0001, "step": 6442 }, { "epoch": 0.6776131568222751, "grad_norm": 2.7943480960191405, "learning_rate": 1.1990248954811002e-06, "loss": 1.0329, "step": 6443 }, { "epoch": 0.6777183272641224, "grad_norm": 1.9980446949092163, "learning_rate": 1.1983120917284682e-06, "loss": 1.0, "step": 6444 }, { "epoch": 0.6778234977059697, "grad_norm": 2.5703128757840967, "learning_rate": 1.1975994331241491e-06, "loss": 1.0233, "step": 6445 }, { "epoch": 0.677928668147817, "grad_norm": 2.454703747591987, "learning_rate": 1.1968869197476116e-06, "loss": 0.9941, "step": 6446 }, { "epoch": 0.6780338385896644, "grad_norm": 2.1653067794036995, "learning_rate": 1.1961745516783044e-06, "loss": 0.9694, "step": 6447 }, { "epoch": 0.6781390090315117, "grad_norm": 2.2038359344425396, "learning_rate": 1.1954623289956633e-06, "loss": 0.977, "step": 6448 }, { "epoch": 0.678244179473359, "grad_norm": 2.693813467095616, "learning_rate": 1.1947502517791073e-06, "loss": 0.984, "step": 6449 }, { "epoch": 0.6783493499152063, "grad_norm": 2.1406308204159874, "learning_rate": 1.194038320108037e-06, "loss": 0.9603, "step": 6450 }, { "epoch": 0.6784545203570537, "grad_norm": 2.1294455644301076, "learning_rate": 1.1933265340618389e-06, "loss": 0.9735, "step": 6451 }, { "epoch": 0.678559690798901, "grad_norm": 2.103773447891833, "learning_rate": 1.192614893719884e-06, "loss": 0.9834, "step": 6452 }, { "epoch": 0.6786648612407483, "grad_norm": 2.0922753017568336, "learning_rate": 1.1919033991615234e-06, "loss": 1.0113, "step": 6453 }, { "epoch": 0.6787700316825956, "grad_norm": 1.9815603762507759, "learning_rate": 1.1911920504660963e-06, "loss": 0.9569, "step": 6454 }, { "epoch": 0.678875202124443, "grad_norm": 1.797769460062252, "learning_rate": 1.190480847712922e-06, "loss": 0.9873, "step": 6455 }, { "epoch": 0.6789803725662903, "grad_norm": 2.003693077137846, "learning_rate": 1.1897697909813058e-06, "loss": 1.0133, "step": 6456 }, { "epoch": 0.6790855430081376, "grad_norm": 2.1003840733140686, "learning_rate": 1.1890588803505362e-06, "loss": 0.9632, "step": 6457 }, { "epoch": 0.6791907134499848, "grad_norm": 2.080990514432014, "learning_rate": 1.1883481158998862e-06, "loss": 0.9783, "step": 6458 }, { "epoch": 0.6792958838918322, "grad_norm": 3.374047031904028, "learning_rate": 1.1876374977086094e-06, "loss": 1.0202, "step": 6459 }, { "epoch": 0.6794010543336795, "grad_norm": 2.4938920235535047, "learning_rate": 1.1869270258559477e-06, "loss": 1.008, "step": 6460 }, { "epoch": 0.6795062247755268, "grad_norm": 2.186419415119328, "learning_rate": 1.1862167004211217e-06, "loss": 0.9835, "step": 6461 }, { "epoch": 0.6796113952173741, "grad_norm": 2.9582048165354378, "learning_rate": 1.1855065214833394e-06, "loss": 0.9733, "step": 6462 }, { "epoch": 0.6797165656592214, "grad_norm": 3.8575153613107016, "learning_rate": 1.1847964891217923e-06, "loss": 1.0115, "step": 6463 }, { "epoch": 0.6798217361010688, "grad_norm": 2.242861038730955, "learning_rate": 1.1840866034156526e-06, "loss": 0.9677, "step": 6464 }, { "epoch": 0.6799269065429161, "grad_norm": 2.0912260201336648, "learning_rate": 1.1833768644440787e-06, "loss": 0.9556, "step": 6465 }, { "epoch": 0.6800320769847634, "grad_norm": 3.3182406948209993, "learning_rate": 1.1826672722862137e-06, "loss": 0.9788, "step": 6466 }, { "epoch": 0.6801372474266107, "grad_norm": 2.887753294344934, "learning_rate": 1.1819578270211801e-06, "loss": 0.9623, "step": 6467 }, { "epoch": 0.6802424178684581, "grad_norm": 3.1634544519317966, "learning_rate": 1.1812485287280886e-06, "loss": 1.0159, "step": 6468 }, { "epoch": 0.6803475883103054, "grad_norm": 2.210225867067167, "learning_rate": 1.1805393774860296e-06, "loss": 0.9579, "step": 6469 }, { "epoch": 0.6804527587521527, "grad_norm": 2.3215390072281874, "learning_rate": 1.1798303733740801e-06, "loss": 0.9572, "step": 6470 }, { "epoch": 0.680557929194, "grad_norm": 2.531063961551066, "learning_rate": 1.1791215164712993e-06, "loss": 1.0024, "step": 6471 }, { "epoch": 0.6806630996358474, "grad_norm": 2.6959358550758883, "learning_rate": 1.1784128068567316e-06, "loss": 0.996, "step": 6472 }, { "epoch": 0.6807682700776947, "grad_norm": 2.0196092769679828, "learning_rate": 1.1777042446094011e-06, "loss": 0.9526, "step": 6473 }, { "epoch": 0.680873440519542, "grad_norm": 2.020236502773026, "learning_rate": 1.1769958298083192e-06, "loss": 0.9758, "step": 6474 }, { "epoch": 0.6809786109613893, "grad_norm": 2.1941180121097825, "learning_rate": 1.176287562532481e-06, "loss": 0.9882, "step": 6475 }, { "epoch": 0.6810837814032367, "grad_norm": 2.44848887745427, "learning_rate": 1.1755794428608614e-06, "loss": 1.0088, "step": 6476 }, { "epoch": 0.681188951845084, "grad_norm": 1.6532880562870076, "learning_rate": 1.1748714708724232e-06, "loss": 0.9774, "step": 6477 }, { "epoch": 0.6812941222869312, "grad_norm": 2.6504903012905494, "learning_rate": 1.1741636466461093e-06, "loss": 0.9639, "step": 6478 }, { "epoch": 0.6813992927287785, "grad_norm": 2.1833707527018653, "learning_rate": 1.173455970260848e-06, "loss": 0.9325, "step": 6479 }, { "epoch": 0.6815044631706259, "grad_norm": 1.8991991840090796, "learning_rate": 1.1727484417955512e-06, "loss": 0.9709, "step": 6480 }, { "epoch": 0.6816096336124732, "grad_norm": 2.7755476284341594, "learning_rate": 1.1720410613291144e-06, "loss": 1.0033, "step": 6481 }, { "epoch": 0.6817148040543205, "grad_norm": 2.4340897257786587, "learning_rate": 1.1713338289404152e-06, "loss": 0.9761, "step": 6482 }, { "epoch": 0.6818199744961678, "grad_norm": 2.6322074888977873, "learning_rate": 1.1706267447083145e-06, "loss": 0.9855, "step": 6483 }, { "epoch": 0.6819251449380151, "grad_norm": 2.5435211848927963, "learning_rate": 1.169919808711659e-06, "loss": 0.9968, "step": 6484 }, { "epoch": 0.6820303153798625, "grad_norm": 2.3401833639741376, "learning_rate": 1.1692130210292767e-06, "loss": 0.9764, "step": 6485 }, { "epoch": 0.6821354858217098, "grad_norm": 2.757772162036146, "learning_rate": 1.1685063817399818e-06, "loss": 0.9863, "step": 6486 }, { "epoch": 0.6822406562635571, "grad_norm": 2.2309395979337774, "learning_rate": 1.167799890922568e-06, "loss": 0.9525, "step": 6487 }, { "epoch": 0.6823458267054044, "grad_norm": 2.2761181614261123, "learning_rate": 1.167093548655815e-06, "loss": 0.9839, "step": 6488 }, { "epoch": 0.6824509971472518, "grad_norm": 2.4073807837683474, "learning_rate": 1.1663873550184864e-06, "loss": 0.9943, "step": 6489 }, { "epoch": 0.6825561675890991, "grad_norm": 2.344425968364666, "learning_rate": 1.1656813100893271e-06, "loss": 1.0342, "step": 6490 }, { "epoch": 0.6826613380309464, "grad_norm": 3.3083451221904077, "learning_rate": 1.1649754139470679e-06, "loss": 0.9833, "step": 6491 }, { "epoch": 0.6827665084727937, "grad_norm": 2.101235305234725, "learning_rate": 1.16426966667042e-06, "loss": 1.0127, "step": 6492 }, { "epoch": 0.6828716789146411, "grad_norm": 2.4853559361171267, "learning_rate": 1.1635640683380803e-06, "loss": 0.9932, "step": 6493 }, { "epoch": 0.6829768493564884, "grad_norm": 2.823621227962317, "learning_rate": 1.1628586190287289e-06, "loss": 0.9721, "step": 6494 }, { "epoch": 0.6830820197983357, "grad_norm": 2.1446739627758924, "learning_rate": 1.1621533188210296e-06, "loss": 0.9461, "step": 6495 }, { "epoch": 0.683187190240183, "grad_norm": 2.5010687220751877, "learning_rate": 1.1614481677936274e-06, "loss": 0.9768, "step": 6496 }, { "epoch": 0.6832923606820304, "grad_norm": 2.5827579517567134, "learning_rate": 1.1607431660251523e-06, "loss": 0.9719, "step": 6497 }, { "epoch": 0.6833975311238777, "grad_norm": 2.163533594993246, "learning_rate": 1.160038313594219e-06, "loss": 0.9844, "step": 6498 }, { "epoch": 0.6835027015657249, "grad_norm": 1.6505556607260339, "learning_rate": 1.1593336105794222e-06, "loss": 0.9816, "step": 6499 }, { "epoch": 0.6836078720075722, "grad_norm": 2.5798234985766753, "learning_rate": 1.158629057059343e-06, "loss": 0.9543, "step": 6500 }, { "epoch": 0.6837130424494196, "grad_norm": 2.847502464596311, "learning_rate": 1.1579246531125435e-06, "loss": 0.9713, "step": 6501 }, { "epoch": 0.6838182128912669, "grad_norm": 2.664122177395312, "learning_rate": 1.1572203988175706e-06, "loss": 1.0001, "step": 6502 }, { "epoch": 0.6839233833331142, "grad_norm": 2.3631903419020603, "learning_rate": 1.1565162942529553e-06, "loss": 0.9806, "step": 6503 }, { "epoch": 0.6840285537749615, "grad_norm": 2.140387924285255, "learning_rate": 1.155812339497209e-06, "loss": 1.0032, "step": 6504 }, { "epoch": 0.6841337242168088, "grad_norm": 2.481719404717142, "learning_rate": 1.1551085346288296e-06, "loss": 1.0173, "step": 6505 }, { "epoch": 0.6842388946586562, "grad_norm": 2.508106903316508, "learning_rate": 1.154404879726294e-06, "loss": 0.9962, "step": 6506 }, { "epoch": 0.6843440651005035, "grad_norm": 3.216269422744731, "learning_rate": 1.1537013748680694e-06, "loss": 1.0013, "step": 6507 }, { "epoch": 0.6844492355423508, "grad_norm": 2.0004248330968206, "learning_rate": 1.152998020132599e-06, "loss": 1.0044, "step": 6508 }, { "epoch": 0.6845544059841981, "grad_norm": 1.933485559067847, "learning_rate": 1.1522948155983143e-06, "loss": 0.9502, "step": 6509 }, { "epoch": 0.6846595764260455, "grad_norm": 2.6662844264965972, "learning_rate": 1.1515917613436258e-06, "loss": 0.9901, "step": 6510 }, { "epoch": 0.6847647468678928, "grad_norm": 1.9043626170295296, "learning_rate": 1.1508888574469308e-06, "loss": 0.9483, "step": 6511 }, { "epoch": 0.6848699173097401, "grad_norm": 2.889546124540106, "learning_rate": 1.1501861039866095e-06, "loss": 0.9652, "step": 6512 }, { "epoch": 0.6849750877515874, "grad_norm": 2.297426343613282, "learning_rate": 1.1494835010410222e-06, "loss": 0.9644, "step": 6513 }, { "epoch": 0.6850802581934348, "grad_norm": 2.5762672587290227, "learning_rate": 1.1487810486885164e-06, "loss": 0.919, "step": 6514 }, { "epoch": 0.6851854286352821, "grad_norm": 3.1574284790806653, "learning_rate": 1.1480787470074197e-06, "loss": 0.9916, "step": 6515 }, { "epoch": 0.6852905990771294, "grad_norm": 2.7014959221760697, "learning_rate": 1.147376596076045e-06, "loss": 0.9377, "step": 6516 }, { "epoch": 0.6853957695189767, "grad_norm": 2.5585301645643783, "learning_rate": 1.146674595972688e-06, "loss": 0.9876, "step": 6517 }, { "epoch": 0.6855009399608241, "grad_norm": 2.2873581952413944, "learning_rate": 1.1459727467756257e-06, "loss": 1.0167, "step": 6518 }, { "epoch": 0.6856061104026713, "grad_norm": 2.0427362279367567, "learning_rate": 1.1452710485631216e-06, "loss": 1.0053, "step": 6519 }, { "epoch": 0.6857112808445186, "grad_norm": 1.6816352696983445, "learning_rate": 1.1445695014134175e-06, "loss": 0.9847, "step": 6520 }, { "epoch": 0.6858164512863659, "grad_norm": 2.8015068825343077, "learning_rate": 1.1438681054047454e-06, "loss": 0.9762, "step": 6521 }, { "epoch": 0.6859216217282132, "grad_norm": 2.8607684877029493, "learning_rate": 1.143166860615313e-06, "loss": 0.9794, "step": 6522 }, { "epoch": 0.6860267921700606, "grad_norm": 1.8264702204384304, "learning_rate": 1.1424657671233175e-06, "loss": 1.0061, "step": 6523 }, { "epoch": 0.6861319626119079, "grad_norm": 2.5820509572402357, "learning_rate": 1.1417648250069332e-06, "loss": 1.0065, "step": 6524 }, { "epoch": 0.6862371330537552, "grad_norm": 2.131759160671369, "learning_rate": 1.1410640343443222e-06, "loss": 0.9638, "step": 6525 }, { "epoch": 0.6863423034956025, "grad_norm": 2.427313320161486, "learning_rate": 1.140363395213629e-06, "loss": 0.9769, "step": 6526 }, { "epoch": 0.6864474739374499, "grad_norm": 2.762517347392917, "learning_rate": 1.139662907692978e-06, "loss": 1.0102, "step": 6527 }, { "epoch": 0.6865526443792972, "grad_norm": 1.928573110997746, "learning_rate": 1.1389625718604816e-06, "loss": 0.9984, "step": 6528 }, { "epoch": 0.6866578148211445, "grad_norm": 2.890165560340471, "learning_rate": 1.1382623877942291e-06, "loss": 1.0161, "step": 6529 }, { "epoch": 0.6867629852629918, "grad_norm": 2.287013440890568, "learning_rate": 1.1375623555723008e-06, "loss": 1.0233, "step": 6530 }, { "epoch": 0.6868681557048392, "grad_norm": 1.865259627410808, "learning_rate": 1.1368624752727529e-06, "loss": 0.9824, "step": 6531 }, { "epoch": 0.6869733261466865, "grad_norm": 2.865076892904702, "learning_rate": 1.1361627469736286e-06, "loss": 0.9781, "step": 6532 }, { "epoch": 0.6870784965885338, "grad_norm": 1.8570863276832512, "learning_rate": 1.1354631707529532e-06, "loss": 0.9515, "step": 6533 }, { "epoch": 0.6871836670303811, "grad_norm": 2.1759244504535022, "learning_rate": 1.1347637466887324e-06, "loss": 0.9344, "step": 6534 }, { "epoch": 0.6872888374722285, "grad_norm": 2.696477773718701, "learning_rate": 1.134064474858961e-06, "loss": 0.9849, "step": 6535 }, { "epoch": 0.6873940079140758, "grad_norm": 2.0827168686133066, "learning_rate": 1.1333653553416107e-06, "loss": 0.948, "step": 6536 }, { "epoch": 0.6874991783559231, "grad_norm": 2.5449063654411237, "learning_rate": 1.1326663882146407e-06, "loss": 0.9875, "step": 6537 }, { "epoch": 0.6876043487977704, "grad_norm": 2.1105732172457667, "learning_rate": 1.1319675735559894e-06, "loss": 1.0455, "step": 6538 }, { "epoch": 0.6877095192396177, "grad_norm": 2.6112809368518044, "learning_rate": 1.1312689114435806e-06, "loss": 0.9998, "step": 6539 }, { "epoch": 0.687814689681465, "grad_norm": 2.6962129988071917, "learning_rate": 1.130570401955322e-06, "loss": 0.974, "step": 6540 }, { "epoch": 0.6879198601233123, "grad_norm": 2.3841373338690954, "learning_rate": 1.129872045169101e-06, "loss": 0.9668, "step": 6541 }, { "epoch": 0.6880250305651596, "grad_norm": 2.00322624680125, "learning_rate": 1.1291738411627913e-06, "loss": 0.9997, "step": 6542 }, { "epoch": 0.688130201007007, "grad_norm": 2.391244626370067, "learning_rate": 1.1284757900142451e-06, "loss": 0.9691, "step": 6543 }, { "epoch": 0.6882353714488543, "grad_norm": 1.7816408797129248, "learning_rate": 1.1277778918013046e-06, "loss": 1.0039, "step": 6544 }, { "epoch": 0.6883405418907016, "grad_norm": 1.9496538742999459, "learning_rate": 1.127080146601788e-06, "loss": 0.9769, "step": 6545 }, { "epoch": 0.6884457123325489, "grad_norm": 2.7789172568168166, "learning_rate": 1.1263825544935015e-06, "loss": 0.9584, "step": 6546 }, { "epoch": 0.6885508827743962, "grad_norm": 2.116800668579364, "learning_rate": 1.1256851155542297e-06, "loss": 0.9908, "step": 6547 }, { "epoch": 0.6886560532162436, "grad_norm": 2.942405980467766, "learning_rate": 1.1249878298617436e-06, "loss": 0.9636, "step": 6548 }, { "epoch": 0.6887612236580909, "grad_norm": 2.213773047892503, "learning_rate": 1.124290697493797e-06, "loss": 0.9824, "step": 6549 }, { "epoch": 0.6888663940999382, "grad_norm": 2.1291578558657394, "learning_rate": 1.1235937185281234e-06, "loss": 0.9922, "step": 6550 }, { "epoch": 0.6889715645417855, "grad_norm": 2.724578811906178, "learning_rate": 1.1228968930424433e-06, "loss": 0.9929, "step": 6551 }, { "epoch": 0.6890767349836329, "grad_norm": 2.0628702408800677, "learning_rate": 1.1222002211144567e-06, "loss": 0.9745, "step": 6552 }, { "epoch": 0.6891819054254802, "grad_norm": 2.1610050278428083, "learning_rate": 1.1215037028218484e-06, "loss": 1.005, "step": 6553 }, { "epoch": 0.6892870758673275, "grad_norm": 2.7787423829228874, "learning_rate": 1.1208073382422866e-06, "loss": 1.0103, "step": 6554 }, { "epoch": 0.6893922463091748, "grad_norm": 2.3348483217669633, "learning_rate": 1.1201111274534198e-06, "loss": 0.9533, "step": 6555 }, { "epoch": 0.6894974167510222, "grad_norm": 2.11428494125051, "learning_rate": 1.1194150705328825e-06, "loss": 1.0074, "step": 6556 }, { "epoch": 0.6896025871928695, "grad_norm": 2.424483283901889, "learning_rate": 1.1187191675582878e-06, "loss": 1.0236, "step": 6557 }, { "epoch": 0.6897077576347168, "grad_norm": 2.041193732298862, "learning_rate": 1.1180234186072379e-06, "loss": 0.9219, "step": 6558 }, { "epoch": 0.6898129280765641, "grad_norm": 2.7736259010967075, "learning_rate": 1.1173278237573113e-06, "loss": 0.9917, "step": 6559 }, { "epoch": 0.6899180985184113, "grad_norm": 2.479292231955384, "learning_rate": 1.1166323830860745e-06, "loss": 0.9836, "step": 6560 }, { "epoch": 0.6900232689602587, "grad_norm": 2.286822176950764, "learning_rate": 1.1159370966710723e-06, "loss": 0.9802, "step": 6561 }, { "epoch": 0.690128439402106, "grad_norm": 1.7894272908985807, "learning_rate": 1.1152419645898355e-06, "loss": 0.9678, "step": 6562 }, { "epoch": 0.6902336098439533, "grad_norm": 3.264730137920692, "learning_rate": 1.114546986919878e-06, "loss": 1.0223, "step": 6563 }, { "epoch": 0.6903387802858006, "grad_norm": 2.1228771331780125, "learning_rate": 1.1138521637386928e-06, "loss": 0.9932, "step": 6564 }, { "epoch": 0.690443950727648, "grad_norm": 2.83441954898417, "learning_rate": 1.1131574951237607e-06, "loss": 0.9904, "step": 6565 }, { "epoch": 0.6905491211694953, "grad_norm": 2.3866086648057, "learning_rate": 1.112462981152539e-06, "loss": 0.9736, "step": 6566 }, { "epoch": 0.6906542916113426, "grad_norm": 2.0765111597644133, "learning_rate": 1.1117686219024756e-06, "loss": 0.9855, "step": 6567 }, { "epoch": 0.6907594620531899, "grad_norm": 1.9430515714587882, "learning_rate": 1.1110744174509952e-06, "loss": 0.9586, "step": 6568 }, { "epoch": 0.6908646324950373, "grad_norm": 2.722560114399417, "learning_rate": 1.1103803678755058e-06, "loss": 0.944, "step": 6569 }, { "epoch": 0.6909698029368846, "grad_norm": 2.1402961489701267, "learning_rate": 1.109686473253401e-06, "loss": 0.995, "step": 6570 }, { "epoch": 0.6910749733787319, "grad_norm": 2.7043321565248677, "learning_rate": 1.1089927336620531e-06, "loss": 1.0437, "step": 6571 }, { "epoch": 0.6911801438205792, "grad_norm": 2.434297743887707, "learning_rate": 1.108299149178823e-06, "loss": 0.9765, "step": 6572 }, { "epoch": 0.6912853142624266, "grad_norm": 1.8390287310817142, "learning_rate": 1.107605719881048e-06, "loss": 0.9612, "step": 6573 }, { "epoch": 0.6913904847042739, "grad_norm": 2.270753478584313, "learning_rate": 1.1069124458460528e-06, "loss": 0.8822, "step": 6574 }, { "epoch": 0.6914956551461212, "grad_norm": 2.1749999487851523, "learning_rate": 1.1062193271511408e-06, "loss": 0.9452, "step": 6575 }, { "epoch": 0.6916008255879685, "grad_norm": 2.202741500698131, "learning_rate": 1.1055263638736008e-06, "loss": 0.9954, "step": 6576 }, { "epoch": 0.6917059960298159, "grad_norm": 3.070731100268695, "learning_rate": 1.1048335560907047e-06, "loss": 1.0214, "step": 6577 }, { "epoch": 0.6918111664716632, "grad_norm": 2.7005615503787426, "learning_rate": 1.1041409038797047e-06, "loss": 1.0112, "step": 6578 }, { "epoch": 0.6919163369135105, "grad_norm": 2.7644388909239135, "learning_rate": 1.1034484073178377e-06, "loss": 0.9633, "step": 6579 }, { "epoch": 0.6920215073553577, "grad_norm": 2.01408411568597, "learning_rate": 1.1027560664823208e-06, "loss": 1.0022, "step": 6580 }, { "epoch": 0.692126677797205, "grad_norm": 2.4415146472180593, "learning_rate": 1.102063881450358e-06, "loss": 0.9889, "step": 6581 }, { "epoch": 0.6922318482390524, "grad_norm": 2.1342536088258277, "learning_rate": 1.1013718522991315e-06, "loss": 1.0117, "step": 6582 }, { "epoch": 0.6923370186808997, "grad_norm": 2.4132414775288296, "learning_rate": 1.100679979105809e-06, "loss": 0.9468, "step": 6583 }, { "epoch": 0.692442189122747, "grad_norm": 2.2328463531395295, "learning_rate": 1.0999882619475382e-06, "loss": 0.9449, "step": 6584 }, { "epoch": 0.6925473595645943, "grad_norm": 2.3588285134864524, "learning_rate": 1.0992967009014522e-06, "loss": 0.9846, "step": 6585 }, { "epoch": 0.6926525300064417, "grad_norm": 2.3173283765307326, "learning_rate": 1.0986052960446658e-06, "loss": 1.011, "step": 6586 }, { "epoch": 0.692757700448289, "grad_norm": 2.300519093867645, "learning_rate": 1.0979140474542743e-06, "loss": 1.0277, "step": 6587 }, { "epoch": 0.6928628708901363, "grad_norm": 2.392157404408049, "learning_rate": 1.0972229552073594e-06, "loss": 0.9473, "step": 6588 }, { "epoch": 0.6929680413319836, "grad_norm": 2.5842800419125784, "learning_rate": 1.0965320193809808e-06, "loss": 0.9278, "step": 6589 }, { "epoch": 0.693073211773831, "grad_norm": 1.891437035514997, "learning_rate": 1.0958412400521851e-06, "loss": 0.9947, "step": 6590 }, { "epoch": 0.6931783822156783, "grad_norm": 2.6484100957216365, "learning_rate": 1.0951506172979998e-06, "loss": 1.0276, "step": 6591 }, { "epoch": 0.6932835526575256, "grad_norm": 2.0840632815442985, "learning_rate": 1.0944601511954328e-06, "loss": 0.993, "step": 6592 }, { "epoch": 0.6933887230993729, "grad_norm": 2.956513595108928, "learning_rate": 1.0937698418214784e-06, "loss": 1.0338, "step": 6593 }, { "epoch": 0.6934938935412203, "grad_norm": 1.9243522516943474, "learning_rate": 1.0930796892531092e-06, "loss": 0.9915, "step": 6594 }, { "epoch": 0.6935990639830676, "grad_norm": 2.1698046683173136, "learning_rate": 1.0923896935672856e-06, "loss": 0.9747, "step": 6595 }, { "epoch": 0.6937042344249149, "grad_norm": 2.5425082569792092, "learning_rate": 1.0916998548409449e-06, "loss": 0.9799, "step": 6596 }, { "epoch": 0.6938094048667622, "grad_norm": 2.577847600360515, "learning_rate": 1.0910101731510113e-06, "loss": 0.9921, "step": 6597 }, { "epoch": 0.6939145753086096, "grad_norm": 1.8248209965382798, "learning_rate": 1.090320648574388e-06, "loss": 0.969, "step": 6598 }, { "epoch": 0.6940197457504569, "grad_norm": 1.7602736317885663, "learning_rate": 1.0896312811879634e-06, "loss": 0.9835, "step": 6599 }, { "epoch": 0.6941249161923041, "grad_norm": 2.958715201212913, "learning_rate": 1.0889420710686077e-06, "loss": 0.9786, "step": 6600 }, { "epoch": 0.6942300866341514, "grad_norm": 2.029850889738827, "learning_rate": 1.0882530182931717e-06, "loss": 0.9684, "step": 6601 }, { "epoch": 0.6943352570759987, "grad_norm": 2.032629169019649, "learning_rate": 1.0875641229384918e-06, "loss": 0.9604, "step": 6602 }, { "epoch": 0.6944404275178461, "grad_norm": 2.6284812089984864, "learning_rate": 1.0868753850813826e-06, "loss": 0.9786, "step": 6603 }, { "epoch": 0.6945455979596934, "grad_norm": 2.0592326838128967, "learning_rate": 1.0861868047986473e-06, "loss": 0.942, "step": 6604 }, { "epoch": 0.6946507684015407, "grad_norm": 1.850794804171779, "learning_rate": 1.0854983821670665e-06, "loss": 0.9761, "step": 6605 }, { "epoch": 0.694755938843388, "grad_norm": 2.5669897777189434, "learning_rate": 1.0848101172634028e-06, "loss": 1.021, "step": 6606 }, { "epoch": 0.6948611092852354, "grad_norm": 2.6120005510022666, "learning_rate": 1.0841220101644063e-06, "loss": 1.0407, "step": 6607 }, { "epoch": 0.6949662797270827, "grad_norm": 2.119486727780465, "learning_rate": 1.0834340609468022e-06, "loss": 1.0054, "step": 6608 }, { "epoch": 0.69507145016893, "grad_norm": 2.2087002638922493, "learning_rate": 1.0827462696873065e-06, "loss": 0.9827, "step": 6609 }, { "epoch": 0.6951766206107773, "grad_norm": 2.0129214447538137, "learning_rate": 1.0820586364626103e-06, "loss": 0.9977, "step": 6610 }, { "epoch": 0.6952817910526247, "grad_norm": 2.199166327772394, "learning_rate": 1.0813711613493922e-06, "loss": 0.9758, "step": 6611 }, { "epoch": 0.695386961494472, "grad_norm": 3.246740679995433, "learning_rate": 1.080683844424309e-06, "loss": 0.9628, "step": 6612 }, { "epoch": 0.6954921319363193, "grad_norm": 2.2023486928412592, "learning_rate": 1.0799966857640027e-06, "loss": 0.9798, "step": 6613 }, { "epoch": 0.6955973023781666, "grad_norm": 2.548062183974951, "learning_rate": 1.0793096854450979e-06, "loss": 0.9919, "step": 6614 }, { "epoch": 0.695702472820014, "grad_norm": 3.1434898532802324, "learning_rate": 1.0786228435441984e-06, "loss": 1.0162, "step": 6615 }, { "epoch": 0.6958076432618613, "grad_norm": 2.310428364039209, "learning_rate": 1.0779361601378946e-06, "loss": 0.9854, "step": 6616 }, { "epoch": 0.6959128137037086, "grad_norm": 2.688905242521263, "learning_rate": 1.077249635302754e-06, "loss": 1.0092, "step": 6617 }, { "epoch": 0.6960179841455559, "grad_norm": 2.192140291940976, "learning_rate": 1.0765632691153333e-06, "loss": 0.9757, "step": 6618 }, { "epoch": 0.6961231545874033, "grad_norm": 3.107859618660363, "learning_rate": 1.0758770616521646e-06, "loss": 0.9874, "step": 6619 }, { "epoch": 0.6962283250292506, "grad_norm": 1.8725342958729618, "learning_rate": 1.0751910129897678e-06, "loss": 0.9366, "step": 6620 }, { "epoch": 0.6963334954710978, "grad_norm": 2.3738236012939358, "learning_rate": 1.0745051232046399e-06, "loss": 0.9436, "step": 6621 }, { "epoch": 0.6964386659129451, "grad_norm": 2.438216862745785, "learning_rate": 1.0738193923732648e-06, "loss": 0.9679, "step": 6622 }, { "epoch": 0.6965438363547924, "grad_norm": 2.1364655426341947, "learning_rate": 1.0731338205721072e-06, "loss": 0.9434, "step": 6623 }, { "epoch": 0.6966490067966398, "grad_norm": 3.5579171402660625, "learning_rate": 1.0724484078776121e-06, "loss": 1.0199, "step": 6624 }, { "epoch": 0.6967541772384871, "grad_norm": 3.215088447682091, "learning_rate": 1.0717631543662098e-06, "loss": 0.9623, "step": 6625 }, { "epoch": 0.6968593476803344, "grad_norm": 2.7336030353955234, "learning_rate": 1.07107806011431e-06, "loss": 1.0457, "step": 6626 }, { "epoch": 0.6969645181221817, "grad_norm": 1.8278294216486561, "learning_rate": 1.0703931251983068e-06, "loss": 0.9591, "step": 6627 }, { "epoch": 0.6970696885640291, "grad_norm": 2.316066385624242, "learning_rate": 1.0697083496945766e-06, "loss": 0.9709, "step": 6628 }, { "epoch": 0.6971748590058764, "grad_norm": 2.7923043921417308, "learning_rate": 1.0690237336794753e-06, "loss": 1.0243, "step": 6629 }, { "epoch": 0.6972800294477237, "grad_norm": 2.0978384840603974, "learning_rate": 1.0683392772293446e-06, "loss": 0.9568, "step": 6630 }, { "epoch": 0.697385199889571, "grad_norm": 2.465884366366025, "learning_rate": 1.0676549804205048e-06, "loss": 0.9826, "step": 6631 }, { "epoch": 0.6974903703314184, "grad_norm": 2.218104914348849, "learning_rate": 1.0669708433292628e-06, "loss": 0.9982, "step": 6632 }, { "epoch": 0.6975955407732657, "grad_norm": 2.3301810291362233, "learning_rate": 1.0662868660319031e-06, "loss": 0.9371, "step": 6633 }, { "epoch": 0.697700711215113, "grad_norm": 2.1441525236150634, "learning_rate": 1.0656030486046965e-06, "loss": 0.9976, "step": 6634 }, { "epoch": 0.6978058816569603, "grad_norm": 2.557259001781231, "learning_rate": 1.0649193911238918e-06, "loss": 1.0343, "step": 6635 }, { "epoch": 0.6979110520988077, "grad_norm": 2.5638852296182266, "learning_rate": 1.064235893665723e-06, "loss": 1.0068, "step": 6636 }, { "epoch": 0.698016222540655, "grad_norm": 2.584073642881432, "learning_rate": 1.063552556306406e-06, "loss": 0.9623, "step": 6637 }, { "epoch": 0.6981213929825023, "grad_norm": 2.3545330761686962, "learning_rate": 1.0628693791221373e-06, "loss": 1.0107, "step": 6638 }, { "epoch": 0.6982265634243496, "grad_norm": 1.945064530584297, "learning_rate": 1.0621863621890976e-06, "loss": 0.9861, "step": 6639 }, { "epoch": 0.698331733866197, "grad_norm": 2.0623306033113145, "learning_rate": 1.061503505583447e-06, "loss": 0.9795, "step": 6640 }, { "epoch": 0.6984369043080442, "grad_norm": 2.077722057621392, "learning_rate": 1.0608208093813299e-06, "loss": 0.9851, "step": 6641 }, { "epoch": 0.6985420747498915, "grad_norm": 2.304749737440805, "learning_rate": 1.0601382736588735e-06, "loss": 0.9362, "step": 6642 }, { "epoch": 0.6986472451917388, "grad_norm": 2.6096925989976385, "learning_rate": 1.059455898492184e-06, "loss": 1.0156, "step": 6643 }, { "epoch": 0.6987524156335861, "grad_norm": 2.201610817630821, "learning_rate": 1.0587736839573525e-06, "loss": 0.9828, "step": 6644 }, { "epoch": 0.6988575860754335, "grad_norm": 1.8056953171438117, "learning_rate": 1.058091630130451e-06, "loss": 0.9644, "step": 6645 }, { "epoch": 0.6989627565172808, "grad_norm": 1.966122087361216, "learning_rate": 1.0574097370875346e-06, "loss": 0.997, "step": 6646 }, { "epoch": 0.6990679269591281, "grad_norm": 2.879109107208205, "learning_rate": 1.0567280049046383e-06, "loss": 0.9991, "step": 6647 }, { "epoch": 0.6991730974009754, "grad_norm": 2.8573357829655404, "learning_rate": 1.056046433657782e-06, "loss": 1.0177, "step": 6648 }, { "epoch": 0.6992782678428228, "grad_norm": 2.8507110133723863, "learning_rate": 1.0553650234229642e-06, "loss": 0.9884, "step": 6649 }, { "epoch": 0.6993834382846701, "grad_norm": 1.8137801088961556, "learning_rate": 1.054683774276169e-06, "loss": 0.9959, "step": 6650 }, { "epoch": 0.6994886087265174, "grad_norm": 2.6038698060831513, "learning_rate": 1.0540026862933612e-06, "loss": 1.0342, "step": 6651 }, { "epoch": 0.6995937791683647, "grad_norm": 3.033587811054729, "learning_rate": 1.0533217595504859e-06, "loss": 0.9652, "step": 6652 }, { "epoch": 0.6996989496102121, "grad_norm": 2.1979861729861923, "learning_rate": 1.0526409941234728e-06, "loss": 0.9613, "step": 6653 }, { "epoch": 0.6998041200520594, "grad_norm": 1.958123885683921, "learning_rate": 1.0519603900882322e-06, "loss": 0.9348, "step": 6654 }, { "epoch": 0.6999092904939067, "grad_norm": 2.6695995950616047, "learning_rate": 1.0512799475206576e-06, "loss": 1.0061, "step": 6655 }, { "epoch": 0.700014460935754, "grad_norm": 2.3933085010130206, "learning_rate": 1.050599666496623e-06, "loss": 0.9999, "step": 6656 }, { "epoch": 0.7001196313776014, "grad_norm": 2.4484506390441987, "learning_rate": 1.0499195470919844e-06, "loss": 0.9801, "step": 6657 }, { "epoch": 0.7002248018194487, "grad_norm": 2.10318195808962, "learning_rate": 1.0492395893825804e-06, "loss": 0.9614, "step": 6658 }, { "epoch": 0.700329972261296, "grad_norm": 2.354963154046998, "learning_rate": 1.0485597934442323e-06, "loss": 0.9967, "step": 6659 }, { "epoch": 0.7004351427031433, "grad_norm": 2.4833792329948308, "learning_rate": 1.0478801593527436e-06, "loss": 0.9805, "step": 6660 }, { "epoch": 0.7005403131449905, "grad_norm": 2.319460276654493, "learning_rate": 1.0472006871838963e-06, "loss": 0.9926, "step": 6661 }, { "epoch": 0.7006454835868379, "grad_norm": 2.6492931586569033, "learning_rate": 1.0465213770134591e-06, "loss": 1.0135, "step": 6662 }, { "epoch": 0.7007506540286852, "grad_norm": 3.0029420857299183, "learning_rate": 1.0458422289171786e-06, "loss": 0.9789, "step": 6663 }, { "epoch": 0.7008558244705325, "grad_norm": 1.924682101684733, "learning_rate": 1.0451632429707856e-06, "loss": 0.9996, "step": 6664 }, { "epoch": 0.7009609949123798, "grad_norm": 2.5935534112052725, "learning_rate": 1.0444844192499938e-06, "loss": 0.9762, "step": 6665 }, { "epoch": 0.7010661653542272, "grad_norm": 2.4891701580984837, "learning_rate": 1.043805757830495e-06, "loss": 0.9801, "step": 6666 }, { "epoch": 0.7011713357960745, "grad_norm": 3.171560808484776, "learning_rate": 1.0431272587879662e-06, "loss": 0.9521, "step": 6667 }, { "epoch": 0.7012765062379218, "grad_norm": 2.360067551193987, "learning_rate": 1.0424489221980657e-06, "loss": 0.9547, "step": 6668 }, { "epoch": 0.7013816766797691, "grad_norm": 2.136337297008342, "learning_rate": 1.0417707481364338e-06, "loss": 0.9991, "step": 6669 }, { "epoch": 0.7014868471216165, "grad_norm": 2.070557413137076, "learning_rate": 1.0410927366786904e-06, "loss": 0.9918, "step": 6670 }, { "epoch": 0.7015920175634638, "grad_norm": 2.0832696047671546, "learning_rate": 1.0404148879004415e-06, "loss": 1.0046, "step": 6671 }, { "epoch": 0.7016971880053111, "grad_norm": 3.3334838695797115, "learning_rate": 1.0397372018772694e-06, "loss": 0.996, "step": 6672 }, { "epoch": 0.7018023584471584, "grad_norm": 3.2162155727197406, "learning_rate": 1.0390596786847435e-06, "loss": 0.9941, "step": 6673 }, { "epoch": 0.7019075288890058, "grad_norm": 2.9010911694149053, "learning_rate": 1.0383823183984133e-06, "loss": 1.0102, "step": 6674 }, { "epoch": 0.7020126993308531, "grad_norm": 1.9879071940536923, "learning_rate": 1.0377051210938077e-06, "loss": 1.0026, "step": 6675 }, { "epoch": 0.7021178697727004, "grad_norm": 2.911710480192307, "learning_rate": 1.0370280868464405e-06, "loss": 1.016, "step": 6676 }, { "epoch": 0.7022230402145477, "grad_norm": 2.47624061900751, "learning_rate": 1.0363512157318076e-06, "loss": 1.0157, "step": 6677 }, { "epoch": 0.7023282106563951, "grad_norm": 2.7429443996244687, "learning_rate": 1.0356745078253833e-06, "loss": 0.9474, "step": 6678 }, { "epoch": 0.7024333810982424, "grad_norm": 1.6759259613705626, "learning_rate": 1.0349979632026272e-06, "loss": 0.9684, "step": 6679 }, { "epoch": 0.7025385515400897, "grad_norm": 2.1862048185264458, "learning_rate": 1.0343215819389782e-06, "loss": 1.0272, "step": 6680 }, { "epoch": 0.702643721981937, "grad_norm": 2.8521745744648492, "learning_rate": 1.0336453641098584e-06, "loss": 0.9889, "step": 6681 }, { "epoch": 0.7027488924237842, "grad_norm": 2.6870393262822474, "learning_rate": 1.0329693097906714e-06, "loss": 1.0578, "step": 6682 }, { "epoch": 0.7028540628656316, "grad_norm": 2.6616207546943484, "learning_rate": 1.0322934190568037e-06, "loss": 0.9533, "step": 6683 }, { "epoch": 0.7029592333074789, "grad_norm": 1.7603354792631345, "learning_rate": 1.0316176919836207e-06, "loss": 0.9708, "step": 6684 }, { "epoch": 0.7030644037493262, "grad_norm": 2.3418486553788216, "learning_rate": 1.0309421286464724e-06, "loss": 0.9603, "step": 6685 }, { "epoch": 0.7031695741911735, "grad_norm": 2.6938896407952133, "learning_rate": 1.030266729120688e-06, "loss": 1.0238, "step": 6686 }, { "epoch": 0.7032747446330209, "grad_norm": 2.035299190868889, "learning_rate": 1.0295914934815806e-06, "loss": 0.9898, "step": 6687 }, { "epoch": 0.7033799150748682, "grad_norm": 2.0038429608901978, "learning_rate": 1.0289164218044452e-06, "loss": 0.9689, "step": 6688 }, { "epoch": 0.7034850855167155, "grad_norm": 2.4755202430037913, "learning_rate": 1.0282415141645554e-06, "loss": 0.9555, "step": 6689 }, { "epoch": 0.7035902559585628, "grad_norm": 3.468729544937565, "learning_rate": 1.02756677063717e-06, "loss": 0.998, "step": 6690 }, { "epoch": 0.7036954264004102, "grad_norm": 2.6675685372943, "learning_rate": 1.0268921912975288e-06, "loss": 1.0056, "step": 6691 }, { "epoch": 0.7038005968422575, "grad_norm": 2.500797177465148, "learning_rate": 1.0262177762208508e-06, "loss": 0.9976, "step": 6692 }, { "epoch": 0.7039057672841048, "grad_norm": 1.843407519469925, "learning_rate": 1.0255435254823404e-06, "loss": 0.9219, "step": 6693 }, { "epoch": 0.7040109377259521, "grad_norm": 2.646055721221685, "learning_rate": 1.02486943915718e-06, "loss": 0.9785, "step": 6694 }, { "epoch": 0.7041161081677995, "grad_norm": 2.4428289065934554, "learning_rate": 1.0241955173205366e-06, "loss": 1.0164, "step": 6695 }, { "epoch": 0.7042212786096468, "grad_norm": 2.4160293322018522, "learning_rate": 1.0235217600475569e-06, "loss": 1.0088, "step": 6696 }, { "epoch": 0.7043264490514941, "grad_norm": 2.6869630614056756, "learning_rate": 1.0228481674133719e-06, "loss": 0.9751, "step": 6697 }, { "epoch": 0.7044316194933414, "grad_norm": 2.230312539280299, "learning_rate": 1.0221747394930904e-06, "loss": 1.0008, "step": 6698 }, { "epoch": 0.7045367899351888, "grad_norm": 2.218987057193651, "learning_rate": 1.0215014763618054e-06, "loss": 0.9617, "step": 6699 }, { "epoch": 0.7046419603770361, "grad_norm": 2.5556328251578284, "learning_rate": 1.020828378094592e-06, "loss": 1.0079, "step": 6700 }, { "epoch": 0.7047471308188834, "grad_norm": 2.1111202262894917, "learning_rate": 1.0201554447665044e-06, "loss": 0.9656, "step": 6701 }, { "epoch": 0.7048523012607306, "grad_norm": 2.3057500589924422, "learning_rate": 1.0194826764525811e-06, "loss": 0.9667, "step": 6702 }, { "epoch": 0.7049574717025779, "grad_norm": 2.438881329131322, "learning_rate": 1.01881007322784e-06, "loss": 1.0032, "step": 6703 }, { "epoch": 0.7050626421444253, "grad_norm": 1.9015647486903267, "learning_rate": 1.0181376351672817e-06, "loss": 0.9893, "step": 6704 }, { "epoch": 0.7051678125862726, "grad_norm": 2.3923551756484422, "learning_rate": 1.0174653623458886e-06, "loss": 0.987, "step": 6705 }, { "epoch": 0.7052729830281199, "grad_norm": 2.4314151560926165, "learning_rate": 1.0167932548386253e-06, "loss": 1.0085, "step": 6706 }, { "epoch": 0.7053781534699672, "grad_norm": 1.781610905130927, "learning_rate": 1.016121312720436e-06, "loss": 0.9624, "step": 6707 }, { "epoch": 0.7054833239118146, "grad_norm": 2.3479526084995106, "learning_rate": 1.0154495360662464e-06, "loss": 1.0281, "step": 6708 }, { "epoch": 0.7055884943536619, "grad_norm": 1.787929279606114, "learning_rate": 1.0147779249509662e-06, "loss": 0.9802, "step": 6709 }, { "epoch": 0.7056936647955092, "grad_norm": 2.5085426770017407, "learning_rate": 1.014106479449485e-06, "loss": 0.9289, "step": 6710 }, { "epoch": 0.7057988352373565, "grad_norm": 2.2725044679555713, "learning_rate": 1.0134351996366749e-06, "loss": 0.9641, "step": 6711 }, { "epoch": 0.7059040056792039, "grad_norm": 2.364113081705728, "learning_rate": 1.0127640855873874e-06, "loss": 0.9668, "step": 6712 }, { "epoch": 0.7060091761210512, "grad_norm": 3.297691941833725, "learning_rate": 1.0120931373764572e-06, "loss": 1.0055, "step": 6713 }, { "epoch": 0.7061143465628985, "grad_norm": 2.1743176526856316, "learning_rate": 1.011422355078702e-06, "loss": 0.993, "step": 6714 }, { "epoch": 0.7062195170047458, "grad_norm": 2.8680555666625676, "learning_rate": 1.0107517387689168e-06, "loss": 1.0052, "step": 6715 }, { "epoch": 0.7063246874465932, "grad_norm": 2.3554559333451874, "learning_rate": 1.0100812885218824e-06, "loss": 0.9962, "step": 6716 }, { "epoch": 0.7064298578884405, "grad_norm": 2.094277187480027, "learning_rate": 1.0094110044123578e-06, "loss": 0.9985, "step": 6717 }, { "epoch": 0.7065350283302878, "grad_norm": 2.515371556231332, "learning_rate": 1.0087408865150852e-06, "loss": 1.0295, "step": 6718 }, { "epoch": 0.7066401987721351, "grad_norm": 3.0627093185609064, "learning_rate": 1.0080709349047885e-06, "loss": 0.9567, "step": 6719 }, { "epoch": 0.7067453692139825, "grad_norm": 2.8156486671988894, "learning_rate": 1.007401149656173e-06, "loss": 0.986, "step": 6720 }, { "epoch": 0.7068505396558298, "grad_norm": 2.2979514616422376, "learning_rate": 1.0067315308439235e-06, "loss": 1.0037, "step": 6721 }, { "epoch": 0.706955710097677, "grad_norm": 3.19623467418787, "learning_rate": 1.0060620785427083e-06, "loss": 1.021, "step": 6722 }, { "epoch": 0.7070608805395243, "grad_norm": 2.3549166089077667, "learning_rate": 1.0053927928271775e-06, "loss": 0.9937, "step": 6723 }, { "epoch": 0.7071660509813716, "grad_norm": 2.822764978609374, "learning_rate": 1.00472367377196e-06, "loss": 1.0179, "step": 6724 }, { "epoch": 0.707271221423219, "grad_norm": 2.659932951697124, "learning_rate": 1.0040547214516698e-06, "loss": 0.9869, "step": 6725 }, { "epoch": 0.7073763918650663, "grad_norm": 3.185470330945337, "learning_rate": 1.0033859359408977e-06, "loss": 0.9823, "step": 6726 }, { "epoch": 0.7074815623069136, "grad_norm": 2.609628396699824, "learning_rate": 1.00271731731422e-06, "loss": 0.9795, "step": 6727 }, { "epoch": 0.7075867327487609, "grad_norm": 1.9750399309722628, "learning_rate": 1.0020488656461936e-06, "loss": 0.9989, "step": 6728 }, { "epoch": 0.7076919031906083, "grad_norm": 2.7806599676410335, "learning_rate": 1.001380581011354e-06, "loss": 1.015, "step": 6729 }, { "epoch": 0.7077970736324556, "grad_norm": 2.4361344876429527, "learning_rate": 1.0007124634842227e-06, "loss": 0.9652, "step": 6730 }, { "epoch": 0.7079022440743029, "grad_norm": 2.232929886411633, "learning_rate": 1.0000445131392975e-06, "loss": 0.9799, "step": 6731 }, { "epoch": 0.7080074145161502, "grad_norm": 2.1763015794617924, "learning_rate": 9.993767300510613e-07, "loss": 0.9916, "step": 6732 }, { "epoch": 0.7081125849579976, "grad_norm": 2.054775097950991, "learning_rate": 9.987091142939766e-07, "loss": 0.9821, "step": 6733 }, { "epoch": 0.7082177553998449, "grad_norm": 3.014980389348752, "learning_rate": 9.980416659424894e-07, "loss": 0.9988, "step": 6734 }, { "epoch": 0.7083229258416922, "grad_norm": 2.2677152224867507, "learning_rate": 9.97374385071023e-07, "loss": 0.9856, "step": 6735 }, { "epoch": 0.7084280962835395, "grad_norm": 2.235863059602925, "learning_rate": 9.967072717539852e-07, "loss": 0.9791, "step": 6736 }, { "epoch": 0.7085332667253869, "grad_norm": 2.2178818893528867, "learning_rate": 9.960403260657658e-07, "loss": 0.9635, "step": 6737 }, { "epoch": 0.7086384371672342, "grad_norm": 2.2511311829581184, "learning_rate": 9.953735480807322e-07, "loss": 0.9609, "step": 6738 }, { "epoch": 0.7087436076090815, "grad_norm": 2.385032459651381, "learning_rate": 9.947069378732372e-07, "loss": 0.9845, "step": 6739 }, { "epoch": 0.7088487780509288, "grad_norm": 1.9674072764447825, "learning_rate": 9.940404955176114e-07, "loss": 0.9351, "step": 6740 }, { "epoch": 0.7089539484927762, "grad_norm": 2.4743599480951404, "learning_rate": 9.933742210881688e-07, "loss": 0.9413, "step": 6741 }, { "epoch": 0.7090591189346235, "grad_norm": 2.0926323669690037, "learning_rate": 9.927081146592058e-07, "loss": 1.0089, "step": 6742 }, { "epoch": 0.7091642893764707, "grad_norm": 2.5941026743219453, "learning_rate": 9.920421763049957e-07, "loss": 1.0047, "step": 6743 }, { "epoch": 0.709269459818318, "grad_norm": 2.8240047908465216, "learning_rate": 9.913764060997982e-07, "loss": 0.972, "step": 6744 }, { "epoch": 0.7093746302601653, "grad_norm": 2.284340896157482, "learning_rate": 9.90710804117849e-07, "loss": 0.9517, "step": 6745 }, { "epoch": 0.7094798007020127, "grad_norm": 2.508946771542956, "learning_rate": 9.900453704333718e-07, "loss": 0.9857, "step": 6746 }, { "epoch": 0.70958497114386, "grad_norm": 2.836809183347872, "learning_rate": 9.893801051205643e-07, "loss": 0.9961, "step": 6747 }, { "epoch": 0.7096901415857073, "grad_norm": 2.2219358660428927, "learning_rate": 9.88715008253611e-07, "loss": 0.9792, "step": 6748 }, { "epoch": 0.7097953120275546, "grad_norm": 2.7231291268770046, "learning_rate": 9.880500799066734e-07, "loss": 1.0135, "step": 6749 }, { "epoch": 0.709900482469402, "grad_norm": 2.2397683772339847, "learning_rate": 9.873853201538972e-07, "loss": 0.9937, "step": 6750 }, { "epoch": 0.7100056529112493, "grad_norm": 2.4531999818310926, "learning_rate": 9.86720729069409e-07, "loss": 0.9821, "step": 6751 }, { "epoch": 0.7101108233530966, "grad_norm": 2.895236414074528, "learning_rate": 9.860563067273142e-07, "loss": 0.9877, "step": 6752 }, { "epoch": 0.7102159937949439, "grad_norm": 2.142507657879859, "learning_rate": 9.853920532017027e-07, "loss": 1.0331, "step": 6753 }, { "epoch": 0.7103211642367913, "grad_norm": 2.1738833228712044, "learning_rate": 9.847279685666425e-07, "loss": 0.9896, "step": 6754 }, { "epoch": 0.7104263346786386, "grad_norm": 4.407965262891168, "learning_rate": 9.840640528961849e-07, "loss": 1.0162, "step": 6755 }, { "epoch": 0.7105315051204859, "grad_norm": 3.380279233475454, "learning_rate": 9.834003062643616e-07, "loss": 0.9877, "step": 6756 }, { "epoch": 0.7106366755623332, "grad_norm": 2.2302312037350207, "learning_rate": 9.82736728745186e-07, "loss": 0.9992, "step": 6757 }, { "epoch": 0.7107418460041806, "grad_norm": 2.0718248730773996, "learning_rate": 9.82073320412652e-07, "loss": 0.9631, "step": 6758 }, { "epoch": 0.7108470164460279, "grad_norm": 2.766749356649403, "learning_rate": 9.814100813407326e-07, "loss": 1.0268, "step": 6759 }, { "epoch": 0.7109521868878752, "grad_norm": 2.7394051021844597, "learning_rate": 9.807470116033879e-07, "loss": 0.9684, "step": 6760 }, { "epoch": 0.7110573573297225, "grad_norm": 2.576083662654087, "learning_rate": 9.800841112745524e-07, "loss": 0.9926, "step": 6761 }, { "epoch": 0.7111625277715699, "grad_norm": 1.998383130036782, "learning_rate": 9.794213804281463e-07, "loss": 0.9218, "step": 6762 }, { "epoch": 0.7112676982134171, "grad_norm": 2.613732532405646, "learning_rate": 9.78758819138068e-07, "loss": 1.0229, "step": 6763 }, { "epoch": 0.7113728686552644, "grad_norm": 2.421476761648599, "learning_rate": 9.780964274781984e-07, "loss": 1.0062, "step": 6764 }, { "epoch": 0.7114780390971117, "grad_norm": 2.13715013448252, "learning_rate": 9.774342055224006e-07, "loss": 1.0194, "step": 6765 }, { "epoch": 0.711583209538959, "grad_norm": 2.3786521850695044, "learning_rate": 9.76772153344516e-07, "loss": 0.9514, "step": 6766 }, { "epoch": 0.7116883799808064, "grad_norm": 2.2713978142889677, "learning_rate": 9.761102710183698e-07, "loss": 0.9796, "step": 6767 }, { "epoch": 0.7117935504226537, "grad_norm": 2.165799232271969, "learning_rate": 9.754485586177648e-07, "loss": 0.9959, "step": 6768 }, { "epoch": 0.711898720864501, "grad_norm": 1.8842675327288372, "learning_rate": 9.747870162164903e-07, "loss": 1.0099, "step": 6769 }, { "epoch": 0.7120038913063483, "grad_norm": 1.9989490315740839, "learning_rate": 9.741256438883108e-07, "loss": 0.9783, "step": 6770 }, { "epoch": 0.7121090617481957, "grad_norm": 3.0192380084749053, "learning_rate": 9.734644417069764e-07, "loss": 0.9645, "step": 6771 }, { "epoch": 0.712214232190043, "grad_norm": 2.5407462322629355, "learning_rate": 9.728034097462144e-07, "loss": 0.9896, "step": 6772 }, { "epoch": 0.7123194026318903, "grad_norm": 2.4113869913201023, "learning_rate": 9.721425480797358e-07, "loss": 0.9704, "step": 6773 }, { "epoch": 0.7124245730737376, "grad_norm": 2.1235306653145734, "learning_rate": 9.714818567812329e-07, "loss": 1.0002, "step": 6774 }, { "epoch": 0.712529743515585, "grad_norm": 2.141043980222307, "learning_rate": 9.708213359243762e-07, "loss": 0.9717, "step": 6775 }, { "epoch": 0.7126349139574323, "grad_norm": 2.246816431557711, "learning_rate": 9.701609855828202e-07, "loss": 0.9864, "step": 6776 }, { "epoch": 0.7127400843992796, "grad_norm": 2.2375134836918664, "learning_rate": 9.695008058301978e-07, "loss": 0.9901, "step": 6777 }, { "epoch": 0.7128452548411269, "grad_norm": 2.9228849737443614, "learning_rate": 9.688407967401248e-07, "loss": 0.9737, "step": 6778 }, { "epoch": 0.7129504252829743, "grad_norm": 2.8328425019223253, "learning_rate": 9.681809583861982e-07, "loss": 0.992, "step": 6779 }, { "epoch": 0.7130555957248216, "grad_norm": 2.7116111720143743, "learning_rate": 9.675212908419937e-07, "loss": 0.9816, "step": 6780 }, { "epoch": 0.7131607661666689, "grad_norm": 2.5984134719702983, "learning_rate": 9.668617941810708e-07, "loss": 0.9631, "step": 6781 }, { "epoch": 0.7132659366085162, "grad_norm": 1.9628235706550243, "learning_rate": 9.662024684769658e-07, "loss": 0.9091, "step": 6782 }, { "epoch": 0.7133711070503634, "grad_norm": 2.953890424536143, "learning_rate": 9.655433138032022e-07, "loss": 0.9763, "step": 6783 }, { "epoch": 0.7134762774922108, "grad_norm": 1.873848783671468, "learning_rate": 9.648843302332786e-07, "loss": 0.9946, "step": 6784 }, { "epoch": 0.7135814479340581, "grad_norm": 2.812780437855715, "learning_rate": 9.642255178406782e-07, "loss": 0.9467, "step": 6785 }, { "epoch": 0.7136866183759054, "grad_norm": 2.0608044605763083, "learning_rate": 9.635668766988618e-07, "loss": 0.9883, "step": 6786 }, { "epoch": 0.7137917888177527, "grad_norm": 2.38366077999555, "learning_rate": 9.629084068812742e-07, "loss": 0.9696, "step": 6787 }, { "epoch": 0.7138969592596001, "grad_norm": 2.7383376317409844, "learning_rate": 9.622501084613407e-07, "loss": 0.9579, "step": 6788 }, { "epoch": 0.7140021297014474, "grad_norm": 3.313699091586876, "learning_rate": 9.615919815124647e-07, "loss": 0.9894, "step": 6789 }, { "epoch": 0.7141073001432947, "grad_norm": 2.0105423002645364, "learning_rate": 9.609340261080343e-07, "loss": 0.9843, "step": 6790 }, { "epoch": 0.714212470585142, "grad_norm": 3.588279258448056, "learning_rate": 9.602762423214146e-07, "loss": 1.0235, "step": 6791 }, { "epoch": 0.7143176410269894, "grad_norm": 2.430588136927392, "learning_rate": 9.596186302259563e-07, "loss": 1.0016, "step": 6792 }, { "epoch": 0.7144228114688367, "grad_norm": 2.1694545531760423, "learning_rate": 9.589611898949868e-07, "loss": 0.9588, "step": 6793 }, { "epoch": 0.714527981910684, "grad_norm": 2.4149358677574773, "learning_rate": 9.583039214018152e-07, "loss": 0.9814, "step": 6794 }, { "epoch": 0.7146331523525313, "grad_norm": 2.272690222416524, "learning_rate": 9.576468248197335e-07, "loss": 0.9828, "step": 6795 }, { "epoch": 0.7147383227943787, "grad_norm": 2.4404468267875363, "learning_rate": 9.569899002220104e-07, "loss": 0.9592, "step": 6796 }, { "epoch": 0.714843493236226, "grad_norm": 2.481227510357708, "learning_rate": 9.563331476819019e-07, "loss": 0.9432, "step": 6797 }, { "epoch": 0.7149486636780733, "grad_norm": 2.1951734767832924, "learning_rate": 9.55676567272638e-07, "loss": 1.0097, "step": 6798 }, { "epoch": 0.7150538341199206, "grad_norm": 2.4639332073414137, "learning_rate": 9.550201590674343e-07, "loss": 0.9852, "step": 6799 }, { "epoch": 0.715159004561768, "grad_norm": 2.1444939700922254, "learning_rate": 9.54363923139484e-07, "loss": 0.9932, "step": 6800 }, { "epoch": 0.7152641750036153, "grad_norm": 2.9435311101102988, "learning_rate": 9.53707859561963e-07, "loss": 0.9227, "step": 6801 }, { "epoch": 0.7153693454454626, "grad_norm": 2.7613347609604024, "learning_rate": 9.530519684080289e-07, "loss": 1.0551, "step": 6802 }, { "epoch": 0.7154745158873099, "grad_norm": 3.138642130105925, "learning_rate": 9.523962497508163e-07, "loss": 0.9815, "step": 6803 }, { "epoch": 0.7155796863291571, "grad_norm": 2.6016824562031577, "learning_rate": 9.517407036634449e-07, "loss": 0.9368, "step": 6804 }, { "epoch": 0.7156848567710045, "grad_norm": 2.9399166189880686, "learning_rate": 9.510853302190107e-07, "loss": 0.9755, "step": 6805 }, { "epoch": 0.7157900272128518, "grad_norm": 2.3854715791155123, "learning_rate": 9.504301294905966e-07, "loss": 1.0069, "step": 6806 }, { "epoch": 0.7158951976546991, "grad_norm": 2.3367169758597504, "learning_rate": 9.497751015512593e-07, "loss": 0.9986, "step": 6807 }, { "epoch": 0.7160003680965464, "grad_norm": 2.8377663825658592, "learning_rate": 9.491202464740415e-07, "loss": 1.0053, "step": 6808 }, { "epoch": 0.7161055385383938, "grad_norm": 2.390482144562161, "learning_rate": 9.484655643319643e-07, "loss": 0.9992, "step": 6809 }, { "epoch": 0.7162107089802411, "grad_norm": 2.942094456519339, "learning_rate": 9.478110551980274e-07, "loss": 1.0179, "step": 6810 }, { "epoch": 0.7163158794220884, "grad_norm": 2.360695205905589, "learning_rate": 9.471567191452175e-07, "loss": 1.0237, "step": 6811 }, { "epoch": 0.7164210498639357, "grad_norm": 2.403024984734448, "learning_rate": 9.465025562464952e-07, "loss": 0.99, "step": 6812 }, { "epoch": 0.7165262203057831, "grad_norm": 2.3364437770617554, "learning_rate": 9.458485665748071e-07, "loss": 0.9672, "step": 6813 }, { "epoch": 0.7166313907476304, "grad_norm": 2.158609622375604, "learning_rate": 9.451947502030759e-07, "loss": 0.9517, "step": 6814 }, { "epoch": 0.7167365611894777, "grad_norm": 2.1055972278729715, "learning_rate": 9.445411072042083e-07, "loss": 0.964, "step": 6815 }, { "epoch": 0.716841731631325, "grad_norm": 2.6035591950083155, "learning_rate": 9.438876376510911e-07, "loss": 0.9857, "step": 6816 }, { "epoch": 0.7169469020731724, "grad_norm": 2.834112744073984, "learning_rate": 9.432343416165899e-07, "loss": 1.0336, "step": 6817 }, { "epoch": 0.7170520725150197, "grad_norm": 2.479880902942861, "learning_rate": 9.425812191735539e-07, "loss": 1.0126, "step": 6818 }, { "epoch": 0.717157242956867, "grad_norm": 1.8559851857923326, "learning_rate": 9.419282703948085e-07, "loss": 0.9631, "step": 6819 }, { "epoch": 0.7172624133987143, "grad_norm": 1.8082543081529459, "learning_rate": 9.412754953531664e-07, "loss": 0.99, "step": 6820 }, { "epoch": 0.7173675838405617, "grad_norm": 2.2523841295243123, "learning_rate": 9.406228941214143e-07, "loss": 0.9886, "step": 6821 }, { "epoch": 0.717472754282409, "grad_norm": 2.457867045065379, "learning_rate": 9.399704667723239e-07, "loss": 1.0095, "step": 6822 }, { "epoch": 0.7175779247242563, "grad_norm": 3.355058501321726, "learning_rate": 9.393182133786443e-07, "loss": 1.0292, "step": 6823 }, { "epoch": 0.7176830951661035, "grad_norm": 2.3934295140430466, "learning_rate": 9.386661340131078e-07, "loss": 0.9797, "step": 6824 }, { "epoch": 0.7177882656079508, "grad_norm": 4.421047798769204, "learning_rate": 9.380142287484273e-07, "loss": 0.9442, "step": 6825 }, { "epoch": 0.7178934360497982, "grad_norm": 2.1419410112275603, "learning_rate": 9.373624976572931e-07, "loss": 1.0034, "step": 6826 }, { "epoch": 0.7179986064916455, "grad_norm": 2.446158128193031, "learning_rate": 9.367109408123803e-07, "loss": 0.9845, "step": 6827 }, { "epoch": 0.7181037769334928, "grad_norm": 2.8329699515453965, "learning_rate": 9.3605955828634e-07, "loss": 1.0246, "step": 6828 }, { "epoch": 0.7182089473753401, "grad_norm": 2.780651175983165, "learning_rate": 9.354083501518097e-07, "loss": 1.0073, "step": 6829 }, { "epoch": 0.7183141178171875, "grad_norm": 2.4642646857763753, "learning_rate": 9.347573164814025e-07, "loss": 0.9969, "step": 6830 }, { "epoch": 0.7184192882590348, "grad_norm": 2.8457696249018243, "learning_rate": 9.34106457347713e-07, "loss": 1.0158, "step": 6831 }, { "epoch": 0.7185244587008821, "grad_norm": 2.5543405389378275, "learning_rate": 9.334557728233185e-07, "loss": 0.9736, "step": 6832 }, { "epoch": 0.7186296291427294, "grad_norm": 1.8641262528029885, "learning_rate": 9.328052629807729e-07, "loss": 0.98, "step": 6833 }, { "epoch": 0.7187347995845768, "grad_norm": 2.5847950268121416, "learning_rate": 9.32154927892617e-07, "loss": 0.9441, "step": 6834 }, { "epoch": 0.7188399700264241, "grad_norm": 2.129043724063401, "learning_rate": 9.315047676313648e-07, "loss": 0.9554, "step": 6835 }, { "epoch": 0.7189451404682714, "grad_norm": 2.324839688594734, "learning_rate": 9.308547822695166e-07, "loss": 1.0237, "step": 6836 }, { "epoch": 0.7190503109101187, "grad_norm": 1.9345404654516318, "learning_rate": 9.302049718795489e-07, "loss": 0.9591, "step": 6837 }, { "epoch": 0.719155481351966, "grad_norm": 1.7931929866422445, "learning_rate": 9.295553365339213e-07, "loss": 0.9902, "step": 6838 }, { "epoch": 0.7192606517938134, "grad_norm": 2.244608243473127, "learning_rate": 9.289058763050743e-07, "loss": 0.9788, "step": 6839 }, { "epoch": 0.7193658222356607, "grad_norm": 2.381440258340103, "learning_rate": 9.282565912654257e-07, "loss": 0.9811, "step": 6840 }, { "epoch": 0.719470992677508, "grad_norm": 2.645643314379612, "learning_rate": 9.276074814873778e-07, "loss": 0.9656, "step": 6841 }, { "epoch": 0.7195761631193553, "grad_norm": 2.2621136426873374, "learning_rate": 9.26958547043309e-07, "loss": 0.9748, "step": 6842 }, { "epoch": 0.7196813335612027, "grad_norm": 2.3967219964591155, "learning_rate": 9.263097880055835e-07, "loss": 0.9744, "step": 6843 }, { "epoch": 0.7197865040030499, "grad_norm": 3.4195444597169553, "learning_rate": 9.256612044465407e-07, "loss": 1.0398, "step": 6844 }, { "epoch": 0.7198916744448972, "grad_norm": 1.8349056835941067, "learning_rate": 9.250127964385045e-07, "loss": 0.9782, "step": 6845 }, { "epoch": 0.7199968448867445, "grad_norm": 3.080343200685503, "learning_rate": 9.243645640537755e-07, "loss": 0.9775, "step": 6846 }, { "epoch": 0.7201020153285919, "grad_norm": 2.207569417842817, "learning_rate": 9.237165073646376e-07, "loss": 0.9864, "step": 6847 }, { "epoch": 0.7202071857704392, "grad_norm": 1.9935368383262349, "learning_rate": 9.230686264433547e-07, "loss": 0.962, "step": 6848 }, { "epoch": 0.7203123562122865, "grad_norm": 3.0768788487950114, "learning_rate": 9.224209213621693e-07, "loss": 0.9913, "step": 6849 }, { "epoch": 0.7204175266541338, "grad_norm": 3.593231825237885, "learning_rate": 9.217733921933073e-07, "loss": 1.0568, "step": 6850 }, { "epoch": 0.7205226970959812, "grad_norm": 2.35712856290603, "learning_rate": 9.21126039008971e-07, "loss": 1.0247, "step": 6851 }, { "epoch": 0.7206278675378285, "grad_norm": 2.060779548724852, "learning_rate": 9.204788618813468e-07, "loss": 0.986, "step": 6852 }, { "epoch": 0.7207330379796758, "grad_norm": 1.8881478246910306, "learning_rate": 9.198318608826001e-07, "loss": 0.9918, "step": 6853 }, { "epoch": 0.7208382084215231, "grad_norm": 1.9601162572993023, "learning_rate": 9.191850360848756e-07, "loss": 0.9542, "step": 6854 }, { "epoch": 0.7209433788633705, "grad_norm": 1.982841411610949, "learning_rate": 9.185383875603004e-07, "loss": 1.0137, "step": 6855 }, { "epoch": 0.7210485493052178, "grad_norm": 2.5279779328379943, "learning_rate": 9.178919153809787e-07, "loss": 1.0106, "step": 6856 }, { "epoch": 0.7211537197470651, "grad_norm": 3.6580469329228826, "learning_rate": 9.172456196190002e-07, "loss": 0.988, "step": 6857 }, { "epoch": 0.7212588901889124, "grad_norm": 2.1551774200448075, "learning_rate": 9.165995003464295e-07, "loss": 0.998, "step": 6858 }, { "epoch": 0.7213640606307598, "grad_norm": 2.7261206696045357, "learning_rate": 9.15953557635316e-07, "loss": 0.9893, "step": 6859 }, { "epoch": 0.7214692310726071, "grad_norm": 2.651636066544782, "learning_rate": 9.153077915576849e-07, "loss": 0.9691, "step": 6860 }, { "epoch": 0.7215744015144544, "grad_norm": 3.4772896538999896, "learning_rate": 9.146622021855455e-07, "loss": 1.0076, "step": 6861 }, { "epoch": 0.7216795719563017, "grad_norm": 2.378757397637525, "learning_rate": 9.140167895908867e-07, "loss": 1.0138, "step": 6862 }, { "epoch": 0.721784742398149, "grad_norm": 1.9309802042614799, "learning_rate": 9.133715538456753e-07, "loss": 0.9607, "step": 6863 }, { "epoch": 0.7218899128399964, "grad_norm": 2.4260010858714534, "learning_rate": 9.12726495021862e-07, "loss": 0.995, "step": 6864 }, { "epoch": 0.7219950832818436, "grad_norm": 2.755072867504919, "learning_rate": 9.12081613191374e-07, "loss": 0.9962, "step": 6865 }, { "epoch": 0.7221002537236909, "grad_norm": 2.318026979345631, "learning_rate": 9.114369084261215e-07, "loss": 0.9444, "step": 6866 }, { "epoch": 0.7222054241655382, "grad_norm": 2.6082295680717023, "learning_rate": 9.107923807979948e-07, "loss": 1.0035, "step": 6867 }, { "epoch": 0.7223105946073856, "grad_norm": 2.3969924183402185, "learning_rate": 9.101480303788623e-07, "loss": 0.9672, "step": 6868 }, { "epoch": 0.7224157650492329, "grad_norm": 2.649977228442466, "learning_rate": 9.095038572405751e-07, "loss": 1.029, "step": 6869 }, { "epoch": 0.7225209354910802, "grad_norm": 2.0300044348072186, "learning_rate": 9.088598614549629e-07, "loss": 0.9735, "step": 6870 }, { "epoch": 0.7226261059329275, "grad_norm": 1.9663312006008373, "learning_rate": 9.082160430938375e-07, "loss": 0.9986, "step": 6871 }, { "epoch": 0.7227312763747749, "grad_norm": 2.09299815663197, "learning_rate": 9.075724022289878e-07, "loss": 1.0071, "step": 6872 }, { "epoch": 0.7228364468166222, "grad_norm": 2.8696877774023886, "learning_rate": 9.069289389321864e-07, "loss": 0.9965, "step": 6873 }, { "epoch": 0.7229416172584695, "grad_norm": 1.8588664189025497, "learning_rate": 9.062856532751832e-07, "loss": 0.9622, "step": 6874 }, { "epoch": 0.7230467877003168, "grad_norm": 2.477197723105134, "learning_rate": 9.056425453297099e-07, "loss": 0.9691, "step": 6875 }, { "epoch": 0.7231519581421642, "grad_norm": 3.2970809410578554, "learning_rate": 9.04999615167479e-07, "loss": 0.9987, "step": 6876 }, { "epoch": 0.7232571285840115, "grad_norm": 3.0574404452300183, "learning_rate": 9.043568628601807e-07, "loss": 1.0004, "step": 6877 }, { "epoch": 0.7233622990258588, "grad_norm": 2.3164982947871096, "learning_rate": 9.037142884794881e-07, "loss": 0.9689, "step": 6878 }, { "epoch": 0.7234674694677061, "grad_norm": 1.705348459141377, "learning_rate": 9.030718920970513e-07, "loss": 0.99, "step": 6879 }, { "epoch": 0.7235726399095535, "grad_norm": 1.8734205753292643, "learning_rate": 9.024296737845056e-07, "loss": 1.0241, "step": 6880 }, { "epoch": 0.7236778103514008, "grad_norm": 2.226219420147319, "learning_rate": 9.017876336134615e-07, "loss": 0.9465, "step": 6881 }, { "epoch": 0.7237829807932481, "grad_norm": 2.3638808966827827, "learning_rate": 9.011457716555108e-07, "loss": 0.9356, "step": 6882 }, { "epoch": 0.7238881512350954, "grad_norm": 2.274163453037241, "learning_rate": 9.005040879822269e-07, "loss": 0.9991, "step": 6883 }, { "epoch": 0.7239933216769427, "grad_norm": 2.3270361322264423, "learning_rate": 8.998625826651624e-07, "loss": 0.9753, "step": 6884 }, { "epoch": 0.72409849211879, "grad_norm": 2.380779478990076, "learning_rate": 8.992212557758515e-07, "loss": 0.9749, "step": 6885 }, { "epoch": 0.7242036625606373, "grad_norm": 2.5279858961997146, "learning_rate": 8.985801073858047e-07, "loss": 1.0178, "step": 6886 }, { "epoch": 0.7243088330024846, "grad_norm": 2.264259809683486, "learning_rate": 8.979391375665169e-07, "loss": 0.9952, "step": 6887 }, { "epoch": 0.7244140034443319, "grad_norm": 2.3465532374579468, "learning_rate": 8.972983463894599e-07, "loss": 0.9717, "step": 6888 }, { "epoch": 0.7245191738861793, "grad_norm": 2.682561586616032, "learning_rate": 8.966577339260874e-07, "loss": 0.9911, "step": 6889 }, { "epoch": 0.7246243443280266, "grad_norm": 3.0023908265682917, "learning_rate": 8.960173002478336e-07, "loss": 0.9918, "step": 6890 }, { "epoch": 0.7247295147698739, "grad_norm": 2.2766548656250625, "learning_rate": 8.953770454261102e-07, "loss": 0.9655, "step": 6891 }, { "epoch": 0.7248346852117212, "grad_norm": 2.83034363859963, "learning_rate": 8.947369695323113e-07, "loss": 0.9809, "step": 6892 }, { "epoch": 0.7249398556535686, "grad_norm": 2.4788072341230527, "learning_rate": 8.940970726378106e-07, "loss": 1.0237, "step": 6893 }, { "epoch": 0.7250450260954159, "grad_norm": 2.711458177748327, "learning_rate": 8.934573548139621e-07, "loss": 0.9478, "step": 6894 }, { "epoch": 0.7251501965372632, "grad_norm": 2.9684298621784375, "learning_rate": 8.928178161320977e-07, "loss": 1.0247, "step": 6895 }, { "epoch": 0.7252553669791105, "grad_norm": 1.6958237240093583, "learning_rate": 8.921784566635328e-07, "loss": 1.0107, "step": 6896 }, { "epoch": 0.7253605374209579, "grad_norm": 2.608563466979351, "learning_rate": 8.915392764795592e-07, "loss": 0.9734, "step": 6897 }, { "epoch": 0.7254657078628052, "grad_norm": 2.465172273583475, "learning_rate": 8.909002756514509e-07, "loss": 0.9729, "step": 6898 }, { "epoch": 0.7255708783046525, "grad_norm": 2.179996715866093, "learning_rate": 8.902614542504631e-07, "loss": 0.9561, "step": 6899 }, { "epoch": 0.7256760487464998, "grad_norm": 1.9710483091820064, "learning_rate": 8.896228123478268e-07, "loss": 0.9673, "step": 6900 }, { "epoch": 0.7257812191883471, "grad_norm": 2.3895996851177923, "learning_rate": 8.889843500147577e-07, "loss": 0.9992, "step": 6901 }, { "epoch": 0.7258863896301945, "grad_norm": 2.4070929276655026, "learning_rate": 8.883460673224478e-07, "loss": 1.0059, "step": 6902 }, { "epoch": 0.7259915600720418, "grad_norm": 2.52826203988613, "learning_rate": 8.877079643420708e-07, "loss": 0.9768, "step": 6903 }, { "epoch": 0.7260967305138891, "grad_norm": 2.6173671239261616, "learning_rate": 8.870700411447817e-07, "loss": 0.9739, "step": 6904 }, { "epoch": 0.7262019009557364, "grad_norm": 2.022268173256108, "learning_rate": 8.864322978017114e-07, "loss": 0.9741, "step": 6905 }, { "epoch": 0.7263070713975837, "grad_norm": 3.336022838388577, "learning_rate": 8.857947343839749e-07, "loss": 0.9924, "step": 6906 }, { "epoch": 0.726412241839431, "grad_norm": 3.1049563608044135, "learning_rate": 8.851573509626649e-07, "loss": 0.9864, "step": 6907 }, { "epoch": 0.7265174122812783, "grad_norm": 2.2360381413354036, "learning_rate": 8.845201476088558e-07, "loss": 0.9817, "step": 6908 }, { "epoch": 0.7266225827231256, "grad_norm": 2.431440384321841, "learning_rate": 8.838831243935988e-07, "loss": 1.006, "step": 6909 }, { "epoch": 0.726727753164973, "grad_norm": 2.672270703566811, "learning_rate": 8.832462813879289e-07, "loss": 0.993, "step": 6910 }, { "epoch": 0.7268329236068203, "grad_norm": 2.3586907446017147, "learning_rate": 8.826096186628568e-07, "loss": 0.9827, "step": 6911 }, { "epoch": 0.7269380940486676, "grad_norm": 2.3158398975904197, "learning_rate": 8.819731362893769e-07, "loss": 0.9767, "step": 6912 }, { "epoch": 0.7270432644905149, "grad_norm": 2.7399363636718146, "learning_rate": 8.813368343384621e-07, "loss": 1.014, "step": 6913 }, { "epoch": 0.7271484349323623, "grad_norm": 2.308433695829236, "learning_rate": 8.807007128810638e-07, "loss": 1.0087, "step": 6914 }, { "epoch": 0.7272536053742096, "grad_norm": 3.0347699529423333, "learning_rate": 8.800647719881153e-07, "loss": 0.9805, "step": 6915 }, { "epoch": 0.7273587758160569, "grad_norm": 2.387065454939189, "learning_rate": 8.794290117305296e-07, "loss": 0.9813, "step": 6916 }, { "epoch": 0.7274639462579042, "grad_norm": 2.8287122692548468, "learning_rate": 8.787934321791972e-07, "loss": 0.9892, "step": 6917 }, { "epoch": 0.7275691166997516, "grad_norm": 2.377152435206834, "learning_rate": 8.781580334049919e-07, "loss": 0.9678, "step": 6918 }, { "epoch": 0.7276742871415989, "grad_norm": 3.7839274906805667, "learning_rate": 8.77522815478764e-07, "loss": 1.0138, "step": 6919 }, { "epoch": 0.7277794575834462, "grad_norm": 2.3333914671728175, "learning_rate": 8.768877784713458e-07, "loss": 1.0377, "step": 6920 }, { "epoch": 0.7278846280252935, "grad_norm": 2.4232211693938637, "learning_rate": 8.762529224535496e-07, "loss": 0.9752, "step": 6921 }, { "epoch": 0.7279897984671408, "grad_norm": 2.3166357731448946, "learning_rate": 8.756182474961666e-07, "loss": 0.9823, "step": 6922 }, { "epoch": 0.7280949689089882, "grad_norm": 2.599714079191184, "learning_rate": 8.749837536699671e-07, "loss": 1.0219, "step": 6923 }, { "epoch": 0.7282001393508355, "grad_norm": 2.758481393241142, "learning_rate": 8.743494410457032e-07, "loss": 1.0016, "step": 6924 }, { "epoch": 0.7283053097926828, "grad_norm": 2.801903529605539, "learning_rate": 8.737153096941045e-07, "loss": 0.9807, "step": 6925 }, { "epoch": 0.72841048023453, "grad_norm": 2.132005701744301, "learning_rate": 8.730813596858823e-07, "loss": 1.0284, "step": 6926 }, { "epoch": 0.7285156506763774, "grad_norm": 2.645513420655628, "learning_rate": 8.724475910917274e-07, "loss": 1.0083, "step": 6927 }, { "epoch": 0.7286208211182247, "grad_norm": 2.7201477772204816, "learning_rate": 8.718140039823086e-07, "loss": 0.9533, "step": 6928 }, { "epoch": 0.728725991560072, "grad_norm": 1.9346726122158562, "learning_rate": 8.711805984282767e-07, "loss": 0.972, "step": 6929 }, { "epoch": 0.7288311620019193, "grad_norm": 2.5900806982874673, "learning_rate": 8.70547374500261e-07, "loss": 1.0068, "step": 6930 }, { "epoch": 0.7289363324437667, "grad_norm": 2.269593721723228, "learning_rate": 8.699143322688719e-07, "loss": 0.982, "step": 6931 }, { "epoch": 0.729041502885614, "grad_norm": 3.3136833165123636, "learning_rate": 8.692814718046979e-07, "loss": 0.9883, "step": 6932 }, { "epoch": 0.7291466733274613, "grad_norm": 2.360602310285996, "learning_rate": 8.686487931783067e-07, "loss": 0.9928, "step": 6933 }, { "epoch": 0.7292518437693086, "grad_norm": 2.231311892385931, "learning_rate": 8.680162964602479e-07, "loss": 1.0011, "step": 6934 }, { "epoch": 0.729357014211156, "grad_norm": 2.2504241332774653, "learning_rate": 8.673839817210497e-07, "loss": 0.9558, "step": 6935 }, { "epoch": 0.7294621846530033, "grad_norm": 1.758728210506665, "learning_rate": 8.66751849031221e-07, "loss": 0.9341, "step": 6936 }, { "epoch": 0.7295673550948506, "grad_norm": 2.618947911589737, "learning_rate": 8.661198984612476e-07, "loss": 0.9822, "step": 6937 }, { "epoch": 0.7296725255366979, "grad_norm": 2.6255176262261877, "learning_rate": 8.654881300815981e-07, "loss": 0.9986, "step": 6938 }, { "epoch": 0.7297776959785452, "grad_norm": 2.858506049180034, "learning_rate": 8.648565439627205e-07, "loss": 0.9505, "step": 6939 }, { "epoch": 0.7298828664203926, "grad_norm": 2.0177047337756906, "learning_rate": 8.642251401750395e-07, "loss": 0.9817, "step": 6940 }, { "epoch": 0.7299880368622399, "grad_norm": 2.414021437249756, "learning_rate": 8.635939187889633e-07, "loss": 0.9816, "step": 6941 }, { "epoch": 0.7300932073040872, "grad_norm": 2.398138605350569, "learning_rate": 8.629628798748763e-07, "loss": 0.9552, "step": 6942 }, { "epoch": 0.7301983777459345, "grad_norm": 3.202783456390097, "learning_rate": 8.623320235031452e-07, "loss": 0.98, "step": 6943 }, { "epoch": 0.7303035481877819, "grad_norm": 1.9008924047025504, "learning_rate": 8.617013497441154e-07, "loss": 0.9407, "step": 6944 }, { "epoch": 0.7304087186296292, "grad_norm": 2.0495458812583873, "learning_rate": 8.610708586681127e-07, "loss": 1.0026, "step": 6945 }, { "epoch": 0.7305138890714764, "grad_norm": 2.281006292586853, "learning_rate": 8.604405503454399e-07, "loss": 0.968, "step": 6946 }, { "epoch": 0.7306190595133237, "grad_norm": 2.8232403754432376, "learning_rate": 8.598104248463823e-07, "loss": 0.9745, "step": 6947 }, { "epoch": 0.7307242299551711, "grad_norm": 3.0990957513005486, "learning_rate": 8.591804822412048e-07, "loss": 0.9712, "step": 6948 }, { "epoch": 0.7308294003970184, "grad_norm": 2.948097662188505, "learning_rate": 8.585507226001488e-07, "loss": 0.972, "step": 6949 }, { "epoch": 0.7309345708388657, "grad_norm": 2.4648039136536304, "learning_rate": 8.579211459934394e-07, "loss": 0.9927, "step": 6950 }, { "epoch": 0.731039741280713, "grad_norm": 3.8091080476673755, "learning_rate": 8.572917524912777e-07, "loss": 0.9932, "step": 6951 }, { "epoch": 0.7311449117225604, "grad_norm": 2.2912070211331597, "learning_rate": 8.566625421638464e-07, "loss": 1.0069, "step": 6952 }, { "epoch": 0.7312500821644077, "grad_norm": 2.769316491698749, "learning_rate": 8.560335150813081e-07, "loss": 0.9821, "step": 6953 }, { "epoch": 0.731355252606255, "grad_norm": 4.834866721749002, "learning_rate": 8.554046713138034e-07, "loss": 0.96, "step": 6954 }, { "epoch": 0.7314604230481023, "grad_norm": 2.262988850257133, "learning_rate": 8.54776010931454e-07, "loss": 0.97, "step": 6955 }, { "epoch": 0.7315655934899497, "grad_norm": 2.4207174619006913, "learning_rate": 8.54147534004359e-07, "loss": 0.9787, "step": 6956 }, { "epoch": 0.731670763931797, "grad_norm": 2.6566870598983052, "learning_rate": 8.535192406025997e-07, "loss": 0.9971, "step": 6957 }, { "epoch": 0.7317759343736443, "grad_norm": 2.3775122168529137, "learning_rate": 8.52891130796235e-07, "loss": 0.9409, "step": 6958 }, { "epoch": 0.7318811048154916, "grad_norm": 2.5975495470680183, "learning_rate": 8.522632046553056e-07, "loss": 1.0159, "step": 6959 }, { "epoch": 0.731986275257339, "grad_norm": 2.551497070086668, "learning_rate": 8.516354622498279e-07, "loss": 0.9776, "step": 6960 }, { "epoch": 0.7320914456991863, "grad_norm": 2.030647013013727, "learning_rate": 8.510079036498012e-07, "loss": 0.9495, "step": 6961 }, { "epoch": 0.7321966161410336, "grad_norm": 2.530716610861503, "learning_rate": 8.503805289252037e-07, "loss": 1.0091, "step": 6962 }, { "epoch": 0.7323017865828809, "grad_norm": 2.8009461610833113, "learning_rate": 8.497533381459914e-07, "loss": 1.0142, "step": 6963 }, { "epoch": 0.7324069570247282, "grad_norm": 2.7103271804572553, "learning_rate": 8.491263313821021e-07, "loss": 0.9479, "step": 6964 }, { "epoch": 0.7325121274665756, "grad_norm": 3.0620007557697995, "learning_rate": 8.484995087034506e-07, "loss": 0.9684, "step": 6965 }, { "epoch": 0.7326172979084229, "grad_norm": 2.310056025863538, "learning_rate": 8.47872870179933e-07, "loss": 1.0122, "step": 6966 }, { "epoch": 0.7327224683502701, "grad_norm": 2.2752199543586933, "learning_rate": 8.472464158814256e-07, "loss": 0.9815, "step": 6967 }, { "epoch": 0.7328276387921174, "grad_norm": 2.499533329716282, "learning_rate": 8.466201458777809e-07, "loss": 0.9966, "step": 6968 }, { "epoch": 0.7329328092339648, "grad_norm": 3.0605576051148433, "learning_rate": 8.459940602388345e-07, "loss": 0.9442, "step": 6969 }, { "epoch": 0.7330379796758121, "grad_norm": 3.0303954415055334, "learning_rate": 8.453681590343979e-07, "loss": 1.012, "step": 6970 }, { "epoch": 0.7331431501176594, "grad_norm": 2.3529258666071855, "learning_rate": 8.447424423342665e-07, "loss": 1.0134, "step": 6971 }, { "epoch": 0.7332483205595067, "grad_norm": 1.653851753950748, "learning_rate": 8.441169102082106e-07, "loss": 0.9661, "step": 6972 }, { "epoch": 0.733353491001354, "grad_norm": 2.3418082947379775, "learning_rate": 8.434915627259832e-07, "loss": 1.0021, "step": 6973 }, { "epoch": 0.7334586614432014, "grad_norm": 3.8928938699639724, "learning_rate": 8.428663999573142e-07, "loss": 0.9314, "step": 6974 }, { "epoch": 0.7335638318850487, "grad_norm": 3.099834663643491, "learning_rate": 8.422414219719147e-07, "loss": 0.9996, "step": 6975 }, { "epoch": 0.733669002326896, "grad_norm": 1.8811632859332335, "learning_rate": 8.416166288394751e-07, "loss": 0.9822, "step": 6976 }, { "epoch": 0.7337741727687433, "grad_norm": 2.5292346499214986, "learning_rate": 8.409920206296635e-07, "loss": 1.0268, "step": 6977 }, { "epoch": 0.7338793432105907, "grad_norm": 2.3412938045005416, "learning_rate": 8.4036759741213e-07, "loss": 0.9831, "step": 6978 }, { "epoch": 0.733984513652438, "grad_norm": 2.3394006760695123, "learning_rate": 8.39743359256501e-07, "loss": 0.9884, "step": 6979 }, { "epoch": 0.7340896840942853, "grad_norm": 1.9244006117602965, "learning_rate": 8.39119306232385e-07, "loss": 0.9443, "step": 6980 }, { "epoch": 0.7341948545361326, "grad_norm": 2.478475134799539, "learning_rate": 8.384954384093682e-07, "loss": 0.9719, "step": 6981 }, { "epoch": 0.73430002497798, "grad_norm": 3.0512883023668933, "learning_rate": 8.378717558570182e-07, "loss": 0.969, "step": 6982 }, { "epoch": 0.7344051954198273, "grad_norm": 3.1758406178171295, "learning_rate": 8.37248258644879e-07, "loss": 0.9856, "step": 6983 }, { "epoch": 0.7345103658616746, "grad_norm": 2.738349461307976, "learning_rate": 8.366249468424742e-07, "loss": 0.9978, "step": 6984 }, { "epoch": 0.7346155363035219, "grad_norm": 2.321797129857226, "learning_rate": 8.36001820519311e-07, "loss": 1.0093, "step": 6985 }, { "epoch": 0.7347207067453693, "grad_norm": 2.1761741288187255, "learning_rate": 8.353788797448703e-07, "loss": 0.9997, "step": 6986 }, { "epoch": 0.7348258771872165, "grad_norm": 2.706187150898931, "learning_rate": 8.347561245886169e-07, "loss": 0.9516, "step": 6987 }, { "epoch": 0.7349310476290638, "grad_norm": 2.061774866814489, "learning_rate": 8.341335551199903e-07, "loss": 0.9517, "step": 6988 }, { "epoch": 0.7350362180709111, "grad_norm": 1.9863690617075271, "learning_rate": 8.335111714084135e-07, "loss": 0.9947, "step": 6989 }, { "epoch": 0.7351413885127585, "grad_norm": 2.197421006470657, "learning_rate": 8.328889735232876e-07, "loss": 0.9866, "step": 6990 }, { "epoch": 0.7352465589546058, "grad_norm": 3.8124829291968165, "learning_rate": 8.322669615339909e-07, "loss": 0.984, "step": 6991 }, { "epoch": 0.7353517293964531, "grad_norm": 3.2431775985547375, "learning_rate": 8.316451355098842e-07, "loss": 1.0148, "step": 6992 }, { "epoch": 0.7354568998383004, "grad_norm": 2.3326759532763464, "learning_rate": 8.310234955203036e-07, "loss": 0.9669, "step": 6993 }, { "epoch": 0.7355620702801478, "grad_norm": 2.13604126329195, "learning_rate": 8.304020416345698e-07, "loss": 1.0038, "step": 6994 }, { "epoch": 0.7356672407219951, "grad_norm": 2.5186724644933234, "learning_rate": 8.297807739219777e-07, "loss": 1.0161, "step": 6995 }, { "epoch": 0.7357724111638424, "grad_norm": 2.8521649286716224, "learning_rate": 8.291596924518048e-07, "loss": 1.0343, "step": 6996 }, { "epoch": 0.7358775816056897, "grad_norm": 2.7048039596817137, "learning_rate": 8.285387972933045e-07, "loss": 0.9705, "step": 6997 }, { "epoch": 0.735982752047537, "grad_norm": 2.332648947659513, "learning_rate": 8.279180885157129e-07, "loss": 0.9918, "step": 6998 }, { "epoch": 0.7360879224893844, "grad_norm": 2.206028825985984, "learning_rate": 8.272975661882446e-07, "loss": 0.9916, "step": 6999 }, { "epoch": 0.7361930929312317, "grad_norm": 2.8257632521399847, "learning_rate": 8.266772303800907e-07, "loss": 0.9935, "step": 7000 }, { "epoch": 0.736298263373079, "grad_norm": 1.9735295694119066, "learning_rate": 8.260570811604252e-07, "loss": 0.9716, "step": 7001 }, { "epoch": 0.7364034338149263, "grad_norm": 2.766348296710771, "learning_rate": 8.254371185983981e-07, "loss": 0.9867, "step": 7002 }, { "epoch": 0.7365086042567737, "grad_norm": 2.5669860597872582, "learning_rate": 8.248173427631406e-07, "loss": 0.9607, "step": 7003 }, { "epoch": 0.736613774698621, "grad_norm": 2.011650558328637, "learning_rate": 8.241977537237639e-07, "loss": 0.9772, "step": 7004 }, { "epoch": 0.7367189451404683, "grad_norm": 4.378445426045064, "learning_rate": 8.235783515493545e-07, "loss": 1.0191, "step": 7005 }, { "epoch": 0.7368241155823156, "grad_norm": 3.0930192829825094, "learning_rate": 8.229591363089826e-07, "loss": 0.9812, "step": 7006 }, { "epoch": 0.7369292860241629, "grad_norm": 2.8858200910410408, "learning_rate": 8.223401080716934e-07, "loss": 1.012, "step": 7007 }, { "epoch": 0.7370344564660102, "grad_norm": 2.3565977478267763, "learning_rate": 8.217212669065161e-07, "loss": 0.9891, "step": 7008 }, { "epoch": 0.7371396269078575, "grad_norm": 2.90432653736149, "learning_rate": 8.21102612882454e-07, "loss": 0.9853, "step": 7009 }, { "epoch": 0.7372447973497048, "grad_norm": 2.636696481329649, "learning_rate": 8.204841460684934e-07, "loss": 0.9837, "step": 7010 }, { "epoch": 0.7373499677915522, "grad_norm": 2.401750673418409, "learning_rate": 8.198658665335968e-07, "loss": 0.9897, "step": 7011 }, { "epoch": 0.7374551382333995, "grad_norm": 2.187244354384058, "learning_rate": 8.192477743467078e-07, "loss": 0.9739, "step": 7012 }, { "epoch": 0.7375603086752468, "grad_norm": 2.504902025352448, "learning_rate": 8.186298695767494e-07, "loss": 1.0082, "step": 7013 }, { "epoch": 0.7376654791170941, "grad_norm": 2.656973818003429, "learning_rate": 8.18012152292621e-07, "loss": 1.0404, "step": 7014 }, { "epoch": 0.7377706495589414, "grad_norm": 1.9657665226192536, "learning_rate": 8.173946225632046e-07, "loss": 0.9986, "step": 7015 }, { "epoch": 0.7378758200007888, "grad_norm": 2.458503304437768, "learning_rate": 8.16777280457357e-07, "loss": 0.9499, "step": 7016 }, { "epoch": 0.7379809904426361, "grad_norm": 2.8152286475357635, "learning_rate": 8.1616012604392e-07, "loss": 1.0263, "step": 7017 }, { "epoch": 0.7380861608844834, "grad_norm": 2.7808079810706197, "learning_rate": 8.155431593917096e-07, "loss": 0.9589, "step": 7018 }, { "epoch": 0.7381913313263307, "grad_norm": 2.2105204331938384, "learning_rate": 8.149263805695215e-07, "loss": 1.0189, "step": 7019 }, { "epoch": 0.7382965017681781, "grad_norm": 2.152001818336773, "learning_rate": 8.143097896461327e-07, "loss": 0.9507, "step": 7020 }, { "epoch": 0.7384016722100254, "grad_norm": 2.5837569059762746, "learning_rate": 8.136933866902957e-07, "loss": 0.9991, "step": 7021 }, { "epoch": 0.7385068426518727, "grad_norm": 2.0815321522556953, "learning_rate": 8.130771717707476e-07, "loss": 0.9999, "step": 7022 }, { "epoch": 0.73861201309372, "grad_norm": 2.4905114183126327, "learning_rate": 8.124611449561981e-07, "loss": 0.9588, "step": 7023 }, { "epoch": 0.7387171835355674, "grad_norm": 2.0780844688809545, "learning_rate": 8.118453063153412e-07, "loss": 0.9142, "step": 7024 }, { "epoch": 0.7388223539774147, "grad_norm": 2.1224882708812336, "learning_rate": 8.112296559168459e-07, "loss": 0.9753, "step": 7025 }, { "epoch": 0.738927524419262, "grad_norm": 2.8656944207274395, "learning_rate": 8.106141938293627e-07, "loss": 0.979, "step": 7026 }, { "epoch": 0.7390326948611093, "grad_norm": 2.5552855508838763, "learning_rate": 8.099989201215214e-07, "loss": 0.9649, "step": 7027 }, { "epoch": 0.7391378653029566, "grad_norm": 1.591509673958794, "learning_rate": 8.093838348619282e-07, "loss": 0.9729, "step": 7028 }, { "epoch": 0.7392430357448039, "grad_norm": 2.3016915010058825, "learning_rate": 8.087689381191713e-07, "loss": 0.9691, "step": 7029 }, { "epoch": 0.7393482061866512, "grad_norm": 2.697962007116117, "learning_rate": 8.081542299618139e-07, "loss": 1.0254, "step": 7030 }, { "epoch": 0.7394533766284985, "grad_norm": 2.585421932407672, "learning_rate": 8.075397104584045e-07, "loss": 0.9704, "step": 7031 }, { "epoch": 0.7395585470703459, "grad_norm": 2.370762176558943, "learning_rate": 8.069253796774639e-07, "loss": 0.9573, "step": 7032 }, { "epoch": 0.7396637175121932, "grad_norm": 2.7384589402328117, "learning_rate": 8.063112376874965e-07, "loss": 0.9659, "step": 7033 }, { "epoch": 0.7397688879540405, "grad_norm": 2.3648476588909197, "learning_rate": 8.056972845569833e-07, "loss": 0.9979, "step": 7034 }, { "epoch": 0.7398740583958878, "grad_norm": 2.363044938151124, "learning_rate": 8.05083520354383e-07, "loss": 0.9984, "step": 7035 }, { "epoch": 0.7399792288377351, "grad_norm": 2.7144839085362498, "learning_rate": 8.044699451481383e-07, "loss": 0.98, "step": 7036 }, { "epoch": 0.7400843992795825, "grad_norm": 1.991910306378637, "learning_rate": 8.038565590066652e-07, "loss": 1.0083, "step": 7037 }, { "epoch": 0.7401895697214298, "grad_norm": 2.268190827360275, "learning_rate": 8.032433619983628e-07, "loss": 0.9803, "step": 7038 }, { "epoch": 0.7402947401632771, "grad_norm": 2.9919518122332582, "learning_rate": 8.026303541916056e-07, "loss": 1.0184, "step": 7039 }, { "epoch": 0.7403999106051244, "grad_norm": 1.9029884195579985, "learning_rate": 8.020175356547497e-07, "loss": 1.0364, "step": 7040 }, { "epoch": 0.7405050810469718, "grad_norm": 2.8980438411255025, "learning_rate": 8.014049064561294e-07, "loss": 0.9867, "step": 7041 }, { "epoch": 0.7406102514888191, "grad_norm": 2.9396255932824475, "learning_rate": 8.007924666640565e-07, "loss": 1.0225, "step": 7042 }, { "epoch": 0.7407154219306664, "grad_norm": 3.0222446896596415, "learning_rate": 8.001802163468244e-07, "loss": 0.9761, "step": 7043 }, { "epoch": 0.7408205923725137, "grad_norm": 3.017678825360528, "learning_rate": 7.995681555727011e-07, "loss": 1.0636, "step": 7044 }, { "epoch": 0.7409257628143611, "grad_norm": 2.240379927077817, "learning_rate": 7.989562844099396e-07, "loss": 0.9825, "step": 7045 }, { "epoch": 0.7410309332562084, "grad_norm": 3.079831156511648, "learning_rate": 7.983446029267658e-07, "loss": 1.0057, "step": 7046 }, { "epoch": 0.7411361036980557, "grad_norm": 3.2771958422605723, "learning_rate": 7.977331111913883e-07, "loss": 0.9918, "step": 7047 }, { "epoch": 0.7412412741399029, "grad_norm": 2.545219586879537, "learning_rate": 7.971218092719921e-07, "loss": 0.9816, "step": 7048 }, { "epoch": 0.7413464445817503, "grad_norm": 1.8833415674316218, "learning_rate": 7.965106972367423e-07, "loss": 0.9534, "step": 7049 }, { "epoch": 0.7414516150235976, "grad_norm": 2.2734406517544934, "learning_rate": 7.958997751537836e-07, "loss": 0.9753, "step": 7050 }, { "epoch": 0.7415567854654449, "grad_norm": 2.119993555931499, "learning_rate": 7.952890430912374e-07, "loss": 0.9472, "step": 7051 }, { "epoch": 0.7416619559072922, "grad_norm": 3.0421738792041166, "learning_rate": 7.946785011172062e-07, "loss": 1.0013, "step": 7052 }, { "epoch": 0.7417671263491395, "grad_norm": 2.473943886643507, "learning_rate": 7.940681492997678e-07, "loss": 1.0045, "step": 7053 }, { "epoch": 0.7418722967909869, "grad_norm": 3.1688667102775616, "learning_rate": 7.934579877069848e-07, "loss": 0.9387, "step": 7054 }, { "epoch": 0.7419774672328342, "grad_norm": 2.5868684633157324, "learning_rate": 7.92848016406893e-07, "loss": 1.0002, "step": 7055 }, { "epoch": 0.7420826376746815, "grad_norm": 2.2449015856520873, "learning_rate": 7.922382354675079e-07, "loss": 0.9367, "step": 7056 }, { "epoch": 0.7421878081165288, "grad_norm": 3.009362045627099, "learning_rate": 7.91628644956827e-07, "loss": 0.9526, "step": 7057 }, { "epoch": 0.7422929785583762, "grad_norm": 2.4357656876283156, "learning_rate": 7.910192449428216e-07, "loss": 1.0009, "step": 7058 }, { "epoch": 0.7423981490002235, "grad_norm": 1.7009514930395582, "learning_rate": 7.904100354934477e-07, "loss": 0.9459, "step": 7059 }, { "epoch": 0.7425033194420708, "grad_norm": 1.686132473222285, "learning_rate": 7.898010166766348e-07, "loss": 0.9756, "step": 7060 }, { "epoch": 0.7426084898839181, "grad_norm": 2.580697246706924, "learning_rate": 7.891921885602946e-07, "loss": 1.014, "step": 7061 }, { "epoch": 0.7427136603257655, "grad_norm": 1.8915629581945324, "learning_rate": 7.885835512123144e-07, "loss": 0.9774, "step": 7062 }, { "epoch": 0.7428188307676128, "grad_norm": 2.819291640040572, "learning_rate": 7.879751047005632e-07, "loss": 0.9752, "step": 7063 }, { "epoch": 0.7429240012094601, "grad_norm": 2.019716408325097, "learning_rate": 7.87366849092888e-07, "loss": 0.9477, "step": 7064 }, { "epoch": 0.7430291716513074, "grad_norm": 2.179030867839558, "learning_rate": 7.867587844571126e-07, "loss": 0.9877, "step": 7065 }, { "epoch": 0.7431343420931548, "grad_norm": 2.348644789987132, "learning_rate": 7.861509108610423e-07, "loss": 0.9943, "step": 7066 }, { "epoch": 0.7432395125350021, "grad_norm": 1.9825526177265085, "learning_rate": 7.855432283724576e-07, "loss": 0.9825, "step": 7067 }, { "epoch": 0.7433446829768493, "grad_norm": 2.171603481982258, "learning_rate": 7.849357370591229e-07, "loss": 1.0117, "step": 7068 }, { "epoch": 0.7434498534186966, "grad_norm": 2.648552295188202, "learning_rate": 7.843284369887757e-07, "loss": 0.9668, "step": 7069 }, { "epoch": 0.743555023860544, "grad_norm": 2.849512019966635, "learning_rate": 7.837213282291365e-07, "loss": 0.9952, "step": 7070 }, { "epoch": 0.7436601943023913, "grad_norm": 2.368367081297725, "learning_rate": 7.831144108479016e-07, "loss": 0.9449, "step": 7071 }, { "epoch": 0.7437653647442386, "grad_norm": 2.465131065995525, "learning_rate": 7.825076849127458e-07, "loss": 1.018, "step": 7072 }, { "epoch": 0.7438705351860859, "grad_norm": 2.883105606748885, "learning_rate": 7.819011504913266e-07, "loss": 0.9762, "step": 7073 }, { "epoch": 0.7439757056279332, "grad_norm": 3.0353049995873707, "learning_rate": 7.812948076512747e-07, "loss": 1.005, "step": 7074 }, { "epoch": 0.7440808760697806, "grad_norm": 2.2610828766405118, "learning_rate": 7.806886564602043e-07, "loss": 0.9639, "step": 7075 }, { "epoch": 0.7441860465116279, "grad_norm": 2.1841830172268466, "learning_rate": 7.800826969857036e-07, "loss": 0.9841, "step": 7076 }, { "epoch": 0.7442912169534752, "grad_norm": 2.129396215048809, "learning_rate": 7.79476929295343e-07, "loss": 0.9898, "step": 7077 }, { "epoch": 0.7443963873953225, "grad_norm": 2.3303104402152384, "learning_rate": 7.788713534566714e-07, "loss": 1.0202, "step": 7078 }, { "epoch": 0.7445015578371699, "grad_norm": 2.278648106170102, "learning_rate": 7.78265969537213e-07, "loss": 0.9724, "step": 7079 }, { "epoch": 0.7446067282790172, "grad_norm": 2.3135184335473524, "learning_rate": 7.776607776044748e-07, "loss": 0.9536, "step": 7080 }, { "epoch": 0.7447118987208645, "grad_norm": 2.4775548340535365, "learning_rate": 7.77055777725938e-07, "loss": 0.9794, "step": 7081 }, { "epoch": 0.7448170691627118, "grad_norm": 2.5496006086788356, "learning_rate": 7.764509699690679e-07, "loss": 1.017, "step": 7082 }, { "epoch": 0.7449222396045592, "grad_norm": 2.1357477443739787, "learning_rate": 7.758463544013026e-07, "loss": 0.9812, "step": 7083 }, { "epoch": 0.7450274100464065, "grad_norm": 2.5264510633127824, "learning_rate": 7.752419310900636e-07, "loss": 0.9786, "step": 7084 }, { "epoch": 0.7451325804882538, "grad_norm": 2.7553954412125945, "learning_rate": 7.746377001027466e-07, "loss": 0.9899, "step": 7085 }, { "epoch": 0.7452377509301011, "grad_norm": 2.1345306806782225, "learning_rate": 7.740336615067293e-07, "loss": 0.9952, "step": 7086 }, { "epoch": 0.7453429213719485, "grad_norm": 2.0732183389576258, "learning_rate": 7.734298153693671e-07, "loss": 0.9757, "step": 7087 }, { "epoch": 0.7454480918137958, "grad_norm": 2.513807387074567, "learning_rate": 7.728261617579922e-07, "loss": 0.9688, "step": 7088 }, { "epoch": 0.745553262255643, "grad_norm": 1.6046019938022835, "learning_rate": 7.722227007399183e-07, "loss": 0.9902, "step": 7089 }, { "epoch": 0.7456584326974903, "grad_norm": 2.5953536113957365, "learning_rate": 7.716194323824345e-07, "loss": 0.9866, "step": 7090 }, { "epoch": 0.7457636031393376, "grad_norm": 2.326926439775202, "learning_rate": 7.710163567528101e-07, "loss": 0.9848, "step": 7091 }, { "epoch": 0.745868773581185, "grad_norm": 2.8094515613190687, "learning_rate": 7.704134739182942e-07, "loss": 0.9781, "step": 7092 }, { "epoch": 0.7459739440230323, "grad_norm": 3.123876981837614, "learning_rate": 7.698107839461111e-07, "loss": 0.9893, "step": 7093 }, { "epoch": 0.7460791144648796, "grad_norm": 3.5591005303250935, "learning_rate": 7.692082869034662e-07, "loss": 0.995, "step": 7094 }, { "epoch": 0.746184284906727, "grad_norm": 2.346896318567527, "learning_rate": 7.686059828575423e-07, "loss": 0.9834, "step": 7095 }, { "epoch": 0.7462894553485743, "grad_norm": 2.6908060947387304, "learning_rate": 7.680038718755023e-07, "loss": 0.9847, "step": 7096 }, { "epoch": 0.7463946257904216, "grad_norm": 2.4966779519732865, "learning_rate": 7.674019540244843e-07, "loss": 0.9635, "step": 7097 }, { "epoch": 0.7464997962322689, "grad_norm": 2.173016936111415, "learning_rate": 7.668002293716084e-07, "loss": 1.0019, "step": 7098 }, { "epoch": 0.7466049666741162, "grad_norm": 2.4007122420388285, "learning_rate": 7.661986979839703e-07, "loss": 0.996, "step": 7099 }, { "epoch": 0.7467101371159636, "grad_norm": 2.305372137742398, "learning_rate": 7.655973599286459e-07, "loss": 1.0022, "step": 7100 }, { "epoch": 0.7468153075578109, "grad_norm": 2.7387496498746966, "learning_rate": 7.649962152726903e-07, "loss": 1.0304, "step": 7101 }, { "epoch": 0.7469204779996582, "grad_norm": 3.7124819307244246, "learning_rate": 7.643952640831334e-07, "loss": 0.9823, "step": 7102 }, { "epoch": 0.7470256484415055, "grad_norm": 2.3773703882173183, "learning_rate": 7.637945064269883e-07, "loss": 1.0051, "step": 7103 }, { "epoch": 0.7471308188833529, "grad_norm": 2.3915425949886466, "learning_rate": 7.631939423712414e-07, "loss": 0.9314, "step": 7104 }, { "epoch": 0.7472359893252002, "grad_norm": 2.7826299688394003, "learning_rate": 7.625935719828633e-07, "loss": 0.9793, "step": 7105 }, { "epoch": 0.7473411597670475, "grad_norm": 2.1770658600493884, "learning_rate": 7.619933953287989e-07, "loss": 0.989, "step": 7106 }, { "epoch": 0.7474463302088948, "grad_norm": 2.877859070061451, "learning_rate": 7.613934124759712e-07, "loss": 1.0117, "step": 7107 }, { "epoch": 0.7475515006507422, "grad_norm": 2.1132339489401253, "learning_rate": 7.607936234912841e-07, "loss": 1.0075, "step": 7108 }, { "epoch": 0.7476566710925894, "grad_norm": 2.04198700411287, "learning_rate": 7.601940284416187e-07, "loss": 0.9941, "step": 7109 }, { "epoch": 0.7477618415344367, "grad_norm": 3.077969718435496, "learning_rate": 7.595946273938348e-07, "loss": 0.9855, "step": 7110 }, { "epoch": 0.747867011976284, "grad_norm": 1.851022408153244, "learning_rate": 7.589954204147696e-07, "loss": 0.9606, "step": 7111 }, { "epoch": 0.7479721824181313, "grad_norm": 2.181580709946769, "learning_rate": 7.583964075712402e-07, "loss": 1.0158, "step": 7112 }, { "epoch": 0.7480773528599787, "grad_norm": 2.5644893957012824, "learning_rate": 7.577975889300395e-07, "loss": 1.0342, "step": 7113 }, { "epoch": 0.748182523301826, "grad_norm": 2.117573151952007, "learning_rate": 7.57198964557942e-07, "loss": 0.9742, "step": 7114 }, { "epoch": 0.7482876937436733, "grad_norm": 2.645246462063244, "learning_rate": 7.566005345216993e-07, "loss": 1.0054, "step": 7115 }, { "epoch": 0.7483928641855206, "grad_norm": 2.997740803186767, "learning_rate": 7.560022988880392e-07, "loss": 1.0511, "step": 7116 }, { "epoch": 0.748498034627368, "grad_norm": 2.925719317892139, "learning_rate": 7.554042577236706e-07, "loss": 1.0065, "step": 7117 }, { "epoch": 0.7486032050692153, "grad_norm": 1.9177386221309367, "learning_rate": 7.548064110952799e-07, "loss": 0.9757, "step": 7118 }, { "epoch": 0.7487083755110626, "grad_norm": 2.088839253519746, "learning_rate": 7.542087590695321e-07, "loss": 0.9794, "step": 7119 }, { "epoch": 0.7488135459529099, "grad_norm": 2.5378066740840493, "learning_rate": 7.536113017130686e-07, "loss": 0.9974, "step": 7120 }, { "epoch": 0.7489187163947573, "grad_norm": 2.3323792119173956, "learning_rate": 7.530140390925125e-07, "loss": 0.9962, "step": 7121 }, { "epoch": 0.7490238868366046, "grad_norm": 3.319157282262614, "learning_rate": 7.524169712744612e-07, "loss": 1.0356, "step": 7122 }, { "epoch": 0.7491290572784519, "grad_norm": 3.4503166532322234, "learning_rate": 7.518200983254931e-07, "loss": 0.9966, "step": 7123 }, { "epoch": 0.7492342277202992, "grad_norm": 2.23731079204491, "learning_rate": 7.512234203121655e-07, "loss": 1.0404, "step": 7124 }, { "epoch": 0.7493393981621466, "grad_norm": 2.889672775952245, "learning_rate": 7.506269373010106e-07, "loss": 0.9698, "step": 7125 }, { "epoch": 0.7494445686039939, "grad_norm": 2.3947512239243682, "learning_rate": 7.500306493585424e-07, "loss": 0.9817, "step": 7126 }, { "epoch": 0.7495497390458412, "grad_norm": 2.1520151953932376, "learning_rate": 7.494345565512504e-07, "loss": 0.973, "step": 7127 }, { "epoch": 0.7496549094876885, "grad_norm": 3.2505704587799995, "learning_rate": 7.488386589456043e-07, "loss": 1.011, "step": 7128 }, { "epoch": 0.7497600799295357, "grad_norm": 2.6688857111662054, "learning_rate": 7.482429566080518e-07, "loss": 0.99, "step": 7129 }, { "epoch": 0.7498652503713831, "grad_norm": 2.2536394568527727, "learning_rate": 7.47647449605017e-07, "loss": 0.9788, "step": 7130 }, { "epoch": 0.7499704208132304, "grad_norm": 2.3527982867557764, "learning_rate": 7.470521380029044e-07, "loss": 1.0023, "step": 7131 }, { "epoch": 0.7500755912550777, "grad_norm": 2.2238317421368268, "learning_rate": 7.464570218680958e-07, "loss": 0.9577, "step": 7132 }, { "epoch": 0.750180761696925, "grad_norm": 2.500971462662424, "learning_rate": 7.45862101266952e-07, "loss": 0.9753, "step": 7133 }, { "epoch": 0.7502859321387724, "grad_norm": 2.404293358312979, "learning_rate": 7.452673762658096e-07, "loss": 0.9654, "step": 7134 }, { "epoch": 0.7503911025806197, "grad_norm": 2.0933883750611204, "learning_rate": 7.446728469309872e-07, "loss": 0.9753, "step": 7135 }, { "epoch": 0.750496273022467, "grad_norm": 2.2681195797049076, "learning_rate": 7.440785133287773e-07, "loss": 0.9794, "step": 7136 }, { "epoch": 0.7506014434643143, "grad_norm": 2.422935286133933, "learning_rate": 7.434843755254534e-07, "loss": 1.0004, "step": 7137 }, { "epoch": 0.7507066139061617, "grad_norm": 3.881491057142409, "learning_rate": 7.428904335872675e-07, "loss": 0.9864, "step": 7138 }, { "epoch": 0.750811784348009, "grad_norm": 2.966377670151308, "learning_rate": 7.422966875804475e-07, "loss": 1.0794, "step": 7139 }, { "epoch": 0.7509169547898563, "grad_norm": 2.859330892149561, "learning_rate": 7.417031375712009e-07, "loss": 1.0196, "step": 7140 }, { "epoch": 0.7510221252317036, "grad_norm": 2.8843383108973084, "learning_rate": 7.411097836257141e-07, "loss": 0.9402, "step": 7141 }, { "epoch": 0.751127295673551, "grad_norm": 2.1645139983935056, "learning_rate": 7.405166258101495e-07, "loss": 0.998, "step": 7142 }, { "epoch": 0.7512324661153983, "grad_norm": 3.3119860219502337, "learning_rate": 7.399236641906498e-07, "loss": 0.9658, "step": 7143 }, { "epoch": 0.7513376365572456, "grad_norm": 2.0595918641126327, "learning_rate": 7.393308988333337e-07, "loss": 0.9997, "step": 7144 }, { "epoch": 0.7514428069990929, "grad_norm": 3.1286563898373543, "learning_rate": 7.387383298042994e-07, "loss": 0.9831, "step": 7145 }, { "epoch": 0.7515479774409403, "grad_norm": 2.8018989951253417, "learning_rate": 7.381459571696237e-07, "loss": 0.9272, "step": 7146 }, { "epoch": 0.7516531478827876, "grad_norm": 2.7502028796110176, "learning_rate": 7.375537809953609e-07, "loss": 0.9867, "step": 7147 }, { "epoch": 0.7517583183246349, "grad_norm": 2.678162761432206, "learning_rate": 7.369618013475419e-07, "loss": 1.0405, "step": 7148 }, { "epoch": 0.7518634887664822, "grad_norm": 2.3470597444469794, "learning_rate": 7.363700182921784e-07, "loss": 1.0009, "step": 7149 }, { "epoch": 0.7519686592083294, "grad_norm": 2.5934942008660897, "learning_rate": 7.357784318952579e-07, "loss": 1.0031, "step": 7150 }, { "epoch": 0.7520738296501768, "grad_norm": 3.16545323400702, "learning_rate": 7.35187042222747e-07, "loss": 1.0078, "step": 7151 }, { "epoch": 0.7521790000920241, "grad_norm": 2.409118323522591, "learning_rate": 7.345958493405911e-07, "loss": 0.9592, "step": 7152 }, { "epoch": 0.7522841705338714, "grad_norm": 2.8526257467266967, "learning_rate": 7.340048533147112e-07, "loss": 0.9807, "step": 7153 }, { "epoch": 0.7523893409757187, "grad_norm": 2.4382304350488035, "learning_rate": 7.334140542110093e-07, "loss": 0.9787, "step": 7154 }, { "epoch": 0.7524945114175661, "grad_norm": 1.893802244461524, "learning_rate": 7.328234520953634e-07, "loss": 1.0078, "step": 7155 }, { "epoch": 0.7525996818594134, "grad_norm": 2.3356044875747672, "learning_rate": 7.322330470336314e-07, "loss": 0.98, "step": 7156 }, { "epoch": 0.7527048523012607, "grad_norm": 2.6357389878275543, "learning_rate": 7.316428390916471e-07, "loss": 0.976, "step": 7157 }, { "epoch": 0.752810022743108, "grad_norm": 2.0816090851410043, "learning_rate": 7.310528283352225e-07, "loss": 0.9998, "step": 7158 }, { "epoch": 0.7529151931849554, "grad_norm": 2.432089883393319, "learning_rate": 7.304630148301495e-07, "loss": 1.0173, "step": 7159 }, { "epoch": 0.7530203636268027, "grad_norm": 3.2576438811204267, "learning_rate": 7.298733986421963e-07, "loss": 0.9316, "step": 7160 }, { "epoch": 0.75312553406865, "grad_norm": 2.5192616299402535, "learning_rate": 7.292839798371107e-07, "loss": 0.9752, "step": 7161 }, { "epoch": 0.7532307045104973, "grad_norm": 2.0662627309135466, "learning_rate": 7.286947584806162e-07, "loss": 1.0003, "step": 7162 }, { "epoch": 0.7533358749523447, "grad_norm": 2.3318286805896697, "learning_rate": 7.28105734638416e-07, "loss": 0.9491, "step": 7163 }, { "epoch": 0.753441045394192, "grad_norm": 1.7589980888158547, "learning_rate": 7.275169083761915e-07, "loss": 1.0146, "step": 7164 }, { "epoch": 0.7535462158360393, "grad_norm": 2.3273013030064167, "learning_rate": 7.269282797596003e-07, "loss": 0.9642, "step": 7165 }, { "epoch": 0.7536513862778866, "grad_norm": 2.912414660527191, "learning_rate": 7.263398488542805e-07, "loss": 1.0203, "step": 7166 }, { "epoch": 0.753756556719734, "grad_norm": 2.82234921943586, "learning_rate": 7.257516157258448e-07, "loss": 0.9563, "step": 7167 }, { "epoch": 0.7538617271615813, "grad_norm": 2.5257753971825565, "learning_rate": 7.251635804398871e-07, "loss": 0.9375, "step": 7168 }, { "epoch": 0.7539668976034286, "grad_norm": 2.2636444621395486, "learning_rate": 7.245757430619774e-07, "loss": 0.9048, "step": 7169 }, { "epoch": 0.7540720680452758, "grad_norm": 1.7968514845268797, "learning_rate": 7.239881036576652e-07, "loss": 1.0071, "step": 7170 }, { "epoch": 0.7541772384871231, "grad_norm": 2.5407406591367714, "learning_rate": 7.23400662292475e-07, "loss": 0.9604, "step": 7171 }, { "epoch": 0.7542824089289705, "grad_norm": 2.8060825489630976, "learning_rate": 7.228134190319131e-07, "loss": 1.016, "step": 7172 }, { "epoch": 0.7543875793708178, "grad_norm": 2.7592153012337985, "learning_rate": 7.222263739414595e-07, "loss": 0.9704, "step": 7173 }, { "epoch": 0.7544927498126651, "grad_norm": 1.982882504910971, "learning_rate": 7.216395270865759e-07, "loss": 0.9705, "step": 7174 }, { "epoch": 0.7545979202545124, "grad_norm": 2.3971758450955254, "learning_rate": 7.210528785327001e-07, "loss": 0.9987, "step": 7175 }, { "epoch": 0.7547030906963598, "grad_norm": 2.0404726366158235, "learning_rate": 7.204664283452472e-07, "loss": 0.9884, "step": 7176 }, { "epoch": 0.7548082611382071, "grad_norm": 2.169832032755285, "learning_rate": 7.198801765896115e-07, "loss": 0.9897, "step": 7177 }, { "epoch": 0.7549134315800544, "grad_norm": 3.109999050865016, "learning_rate": 7.192941233311651e-07, "loss": 1.0375, "step": 7178 }, { "epoch": 0.7550186020219017, "grad_norm": 2.4498043657390474, "learning_rate": 7.187082686352564e-07, "loss": 0.9986, "step": 7179 }, { "epoch": 0.7551237724637491, "grad_norm": 2.045678192773114, "learning_rate": 7.181226125672142e-07, "loss": 0.9928, "step": 7180 }, { "epoch": 0.7552289429055964, "grad_norm": 2.506873399635673, "learning_rate": 7.175371551923418e-07, "loss": 0.9825, "step": 7181 }, { "epoch": 0.7553341133474437, "grad_norm": 2.679384773475905, "learning_rate": 7.169518965759231e-07, "loss": 1.0099, "step": 7182 }, { "epoch": 0.755439283789291, "grad_norm": 2.6893589546797467, "learning_rate": 7.163668367832194e-07, "loss": 0.9387, "step": 7183 }, { "epoch": 0.7555444542311384, "grad_norm": 2.9496315141406906, "learning_rate": 7.157819758794699e-07, "loss": 0.9685, "step": 7184 }, { "epoch": 0.7556496246729857, "grad_norm": 2.8192968825983615, "learning_rate": 7.151973139298895e-07, "loss": 0.9811, "step": 7185 }, { "epoch": 0.755754795114833, "grad_norm": 2.823604895336445, "learning_rate": 7.146128509996736e-07, "loss": 1.0068, "step": 7186 }, { "epoch": 0.7558599655566803, "grad_norm": 2.6359789895073855, "learning_rate": 7.140285871539948e-07, "loss": 0.9866, "step": 7187 }, { "epoch": 0.7559651359985277, "grad_norm": 2.1868328887950144, "learning_rate": 7.134445224580017e-07, "loss": 0.9832, "step": 7188 }, { "epoch": 0.756070306440375, "grad_norm": 2.2297442915008046, "learning_rate": 7.128606569768237e-07, "loss": 0.9415, "step": 7189 }, { "epoch": 0.7561754768822222, "grad_norm": 2.157623716865805, "learning_rate": 7.122769907755644e-07, "loss": 1.0098, "step": 7190 }, { "epoch": 0.7562806473240695, "grad_norm": 2.393849877655623, "learning_rate": 7.116935239193085e-07, "loss": 0.9886, "step": 7191 }, { "epoch": 0.7563858177659168, "grad_norm": 2.5597033186090457, "learning_rate": 7.111102564731173e-07, "loss": 0.9664, "step": 7192 }, { "epoch": 0.7564909882077642, "grad_norm": 3.176157747900133, "learning_rate": 7.105271885020281e-07, "loss": 1.0258, "step": 7193 }, { "epoch": 0.7565961586496115, "grad_norm": 2.9605515801527513, "learning_rate": 7.099443200710595e-07, "loss": 0.9994, "step": 7194 }, { "epoch": 0.7567013290914588, "grad_norm": 2.0428204214114, "learning_rate": 7.093616512452042e-07, "loss": 0.9946, "step": 7195 }, { "epoch": 0.7568064995333061, "grad_norm": 2.5922763139539353, "learning_rate": 7.087791820894349e-07, "loss": 0.9879, "step": 7196 }, { "epoch": 0.7569116699751535, "grad_norm": 2.2625784519484355, "learning_rate": 7.081969126687014e-07, "loss": 0.9551, "step": 7197 }, { "epoch": 0.7570168404170008, "grad_norm": 2.7498123014483973, "learning_rate": 7.076148430479321e-07, "loss": 0.9919, "step": 7198 }, { "epoch": 0.7571220108588481, "grad_norm": 2.2860922030180015, "learning_rate": 7.070329732920308e-07, "loss": 0.9593, "step": 7199 }, { "epoch": 0.7572271813006954, "grad_norm": 2.1018686765589782, "learning_rate": 7.064513034658812e-07, "loss": 0.9519, "step": 7200 }, { "epoch": 0.7573323517425428, "grad_norm": 2.0969982207355655, "learning_rate": 7.05869833634345e-07, "loss": 0.964, "step": 7201 }, { "epoch": 0.7574375221843901, "grad_norm": 2.2200649084471045, "learning_rate": 7.052885638622586e-07, "loss": 0.949, "step": 7202 }, { "epoch": 0.7575426926262374, "grad_norm": 2.5288138123773556, "learning_rate": 7.047074942144399e-07, "loss": 1.0022, "step": 7203 }, { "epoch": 0.7576478630680847, "grad_norm": 2.914980411654761, "learning_rate": 7.041266247556814e-07, "loss": 0.9717, "step": 7204 }, { "epoch": 0.7577530335099321, "grad_norm": 1.7833952010134386, "learning_rate": 7.035459555507549e-07, "loss": 0.9748, "step": 7205 }, { "epoch": 0.7578582039517794, "grad_norm": 2.49777934486207, "learning_rate": 7.029654866644098e-07, "loss": 0.9616, "step": 7206 }, { "epoch": 0.7579633743936267, "grad_norm": 2.748297431592962, "learning_rate": 7.023852181613735e-07, "loss": 0.99, "step": 7207 }, { "epoch": 0.758068544835474, "grad_norm": 2.6942600142402537, "learning_rate": 7.018051501063497e-07, "loss": 1.026, "step": 7208 }, { "epoch": 0.7581737152773214, "grad_norm": 3.1894240209205713, "learning_rate": 7.01225282564019e-07, "loss": 1.0089, "step": 7209 }, { "epoch": 0.7582788857191687, "grad_norm": 1.8391440626240132, "learning_rate": 7.006456155990444e-07, "loss": 0.978, "step": 7210 }, { "epoch": 0.7583840561610159, "grad_norm": 2.4573368871580317, "learning_rate": 7.000661492760605e-07, "loss": 0.9747, "step": 7211 }, { "epoch": 0.7584892266028632, "grad_norm": 2.0727123712660758, "learning_rate": 6.994868836596841e-07, "loss": 1.0151, "step": 7212 }, { "epoch": 0.7585943970447105, "grad_norm": 1.8852340383663915, "learning_rate": 6.989078188145065e-07, "loss": 0.9984, "step": 7213 }, { "epoch": 0.7586995674865579, "grad_norm": 1.9223909811199098, "learning_rate": 6.983289548050984e-07, "loss": 1.0037, "step": 7214 }, { "epoch": 0.7588047379284052, "grad_norm": 2.520783501726942, "learning_rate": 6.977502916960083e-07, "loss": 0.9742, "step": 7215 }, { "epoch": 0.7589099083702525, "grad_norm": 2.838885326430099, "learning_rate": 6.971718295517604e-07, "loss": 1.0104, "step": 7216 }, { "epoch": 0.7590150788120998, "grad_norm": 2.9156056508616257, "learning_rate": 6.965935684368591e-07, "loss": 1.0002, "step": 7217 }, { "epoch": 0.7591202492539472, "grad_norm": 2.3167176341509514, "learning_rate": 6.960155084157835e-07, "loss": 0.9942, "step": 7218 }, { "epoch": 0.7592254196957945, "grad_norm": 2.154619542983804, "learning_rate": 6.954376495529927e-07, "loss": 0.9635, "step": 7219 }, { "epoch": 0.7593305901376418, "grad_norm": 2.708144396111361, "learning_rate": 6.94859991912922e-07, "loss": 0.9492, "step": 7220 }, { "epoch": 0.7594357605794891, "grad_norm": 2.869899495540543, "learning_rate": 6.942825355599861e-07, "loss": 0.9659, "step": 7221 }, { "epoch": 0.7595409310213365, "grad_norm": 2.7681751925235063, "learning_rate": 6.93705280558574e-07, "loss": 0.9965, "step": 7222 }, { "epoch": 0.7596461014631838, "grad_norm": 2.454449329821474, "learning_rate": 6.931282269730547e-07, "loss": 0.9723, "step": 7223 }, { "epoch": 0.7597512719050311, "grad_norm": 2.2182925652533085, "learning_rate": 6.925513748677754e-07, "loss": 0.9866, "step": 7224 }, { "epoch": 0.7598564423468784, "grad_norm": 2.3307398988105255, "learning_rate": 6.919747243070576e-07, "loss": 0.9802, "step": 7225 }, { "epoch": 0.7599616127887258, "grad_norm": 2.9729325369975705, "learning_rate": 6.91398275355204e-07, "loss": 0.9946, "step": 7226 }, { "epoch": 0.7600667832305731, "grad_norm": 2.7311855204096185, "learning_rate": 6.908220280764918e-07, "loss": 0.9488, "step": 7227 }, { "epoch": 0.7601719536724204, "grad_norm": 2.68897996658099, "learning_rate": 6.902459825351776e-07, "loss": 0.9728, "step": 7228 }, { "epoch": 0.7602771241142677, "grad_norm": 2.3536132874792934, "learning_rate": 6.896701387954957e-07, "loss": 0.9418, "step": 7229 }, { "epoch": 0.7603822945561151, "grad_norm": 2.604781814575496, "learning_rate": 6.890944969216556e-07, "loss": 0.9645, "step": 7230 }, { "epoch": 0.7604874649979623, "grad_norm": 2.703365663091066, "learning_rate": 6.885190569778477e-07, "loss": 1.0083, "step": 7231 }, { "epoch": 0.7605926354398096, "grad_norm": 2.995617326069344, "learning_rate": 6.879438190282354e-07, "loss": 0.9152, "step": 7232 }, { "epoch": 0.7606978058816569, "grad_norm": 2.4869310918420573, "learning_rate": 6.873687831369655e-07, "loss": 1.0181, "step": 7233 }, { "epoch": 0.7608029763235042, "grad_norm": 3.3720251417185545, "learning_rate": 6.867939493681563e-07, "loss": 0.9915, "step": 7234 }, { "epoch": 0.7609081467653516, "grad_norm": 2.751154189351907, "learning_rate": 6.862193177859081e-07, "loss": 1.0399, "step": 7235 }, { "epoch": 0.7610133172071989, "grad_norm": 2.9074664793566565, "learning_rate": 6.856448884542951e-07, "loss": 0.9934, "step": 7236 }, { "epoch": 0.7611184876490462, "grad_norm": 1.9259825570766822, "learning_rate": 6.850706614373715e-07, "loss": 1.0186, "step": 7237 }, { "epoch": 0.7612236580908935, "grad_norm": 2.2646689754710816, "learning_rate": 6.844966367991688e-07, "loss": 0.9753, "step": 7238 }, { "epoch": 0.7613288285327409, "grad_norm": 1.9865318190535692, "learning_rate": 6.839228146036936e-07, "loss": 0.9906, "step": 7239 }, { "epoch": 0.7614339989745882, "grad_norm": 2.101934374400583, "learning_rate": 6.833491949149329e-07, "loss": 0.9754, "step": 7240 }, { "epoch": 0.7615391694164355, "grad_norm": 2.9805412783118554, "learning_rate": 6.82775777796848e-07, "loss": 0.9668, "step": 7241 }, { "epoch": 0.7616443398582828, "grad_norm": 2.1434579040583226, "learning_rate": 6.82202563313382e-07, "loss": 1.009, "step": 7242 }, { "epoch": 0.7617495103001302, "grad_norm": 2.5155188921882763, "learning_rate": 6.816295515284513e-07, "loss": 0.9747, "step": 7243 }, { "epoch": 0.7618546807419775, "grad_norm": 2.351626885137821, "learning_rate": 6.810567425059506e-07, "loss": 0.9613, "step": 7244 }, { "epoch": 0.7619598511838248, "grad_norm": 2.499487860831982, "learning_rate": 6.804841363097536e-07, "loss": 0.998, "step": 7245 }, { "epoch": 0.7620650216256721, "grad_norm": 3.5082989671199454, "learning_rate": 6.799117330037086e-07, "loss": 1.0091, "step": 7246 }, { "epoch": 0.7621701920675195, "grad_norm": 2.5497617555706222, "learning_rate": 6.793395326516458e-07, "loss": 1.0019, "step": 7247 }, { "epoch": 0.7622753625093668, "grad_norm": 2.9850232501231315, "learning_rate": 6.787675353173675e-07, "loss": 0.9852, "step": 7248 }, { "epoch": 0.7623805329512141, "grad_norm": 2.369868434119237, "learning_rate": 6.781957410646581e-07, "loss": 0.9482, "step": 7249 }, { "epoch": 0.7624857033930614, "grad_norm": 2.2851427983587325, "learning_rate": 6.776241499572747e-07, "loss": 0.997, "step": 7250 }, { "epoch": 0.7625908738349086, "grad_norm": 2.566339810963556, "learning_rate": 6.770527620589554e-07, "loss": 0.9698, "step": 7251 }, { "epoch": 0.762696044276756, "grad_norm": 2.5226398298922685, "learning_rate": 6.764815774334149e-07, "loss": 1.0029, "step": 7252 }, { "epoch": 0.7628012147186033, "grad_norm": 2.0900487583661564, "learning_rate": 6.759105961443435e-07, "loss": 0.9886, "step": 7253 }, { "epoch": 0.7629063851604506, "grad_norm": 2.8035948742537626, "learning_rate": 6.753398182554116e-07, "loss": 0.954, "step": 7254 }, { "epoch": 0.7630115556022979, "grad_norm": 2.2764400788498347, "learning_rate": 6.747692438302624e-07, "loss": 1.0132, "step": 7255 }, { "epoch": 0.7631167260441453, "grad_norm": 3.4744512830475545, "learning_rate": 6.741988729325235e-07, "loss": 1.0112, "step": 7256 }, { "epoch": 0.7632218964859926, "grad_norm": 3.0967876034792012, "learning_rate": 6.736287056257925e-07, "loss": 0.9576, "step": 7257 }, { "epoch": 0.7633270669278399, "grad_norm": 2.346841697317024, "learning_rate": 6.730587419736492e-07, "loss": 0.9708, "step": 7258 }, { "epoch": 0.7634322373696872, "grad_norm": 2.201886938588659, "learning_rate": 6.724889820396488e-07, "loss": 0.993, "step": 7259 }, { "epoch": 0.7635374078115346, "grad_norm": 2.8480079520191044, "learning_rate": 6.719194258873216e-07, "loss": 0.9501, "step": 7260 }, { "epoch": 0.7636425782533819, "grad_norm": 2.560782072345373, "learning_rate": 6.713500735801811e-07, "loss": 0.8903, "step": 7261 }, { "epoch": 0.7637477486952292, "grad_norm": 2.159045835231064, "learning_rate": 6.707809251817121e-07, "loss": 0.9807, "step": 7262 }, { "epoch": 0.7638529191370765, "grad_norm": 1.9003145255033675, "learning_rate": 6.702119807553806e-07, "loss": 0.9616, "step": 7263 }, { "epoch": 0.7639580895789239, "grad_norm": 2.7151279520190976, "learning_rate": 6.696432403646267e-07, "loss": 0.9819, "step": 7264 }, { "epoch": 0.7640632600207712, "grad_norm": 2.657753683243195, "learning_rate": 6.690747040728702e-07, "loss": 0.9873, "step": 7265 }, { "epoch": 0.7641684304626185, "grad_norm": 1.8721650852607126, "learning_rate": 6.685063719435081e-07, "loss": 0.9638, "step": 7266 }, { "epoch": 0.7642736009044658, "grad_norm": 2.385971269188057, "learning_rate": 6.679382440399121e-07, "loss": 0.9701, "step": 7267 }, { "epoch": 0.7643787713463132, "grad_norm": 1.6808229698056358, "learning_rate": 6.673703204254348e-07, "loss": 0.9355, "step": 7268 }, { "epoch": 0.7644839417881605, "grad_norm": 2.741069590738767, "learning_rate": 6.668026011634019e-07, "loss": 0.9926, "step": 7269 }, { "epoch": 0.7645891122300078, "grad_norm": 2.117414364110104, "learning_rate": 6.662350863171207e-07, "loss": 0.9928, "step": 7270 }, { "epoch": 0.7646942826718551, "grad_norm": 3.3484101267491737, "learning_rate": 6.656677759498722e-07, "loss": 0.9656, "step": 7271 }, { "epoch": 0.7647994531137023, "grad_norm": 1.9706890021853318, "learning_rate": 6.651006701249168e-07, "loss": 1.0416, "step": 7272 }, { "epoch": 0.7649046235555497, "grad_norm": 3.0413485520399663, "learning_rate": 6.645337689054901e-07, "loss": 0.9993, "step": 7273 }, { "epoch": 0.765009793997397, "grad_norm": 2.0490262655425626, "learning_rate": 6.639670723548066e-07, "loss": 1.0095, "step": 7274 }, { "epoch": 0.7651149644392443, "grad_norm": 2.8013420666362183, "learning_rate": 6.63400580536058e-07, "loss": 0.9992, "step": 7275 }, { "epoch": 0.7652201348810916, "grad_norm": 2.047516643390938, "learning_rate": 6.628342935124113e-07, "loss": 1.0431, "step": 7276 }, { "epoch": 0.765325305322939, "grad_norm": 1.8990163407770566, "learning_rate": 6.622682113470131e-07, "loss": 0.9764, "step": 7277 }, { "epoch": 0.7654304757647863, "grad_norm": 2.450404087097505, "learning_rate": 6.617023341029835e-07, "loss": 0.9911, "step": 7278 }, { "epoch": 0.7655356462066336, "grad_norm": 2.3743125938259735, "learning_rate": 6.611366618434259e-07, "loss": 0.9901, "step": 7279 }, { "epoch": 0.7656408166484809, "grad_norm": 2.6744820836183436, "learning_rate": 6.605711946314153e-07, "loss": 0.9975, "step": 7280 }, { "epoch": 0.7657459870903283, "grad_norm": 2.2318926191610093, "learning_rate": 6.600059325300049e-07, "loss": 0.9983, "step": 7281 }, { "epoch": 0.7658511575321756, "grad_norm": 2.6271251940737734, "learning_rate": 6.594408756022272e-07, "loss": 0.9516, "step": 7282 }, { "epoch": 0.7659563279740229, "grad_norm": 2.686237334138938, "learning_rate": 6.588760239110887e-07, "loss": 1.0418, "step": 7283 }, { "epoch": 0.7660614984158702, "grad_norm": 2.7534033695073665, "learning_rate": 6.583113775195771e-07, "loss": 1.0173, "step": 7284 }, { "epoch": 0.7661666688577176, "grad_norm": 2.2831574538057744, "learning_rate": 6.577469364906527e-07, "loss": 0.9971, "step": 7285 }, { "epoch": 0.7662718392995649, "grad_norm": 2.7528580601431263, "learning_rate": 6.571827008872572e-07, "loss": 0.9858, "step": 7286 }, { "epoch": 0.7663770097414122, "grad_norm": 2.305246939209575, "learning_rate": 6.566186707723049e-07, "loss": 0.9331, "step": 7287 }, { "epoch": 0.7664821801832595, "grad_norm": 2.1919285177867747, "learning_rate": 6.560548462086911e-07, "loss": 0.9929, "step": 7288 }, { "epoch": 0.7665873506251069, "grad_norm": 2.617341622706207, "learning_rate": 6.554912272592867e-07, "loss": 0.9793, "step": 7289 }, { "epoch": 0.7666925210669542, "grad_norm": 3.2075296541728506, "learning_rate": 6.549278139869383e-07, "loss": 0.963, "step": 7290 }, { "epoch": 0.7667976915088015, "grad_norm": 2.4692337859546227, "learning_rate": 6.543646064544725e-07, "loss": 0.9813, "step": 7291 }, { "epoch": 0.7669028619506487, "grad_norm": 2.161864541098391, "learning_rate": 6.53801604724689e-07, "loss": 0.9615, "step": 7292 }, { "epoch": 0.767008032392496, "grad_norm": 2.2773528909260428, "learning_rate": 6.5323880886037e-07, "loss": 0.9771, "step": 7293 }, { "epoch": 0.7671132028343434, "grad_norm": 2.9832947081070955, "learning_rate": 6.526762189242692e-07, "loss": 1.0007, "step": 7294 }, { "epoch": 0.7672183732761907, "grad_norm": 2.715135683153078, "learning_rate": 6.521138349791209e-07, "loss": 0.9939, "step": 7295 }, { "epoch": 0.767323543718038, "grad_norm": 2.470749049910447, "learning_rate": 6.515516570876351e-07, "loss": 0.9985, "step": 7296 }, { "epoch": 0.7674287141598853, "grad_norm": 2.2143297282737655, "learning_rate": 6.509896853124972e-07, "loss": 0.9607, "step": 7297 }, { "epoch": 0.7675338846017327, "grad_norm": 2.887958915857869, "learning_rate": 6.504279197163746e-07, "loss": 0.9331, "step": 7298 }, { "epoch": 0.76763905504358, "grad_norm": 3.027736095235375, "learning_rate": 6.498663603619062e-07, "loss": 1.005, "step": 7299 }, { "epoch": 0.7677442254854273, "grad_norm": 2.6040148356675403, "learning_rate": 6.493050073117115e-07, "loss": 1.0007, "step": 7300 }, { "epoch": 0.7678493959272746, "grad_norm": 2.4651062503117127, "learning_rate": 6.487438606283847e-07, "loss": 0.9931, "step": 7301 }, { "epoch": 0.767954566369122, "grad_norm": 2.1995832711178593, "learning_rate": 6.481829203744985e-07, "loss": 0.9499, "step": 7302 }, { "epoch": 0.7680597368109693, "grad_norm": 2.7534359404754913, "learning_rate": 6.476221866126029e-07, "loss": 0.9385, "step": 7303 }, { "epoch": 0.7681649072528166, "grad_norm": 2.0684676604556387, "learning_rate": 6.470616594052223e-07, "loss": 1.0037, "step": 7304 }, { "epoch": 0.7682700776946639, "grad_norm": 2.474552308274405, "learning_rate": 6.465013388148616e-07, "loss": 0.9923, "step": 7305 }, { "epoch": 0.7683752481365113, "grad_norm": 1.7973959275988587, "learning_rate": 6.459412249039987e-07, "loss": 0.9635, "step": 7306 }, { "epoch": 0.7684804185783586, "grad_norm": 2.12766206020307, "learning_rate": 6.453813177350934e-07, "loss": 0.9721, "step": 7307 }, { "epoch": 0.7685855890202059, "grad_norm": 2.371873219751724, "learning_rate": 6.448216173705777e-07, "loss": 0.9784, "step": 7308 }, { "epoch": 0.7686907594620532, "grad_norm": 2.0694569481455987, "learning_rate": 6.442621238728639e-07, "loss": 0.9855, "step": 7309 }, { "epoch": 0.7687959299039006, "grad_norm": 3.206943381862623, "learning_rate": 6.437028373043386e-07, "loss": 1.0048, "step": 7310 }, { "epoch": 0.7689011003457479, "grad_norm": 3.365367181152907, "learning_rate": 6.431437577273669e-07, "loss": 0.9555, "step": 7311 }, { "epoch": 0.7690062707875951, "grad_norm": 2.8202002768816925, "learning_rate": 6.425848852042918e-07, "loss": 1.0238, "step": 7312 }, { "epoch": 0.7691114412294424, "grad_norm": 2.47701840199492, "learning_rate": 6.420262197974297e-07, "loss": 0.991, "step": 7313 }, { "epoch": 0.7692166116712897, "grad_norm": 2.3044685577607513, "learning_rate": 6.414677615690784e-07, "loss": 0.9681, "step": 7314 }, { "epoch": 0.7693217821131371, "grad_norm": 3.4475573948156413, "learning_rate": 6.409095105815086e-07, "loss": 0.9781, "step": 7315 }, { "epoch": 0.7694269525549844, "grad_norm": 1.8908622557377632, "learning_rate": 6.403514668969702e-07, "loss": 0.976, "step": 7316 }, { "epoch": 0.7695321229968317, "grad_norm": 2.1550644787890785, "learning_rate": 6.397936305776903e-07, "loss": 0.9988, "step": 7317 }, { "epoch": 0.769637293438679, "grad_norm": 3.0769836395456025, "learning_rate": 6.392360016858703e-07, "loss": 0.9766, "step": 7318 }, { "epoch": 0.7697424638805264, "grad_norm": 2.5329631219295625, "learning_rate": 6.38678580283692e-07, "loss": 1.0409, "step": 7319 }, { "epoch": 0.7698476343223737, "grad_norm": 3.1252877879621637, "learning_rate": 6.381213664333096e-07, "loss": 0.9786, "step": 7320 }, { "epoch": 0.769952804764221, "grad_norm": 2.477880542696044, "learning_rate": 6.375643601968598e-07, "loss": 0.985, "step": 7321 }, { "epoch": 0.7700579752060683, "grad_norm": 2.8094356003054846, "learning_rate": 6.370075616364512e-07, "loss": 1.0039, "step": 7322 }, { "epoch": 0.7701631456479157, "grad_norm": 2.8079925876475476, "learning_rate": 6.364509708141725e-07, "loss": 1.0002, "step": 7323 }, { "epoch": 0.770268316089763, "grad_norm": 2.3156139981672035, "learning_rate": 6.358945877920861e-07, "loss": 0.9701, "step": 7324 }, { "epoch": 0.7703734865316103, "grad_norm": 2.3629511594914994, "learning_rate": 6.353384126322343e-07, "loss": 1.0154, "step": 7325 }, { "epoch": 0.7704786569734576, "grad_norm": 2.898189649288246, "learning_rate": 6.347824453966354e-07, "loss": 0.9892, "step": 7326 }, { "epoch": 0.770583827415305, "grad_norm": 2.122895447793783, "learning_rate": 6.342266861472823e-07, "loss": 0.9902, "step": 7327 }, { "epoch": 0.7706889978571523, "grad_norm": 2.7711699801067486, "learning_rate": 6.336711349461486e-07, "loss": 0.9745, "step": 7328 }, { "epoch": 0.7707941682989996, "grad_norm": 2.5654011499268354, "learning_rate": 6.331157918551801e-07, "loss": 1.033, "step": 7329 }, { "epoch": 0.7708993387408469, "grad_norm": 1.7248857187294304, "learning_rate": 6.325606569363044e-07, "loss": 0.9897, "step": 7330 }, { "epoch": 0.7710045091826943, "grad_norm": 2.540837264737888, "learning_rate": 6.320057302514223e-07, "loss": 0.9716, "step": 7331 }, { "epoch": 0.7711096796245416, "grad_norm": 2.1994727634563467, "learning_rate": 6.314510118624121e-07, "loss": 1.0289, "step": 7332 }, { "epoch": 0.7712148500663888, "grad_norm": 3.2728653021918515, "learning_rate": 6.308965018311289e-07, "loss": 0.9727, "step": 7333 }, { "epoch": 0.7713200205082361, "grad_norm": 2.323117451826077, "learning_rate": 6.303422002194057e-07, "loss": 1.0584, "step": 7334 }, { "epoch": 0.7714251909500834, "grad_norm": 2.991321743338717, "learning_rate": 6.297881070890519e-07, "loss": 0.9433, "step": 7335 }, { "epoch": 0.7715303613919308, "grad_norm": 1.953839287290166, "learning_rate": 6.292342225018517e-07, "loss": 1.0006, "step": 7336 }, { "epoch": 0.7716355318337781, "grad_norm": 2.0866972087128044, "learning_rate": 6.286805465195691e-07, "loss": 0.964, "step": 7337 }, { "epoch": 0.7717407022756254, "grad_norm": 2.647572811142748, "learning_rate": 6.281270792039418e-07, "loss": 1.0017, "step": 7338 }, { "epoch": 0.7718458727174727, "grad_norm": 2.3592122810731553, "learning_rate": 6.275738206166862e-07, "loss": 0.9775, "step": 7339 }, { "epoch": 0.7719510431593201, "grad_norm": 1.9493966901790556, "learning_rate": 6.270207708194959e-07, "loss": 1.0169, "step": 7340 }, { "epoch": 0.7720562136011674, "grad_norm": 2.4294207839002384, "learning_rate": 6.264679298740389e-07, "loss": 0.9461, "step": 7341 }, { "epoch": 0.7721613840430147, "grad_norm": 2.1711248655387347, "learning_rate": 6.259152978419625e-07, "loss": 0.9646, "step": 7342 }, { "epoch": 0.772266554484862, "grad_norm": 2.526321431390834, "learning_rate": 6.253628747848872e-07, "loss": 0.9981, "step": 7343 }, { "epoch": 0.7723717249267094, "grad_norm": 2.4845350540111806, "learning_rate": 6.248106607644155e-07, "loss": 0.9788, "step": 7344 }, { "epoch": 0.7724768953685567, "grad_norm": 2.3629873655101923, "learning_rate": 6.242586558421216e-07, "loss": 0.9314, "step": 7345 }, { "epoch": 0.772582065810404, "grad_norm": 3.0934894712871963, "learning_rate": 6.237068600795593e-07, "loss": 0.9113, "step": 7346 }, { "epoch": 0.7726872362522513, "grad_norm": 2.022489507710269, "learning_rate": 6.23155273538257e-07, "loss": 1.0347, "step": 7347 }, { "epoch": 0.7727924066940987, "grad_norm": 1.9206629909055462, "learning_rate": 6.226038962797218e-07, "loss": 0.9649, "step": 7348 }, { "epoch": 0.772897577135946, "grad_norm": 2.4285553532799167, "learning_rate": 6.220527283654368e-07, "loss": 0.9841, "step": 7349 }, { "epoch": 0.7730027475777933, "grad_norm": 2.2432647116261912, "learning_rate": 6.215017698568604e-07, "loss": 0.9608, "step": 7350 }, { "epoch": 0.7731079180196406, "grad_norm": 2.2176480767044664, "learning_rate": 6.209510208154299e-07, "loss": 0.9774, "step": 7351 }, { "epoch": 0.773213088461488, "grad_norm": 2.250001090894873, "learning_rate": 6.204004813025569e-07, "loss": 1.0104, "step": 7352 }, { "epoch": 0.7733182589033352, "grad_norm": 1.5454534677927962, "learning_rate": 6.198501513796315e-07, "loss": 0.9279, "step": 7353 }, { "epoch": 0.7734234293451825, "grad_norm": 2.839261743707263, "learning_rate": 6.193000311080203e-07, "loss": 0.967, "step": 7354 }, { "epoch": 0.7735285997870298, "grad_norm": 2.6135585390560903, "learning_rate": 6.187501205490648e-07, "loss": 0.9666, "step": 7355 }, { "epoch": 0.7736337702288771, "grad_norm": 2.441585421023636, "learning_rate": 6.18200419764085e-07, "loss": 0.9487, "step": 7356 }, { "epoch": 0.7737389406707245, "grad_norm": 2.154135654667443, "learning_rate": 6.176509288143768e-07, "loss": 0.9512, "step": 7357 }, { "epoch": 0.7738441111125718, "grad_norm": 2.0470908979748317, "learning_rate": 6.17101647761213e-07, "loss": 1.009, "step": 7358 }, { "epoch": 0.7739492815544191, "grad_norm": 2.30034891783103, "learning_rate": 6.16552576665842e-07, "loss": 0.9784, "step": 7359 }, { "epoch": 0.7740544519962664, "grad_norm": 2.796522111094815, "learning_rate": 6.160037155894902e-07, "loss": 0.9799, "step": 7360 }, { "epoch": 0.7741596224381138, "grad_norm": 2.635375796645631, "learning_rate": 6.154550645933591e-07, "loss": 0.9638, "step": 7361 }, { "epoch": 0.7742647928799611, "grad_norm": 2.5343887605480924, "learning_rate": 6.149066237386278e-07, "loss": 1.0155, "step": 7362 }, { "epoch": 0.7743699633218084, "grad_norm": 2.3060590549923874, "learning_rate": 6.143583930864527e-07, "loss": 0.9765, "step": 7363 }, { "epoch": 0.7744751337636557, "grad_norm": 2.196968553332674, "learning_rate": 6.138103726979641e-07, "loss": 0.9767, "step": 7364 }, { "epoch": 0.7745803042055031, "grad_norm": 1.8053420148777697, "learning_rate": 6.132625626342723e-07, "loss": 1.0004, "step": 7365 }, { "epoch": 0.7746854746473504, "grad_norm": 1.974966755215082, "learning_rate": 6.127149629564605e-07, "loss": 1.0041, "step": 7366 }, { "epoch": 0.7747906450891977, "grad_norm": 2.4074095725370777, "learning_rate": 6.121675737255913e-07, "loss": 0.9991, "step": 7367 }, { "epoch": 0.774895815531045, "grad_norm": 2.771485579995371, "learning_rate": 6.116203950027036e-07, "loss": 0.9738, "step": 7368 }, { "epoch": 0.7750009859728924, "grad_norm": 2.5495785766223857, "learning_rate": 6.110734268488106e-07, "loss": 1.0109, "step": 7369 }, { "epoch": 0.7751061564147397, "grad_norm": 2.665289353085339, "learning_rate": 6.105266693249043e-07, "loss": 0.9922, "step": 7370 }, { "epoch": 0.775211326856587, "grad_norm": 3.2766595461201926, "learning_rate": 6.099801224919522e-07, "loss": 1.0238, "step": 7371 }, { "epoch": 0.7753164972984343, "grad_norm": 2.810829062797779, "learning_rate": 6.094337864108993e-07, "loss": 0.9732, "step": 7372 }, { "epoch": 0.7754216677402815, "grad_norm": 2.2147571794146486, "learning_rate": 6.08887661142665e-07, "loss": 0.9908, "step": 7373 }, { "epoch": 0.7755268381821289, "grad_norm": 2.1162563329579482, "learning_rate": 6.083417467481479e-07, "loss": 0.9771, "step": 7374 }, { "epoch": 0.7756320086239762, "grad_norm": 3.0165229052813265, "learning_rate": 6.077960432882202e-07, "loss": 0.9558, "step": 7375 }, { "epoch": 0.7757371790658235, "grad_norm": 2.780788058142792, "learning_rate": 6.072505508237328e-07, "loss": 0.9393, "step": 7376 }, { "epoch": 0.7758423495076708, "grad_norm": 2.746344323741756, "learning_rate": 6.067052694155132e-07, "loss": 0.9889, "step": 7377 }, { "epoch": 0.7759475199495182, "grad_norm": 2.8304408738719853, "learning_rate": 6.061601991243629e-07, "loss": 0.976, "step": 7378 }, { "epoch": 0.7760526903913655, "grad_norm": 2.7760091448267428, "learning_rate": 6.056153400110623e-07, "loss": 0.9813, "step": 7379 }, { "epoch": 0.7761578608332128, "grad_norm": 2.5521937340727234, "learning_rate": 6.050706921363672e-07, "loss": 0.9967, "step": 7380 }, { "epoch": 0.7762630312750601, "grad_norm": 2.179859514373383, "learning_rate": 6.045262555610113e-07, "loss": 0.9829, "step": 7381 }, { "epoch": 0.7763682017169075, "grad_norm": 2.7632582697479187, "learning_rate": 6.039820303457022e-07, "loss": 1.0111, "step": 7382 }, { "epoch": 0.7764733721587548, "grad_norm": 2.822582215401159, "learning_rate": 6.034380165511247e-07, "loss": 0.996, "step": 7383 }, { "epoch": 0.7765785426006021, "grad_norm": 2.8025819156968517, "learning_rate": 6.028942142379416e-07, "loss": 0.9985, "step": 7384 }, { "epoch": 0.7766837130424494, "grad_norm": 2.311116198164196, "learning_rate": 6.023506234667908e-07, "loss": 0.9911, "step": 7385 }, { "epoch": 0.7767888834842968, "grad_norm": 2.7484844931535504, "learning_rate": 6.018072442982875e-07, "loss": 1.0146, "step": 7386 }, { "epoch": 0.7768940539261441, "grad_norm": 2.3912901717058896, "learning_rate": 6.012640767930217e-07, "loss": 0.9586, "step": 7387 }, { "epoch": 0.7769992243679914, "grad_norm": 2.7928691824138614, "learning_rate": 6.00721121011561e-07, "loss": 0.9506, "step": 7388 }, { "epoch": 0.7771043948098387, "grad_norm": 2.0128303006970305, "learning_rate": 6.001783770144504e-07, "loss": 1.0171, "step": 7389 }, { "epoch": 0.777209565251686, "grad_norm": 2.240404598690575, "learning_rate": 5.99635844862208e-07, "loss": 0.9801, "step": 7390 }, { "epoch": 0.7773147356935334, "grad_norm": 2.001832281945611, "learning_rate": 5.990935246153326e-07, "loss": 0.9841, "step": 7391 }, { "epoch": 0.7774199061353807, "grad_norm": 2.653987884349642, "learning_rate": 5.985514163342948e-07, "loss": 0.9892, "step": 7392 }, { "epoch": 0.777525076577228, "grad_norm": 2.5723150950012834, "learning_rate": 5.980095200795452e-07, "loss": 0.9935, "step": 7393 }, { "epoch": 0.7776302470190752, "grad_norm": 2.0286439737482875, "learning_rate": 5.974678359115094e-07, "loss": 0.9336, "step": 7394 }, { "epoch": 0.7777354174609226, "grad_norm": 2.672262654260771, "learning_rate": 5.9692636389059e-07, "loss": 0.9862, "step": 7395 }, { "epoch": 0.7778405879027699, "grad_norm": 2.9017249311317173, "learning_rate": 5.963851040771639e-07, "loss": 0.9836, "step": 7396 }, { "epoch": 0.7779457583446172, "grad_norm": 3.0804120297128468, "learning_rate": 5.958440565315871e-07, "loss": 1.0163, "step": 7397 }, { "epoch": 0.7780509287864645, "grad_norm": 2.6802032536538176, "learning_rate": 5.953032213141894e-07, "loss": 0.9765, "step": 7398 }, { "epoch": 0.7781560992283119, "grad_norm": 2.051038288544476, "learning_rate": 5.947625984852787e-07, "loss": 0.9461, "step": 7399 }, { "epoch": 0.7782612696701592, "grad_norm": 2.1836813883193362, "learning_rate": 5.942221881051394e-07, "loss": 0.9772, "step": 7400 }, { "epoch": 0.7783664401120065, "grad_norm": 2.6840419851820956, "learning_rate": 5.936819902340299e-07, "loss": 1.0152, "step": 7401 }, { "epoch": 0.7784716105538538, "grad_norm": 2.431122357081175, "learning_rate": 5.931420049321873e-07, "loss": 0.9785, "step": 7402 }, { "epoch": 0.7785767809957012, "grad_norm": 2.439690169026877, "learning_rate": 5.926022322598249e-07, "loss": 0.9506, "step": 7403 }, { "epoch": 0.7786819514375485, "grad_norm": 3.004232477345118, "learning_rate": 5.920626722771303e-07, "loss": 0.9238, "step": 7404 }, { "epoch": 0.7787871218793958, "grad_norm": 2.7134097989656207, "learning_rate": 5.915233250442695e-07, "loss": 0.9726, "step": 7405 }, { "epoch": 0.7788922923212431, "grad_norm": 3.4146505334812702, "learning_rate": 5.909841906213828e-07, "loss": 1.0107, "step": 7406 }, { "epoch": 0.7789974627630905, "grad_norm": 1.9095501012682825, "learning_rate": 5.904452690685888e-07, "loss": 1.0127, "step": 7407 }, { "epoch": 0.7791026332049378, "grad_norm": 2.5125920673209254, "learning_rate": 5.899065604459814e-07, "loss": 0.9826, "step": 7408 }, { "epoch": 0.7792078036467851, "grad_norm": 2.028561314466869, "learning_rate": 5.893680648136311e-07, "loss": 0.9689, "step": 7409 }, { "epoch": 0.7793129740886324, "grad_norm": 2.20112168268948, "learning_rate": 5.888297822315831e-07, "loss": 0.9506, "step": 7410 }, { "epoch": 0.7794181445304798, "grad_norm": 1.8066767860491197, "learning_rate": 5.882917127598608e-07, "loss": 0.9993, "step": 7411 }, { "epoch": 0.7795233149723271, "grad_norm": 2.975379972499003, "learning_rate": 5.877538564584642e-07, "loss": 0.9631, "step": 7412 }, { "epoch": 0.7796284854141744, "grad_norm": 2.533926267530022, "learning_rate": 5.872162133873666e-07, "loss": 0.9686, "step": 7413 }, { "epoch": 0.7797336558560216, "grad_norm": 2.5144691976081175, "learning_rate": 5.866787836065211e-07, "loss": 1.0199, "step": 7414 }, { "epoch": 0.7798388262978689, "grad_norm": 1.9435003292682747, "learning_rate": 5.861415671758536e-07, "loss": 0.977, "step": 7415 }, { "epoch": 0.7799439967397163, "grad_norm": 2.516714132211873, "learning_rate": 5.856045641552685e-07, "loss": 1.0082, "step": 7416 }, { "epoch": 0.7800491671815636, "grad_norm": 2.283896919126085, "learning_rate": 5.850677746046471e-07, "loss": 0.9556, "step": 7417 }, { "epoch": 0.7801543376234109, "grad_norm": 2.396317543406843, "learning_rate": 5.845311985838437e-07, "loss": 1.0118, "step": 7418 }, { "epoch": 0.7802595080652582, "grad_norm": 1.8411662172460828, "learning_rate": 5.83994836152692e-07, "loss": 1.0051, "step": 7419 }, { "epoch": 0.7803646785071056, "grad_norm": 2.1120279732981038, "learning_rate": 5.83458687371e-07, "loss": 0.9711, "step": 7420 }, { "epoch": 0.7804698489489529, "grad_norm": 2.5159122284997815, "learning_rate": 5.82922752298552e-07, "loss": 1.0153, "step": 7421 }, { "epoch": 0.7805750193908002, "grad_norm": 3.0017293005335217, "learning_rate": 5.823870309951096e-07, "loss": 0.9498, "step": 7422 }, { "epoch": 0.7806801898326475, "grad_norm": 2.7311817018256264, "learning_rate": 5.818515235204109e-07, "loss": 1.0429, "step": 7423 }, { "epoch": 0.7807853602744949, "grad_norm": 2.1998485383151873, "learning_rate": 5.813162299341669e-07, "loss": 0.971, "step": 7424 }, { "epoch": 0.7808905307163422, "grad_norm": 2.364551392600312, "learning_rate": 5.807811502960683e-07, "loss": 0.9979, "step": 7425 }, { "epoch": 0.7809957011581895, "grad_norm": 2.574638685273489, "learning_rate": 5.802462846657811e-07, "loss": 1.0097, "step": 7426 }, { "epoch": 0.7811008716000368, "grad_norm": 2.1856284743115335, "learning_rate": 5.797116331029456e-07, "loss": 0.9878, "step": 7427 }, { "epoch": 0.7812060420418842, "grad_norm": 2.1441801091958776, "learning_rate": 5.79177195667181e-07, "loss": 1.005, "step": 7428 }, { "epoch": 0.7813112124837315, "grad_norm": 2.3913829746681805, "learning_rate": 5.786429724180801e-07, "loss": 0.9599, "step": 7429 }, { "epoch": 0.7814163829255788, "grad_norm": 2.4163311751474006, "learning_rate": 5.781089634152131e-07, "loss": 0.9963, "step": 7430 }, { "epoch": 0.7815215533674261, "grad_norm": 3.0068250817206343, "learning_rate": 5.775751687181266e-07, "loss": 0.9787, "step": 7431 }, { "epoch": 0.7816267238092734, "grad_norm": 2.896952363328142, "learning_rate": 5.770415883863434e-07, "loss": 1.0102, "step": 7432 }, { "epoch": 0.7817318942511208, "grad_norm": 2.342426916784213, "learning_rate": 5.765082224793611e-07, "loss": 0.9588, "step": 7433 }, { "epoch": 0.781837064692968, "grad_norm": 2.322954163018337, "learning_rate": 5.75975071056653e-07, "loss": 0.9995, "step": 7434 }, { "epoch": 0.7819422351348153, "grad_norm": 2.7124852787883476, "learning_rate": 5.75442134177672e-07, "loss": 0.9848, "step": 7435 }, { "epoch": 0.7820474055766626, "grad_norm": 2.262898311145278, "learning_rate": 5.749094119018431e-07, "loss": 1.0258, "step": 7436 }, { "epoch": 0.78215257601851, "grad_norm": 2.4401944041421793, "learning_rate": 5.743769042885697e-07, "loss": 0.9959, "step": 7437 }, { "epoch": 0.7822577464603573, "grad_norm": 3.501822098234867, "learning_rate": 5.738446113972298e-07, "loss": 1.0054, "step": 7438 }, { "epoch": 0.7823629169022046, "grad_norm": 2.4007429912831073, "learning_rate": 5.733125332871786e-07, "loss": 0.9699, "step": 7439 }, { "epoch": 0.7824680873440519, "grad_norm": 1.5286404234678088, "learning_rate": 5.727806700177477e-07, "loss": 0.9517, "step": 7440 }, { "epoch": 0.7825732577858993, "grad_norm": 1.9106204956021087, "learning_rate": 5.722490216482424e-07, "loss": 0.9594, "step": 7441 }, { "epoch": 0.7826784282277466, "grad_norm": 2.392573929678364, "learning_rate": 5.717175882379473e-07, "loss": 0.9796, "step": 7442 }, { "epoch": 0.7827835986695939, "grad_norm": 2.4076467512658484, "learning_rate": 5.711863698461198e-07, "loss": 0.9668, "step": 7443 }, { "epoch": 0.7828887691114412, "grad_norm": 2.9227877491691956, "learning_rate": 5.706553665319955e-07, "loss": 1.0166, "step": 7444 }, { "epoch": 0.7829939395532886, "grad_norm": 1.7837281364826976, "learning_rate": 5.701245783547856e-07, "loss": 0.9527, "step": 7445 }, { "epoch": 0.7830991099951359, "grad_norm": 3.8675285298144995, "learning_rate": 5.695940053736779e-07, "loss": 0.9783, "step": 7446 }, { "epoch": 0.7832042804369832, "grad_norm": 2.6749508025236657, "learning_rate": 5.690636476478337e-07, "loss": 0.9893, "step": 7447 }, { "epoch": 0.7833094508788305, "grad_norm": 2.344089841049213, "learning_rate": 5.685335052363927e-07, "loss": 0.9719, "step": 7448 }, { "epoch": 0.7834146213206779, "grad_norm": 2.6937529790986607, "learning_rate": 5.680035781984708e-07, "loss": 0.9855, "step": 7449 }, { "epoch": 0.7835197917625252, "grad_norm": 2.309732894716535, "learning_rate": 5.674738665931575e-07, "loss": 0.9556, "step": 7450 }, { "epoch": 0.7836249622043725, "grad_norm": 2.380015885121929, "learning_rate": 5.669443704795214e-07, "loss": 1.007, "step": 7451 }, { "epoch": 0.7837301326462198, "grad_norm": 2.175460160357494, "learning_rate": 5.664150899166035e-07, "loss": 0.9653, "step": 7452 }, { "epoch": 0.7838353030880671, "grad_norm": 2.493955497046477, "learning_rate": 5.658860249634237e-07, "loss": 0.9901, "step": 7453 }, { "epoch": 0.7839404735299145, "grad_norm": 3.0014222312856185, "learning_rate": 5.653571756789777e-07, "loss": 0.9816, "step": 7454 }, { "epoch": 0.7840456439717617, "grad_norm": 1.879178780187409, "learning_rate": 5.648285421222347e-07, "loss": 0.9732, "step": 7455 }, { "epoch": 0.784150814413609, "grad_norm": 2.4428944524881455, "learning_rate": 5.643001243521429e-07, "loss": 0.9955, "step": 7456 }, { "epoch": 0.7842559848554563, "grad_norm": 2.4832015579932363, "learning_rate": 5.637719224276228e-07, "loss": 0.982, "step": 7457 }, { "epoch": 0.7843611552973037, "grad_norm": 2.7676659547524247, "learning_rate": 5.632439364075759e-07, "loss": 1.0221, "step": 7458 }, { "epoch": 0.784466325739151, "grad_norm": 2.900996870688416, "learning_rate": 5.627161663508745e-07, "loss": 0.9675, "step": 7459 }, { "epoch": 0.7845714961809983, "grad_norm": 2.6087512206617434, "learning_rate": 5.621886123163708e-07, "loss": 1.0026, "step": 7460 }, { "epoch": 0.7846766666228456, "grad_norm": 2.075036647944145, "learning_rate": 5.616612743628896e-07, "loss": 0.9838, "step": 7461 }, { "epoch": 0.784781837064693, "grad_norm": 2.300780190568228, "learning_rate": 5.611341525492337e-07, "loss": 0.9434, "step": 7462 }, { "epoch": 0.7848870075065403, "grad_norm": 1.8042294075183316, "learning_rate": 5.606072469341823e-07, "loss": 0.9684, "step": 7463 }, { "epoch": 0.7849921779483876, "grad_norm": 2.5965873366492382, "learning_rate": 5.600805575764878e-07, "loss": 1.0404, "step": 7464 }, { "epoch": 0.7850973483902349, "grad_norm": 2.5263383523718552, "learning_rate": 5.595540845348815e-07, "loss": 0.96, "step": 7465 }, { "epoch": 0.7852025188320823, "grad_norm": 2.5277784570322916, "learning_rate": 5.590278278680683e-07, "loss": 1.0546, "step": 7466 }, { "epoch": 0.7853076892739296, "grad_norm": 2.7356237220044166, "learning_rate": 5.585017876347301e-07, "loss": 0.9886, "step": 7467 }, { "epoch": 0.7854128597157769, "grad_norm": 2.8495412546660472, "learning_rate": 5.579759638935254e-07, "loss": 1.0331, "step": 7468 }, { "epoch": 0.7855180301576242, "grad_norm": 2.053798417338012, "learning_rate": 5.574503567030861e-07, "loss": 0.9651, "step": 7469 }, { "epoch": 0.7856232005994715, "grad_norm": 3.106394812966373, "learning_rate": 5.569249661220233e-07, "loss": 0.9802, "step": 7470 }, { "epoch": 0.7857283710413189, "grad_norm": 2.8187679442298954, "learning_rate": 5.563997922089193e-07, "loss": 1.0118, "step": 7471 }, { "epoch": 0.7858335414831662, "grad_norm": 2.5263959039316113, "learning_rate": 5.558748350223386e-07, "loss": 0.9958, "step": 7472 }, { "epoch": 0.7859387119250135, "grad_norm": 2.6283622778437747, "learning_rate": 5.553500946208154e-07, "loss": 0.9912, "step": 7473 }, { "epoch": 0.7860438823668608, "grad_norm": 1.9388197155123486, "learning_rate": 5.548255710628639e-07, "loss": 1.0043, "step": 7474 }, { "epoch": 0.7861490528087081, "grad_norm": 3.725787391804805, "learning_rate": 5.543012644069714e-07, "loss": 0.9957, "step": 7475 }, { "epoch": 0.7862542232505554, "grad_norm": 2.8279644242846778, "learning_rate": 5.537771747116024e-07, "loss": 1.0032, "step": 7476 }, { "epoch": 0.7863593936924027, "grad_norm": 2.05494665096958, "learning_rate": 5.53253302035198e-07, "loss": 0.9707, "step": 7477 }, { "epoch": 0.78646456413425, "grad_norm": 2.2246914275505922, "learning_rate": 5.527296464361725e-07, "loss": 0.9912, "step": 7478 }, { "epoch": 0.7865697345760974, "grad_norm": 2.390526330629458, "learning_rate": 5.522062079729192e-07, "loss": 1.0125, "step": 7479 }, { "epoch": 0.7866749050179447, "grad_norm": 2.3621903521012877, "learning_rate": 5.516829867038034e-07, "loss": 0.9723, "step": 7480 }, { "epoch": 0.786780075459792, "grad_norm": 2.432774028158043, "learning_rate": 5.511599826871708e-07, "loss": 0.9861, "step": 7481 }, { "epoch": 0.7868852459016393, "grad_norm": 3.122179435897935, "learning_rate": 5.506371959813386e-07, "loss": 0.9478, "step": 7482 }, { "epoch": 0.7869904163434867, "grad_norm": 2.2616352167488705, "learning_rate": 5.501146266446031e-07, "loss": 0.9887, "step": 7483 }, { "epoch": 0.787095586785334, "grad_norm": 1.7783208579319671, "learning_rate": 5.495922747352336e-07, "loss": 0.9767, "step": 7484 }, { "epoch": 0.7872007572271813, "grad_norm": 2.7785258155561667, "learning_rate": 5.490701403114757e-07, "loss": 1.0049, "step": 7485 }, { "epoch": 0.7873059276690286, "grad_norm": 2.063969890861771, "learning_rate": 5.485482234315537e-07, "loss": 0.994, "step": 7486 }, { "epoch": 0.787411098110876, "grad_norm": 1.9172938948485974, "learning_rate": 5.480265241536636e-07, "loss": 0.981, "step": 7487 }, { "epoch": 0.7875162685527233, "grad_norm": 2.3568236845146515, "learning_rate": 5.475050425359805e-07, "loss": 0.9963, "step": 7488 }, { "epoch": 0.7876214389945706, "grad_norm": 2.365770278271118, "learning_rate": 5.469837786366514e-07, "loss": 0.9908, "step": 7489 }, { "epoch": 0.7877266094364179, "grad_norm": 2.0233878116931843, "learning_rate": 5.464627325138031e-07, "loss": 0.955, "step": 7490 }, { "epoch": 0.7878317798782652, "grad_norm": 2.8116469349236874, "learning_rate": 5.45941904225536e-07, "loss": 0.9791, "step": 7491 }, { "epoch": 0.7879369503201126, "grad_norm": 2.389189229680465, "learning_rate": 5.454212938299256e-07, "loss": 0.9532, "step": 7492 }, { "epoch": 0.7880421207619599, "grad_norm": 3.2244042201754755, "learning_rate": 5.449009013850253e-07, "loss": 1.0245, "step": 7493 }, { "epoch": 0.7881472912038072, "grad_norm": 2.350726006073612, "learning_rate": 5.443807269488613e-07, "loss": 0.9812, "step": 7494 }, { "epoch": 0.7882524616456544, "grad_norm": 2.33734989566254, "learning_rate": 5.438607705794391e-07, "loss": 0.977, "step": 7495 }, { "epoch": 0.7883576320875018, "grad_norm": 3.1617106808359474, "learning_rate": 5.43341032334736e-07, "loss": 1.0034, "step": 7496 }, { "epoch": 0.7884628025293491, "grad_norm": 2.062664190865852, "learning_rate": 5.428215122727084e-07, "loss": 0.9338, "step": 7497 }, { "epoch": 0.7885679729711964, "grad_norm": 2.541915976209122, "learning_rate": 5.423022104512854e-07, "loss": 1.01, "step": 7498 }, { "epoch": 0.7886731434130437, "grad_norm": 2.6573006964220665, "learning_rate": 5.417831269283741e-07, "loss": 0.9934, "step": 7499 }, { "epoch": 0.7887783138548911, "grad_norm": 3.667198982465556, "learning_rate": 5.412642617618565e-07, "loss": 1.0216, "step": 7500 }, { "epoch": 0.7888834842967384, "grad_norm": 2.450330102335447, "learning_rate": 5.407456150095891e-07, "loss": 0.9705, "step": 7501 }, { "epoch": 0.7889886547385857, "grad_norm": 2.6639634437563466, "learning_rate": 5.402271867294062e-07, "loss": 1.0375, "step": 7502 }, { "epoch": 0.789093825180433, "grad_norm": 2.215568756488248, "learning_rate": 5.39708976979115e-07, "loss": 0.9998, "step": 7503 }, { "epoch": 0.7891989956222804, "grad_norm": 2.7328182673914556, "learning_rate": 5.39190985816502e-07, "loss": 1.0026, "step": 7504 }, { "epoch": 0.7893041660641277, "grad_norm": 2.3163502581110684, "learning_rate": 5.386732132993264e-07, "loss": 0.9868, "step": 7505 }, { "epoch": 0.789409336505975, "grad_norm": 2.0522924122717083, "learning_rate": 5.381556594853226e-07, "loss": 0.9908, "step": 7506 }, { "epoch": 0.7895145069478223, "grad_norm": 2.6138338131436214, "learning_rate": 5.376383244322039e-07, "loss": 0.9801, "step": 7507 }, { "epoch": 0.7896196773896696, "grad_norm": 1.6691242712576417, "learning_rate": 5.371212081976548e-07, "loss": 0.9766, "step": 7508 }, { "epoch": 0.789724847831517, "grad_norm": 2.8741546215777496, "learning_rate": 5.366043108393407e-07, "loss": 0.9829, "step": 7509 }, { "epoch": 0.7898300182733643, "grad_norm": 2.2165275941322284, "learning_rate": 5.360876324148972e-07, "loss": 0.9629, "step": 7510 }, { "epoch": 0.7899351887152116, "grad_norm": 1.605507267413963, "learning_rate": 5.355711729819396e-07, "loss": 0.9566, "step": 7511 }, { "epoch": 0.790040359157059, "grad_norm": 2.2184467134191035, "learning_rate": 5.350549325980558e-07, "loss": 0.9624, "step": 7512 }, { "epoch": 0.7901455295989063, "grad_norm": 2.840349144152613, "learning_rate": 5.34538911320811e-07, "loss": 0.9807, "step": 7513 }, { "epoch": 0.7902507000407536, "grad_norm": 2.6440663804972333, "learning_rate": 5.340231092077469e-07, "loss": 0.998, "step": 7514 }, { "epoch": 0.7903558704826009, "grad_norm": 2.9746470518481423, "learning_rate": 5.335075263163774e-07, "loss": 0.9896, "step": 7515 }, { "epoch": 0.7904610409244481, "grad_norm": 2.217261654951149, "learning_rate": 5.329921627041959e-07, "loss": 0.9776, "step": 7516 }, { "epoch": 0.7905662113662955, "grad_norm": 2.275129661652413, "learning_rate": 5.324770184286668e-07, "loss": 0.9754, "step": 7517 }, { "epoch": 0.7906713818081428, "grad_norm": 2.125756351189819, "learning_rate": 5.319620935472361e-07, "loss": 0.9576, "step": 7518 }, { "epoch": 0.7907765522499901, "grad_norm": 1.8544116319858905, "learning_rate": 5.314473881173193e-07, "loss": 1.0268, "step": 7519 }, { "epoch": 0.7908817226918374, "grad_norm": 2.7586623648851947, "learning_rate": 5.309329021963116e-07, "loss": 0.9541, "step": 7520 }, { "epoch": 0.7909868931336848, "grad_norm": 2.0077432117735503, "learning_rate": 5.30418635841582e-07, "loss": 1.0106, "step": 7521 }, { "epoch": 0.7910920635755321, "grad_norm": 2.211322527248184, "learning_rate": 5.299045891104729e-07, "loss": 0.9729, "step": 7522 }, { "epoch": 0.7911972340173794, "grad_norm": 3.561907418437129, "learning_rate": 5.293907620603081e-07, "loss": 1.0103, "step": 7523 }, { "epoch": 0.7913024044592267, "grad_norm": 2.919228767233691, "learning_rate": 5.288771547483807e-07, "loss": 1.0016, "step": 7524 }, { "epoch": 0.791407574901074, "grad_norm": 2.4514739629485764, "learning_rate": 5.283637672319633e-07, "loss": 1.0114, "step": 7525 }, { "epoch": 0.7915127453429214, "grad_norm": 2.248230346805148, "learning_rate": 5.278505995683014e-07, "loss": 0.9975, "step": 7526 }, { "epoch": 0.7916179157847687, "grad_norm": 2.299965654128683, "learning_rate": 5.273376518146181e-07, "loss": 0.9858, "step": 7527 }, { "epoch": 0.791723086226616, "grad_norm": 2.631945993060661, "learning_rate": 5.268249240281112e-07, "loss": 1.053, "step": 7528 }, { "epoch": 0.7918282566684633, "grad_norm": 2.965446830432594, "learning_rate": 5.263124162659531e-07, "loss": 1.0042, "step": 7529 }, { "epoch": 0.7919334271103107, "grad_norm": 2.8523687341385866, "learning_rate": 5.258001285852931e-07, "loss": 0.998, "step": 7530 }, { "epoch": 0.792038597552158, "grad_norm": 2.8763350701028076, "learning_rate": 5.252880610432537e-07, "loss": 0.9963, "step": 7531 }, { "epoch": 0.7921437679940053, "grad_norm": 2.7401215806024384, "learning_rate": 5.247762136969367e-07, "loss": 0.9984, "step": 7532 }, { "epoch": 0.7922489384358526, "grad_norm": 2.381855054501238, "learning_rate": 5.242645866034154e-07, "loss": 0.9807, "step": 7533 }, { "epoch": 0.7923541088777, "grad_norm": 2.2923915086079303, "learning_rate": 5.237531798197415e-07, "loss": 0.9697, "step": 7534 }, { "epoch": 0.7924592793195473, "grad_norm": 2.5095749130378695, "learning_rate": 5.232419934029395e-07, "loss": 0.9681, "step": 7535 }, { "epoch": 0.7925644497613945, "grad_norm": 2.3453279983832793, "learning_rate": 5.227310274100112e-07, "loss": 1.0093, "step": 7536 }, { "epoch": 0.7926696202032418, "grad_norm": 2.140545124978573, "learning_rate": 5.222202818979338e-07, "loss": 0.9813, "step": 7537 }, { "epoch": 0.7927747906450892, "grad_norm": 2.4063998911718634, "learning_rate": 5.217097569236581e-07, "loss": 0.9952, "step": 7538 }, { "epoch": 0.7928799610869365, "grad_norm": 3.1685790964036076, "learning_rate": 5.211994525441133e-07, "loss": 0.9982, "step": 7539 }, { "epoch": 0.7929851315287838, "grad_norm": 2.789219170600396, "learning_rate": 5.20689368816201e-07, "loss": 1.0258, "step": 7540 }, { "epoch": 0.7930903019706311, "grad_norm": 2.2210430744775196, "learning_rate": 5.201795057967995e-07, "loss": 1.0254, "step": 7541 }, { "epoch": 0.7931954724124785, "grad_norm": 2.7466488855873314, "learning_rate": 5.196698635427636e-07, "loss": 0.9965, "step": 7542 }, { "epoch": 0.7933006428543258, "grad_norm": 2.225180678588441, "learning_rate": 5.191604421109211e-07, "loss": 0.986, "step": 7543 }, { "epoch": 0.7934058132961731, "grad_norm": 2.553086727079915, "learning_rate": 5.186512415580778e-07, "loss": 0.971, "step": 7544 }, { "epoch": 0.7935109837380204, "grad_norm": 2.382433854163622, "learning_rate": 5.181422619410109e-07, "loss": 0.9843, "step": 7545 }, { "epoch": 0.7936161541798677, "grad_norm": 2.3343492155076127, "learning_rate": 5.176335033164793e-07, "loss": 1.0111, "step": 7546 }, { "epoch": 0.7937213246217151, "grad_norm": 2.256309064903577, "learning_rate": 5.171249657412106e-07, "loss": 0.9924, "step": 7547 }, { "epoch": 0.7938264950635624, "grad_norm": 2.8424568000686192, "learning_rate": 5.166166492719124e-07, "loss": 0.9672, "step": 7548 }, { "epoch": 0.7939316655054097, "grad_norm": 2.563574424214626, "learning_rate": 5.161085539652649e-07, "loss": 0.9811, "step": 7549 }, { "epoch": 0.794036835947257, "grad_norm": 1.8740387198796324, "learning_rate": 5.156006798779248e-07, "loss": 1.0023, "step": 7550 }, { "epoch": 0.7941420063891044, "grad_norm": 2.7294780666990612, "learning_rate": 5.15093027066525e-07, "loss": 0.9843, "step": 7551 }, { "epoch": 0.7942471768309517, "grad_norm": 2.4042826855611787, "learning_rate": 5.145855955876713e-07, "loss": 1.0067, "step": 7552 }, { "epoch": 0.794352347272799, "grad_norm": 2.6315789434241643, "learning_rate": 5.140783854979475e-07, "loss": 0.9392, "step": 7553 }, { "epoch": 0.7944575177146463, "grad_norm": 2.3247912098399364, "learning_rate": 5.135713968539096e-07, "loss": 1.0035, "step": 7554 }, { "epoch": 0.7945626881564937, "grad_norm": 2.311734008689543, "learning_rate": 5.130646297120936e-07, "loss": 1.0073, "step": 7555 }, { "epoch": 0.7946678585983409, "grad_norm": 2.771031149106594, "learning_rate": 5.125580841290063e-07, "loss": 0.9839, "step": 7556 }, { "epoch": 0.7947730290401882, "grad_norm": 2.5613460622634263, "learning_rate": 5.12051760161131e-07, "loss": 0.964, "step": 7557 }, { "epoch": 0.7948781994820355, "grad_norm": 2.9699017410914097, "learning_rate": 5.115456578649272e-07, "loss": 0.9706, "step": 7558 }, { "epoch": 0.7949833699238829, "grad_norm": 2.8383038959737035, "learning_rate": 5.110397772968295e-07, "loss": 1.0041, "step": 7559 }, { "epoch": 0.7950885403657302, "grad_norm": 2.4690199798689925, "learning_rate": 5.105341185132481e-07, "loss": 1.0097, "step": 7560 }, { "epoch": 0.7951937108075775, "grad_norm": 2.253149181816096, "learning_rate": 5.100286815705665e-07, "loss": 0.973, "step": 7561 }, { "epoch": 0.7952988812494248, "grad_norm": 2.6862303113933934, "learning_rate": 5.095234665251464e-07, "loss": 0.9876, "step": 7562 }, { "epoch": 0.7954040516912722, "grad_norm": 2.7435662802894734, "learning_rate": 5.090184734333214e-07, "loss": 0.9361, "step": 7563 }, { "epoch": 0.7955092221331195, "grad_norm": 2.149989633339049, "learning_rate": 5.085137023514034e-07, "loss": 1.0072, "step": 7564 }, { "epoch": 0.7956143925749668, "grad_norm": 2.5740579082453494, "learning_rate": 5.080091533356784e-07, "loss": 0.9908, "step": 7565 }, { "epoch": 0.7957195630168141, "grad_norm": 2.3471609710728822, "learning_rate": 5.075048264424065e-07, "loss": 1.0258, "step": 7566 }, { "epoch": 0.7958247334586614, "grad_norm": 2.406035264757779, "learning_rate": 5.070007217278253e-07, "loss": 0.9846, "step": 7567 }, { "epoch": 0.7959299039005088, "grad_norm": 2.117858329168068, "learning_rate": 5.064968392481448e-07, "loss": 0.9419, "step": 7568 }, { "epoch": 0.7960350743423561, "grad_norm": 2.193661332028775, "learning_rate": 5.059931790595538e-07, "loss": 0.9726, "step": 7569 }, { "epoch": 0.7961402447842034, "grad_norm": 3.4342436943341283, "learning_rate": 5.054897412182128e-07, "loss": 0.9513, "step": 7570 }, { "epoch": 0.7962454152260507, "grad_norm": 2.6107851702888367, "learning_rate": 5.049865257802599e-07, "loss": 1.0154, "step": 7571 }, { "epoch": 0.7963505856678981, "grad_norm": 2.593692528488154, "learning_rate": 5.044835328018069e-07, "loss": 0.9833, "step": 7572 }, { "epoch": 0.7964557561097454, "grad_norm": 2.374728480707458, "learning_rate": 5.039807623389418e-07, "loss": 0.9846, "step": 7573 }, { "epoch": 0.7965609265515927, "grad_norm": 3.1091749055796667, "learning_rate": 5.034782144477276e-07, "loss": 1.0286, "step": 7574 }, { "epoch": 0.79666609699344, "grad_norm": 2.3409299896756965, "learning_rate": 5.029758891842015e-07, "loss": 1.0111, "step": 7575 }, { "epoch": 0.7967712674352874, "grad_norm": 2.659765654383437, "learning_rate": 5.02473786604378e-07, "loss": 0.9769, "step": 7576 }, { "epoch": 0.7968764378771346, "grad_norm": 2.6745888226219923, "learning_rate": 5.019719067642437e-07, "loss": 0.9902, "step": 7577 }, { "epoch": 0.7969816083189819, "grad_norm": 2.987835654743405, "learning_rate": 5.014702497197632e-07, "loss": 1.028, "step": 7578 }, { "epoch": 0.7970867787608292, "grad_norm": 2.401381290760075, "learning_rate": 5.009688155268755e-07, "loss": 0.9726, "step": 7579 }, { "epoch": 0.7971919492026766, "grad_norm": 2.120903536373421, "learning_rate": 5.004676042414936e-07, "loss": 0.9738, "step": 7580 }, { "epoch": 0.7972971196445239, "grad_norm": 2.2322743664947065, "learning_rate": 4.999666159195063e-07, "loss": 0.9493, "step": 7581 }, { "epoch": 0.7974022900863712, "grad_norm": 3.0099693723923004, "learning_rate": 4.994658506167782e-07, "loss": 1.0182, "step": 7582 }, { "epoch": 0.7975074605282185, "grad_norm": 1.656585929583154, "learning_rate": 4.989653083891493e-07, "loss": 0.9718, "step": 7583 }, { "epoch": 0.7976126309700659, "grad_norm": 2.5313550286466056, "learning_rate": 4.984649892924323e-07, "loss": 0.9698, "step": 7584 }, { "epoch": 0.7977178014119132, "grad_norm": 3.027950770126478, "learning_rate": 4.979648933824183e-07, "loss": 1.0244, "step": 7585 }, { "epoch": 0.7978229718537605, "grad_norm": 3.251073308685086, "learning_rate": 4.974650207148701e-07, "loss": 0.9823, "step": 7586 }, { "epoch": 0.7979281422956078, "grad_norm": 2.2190156660054363, "learning_rate": 4.969653713455286e-07, "loss": 0.979, "step": 7587 }, { "epoch": 0.7980333127374551, "grad_norm": 2.1132623922709732, "learning_rate": 4.964659453301088e-07, "loss": 0.9428, "step": 7588 }, { "epoch": 0.7981384831793025, "grad_norm": 2.2951743727023306, "learning_rate": 4.959667427242995e-07, "loss": 0.9855, "step": 7589 }, { "epoch": 0.7982436536211498, "grad_norm": 2.2665071186589705, "learning_rate": 4.954677635837668e-07, "loss": 0.9737, "step": 7590 }, { "epoch": 0.7983488240629971, "grad_norm": 2.327325568834645, "learning_rate": 4.949690079641498e-07, "loss": 0.9589, "step": 7591 }, { "epoch": 0.7984539945048444, "grad_norm": 2.8604374797701353, "learning_rate": 4.944704759210637e-07, "loss": 0.9786, "step": 7592 }, { "epoch": 0.7985591649466918, "grad_norm": 2.2699713114872684, "learning_rate": 4.939721675101001e-07, "loss": 0.9744, "step": 7593 }, { "epoch": 0.7986643353885391, "grad_norm": 2.1867243451965, "learning_rate": 4.934740827868221e-07, "loss": 0.9416, "step": 7594 }, { "epoch": 0.7987695058303864, "grad_norm": 2.032577080022669, "learning_rate": 4.929762218067713e-07, "loss": 0.9277, "step": 7595 }, { "epoch": 0.7988746762722337, "grad_norm": 2.469836920824304, "learning_rate": 4.924785846254629e-07, "loss": 1.0052, "step": 7596 }, { "epoch": 0.798979846714081, "grad_norm": 2.2285999066069544, "learning_rate": 4.91981171298388e-07, "loss": 1.0035, "step": 7597 }, { "epoch": 0.7990850171559283, "grad_norm": 2.7998486974210626, "learning_rate": 4.914839818810105e-07, "loss": 0.9873, "step": 7598 }, { "epoch": 0.7991901875977756, "grad_norm": 2.2023794298511308, "learning_rate": 4.909870164287725e-07, "loss": 0.9966, "step": 7599 }, { "epoch": 0.7992953580396229, "grad_norm": 2.5707030773678423, "learning_rate": 4.904902749970883e-07, "loss": 0.9916, "step": 7600 }, { "epoch": 0.7994005284814703, "grad_norm": 2.3180485019623442, "learning_rate": 4.899937576413486e-07, "loss": 0.961, "step": 7601 }, { "epoch": 0.7995056989233176, "grad_norm": 2.3791731807882335, "learning_rate": 4.894974644169198e-07, "loss": 0.9891, "step": 7602 }, { "epoch": 0.7996108693651649, "grad_norm": 2.479486951922807, "learning_rate": 4.890013953791411e-07, "loss": 0.9645, "step": 7603 }, { "epoch": 0.7997160398070122, "grad_norm": 2.633404751177737, "learning_rate": 4.885055505833291e-07, "loss": 0.9565, "step": 7604 }, { "epoch": 0.7998212102488595, "grad_norm": 2.684117417959632, "learning_rate": 4.880099300847741e-07, "loss": 0.9838, "step": 7605 }, { "epoch": 0.7999263806907069, "grad_norm": 2.8746409642803243, "learning_rate": 4.875145339387418e-07, "loss": 1.036, "step": 7606 }, { "epoch": 0.8000315511325542, "grad_norm": 2.0807055982824276, "learning_rate": 4.870193622004729e-07, "loss": 0.9486, "step": 7607 }, { "epoch": 0.8001367215744015, "grad_norm": 2.471988925752685, "learning_rate": 4.865244149251815e-07, "loss": 0.9727, "step": 7608 }, { "epoch": 0.8002418920162488, "grad_norm": 1.9412511005330855, "learning_rate": 4.860296921680593e-07, "loss": 0.9654, "step": 7609 }, { "epoch": 0.8003470624580962, "grad_norm": 1.9790419108013764, "learning_rate": 4.855351939842717e-07, "loss": 0.9808, "step": 7610 }, { "epoch": 0.8004522328999435, "grad_norm": 2.4043654013707254, "learning_rate": 4.850409204289591e-07, "loss": 0.9727, "step": 7611 }, { "epoch": 0.8005574033417908, "grad_norm": 2.7262779181762666, "learning_rate": 4.845468715572363e-07, "loss": 0.9834, "step": 7612 }, { "epoch": 0.8006625737836381, "grad_norm": 2.2391899912644715, "learning_rate": 4.840530474241944e-07, "loss": 0.9767, "step": 7613 }, { "epoch": 0.8007677442254855, "grad_norm": 2.4174326455832977, "learning_rate": 4.835594480848974e-07, "loss": 0.9769, "step": 7614 }, { "epoch": 0.8008729146673328, "grad_norm": 2.6250830234736307, "learning_rate": 4.830660735943865e-07, "loss": 0.9464, "step": 7615 }, { "epoch": 0.8009780851091801, "grad_norm": 2.34149423966112, "learning_rate": 4.825729240076768e-07, "loss": 1.0037, "step": 7616 }, { "epoch": 0.8010832555510273, "grad_norm": 1.8300856800483398, "learning_rate": 4.820799993797576e-07, "loss": 0.9735, "step": 7617 }, { "epoch": 0.8011884259928747, "grad_norm": 1.764340560176855, "learning_rate": 4.815872997655941e-07, "loss": 0.9249, "step": 7618 }, { "epoch": 0.801293596434722, "grad_norm": 2.320272163563018, "learning_rate": 4.810948252201261e-07, "loss": 0.9664, "step": 7619 }, { "epoch": 0.8013987668765693, "grad_norm": 3.0912542656654733, "learning_rate": 4.806025757982694e-07, "loss": 0.9396, "step": 7620 }, { "epoch": 0.8015039373184166, "grad_norm": 2.2930284148085316, "learning_rate": 4.80110551554912e-07, "loss": 0.9745, "step": 7621 }, { "epoch": 0.801609107760264, "grad_norm": 2.3498868797024106, "learning_rate": 4.796187525449198e-07, "loss": 1.0354, "step": 7622 }, { "epoch": 0.8017142782021113, "grad_norm": 2.2536909100182534, "learning_rate": 4.791271788231309e-07, "loss": 1.011, "step": 7623 }, { "epoch": 0.8018194486439586, "grad_norm": 2.5584982606240603, "learning_rate": 4.786358304443603e-07, "loss": 1.0019, "step": 7624 }, { "epoch": 0.8019246190858059, "grad_norm": 2.356806815511279, "learning_rate": 4.781447074633977e-07, "loss": 0.9796, "step": 7625 }, { "epoch": 0.8020297895276532, "grad_norm": 2.654243031206716, "learning_rate": 4.776538099350058e-07, "loss": 0.966, "step": 7626 }, { "epoch": 0.8021349599695006, "grad_norm": 2.7182415623648843, "learning_rate": 4.771631379139244e-07, "loss": 0.9779, "step": 7627 }, { "epoch": 0.8022401304113479, "grad_norm": 2.5630762496199044, "learning_rate": 4.7667269145486777e-07, "loss": 0.9753, "step": 7628 }, { "epoch": 0.8023453008531952, "grad_norm": 2.485238287825221, "learning_rate": 4.7618247061252286e-07, "loss": 0.9542, "step": 7629 }, { "epoch": 0.8024504712950425, "grad_norm": 3.0101852738819375, "learning_rate": 4.7569247544155486e-07, "loss": 1.0098, "step": 7630 }, { "epoch": 0.8025556417368899, "grad_norm": 2.4862972563714694, "learning_rate": 4.7520270599660053e-07, "loss": 0.907, "step": 7631 }, { "epoch": 0.8026608121787372, "grad_norm": 3.043564384568565, "learning_rate": 4.747131623322737e-07, "loss": 1.0138, "step": 7632 }, { "epoch": 0.8027659826205845, "grad_norm": 2.1240214301887583, "learning_rate": 4.7422384450316244e-07, "loss": 1.0008, "step": 7633 }, { "epoch": 0.8028711530624318, "grad_norm": 2.6774258034215537, "learning_rate": 4.7373475256382973e-07, "loss": 0.9516, "step": 7634 }, { "epoch": 0.8029763235042792, "grad_norm": 2.5417333491458285, "learning_rate": 4.7324588656881194e-07, "loss": 1.0021, "step": 7635 }, { "epoch": 0.8030814939461265, "grad_norm": 2.8488514697489786, "learning_rate": 4.7275724657262293e-07, "loss": 0.9775, "step": 7636 }, { "epoch": 0.8031866643879738, "grad_norm": 2.3577591104181437, "learning_rate": 4.7226883262974865e-07, "loss": 1.0096, "step": 7637 }, { "epoch": 0.803291834829821, "grad_norm": 2.6074240401396542, "learning_rate": 4.717806447946513e-07, "loss": 0.9777, "step": 7638 }, { "epoch": 0.8033970052716684, "grad_norm": 3.3809444206445995, "learning_rate": 4.712926831217685e-07, "loss": 0.9335, "step": 7639 }, { "epoch": 0.8035021757135157, "grad_norm": 2.0108425607516263, "learning_rate": 4.708049476655105e-07, "loss": 0.9809, "step": 7640 }, { "epoch": 0.803607346155363, "grad_norm": 2.6521203726038514, "learning_rate": 4.703174384802639e-07, "loss": 1.0248, "step": 7641 }, { "epoch": 0.8037125165972103, "grad_norm": 2.848291027442207, "learning_rate": 4.69830155620391e-07, "loss": 0.9835, "step": 7642 }, { "epoch": 0.8038176870390576, "grad_norm": 2.475573839743065, "learning_rate": 4.693430991402259e-07, "loss": 0.9945, "step": 7643 }, { "epoch": 0.803922857480905, "grad_norm": 2.345453581854106, "learning_rate": 4.6885626909408037e-07, "loss": 0.9622, "step": 7644 }, { "epoch": 0.8040280279227523, "grad_norm": 2.7478312834618808, "learning_rate": 4.683696655362391e-07, "loss": 0.9866, "step": 7645 }, { "epoch": 0.8041331983645996, "grad_norm": 1.9800930016394065, "learning_rate": 4.678832885209622e-07, "loss": 1.0, "step": 7646 }, { "epoch": 0.804238368806447, "grad_norm": 1.6413329542669401, "learning_rate": 4.6739713810248443e-07, "loss": 0.9856, "step": 7647 }, { "epoch": 0.8043435392482943, "grad_norm": 2.414821706263618, "learning_rate": 4.6691121433501657e-07, "loss": 0.9904, "step": 7648 }, { "epoch": 0.8044487096901416, "grad_norm": 2.4848547284531506, "learning_rate": 4.664255172727411e-07, "loss": 0.9819, "step": 7649 }, { "epoch": 0.8045538801319889, "grad_norm": 1.9804328938918532, "learning_rate": 4.6594004696981774e-07, "loss": 0.9987, "step": 7650 }, { "epoch": 0.8046590505738362, "grad_norm": 2.5014058610541983, "learning_rate": 4.65454803480381e-07, "loss": 0.9909, "step": 7651 }, { "epoch": 0.8047642210156836, "grad_norm": 2.202903476738561, "learning_rate": 4.649697868585379e-07, "loss": 0.9658, "step": 7652 }, { "epoch": 0.8048693914575309, "grad_norm": 2.380362604444181, "learning_rate": 4.644849971583729e-07, "loss": 1.0007, "step": 7653 }, { "epoch": 0.8049745618993782, "grad_norm": 6.351090296949297, "learning_rate": 4.6400043443394246e-07, "loss": 1.0105, "step": 7654 }, { "epoch": 0.8050797323412255, "grad_norm": 2.8797515711713615, "learning_rate": 4.6351609873927976e-07, "loss": 1.03, "step": 7655 }, { "epoch": 0.8051849027830729, "grad_norm": 3.2146357197391846, "learning_rate": 4.6303199012839225e-07, "loss": 1.0333, "step": 7656 }, { "epoch": 0.8052900732249202, "grad_norm": 2.079836576250059, "learning_rate": 4.6254810865526187e-07, "loss": 0.9962, "step": 7657 }, { "epoch": 0.8053952436667674, "grad_norm": 1.649972670577275, "learning_rate": 4.6206445437384504e-07, "loss": 0.9932, "step": 7658 }, { "epoch": 0.8055004141086147, "grad_norm": 1.9272657097173842, "learning_rate": 4.615810273380722e-07, "loss": 0.9981, "step": 7659 }, { "epoch": 0.805605584550462, "grad_norm": 2.8001761793669577, "learning_rate": 4.610978276018496e-07, "loss": 0.9994, "step": 7660 }, { "epoch": 0.8057107549923094, "grad_norm": 2.0720005349560995, "learning_rate": 4.6061485521905815e-07, "loss": 0.9708, "step": 7661 }, { "epoch": 0.8058159254341567, "grad_norm": 1.9105799009272426, "learning_rate": 4.6013211024355353e-07, "loss": 1.0271, "step": 7662 }, { "epoch": 0.805921095876004, "grad_norm": 1.9282586380528615, "learning_rate": 4.5964959272916415e-07, "loss": 1.0037, "step": 7663 }, { "epoch": 0.8060262663178513, "grad_norm": 1.9696455842256768, "learning_rate": 4.59167302729695e-07, "loss": 1.0094, "step": 7664 }, { "epoch": 0.8061314367596987, "grad_norm": 1.9870486254495214, "learning_rate": 4.5868524029892616e-07, "loss": 0.9653, "step": 7665 }, { "epoch": 0.806236607201546, "grad_norm": 2.631029480979719, "learning_rate": 4.582034054906098e-07, "loss": 1.0113, "step": 7666 }, { "epoch": 0.8063417776433933, "grad_norm": 1.9761375577657758, "learning_rate": 4.5772179835847584e-07, "loss": 0.9523, "step": 7667 }, { "epoch": 0.8064469480852406, "grad_norm": 2.7997469746623267, "learning_rate": 4.5724041895622546e-07, "loss": 0.9985, "step": 7668 }, { "epoch": 0.806552118527088, "grad_norm": 2.581890446856423, "learning_rate": 4.567592673375371e-07, "loss": 0.9612, "step": 7669 }, { "epoch": 0.8066572889689353, "grad_norm": 2.1149257021470946, "learning_rate": 4.56278343556063e-07, "loss": 0.9749, "step": 7670 }, { "epoch": 0.8067624594107826, "grad_norm": 2.547543034627653, "learning_rate": 4.5579764766543023e-07, "loss": 1.0053, "step": 7671 }, { "epoch": 0.8068676298526299, "grad_norm": 2.369288975718212, "learning_rate": 4.5531717971923904e-07, "loss": 0.9468, "step": 7672 }, { "epoch": 0.8069728002944773, "grad_norm": 1.9398001873861406, "learning_rate": 4.5483693977106614e-07, "loss": 0.9719, "step": 7673 }, { "epoch": 0.8070779707363246, "grad_norm": 1.7047845543366074, "learning_rate": 4.543569278744625e-07, "loss": 0.9695, "step": 7674 }, { "epoch": 0.8071831411781719, "grad_norm": 2.311255391335769, "learning_rate": 4.5387714408295174e-07, "loss": 1.021, "step": 7675 }, { "epoch": 0.8072883116200192, "grad_norm": 2.5665068402737297, "learning_rate": 4.5339758845003497e-07, "loss": 0.9434, "step": 7676 }, { "epoch": 0.8073934820618666, "grad_norm": 1.987419007348485, "learning_rate": 4.529182610291849e-07, "loss": 1.0189, "step": 7677 }, { "epoch": 0.8074986525037138, "grad_norm": 2.7148221725671258, "learning_rate": 4.5243916187385104e-07, "loss": 1.0317, "step": 7678 }, { "epoch": 0.8076038229455611, "grad_norm": 1.9771244563101253, "learning_rate": 4.5196029103745736e-07, "loss": 0.9574, "step": 7679 }, { "epoch": 0.8077089933874084, "grad_norm": 2.5643391230827577, "learning_rate": 4.514816485734003e-07, "loss": 0.9949, "step": 7680 }, { "epoch": 0.8078141638292557, "grad_norm": 2.6365658218139947, "learning_rate": 4.5100323453505344e-07, "loss": 0.991, "step": 7681 }, { "epoch": 0.8079193342711031, "grad_norm": 2.0213099184862746, "learning_rate": 4.5052504897576177e-07, "loss": 1.0065, "step": 7682 }, { "epoch": 0.8080245047129504, "grad_norm": 2.430948178647297, "learning_rate": 4.500470919488495e-07, "loss": 0.9754, "step": 7683 }, { "epoch": 0.8081296751547977, "grad_norm": 2.432965011287217, "learning_rate": 4.495693635076101e-07, "loss": 0.9688, "step": 7684 }, { "epoch": 0.808234845596645, "grad_norm": 1.9832928166525534, "learning_rate": 4.49091863705316e-07, "loss": 0.967, "step": 7685 }, { "epoch": 0.8083400160384924, "grad_norm": 2.718332781399688, "learning_rate": 4.4861459259521013e-07, "loss": 0.9604, "step": 7686 }, { "epoch": 0.8084451864803397, "grad_norm": 2.5869970801279707, "learning_rate": 4.4813755023051323e-07, "loss": 1.0265, "step": 7687 }, { "epoch": 0.808550356922187, "grad_norm": 2.249567127749935, "learning_rate": 4.476607366644192e-07, "loss": 1.0034, "step": 7688 }, { "epoch": 0.8086555273640343, "grad_norm": 1.8946857360784728, "learning_rate": 4.4718415195009577e-07, "loss": 1.009, "step": 7689 }, { "epoch": 0.8087606978058817, "grad_norm": 2.775009327722139, "learning_rate": 4.4670779614068683e-07, "loss": 0.9728, "step": 7690 }, { "epoch": 0.808865868247729, "grad_norm": 2.3078323532076626, "learning_rate": 4.4623166928930846e-07, "loss": 0.9838, "step": 7691 }, { "epoch": 0.8089710386895763, "grad_norm": 2.454669259509621, "learning_rate": 4.457557714490532e-07, "loss": 1.0128, "step": 7692 }, { "epoch": 0.8090762091314236, "grad_norm": 2.256343596048476, "learning_rate": 4.4528010267298803e-07, "loss": 1.0084, "step": 7693 }, { "epoch": 0.809181379573271, "grad_norm": 2.0591184791276738, "learning_rate": 4.4480466301415253e-07, "loss": 0.9828, "step": 7694 }, { "epoch": 0.8092865500151183, "grad_norm": 2.8039610013628145, "learning_rate": 4.4432945252556285e-07, "loss": 1.0063, "step": 7695 }, { "epoch": 0.8093917204569656, "grad_norm": 1.8867274438204686, "learning_rate": 4.4385447126020695e-07, "loss": 0.979, "step": 7696 }, { "epoch": 0.8094968908988129, "grad_norm": 2.0342847166171265, "learning_rate": 4.433797192710515e-07, "loss": 0.959, "step": 7697 }, { "epoch": 0.8096020613406603, "grad_norm": 2.143958088883331, "learning_rate": 4.4290519661103313e-07, "loss": 0.9682, "step": 7698 }, { "epoch": 0.8097072317825075, "grad_norm": 2.307795459628221, "learning_rate": 4.424309033330662e-07, "loss": 0.9781, "step": 7699 }, { "epoch": 0.8098124022243548, "grad_norm": 2.6737394534107755, "learning_rate": 4.4195683949003645e-07, "loss": 0.9879, "step": 7700 }, { "epoch": 0.8099175726662021, "grad_norm": 2.468361380665413, "learning_rate": 4.414830051348068e-07, "loss": 0.9823, "step": 7701 }, { "epoch": 0.8100227431080494, "grad_norm": 2.464813507317941, "learning_rate": 4.410094003202134e-07, "loss": 0.9437, "step": 7702 }, { "epoch": 0.8101279135498968, "grad_norm": 2.439270737619183, "learning_rate": 4.4053602509906647e-07, "loss": 0.9409, "step": 7703 }, { "epoch": 0.8102330839917441, "grad_norm": 2.2253177582591266, "learning_rate": 4.4006287952415156e-07, "loss": 1.0017, "step": 7704 }, { "epoch": 0.8103382544335914, "grad_norm": 1.8670735694432252, "learning_rate": 4.395899636482265e-07, "loss": 0.9638, "step": 7705 }, { "epoch": 0.8104434248754387, "grad_norm": 2.537791466303488, "learning_rate": 4.391172775240277e-07, "loss": 0.9782, "step": 7706 }, { "epoch": 0.8105485953172861, "grad_norm": 2.2360740386296953, "learning_rate": 4.386448212042613e-07, "loss": 0.9666, "step": 7707 }, { "epoch": 0.8106537657591334, "grad_norm": 1.9347825652393957, "learning_rate": 4.38172594741611e-07, "loss": 1.0115, "step": 7708 }, { "epoch": 0.8107589362009807, "grad_norm": 2.644131866061192, "learning_rate": 4.3770059818873335e-07, "loss": 0.9877, "step": 7709 }, { "epoch": 0.810864106642828, "grad_norm": 2.7175911960332906, "learning_rate": 4.372288315982584e-07, "loss": 0.9678, "step": 7710 }, { "epoch": 0.8109692770846754, "grad_norm": 2.336165314669402, "learning_rate": 4.3675729502279385e-07, "loss": 1.0202, "step": 7711 }, { "epoch": 0.8110744475265227, "grad_norm": 2.5282003663888952, "learning_rate": 4.3628598851491816e-07, "loss": 0.9948, "step": 7712 }, { "epoch": 0.81117961796837, "grad_norm": 2.6721797231956863, "learning_rate": 4.358149121271871e-07, "loss": 1.0091, "step": 7713 }, { "epoch": 0.8112847884102173, "grad_norm": 2.155263569819944, "learning_rate": 4.3534406591212775e-07, "loss": 0.9918, "step": 7714 }, { "epoch": 0.8113899588520647, "grad_norm": 2.878491927577863, "learning_rate": 4.3487344992224375e-07, "loss": 0.9162, "step": 7715 }, { "epoch": 0.811495129293912, "grad_norm": 2.6975923680738703, "learning_rate": 4.344030642100133e-07, "loss": 1.0168, "step": 7716 }, { "epoch": 0.8116002997357593, "grad_norm": 2.24948546017479, "learning_rate": 4.3393290882788654e-07, "loss": 0.979, "step": 7717 }, { "epoch": 0.8117054701776066, "grad_norm": 2.2782812764649836, "learning_rate": 4.3346298382829085e-07, "loss": 0.9802, "step": 7718 }, { "epoch": 0.8118106406194538, "grad_norm": 2.369431861546599, "learning_rate": 4.329932892636249e-07, "loss": 0.9852, "step": 7719 }, { "epoch": 0.8119158110613012, "grad_norm": 2.819628728763317, "learning_rate": 4.325238251862651e-07, "loss": 1.007, "step": 7720 }, { "epoch": 0.8120209815031485, "grad_norm": 2.480164594518704, "learning_rate": 4.3205459164855914e-07, "loss": 0.9727, "step": 7721 }, { "epoch": 0.8121261519449958, "grad_norm": 2.3940648557433524, "learning_rate": 4.315855887028314e-07, "loss": 0.9746, "step": 7722 }, { "epoch": 0.8122313223868431, "grad_norm": 3.393286161834988, "learning_rate": 4.3111681640137763e-07, "loss": 0.9979, "step": 7723 }, { "epoch": 0.8123364928286905, "grad_norm": 2.154801256417696, "learning_rate": 4.306482747964705e-07, "loss": 1.0169, "step": 7724 }, { "epoch": 0.8124416632705378, "grad_norm": 2.6404030152506657, "learning_rate": 4.3017996394035674e-07, "loss": 0.9906, "step": 7725 }, { "epoch": 0.8125468337123851, "grad_norm": 2.921067892801514, "learning_rate": 4.2971188388525515e-07, "loss": 1.0098, "step": 7726 }, { "epoch": 0.8126520041542324, "grad_norm": 2.151845511245677, "learning_rate": 4.292440346833618e-07, "loss": 0.9851, "step": 7727 }, { "epoch": 0.8127571745960798, "grad_norm": 2.1131586999724665, "learning_rate": 4.287764163868441e-07, "loss": 0.9912, "step": 7728 }, { "epoch": 0.8128623450379271, "grad_norm": 2.540790376974774, "learning_rate": 4.283090290478459e-07, "loss": 0.9808, "step": 7729 }, { "epoch": 0.8129675154797744, "grad_norm": 2.5175938968858476, "learning_rate": 4.278418727184849e-07, "loss": 0.9565, "step": 7730 }, { "epoch": 0.8130726859216217, "grad_norm": 3.2981499359448265, "learning_rate": 4.273749474508515e-07, "loss": 0.9686, "step": 7731 }, { "epoch": 0.8131778563634691, "grad_norm": 2.9415949773488514, "learning_rate": 4.2690825329701313e-07, "loss": 0.9655, "step": 7732 }, { "epoch": 0.8132830268053164, "grad_norm": 2.7939627395608104, "learning_rate": 4.2644179030900734e-07, "loss": 0.9893, "step": 7733 }, { "epoch": 0.8133881972471637, "grad_norm": 2.668181131671682, "learning_rate": 4.2597555853885117e-07, "loss": 1.0038, "step": 7734 }, { "epoch": 0.813493367689011, "grad_norm": 2.065827171733609, "learning_rate": 4.2550955803853125e-07, "loss": 0.9932, "step": 7735 }, { "epoch": 0.8135985381308584, "grad_norm": 2.4982865299615513, "learning_rate": 4.250437888600115e-07, "loss": 0.9941, "step": 7736 }, { "epoch": 0.8137037085727057, "grad_norm": 2.9611235171473536, "learning_rate": 4.2457825105522736e-07, "loss": 0.9584, "step": 7737 }, { "epoch": 0.813808879014553, "grad_norm": 1.856983927521474, "learning_rate": 4.241129446760905e-07, "loss": 0.9412, "step": 7738 }, { "epoch": 0.8139140494564002, "grad_norm": 2.298057826980122, "learning_rate": 4.2364786977448723e-07, "loss": 0.977, "step": 7739 }, { "epoch": 0.8140192198982475, "grad_norm": 2.857504158227023, "learning_rate": 4.231830264022757e-07, "loss": 0.9809, "step": 7740 }, { "epoch": 0.8141243903400949, "grad_norm": 1.9125168283750302, "learning_rate": 4.227184146112903e-07, "loss": 0.9547, "step": 7741 }, { "epoch": 0.8142295607819422, "grad_norm": 2.3292724206858115, "learning_rate": 4.222540344533374e-07, "loss": 0.9752, "step": 7742 }, { "epoch": 0.8143347312237895, "grad_norm": 2.8514614953686768, "learning_rate": 4.2178988598020164e-07, "loss": 0.9877, "step": 7743 }, { "epoch": 0.8144399016656368, "grad_norm": 2.3444264375718555, "learning_rate": 4.2132596924363666e-07, "loss": 0.9912, "step": 7744 }, { "epoch": 0.8145450721074842, "grad_norm": 2.51703936814917, "learning_rate": 4.208622842953747e-07, "loss": 0.9761, "step": 7745 }, { "epoch": 0.8146502425493315, "grad_norm": 2.401729234637294, "learning_rate": 4.2039883118711935e-07, "loss": 0.9613, "step": 7746 }, { "epoch": 0.8147554129911788, "grad_norm": 2.10443613373023, "learning_rate": 4.1993560997054787e-07, "loss": 0.9799, "step": 7747 }, { "epoch": 0.8148605834330261, "grad_norm": 2.7838525850839613, "learning_rate": 4.194726206973157e-07, "loss": 0.958, "step": 7748 }, { "epoch": 0.8149657538748735, "grad_norm": 2.471550286019982, "learning_rate": 4.1900986341904787e-07, "loss": 0.957, "step": 7749 }, { "epoch": 0.8150709243167208, "grad_norm": 2.6262073326290087, "learning_rate": 4.1854733818734646e-07, "loss": 0.9636, "step": 7750 }, { "epoch": 0.8151760947585681, "grad_norm": 1.7860526138744137, "learning_rate": 4.180850450537857e-07, "loss": 0.9505, "step": 7751 }, { "epoch": 0.8152812652004154, "grad_norm": 2.3407264097231866, "learning_rate": 4.1762298406991503e-07, "loss": 0.9565, "step": 7752 }, { "epoch": 0.8153864356422628, "grad_norm": 2.150623638303471, "learning_rate": 4.1716115528725896e-07, "loss": 0.9697, "step": 7753 }, { "epoch": 0.8154916060841101, "grad_norm": 3.2525410184474173, "learning_rate": 4.166995587573133e-07, "loss": 1.0229, "step": 7754 }, { "epoch": 0.8155967765259574, "grad_norm": 2.1671249003463577, "learning_rate": 4.1623819453155133e-07, "loss": 0.9757, "step": 7755 }, { "epoch": 0.8157019469678047, "grad_norm": 2.097060159238962, "learning_rate": 4.157770626614166e-07, "loss": 0.9595, "step": 7756 }, { "epoch": 0.8158071174096521, "grad_norm": 2.5745070957454947, "learning_rate": 4.153161631983313e-07, "loss": 0.9547, "step": 7757 }, { "epoch": 0.8159122878514994, "grad_norm": 2.6795685146216854, "learning_rate": 4.1485549619368765e-07, "loss": 0.9458, "step": 7758 }, { "epoch": 0.8160174582933467, "grad_norm": 2.462379782717814, "learning_rate": 4.1439506169885484e-07, "loss": 0.9906, "step": 7759 }, { "epoch": 0.8161226287351939, "grad_norm": 2.4584203736359918, "learning_rate": 4.139348597651735e-07, "loss": 0.9678, "step": 7760 }, { "epoch": 0.8162277991770412, "grad_norm": 2.373737093242701, "learning_rate": 4.134748904439603e-07, "loss": 0.9874, "step": 7761 }, { "epoch": 0.8163329696188886, "grad_norm": 2.682893399793693, "learning_rate": 4.1301515378650654e-07, "loss": 0.9744, "step": 7762 }, { "epoch": 0.8164381400607359, "grad_norm": 2.6158066777998443, "learning_rate": 4.1255564984407445e-07, "loss": 0.9914, "step": 7763 }, { "epoch": 0.8165433105025832, "grad_norm": 2.320463099049456, "learning_rate": 4.120963786679039e-07, "loss": 0.9913, "step": 7764 }, { "epoch": 0.8166484809444305, "grad_norm": 2.584830907970578, "learning_rate": 4.116373403092058e-07, "loss": 0.9571, "step": 7765 }, { "epoch": 0.8167536513862779, "grad_norm": 2.3221349508899425, "learning_rate": 4.111785348191674e-07, "loss": 0.9629, "step": 7766 }, { "epoch": 0.8168588218281252, "grad_norm": 2.3555780902934207, "learning_rate": 4.1071996224894937e-07, "loss": 0.9926, "step": 7767 }, { "epoch": 0.8169639922699725, "grad_norm": 2.4447457069951763, "learning_rate": 4.1026162264968526e-07, "loss": 0.9714, "step": 7768 }, { "epoch": 0.8170691627118198, "grad_norm": 2.5930949291002143, "learning_rate": 4.098035160724842e-07, "loss": 0.9894, "step": 7769 }, { "epoch": 0.8171743331536672, "grad_norm": 2.4978191323631975, "learning_rate": 4.093456425684275e-07, "loss": 0.9675, "step": 7770 }, { "epoch": 0.8172795035955145, "grad_norm": 1.763876023991041, "learning_rate": 4.0888800218857326e-07, "loss": 0.9674, "step": 7771 }, { "epoch": 0.8173846740373618, "grad_norm": 2.562886748389969, "learning_rate": 4.0843059498395063e-07, "loss": 0.9959, "step": 7772 }, { "epoch": 0.8174898444792091, "grad_norm": 3.542366051507965, "learning_rate": 4.0797342100556526e-07, "loss": 0.9939, "step": 7773 }, { "epoch": 0.8175950149210565, "grad_norm": 2.978184338970861, "learning_rate": 4.075164803043941e-07, "loss": 1.0054, "step": 7774 }, { "epoch": 0.8177001853629038, "grad_norm": 2.0685694903463263, "learning_rate": 4.070597729313905e-07, "loss": 0.9819, "step": 7775 }, { "epoch": 0.8178053558047511, "grad_norm": 2.4513268899881755, "learning_rate": 4.066032989374813e-07, "loss": 0.9578, "step": 7776 }, { "epoch": 0.8179105262465984, "grad_norm": 2.789567940068473, "learning_rate": 4.061470583735655e-07, "loss": 0.9826, "step": 7777 }, { "epoch": 0.8180156966884458, "grad_norm": 2.2071024352563495, "learning_rate": 4.056910512905193e-07, "loss": 0.9408, "step": 7778 }, { "epoch": 0.8181208671302931, "grad_norm": 2.462265054582973, "learning_rate": 4.052352777391885e-07, "loss": 0.9993, "step": 7779 }, { "epoch": 0.8182260375721403, "grad_norm": 2.1507474468996537, "learning_rate": 4.0477973777039854e-07, "loss": 0.9428, "step": 7780 }, { "epoch": 0.8183312080139876, "grad_norm": 1.9943322945072193, "learning_rate": 4.0432443143494373e-07, "loss": 0.9267, "step": 7781 }, { "epoch": 0.818436378455835, "grad_norm": 2.4651273882986757, "learning_rate": 4.03869358783594e-07, "loss": 0.9777, "step": 7782 }, { "epoch": 0.8185415488976823, "grad_norm": 2.3629386718998173, "learning_rate": 4.0341451986709494e-07, "loss": 1.0024, "step": 7783 }, { "epoch": 0.8186467193395296, "grad_norm": 1.98682520235926, "learning_rate": 4.029599147361624e-07, "loss": 1.0093, "step": 7784 }, { "epoch": 0.8187518897813769, "grad_norm": 2.6681064116513826, "learning_rate": 4.0250554344149094e-07, "loss": 0.998, "step": 7785 }, { "epoch": 0.8188570602232242, "grad_norm": 2.0258952155927568, "learning_rate": 4.020514060337447e-07, "loss": 0.9847, "step": 7786 }, { "epoch": 0.8189622306650716, "grad_norm": 2.2616741774571074, "learning_rate": 4.015975025635646e-07, "loss": 0.9739, "step": 7787 }, { "epoch": 0.8190674011069189, "grad_norm": 2.946241928430217, "learning_rate": 4.011438330815634e-07, "loss": 0.9951, "step": 7788 }, { "epoch": 0.8191725715487662, "grad_norm": 2.6286525561939342, "learning_rate": 4.0069039763832946e-07, "loss": 0.9628, "step": 7789 }, { "epoch": 0.8192777419906135, "grad_norm": 2.3544960061523943, "learning_rate": 4.0023719628442454e-07, "loss": 0.9692, "step": 7790 }, { "epoch": 0.8193829124324609, "grad_norm": 2.447301762694223, "learning_rate": 3.997842290703835e-07, "loss": 0.988, "step": 7791 }, { "epoch": 0.8194880828743082, "grad_norm": 2.0937992451075864, "learning_rate": 3.9933149604671624e-07, "loss": 0.9527, "step": 7792 }, { "epoch": 0.8195932533161555, "grad_norm": 2.5017080295162653, "learning_rate": 3.9887899726390455e-07, "loss": 0.9585, "step": 7793 }, { "epoch": 0.8196984237580028, "grad_norm": 2.1071679855154173, "learning_rate": 3.9842673277240813e-07, "loss": 0.9637, "step": 7794 }, { "epoch": 0.8198035941998502, "grad_norm": 2.6444030746408793, "learning_rate": 3.97974702622656e-07, "loss": 0.986, "step": 7795 }, { "epoch": 0.8199087646416975, "grad_norm": 2.2415822506715366, "learning_rate": 3.975229068650541e-07, "loss": 0.9998, "step": 7796 }, { "epoch": 0.8200139350835448, "grad_norm": 2.8744646198567714, "learning_rate": 3.970713455499803e-07, "loss": 0.9492, "step": 7797 }, { "epoch": 0.8201191055253921, "grad_norm": 2.0835911020378837, "learning_rate": 3.9662001872778746e-07, "loss": 0.9694, "step": 7798 }, { "epoch": 0.8202242759672395, "grad_norm": 2.0735661240104832, "learning_rate": 3.961689264488025e-07, "loss": 0.9942, "step": 7799 }, { "epoch": 0.8203294464090867, "grad_norm": 2.132869834888772, "learning_rate": 3.9571806876332527e-07, "loss": 0.9882, "step": 7800 }, { "epoch": 0.820434616850934, "grad_norm": 3.1197931626041218, "learning_rate": 3.9526744572163016e-07, "loss": 0.9942, "step": 7801 }, { "epoch": 0.8205397872927813, "grad_norm": 2.4680929826487352, "learning_rate": 3.9481705737396457e-07, "loss": 1.01, "step": 7802 }, { "epoch": 0.8206449577346286, "grad_norm": 3.1575237316281837, "learning_rate": 3.943669037705508e-07, "loss": 1.0134, "step": 7803 }, { "epoch": 0.820750128176476, "grad_norm": 2.5835775583665943, "learning_rate": 3.939169849615851e-07, "loss": 1.0241, "step": 7804 }, { "epoch": 0.8208552986183233, "grad_norm": 2.231742487138655, "learning_rate": 3.9346730099723533e-07, "loss": 0.9715, "step": 7805 }, { "epoch": 0.8209604690601706, "grad_norm": 2.6521036190376783, "learning_rate": 3.930178519276462e-07, "loss": 1.0162, "step": 7806 }, { "epoch": 0.8210656395020179, "grad_norm": 2.867026937723473, "learning_rate": 3.925686378029331e-07, "loss": 0.9818, "step": 7807 }, { "epoch": 0.8211708099438653, "grad_norm": 2.263917199281129, "learning_rate": 3.921196586731893e-07, "loss": 0.9731, "step": 7808 }, { "epoch": 0.8212759803857126, "grad_norm": 2.3946757029431684, "learning_rate": 3.916709145884773e-07, "loss": 0.9887, "step": 7809 }, { "epoch": 0.8213811508275599, "grad_norm": 2.280792937113479, "learning_rate": 3.912224055988373e-07, "loss": 0.9711, "step": 7810 }, { "epoch": 0.8214863212694072, "grad_norm": 2.3686260861759547, "learning_rate": 3.907741317542799e-07, "loss": 0.9954, "step": 7811 }, { "epoch": 0.8215914917112546, "grad_norm": 2.6674673001111167, "learning_rate": 3.903260931047917e-07, "loss": 0.9966, "step": 7812 }, { "epoch": 0.8216966621531019, "grad_norm": 2.7598548320790517, "learning_rate": 3.898782897003334e-07, "loss": 0.9814, "step": 7813 }, { "epoch": 0.8218018325949492, "grad_norm": 2.464756135941767, "learning_rate": 3.894307215908372e-07, "loss": 1.0448, "step": 7814 }, { "epoch": 0.8219070030367965, "grad_norm": 1.6827932013253648, "learning_rate": 3.889833888262115e-07, "loss": 0.9651, "step": 7815 }, { "epoch": 0.8220121734786439, "grad_norm": 2.3519562279418014, "learning_rate": 3.8853629145633646e-07, "loss": 1.0091, "step": 7816 }, { "epoch": 0.8221173439204912, "grad_norm": 1.9766869386337151, "learning_rate": 3.880894295310672e-07, "loss": 0.9603, "step": 7817 }, { "epoch": 0.8222225143623385, "grad_norm": 2.3412319875825585, "learning_rate": 3.87642803100233e-07, "loss": 0.987, "step": 7818 }, { "epoch": 0.8223276848041858, "grad_norm": 2.2093606730393156, "learning_rate": 3.8719641221363514e-07, "loss": 0.9679, "step": 7819 }, { "epoch": 0.8224328552460332, "grad_norm": 3.16030150590197, "learning_rate": 3.8675025692105015e-07, "loss": 0.9895, "step": 7820 }, { "epoch": 0.8225380256878804, "grad_norm": 2.3349525577022585, "learning_rate": 3.863043372722278e-07, "loss": 1.0192, "step": 7821 }, { "epoch": 0.8226431961297277, "grad_norm": 2.6371286764996618, "learning_rate": 3.858586533168923e-07, "loss": 0.96, "step": 7822 }, { "epoch": 0.822748366571575, "grad_norm": 2.30573441792884, "learning_rate": 3.8541320510473963e-07, "loss": 0.9927, "step": 7823 }, { "epoch": 0.8228535370134223, "grad_norm": 2.873334351698845, "learning_rate": 3.849679926854416e-07, "loss": 0.9672, "step": 7824 }, { "epoch": 0.8229587074552697, "grad_norm": 2.4655038088603978, "learning_rate": 3.845230161086422e-07, "loss": 1.0114, "step": 7825 }, { "epoch": 0.823063877897117, "grad_norm": 2.277321669840928, "learning_rate": 3.840782754239597e-07, "loss": 0.9895, "step": 7826 }, { "epoch": 0.8231690483389643, "grad_norm": 2.4885441680443154, "learning_rate": 3.8363377068098756e-07, "loss": 1.0021, "step": 7827 }, { "epoch": 0.8232742187808116, "grad_norm": 2.581402189491797, "learning_rate": 3.831895019292897e-07, "loss": 0.9841, "step": 7828 }, { "epoch": 0.823379389222659, "grad_norm": 2.6225815215802952, "learning_rate": 3.8274546921840653e-07, "loss": 0.98, "step": 7829 }, { "epoch": 0.8234845596645063, "grad_norm": 2.672257972613925, "learning_rate": 3.823016725978507e-07, "loss": 0.917, "step": 7830 }, { "epoch": 0.8235897301063536, "grad_norm": 2.6941147355501207, "learning_rate": 3.8185811211710997e-07, "loss": 0.9917, "step": 7831 }, { "epoch": 0.8236949005482009, "grad_norm": 2.5181042464817778, "learning_rate": 3.814147878256441e-07, "loss": 0.9903, "step": 7832 }, { "epoch": 0.8238000709900483, "grad_norm": 2.982863876704575, "learning_rate": 3.809716997728863e-07, "loss": 0.9598, "step": 7833 }, { "epoch": 0.8239052414318956, "grad_norm": 2.4829472094226075, "learning_rate": 3.805288480082453e-07, "loss": 1.0105, "step": 7834 }, { "epoch": 0.8240104118737429, "grad_norm": 2.262790895769715, "learning_rate": 3.800862325811025e-07, "loss": 0.9802, "step": 7835 }, { "epoch": 0.8241155823155902, "grad_norm": 2.3779766898281287, "learning_rate": 3.796438535408134e-07, "loss": 0.9596, "step": 7836 }, { "epoch": 0.8242207527574376, "grad_norm": 2.5486165785150594, "learning_rate": 3.792017109367052e-07, "loss": 0.9497, "step": 7837 }, { "epoch": 0.8243259231992849, "grad_norm": 2.2377493045016306, "learning_rate": 3.787598048180821e-07, "loss": 0.9521, "step": 7838 }, { "epoch": 0.8244310936411322, "grad_norm": 2.560942533796768, "learning_rate": 3.7831813523421856e-07, "loss": 0.9648, "step": 7839 }, { "epoch": 0.8245362640829795, "grad_norm": 3.155199120538444, "learning_rate": 3.7787670223436464e-07, "loss": 1.0093, "step": 7840 }, { "epoch": 0.8246414345248267, "grad_norm": 2.5558594549066047, "learning_rate": 3.7743550586774425e-07, "loss": 0.9772, "step": 7841 }, { "epoch": 0.8247466049666741, "grad_norm": 3.286085851170872, "learning_rate": 3.7699454618355306e-07, "loss": 1.0098, "step": 7842 }, { "epoch": 0.8248517754085214, "grad_norm": 3.071919558180755, "learning_rate": 3.765538232309621e-07, "loss": 0.978, "step": 7843 }, { "epoch": 0.8249569458503687, "grad_norm": 1.9611542629163186, "learning_rate": 3.7611333705911526e-07, "loss": 0.9318, "step": 7844 }, { "epoch": 0.825062116292216, "grad_norm": 2.668955460154382, "learning_rate": 3.756730877171308e-07, "loss": 0.9825, "step": 7845 }, { "epoch": 0.8251672867340634, "grad_norm": 2.097041197652859, "learning_rate": 3.752330752540989e-07, "loss": 0.951, "step": 7846 }, { "epoch": 0.8252724571759107, "grad_norm": 2.4381455759333552, "learning_rate": 3.747932997190853e-07, "loss": 0.9731, "step": 7847 }, { "epoch": 0.825377627617758, "grad_norm": 1.9389944794723009, "learning_rate": 3.7435376116112753e-07, "loss": 0.9945, "step": 7848 }, { "epoch": 0.8254827980596053, "grad_norm": 2.6544698505058437, "learning_rate": 3.7391445962923824e-07, "loss": 0.9866, "step": 7849 }, { "epoch": 0.8255879685014527, "grad_norm": 2.205561586287741, "learning_rate": 3.734753951724029e-07, "loss": 0.9731, "step": 7850 }, { "epoch": 0.8256931389433, "grad_norm": 2.4950986903703165, "learning_rate": 3.7303656783958e-07, "loss": 0.9696, "step": 7851 }, { "epoch": 0.8257983093851473, "grad_norm": 2.824630018964161, "learning_rate": 3.7259797767970255e-07, "loss": 1.0108, "step": 7852 }, { "epoch": 0.8259034798269946, "grad_norm": 1.5885466979887306, "learning_rate": 3.7215962474167754e-07, "loss": 0.9861, "step": 7853 }, { "epoch": 0.826008650268842, "grad_norm": 2.407273379719963, "learning_rate": 3.717215090743834e-07, "loss": 0.9186, "step": 7854 }, { "epoch": 0.8261138207106893, "grad_norm": 2.108109969570331, "learning_rate": 3.7128363072667455e-07, "loss": 0.9826, "step": 7855 }, { "epoch": 0.8262189911525366, "grad_norm": 2.125810712110948, "learning_rate": 3.708459897473768e-07, "loss": 0.9589, "step": 7856 }, { "epoch": 0.8263241615943839, "grad_norm": 2.276739627965222, "learning_rate": 3.70408586185291e-07, "loss": 0.9668, "step": 7857 }, { "epoch": 0.8264293320362313, "grad_norm": 1.9368576617583315, "learning_rate": 3.69971420089191e-07, "loss": 1.0004, "step": 7858 }, { "epoch": 0.8265345024780786, "grad_norm": 2.122279799459209, "learning_rate": 3.69534491507825e-07, "loss": 0.9629, "step": 7859 }, { "epoch": 0.8266396729199259, "grad_norm": 3.094014832454307, "learning_rate": 3.6909780048991264e-07, "loss": 0.9908, "step": 7860 }, { "epoch": 0.8267448433617731, "grad_norm": 2.759129120062527, "learning_rate": 3.6866134708414964e-07, "loss": 0.98, "step": 7861 }, { "epoch": 0.8268500138036204, "grad_norm": 2.276400624904361, "learning_rate": 3.682251313392024e-07, "loss": 1.0001, "step": 7862 }, { "epoch": 0.8269551842454678, "grad_norm": 1.7834147878732247, "learning_rate": 3.677891533037134e-07, "loss": 0.9976, "step": 7863 }, { "epoch": 0.8270603546873151, "grad_norm": 2.167298852018403, "learning_rate": 3.6735341302629816e-07, "loss": 0.991, "step": 7864 }, { "epoch": 0.8271655251291624, "grad_norm": 2.480960772681218, "learning_rate": 3.6691791055554344e-07, "loss": 0.9629, "step": 7865 }, { "epoch": 0.8272706955710097, "grad_norm": 2.448579622721506, "learning_rate": 3.664826459400123e-07, "loss": 0.9664, "step": 7866 }, { "epoch": 0.8273758660128571, "grad_norm": 2.2157167037720105, "learning_rate": 3.660476192282403e-07, "loss": 0.9307, "step": 7867 }, { "epoch": 0.8274810364547044, "grad_norm": 2.4536996567992246, "learning_rate": 3.656128304687354e-07, "loss": 0.9936, "step": 7868 }, { "epoch": 0.8275862068965517, "grad_norm": 3.368456460878063, "learning_rate": 3.6517827970998095e-07, "loss": 0.9781, "step": 7869 }, { "epoch": 0.827691377338399, "grad_norm": 1.71414901522867, "learning_rate": 3.647439670004316e-07, "loss": 0.9807, "step": 7870 }, { "epoch": 0.8277965477802464, "grad_norm": 2.97285183356878, "learning_rate": 3.6430989238851717e-07, "loss": 0.9555, "step": 7871 }, { "epoch": 0.8279017182220937, "grad_norm": 2.96702239531014, "learning_rate": 3.6387605592264036e-07, "loss": 0.9843, "step": 7872 }, { "epoch": 0.828006888663941, "grad_norm": 2.166596883395088, "learning_rate": 3.63442457651178e-07, "loss": 0.9585, "step": 7873 }, { "epoch": 0.8281120591057883, "grad_norm": 2.5899405106290123, "learning_rate": 3.6300909762247813e-07, "loss": 0.9518, "step": 7874 }, { "epoch": 0.8282172295476357, "grad_norm": 2.2108133293110157, "learning_rate": 3.625759758848649e-07, "loss": 0.9702, "step": 7875 }, { "epoch": 0.828322399989483, "grad_norm": 1.7475000460222763, "learning_rate": 3.621430924866348e-07, "loss": 0.9557, "step": 7876 }, { "epoch": 0.8284275704313303, "grad_norm": 2.9751242328091165, "learning_rate": 3.617104474760569e-07, "loss": 0.9627, "step": 7877 }, { "epoch": 0.8285327408731776, "grad_norm": 2.2824598602628066, "learning_rate": 3.612780409013755e-07, "loss": 0.9557, "step": 7878 }, { "epoch": 0.828637911315025, "grad_norm": 2.560107410245565, "learning_rate": 3.6084587281080605e-07, "loss": 0.9479, "step": 7879 }, { "epoch": 0.8287430817568723, "grad_norm": 2.6404332069080954, "learning_rate": 3.604139432525394e-07, "loss": 1.0084, "step": 7880 }, { "epoch": 0.8288482521987196, "grad_norm": 3.5468570151691994, "learning_rate": 3.59982252274739e-07, "loss": 0.9756, "step": 7881 }, { "epoch": 0.8289534226405668, "grad_norm": 3.1714421863078326, "learning_rate": 3.595507999255421e-07, "loss": 0.9722, "step": 7882 }, { "epoch": 0.8290585930824141, "grad_norm": 1.7670191648623403, "learning_rate": 3.591195862530586e-07, "loss": 0.9664, "step": 7883 }, { "epoch": 0.8291637635242615, "grad_norm": 2.8058619365312394, "learning_rate": 3.586886113053717e-07, "loss": 1.0019, "step": 7884 }, { "epoch": 0.8292689339661088, "grad_norm": 2.274264569970648, "learning_rate": 3.5825787513053905e-07, "loss": 1.001, "step": 7885 }, { "epoch": 0.8293741044079561, "grad_norm": 2.1691351608255953, "learning_rate": 3.5782737777659086e-07, "loss": 0.9753, "step": 7886 }, { "epoch": 0.8294792748498034, "grad_norm": 3.0588797917567683, "learning_rate": 3.573971192915315e-07, "loss": 0.9733, "step": 7887 }, { "epoch": 0.8295844452916508, "grad_norm": 2.2148340995812124, "learning_rate": 3.569670997233371e-07, "loss": 0.9621, "step": 7888 }, { "epoch": 0.8296896157334981, "grad_norm": 3.2864299842355895, "learning_rate": 3.5653731911995893e-07, "loss": 1.0221, "step": 7889 }, { "epoch": 0.8297947861753454, "grad_norm": 2.4206938864595036, "learning_rate": 3.5610777752932117e-07, "loss": 0.9424, "step": 7890 }, { "epoch": 0.8298999566171927, "grad_norm": 2.524736186816006, "learning_rate": 3.5567847499932004e-07, "loss": 1.0228, "step": 7891 }, { "epoch": 0.8300051270590401, "grad_norm": 2.8928862446277224, "learning_rate": 3.552494115778271e-07, "loss": 0.9781, "step": 7892 }, { "epoch": 0.8301102975008874, "grad_norm": 1.951995142147348, "learning_rate": 3.5482058731268503e-07, "loss": 0.9185, "step": 7893 }, { "epoch": 0.8302154679427347, "grad_norm": 1.5618142840999194, "learning_rate": 3.5439200225171193e-07, "loss": 0.8677, "step": 7894 }, { "epoch": 0.830320638384582, "grad_norm": 2.567017305726929, "learning_rate": 3.539636564426985e-07, "loss": 1.0362, "step": 7895 }, { "epoch": 0.8304258088264294, "grad_norm": 2.357574361477999, "learning_rate": 3.535355499334087e-07, "loss": 1.0022, "step": 7896 }, { "epoch": 0.8305309792682767, "grad_norm": 2.3855821109988904, "learning_rate": 3.5310768277157875e-07, "loss": 0.9882, "step": 7897 }, { "epoch": 0.830636149710124, "grad_norm": 2.4659785940655357, "learning_rate": 3.5268005500492004e-07, "loss": 0.9991, "step": 7898 }, { "epoch": 0.8307413201519713, "grad_norm": 2.8955739385288086, "learning_rate": 3.522526666811166e-07, "loss": 0.9721, "step": 7899 }, { "epoch": 0.8308464905938187, "grad_norm": 2.2199143818637275, "learning_rate": 3.518255178478247e-07, "loss": 1.028, "step": 7900 }, { "epoch": 0.830951661035666, "grad_norm": 2.198553962750598, "learning_rate": 3.513986085526755e-07, "loss": 0.949, "step": 7901 }, { "epoch": 0.8310568314775132, "grad_norm": 2.6375318358953828, "learning_rate": 3.50971938843272e-07, "loss": 1.0045, "step": 7902 }, { "epoch": 0.8311620019193605, "grad_norm": 2.2539176960571585, "learning_rate": 3.5054550876719134e-07, "loss": 0.9797, "step": 7903 }, { "epoch": 0.8312671723612078, "grad_norm": 2.621497832230979, "learning_rate": 3.5011931837198476e-07, "loss": 0.9706, "step": 7904 }, { "epoch": 0.8313723428030552, "grad_norm": 2.408630576835979, "learning_rate": 3.496933677051745e-07, "loss": 1.0101, "step": 7905 }, { "epoch": 0.8314775132449025, "grad_norm": 2.5560399097177635, "learning_rate": 3.4926765681425835e-07, "loss": 1.0304, "step": 7906 }, { "epoch": 0.8315826836867498, "grad_norm": 2.6823556900283565, "learning_rate": 3.488421857467053e-07, "loss": 0.9543, "step": 7907 }, { "epoch": 0.8316878541285971, "grad_norm": 1.7262174828332262, "learning_rate": 3.484169545499594e-07, "loss": 1.0048, "step": 7908 }, { "epoch": 0.8317930245704445, "grad_norm": 2.243625820617893, "learning_rate": 3.479919632714374e-07, "loss": 0.9714, "step": 7909 }, { "epoch": 0.8318981950122918, "grad_norm": 2.2906586964812985, "learning_rate": 3.475672119585291e-07, "loss": 0.9659, "step": 7910 }, { "epoch": 0.8320033654541391, "grad_norm": 2.6417368186284964, "learning_rate": 3.4714270065859673e-07, "loss": 0.9472, "step": 7911 }, { "epoch": 0.8321085358959864, "grad_norm": 3.0260483510274017, "learning_rate": 3.4671842941897764e-07, "loss": 1.0046, "step": 7912 }, { "epoch": 0.8322137063378338, "grad_norm": 2.59174736426387, "learning_rate": 3.4629439828698125e-07, "loss": 1.0117, "step": 7913 }, { "epoch": 0.8323188767796811, "grad_norm": 2.855621058386072, "learning_rate": 3.4587060730988943e-07, "loss": 0.9702, "step": 7914 }, { "epoch": 0.8324240472215284, "grad_norm": 2.3861443308093295, "learning_rate": 3.4544705653495936e-07, "loss": 0.9683, "step": 7915 }, { "epoch": 0.8325292176633757, "grad_norm": 2.956455393722912, "learning_rate": 3.4502374600941915e-07, "loss": 0.9796, "step": 7916 }, { "epoch": 0.8326343881052231, "grad_norm": 3.3272393954310275, "learning_rate": 3.446006757804715e-07, "loss": 0.9626, "step": 7917 }, { "epoch": 0.8327395585470704, "grad_norm": 3.117936160153246, "learning_rate": 3.44177845895293e-07, "loss": 1.0031, "step": 7918 }, { "epoch": 0.8328447289889177, "grad_norm": 2.3984852683425406, "learning_rate": 3.4375525640103103e-07, "loss": 0.9611, "step": 7919 }, { "epoch": 0.832949899430765, "grad_norm": 2.548645604067526, "learning_rate": 3.4333290734480885e-07, "loss": 0.9952, "step": 7920 }, { "epoch": 0.8330550698726124, "grad_norm": 2.5500483837731815, "learning_rate": 3.4291079877371984e-07, "loss": 0.9917, "step": 7921 }, { "epoch": 0.8331602403144596, "grad_norm": 2.514529585621814, "learning_rate": 3.4248893073483504e-07, "loss": 1.0036, "step": 7922 }, { "epoch": 0.8332654107563069, "grad_norm": 2.025207298660727, "learning_rate": 3.4206730327519365e-07, "loss": 0.978, "step": 7923 }, { "epoch": 0.8333705811981542, "grad_norm": 2.5083140341213355, "learning_rate": 3.4164591644181233e-07, "loss": 0.9394, "step": 7924 }, { "epoch": 0.8334757516400015, "grad_norm": 3.419460327667554, "learning_rate": 3.412247702816771e-07, "loss": 0.9656, "step": 7925 }, { "epoch": 0.8335809220818489, "grad_norm": 2.5984729823260144, "learning_rate": 3.408038648417503e-07, "loss": 0.9887, "step": 7926 }, { "epoch": 0.8336860925236962, "grad_norm": 2.3818846798670767, "learning_rate": 3.403832001689661e-07, "loss": 0.9715, "step": 7927 }, { "epoch": 0.8337912629655435, "grad_norm": 2.210634104658908, "learning_rate": 3.399627763102309e-07, "loss": 0.978, "step": 7928 }, { "epoch": 0.8338964334073908, "grad_norm": 2.5371862459046146, "learning_rate": 3.3954259331242664e-07, "loss": 0.9618, "step": 7929 }, { "epoch": 0.8340016038492382, "grad_norm": 2.480566157647304, "learning_rate": 3.391226512224055e-07, "loss": 0.9725, "step": 7930 }, { "epoch": 0.8341067742910855, "grad_norm": 2.1229236675007845, "learning_rate": 3.387029500869954e-07, "loss": 0.9619, "step": 7931 }, { "epoch": 0.8342119447329328, "grad_norm": 1.9698108801024228, "learning_rate": 3.3828348995299575e-07, "loss": 0.9741, "step": 7932 }, { "epoch": 0.8343171151747801, "grad_norm": 1.9418318681400226, "learning_rate": 3.3786427086718006e-07, "loss": 0.9577, "step": 7933 }, { "epoch": 0.8344222856166275, "grad_norm": 3.3957679090439017, "learning_rate": 3.3744529287629446e-07, "loss": 0.9471, "step": 7934 }, { "epoch": 0.8345274560584748, "grad_norm": 2.4124644360616436, "learning_rate": 3.3702655602705693e-07, "loss": 0.983, "step": 7935 }, { "epoch": 0.8346326265003221, "grad_norm": 2.7101113991825825, "learning_rate": 3.36608060366162e-07, "loss": 0.9811, "step": 7936 }, { "epoch": 0.8347377969421694, "grad_norm": 2.2798654842255157, "learning_rate": 3.361898059402738e-07, "loss": 0.9592, "step": 7937 }, { "epoch": 0.8348429673840168, "grad_norm": 2.982597973091621, "learning_rate": 3.357717927960316e-07, "loss": 1.0268, "step": 7938 }, { "epoch": 0.8349481378258641, "grad_norm": 2.298141064600936, "learning_rate": 3.3535402098004626e-07, "loss": 0.9903, "step": 7939 }, { "epoch": 0.8350533082677114, "grad_norm": 2.3752357601389704, "learning_rate": 3.3493649053890325e-07, "loss": 0.9704, "step": 7940 }, { "epoch": 0.8351584787095587, "grad_norm": 2.763407955749695, "learning_rate": 3.345192015191609e-07, "loss": 0.9829, "step": 7941 }, { "epoch": 0.835263649151406, "grad_norm": 2.828981552542588, "learning_rate": 3.341021539673492e-07, "loss": 1.0321, "step": 7942 }, { "epoch": 0.8353688195932533, "grad_norm": 2.2262635053414574, "learning_rate": 3.3368534792997285e-07, "loss": 0.9639, "step": 7943 }, { "epoch": 0.8354739900351006, "grad_norm": 2.446688699369652, "learning_rate": 3.3326878345350784e-07, "loss": 1.0052, "step": 7944 }, { "epoch": 0.8355791604769479, "grad_norm": 2.7455178000846363, "learning_rate": 3.3285246058440635e-07, "loss": 0.9826, "step": 7945 }, { "epoch": 0.8356843309187952, "grad_norm": 2.464176366963785, "learning_rate": 3.324363793690899e-07, "loss": 0.9878, "step": 7946 }, { "epoch": 0.8357895013606426, "grad_norm": 2.2726195863266705, "learning_rate": 3.320205398539561e-07, "loss": 0.9455, "step": 7947 }, { "epoch": 0.8358946718024899, "grad_norm": 2.2989720657926402, "learning_rate": 3.316049420853729e-07, "loss": 0.9815, "step": 7948 }, { "epoch": 0.8359998422443372, "grad_norm": 2.1279969880508705, "learning_rate": 3.311895861096831e-07, "loss": 0.9454, "step": 7949 }, { "epoch": 0.8361050126861845, "grad_norm": 2.6446950816118195, "learning_rate": 3.307744719732031e-07, "loss": 0.9773, "step": 7950 }, { "epoch": 0.8362101831280319, "grad_norm": 1.955699911337364, "learning_rate": 3.3035959972222e-07, "loss": 0.9484, "step": 7951 }, { "epoch": 0.8363153535698792, "grad_norm": 2.332225298540404, "learning_rate": 3.299449694029966e-07, "loss": 0.9963, "step": 7952 }, { "epoch": 0.8364205240117265, "grad_norm": 4.216667098594006, "learning_rate": 3.2953058106176607e-07, "loss": 1.0338, "step": 7953 }, { "epoch": 0.8365256944535738, "grad_norm": 2.0765665881975752, "learning_rate": 3.2911643474473647e-07, "loss": 0.9991, "step": 7954 }, { "epoch": 0.8366308648954212, "grad_norm": 2.044749035524921, "learning_rate": 3.28702530498089e-07, "loss": 0.9589, "step": 7955 }, { "epoch": 0.8367360353372685, "grad_norm": 3.061177661810104, "learning_rate": 3.282888683679758e-07, "loss": 1.0022, "step": 7956 }, { "epoch": 0.8368412057791158, "grad_norm": 1.852367172280801, "learning_rate": 3.2787544840052503e-07, "loss": 0.9729, "step": 7957 }, { "epoch": 0.8369463762209631, "grad_norm": 2.4129977271544436, "learning_rate": 3.274622706418346e-07, "loss": 0.9655, "step": 7958 }, { "epoch": 0.8370515466628105, "grad_norm": 2.467914364505314, "learning_rate": 3.270493351379786e-07, "loss": 0.9492, "step": 7959 }, { "epoch": 0.8371567171046578, "grad_norm": 2.311681107608195, "learning_rate": 3.2663664193500114e-07, "loss": 0.9855, "step": 7960 }, { "epoch": 0.8372618875465051, "grad_norm": 2.670758248675138, "learning_rate": 3.262241910789221e-07, "loss": 0.9683, "step": 7961 }, { "epoch": 0.8373670579883524, "grad_norm": 2.1454789719559284, "learning_rate": 3.258119826157319e-07, "loss": 0.9359, "step": 7962 }, { "epoch": 0.8374722284301996, "grad_norm": 2.4776693808188446, "learning_rate": 3.2540001659139506e-07, "loss": 1.0157, "step": 7963 }, { "epoch": 0.837577398872047, "grad_norm": 1.8011863229194303, "learning_rate": 3.249882930518497e-07, "loss": 0.9874, "step": 7964 }, { "epoch": 0.8376825693138943, "grad_norm": 2.168737945078986, "learning_rate": 3.245768120430054e-07, "loss": 0.993, "step": 7965 }, { "epoch": 0.8377877397557416, "grad_norm": 3.0895348956728617, "learning_rate": 3.241655736107466e-07, "loss": 0.9992, "step": 7966 }, { "epoch": 0.8378929101975889, "grad_norm": 3.3010610656877093, "learning_rate": 3.237545778009274e-07, "loss": 0.9793, "step": 7967 }, { "epoch": 0.8379980806394363, "grad_norm": 2.7672284112761956, "learning_rate": 3.233438246593795e-07, "loss": 0.9919, "step": 7968 }, { "epoch": 0.8381032510812836, "grad_norm": 2.0517422870765625, "learning_rate": 3.229333142319041e-07, "loss": 0.991, "step": 7969 }, { "epoch": 0.8382084215231309, "grad_norm": 2.0009951842118143, "learning_rate": 3.2252304656427566e-07, "loss": 0.9795, "step": 7970 }, { "epoch": 0.8383135919649782, "grad_norm": 2.4005492702244977, "learning_rate": 3.221130217022433e-07, "loss": 0.9675, "step": 7971 }, { "epoch": 0.8384187624068256, "grad_norm": 2.1804042366396232, "learning_rate": 3.217032396915265e-07, "loss": 0.972, "step": 7972 }, { "epoch": 0.8385239328486729, "grad_norm": 2.973961335735684, "learning_rate": 3.2129370057782106e-07, "loss": 0.9951, "step": 7973 }, { "epoch": 0.8386291032905202, "grad_norm": 2.025266752429975, "learning_rate": 3.208844044067921e-07, "loss": 1.0002, "step": 7974 }, { "epoch": 0.8387342737323675, "grad_norm": 2.4059729700613457, "learning_rate": 3.2047535122408076e-07, "loss": 0.9795, "step": 7975 }, { "epoch": 0.8388394441742149, "grad_norm": 2.753932435517075, "learning_rate": 3.200665410752982e-07, "loss": 0.9798, "step": 7976 }, { "epoch": 0.8389446146160622, "grad_norm": 2.412042721936347, "learning_rate": 3.196579740060307e-07, "loss": 0.9764, "step": 7977 }, { "epoch": 0.8390497850579095, "grad_norm": 2.922557537170381, "learning_rate": 3.192496500618372e-07, "loss": 0.9586, "step": 7978 }, { "epoch": 0.8391549554997568, "grad_norm": 2.2610207215000173, "learning_rate": 3.188415692882477e-07, "loss": 0.9687, "step": 7979 }, { "epoch": 0.8392601259416042, "grad_norm": 2.6933915853361006, "learning_rate": 3.1843373173076784e-07, "loss": 1.0122, "step": 7980 }, { "epoch": 0.8393652963834515, "grad_norm": 1.9203482516232904, "learning_rate": 3.1802613743487255e-07, "loss": 0.9712, "step": 7981 }, { "epoch": 0.8394704668252988, "grad_norm": 2.8523465380370348, "learning_rate": 3.1761878644601425e-07, "loss": 0.9694, "step": 7982 }, { "epoch": 0.839575637267146, "grad_norm": 2.2911244103217774, "learning_rate": 3.172116788096141e-07, "loss": 0.9529, "step": 7983 }, { "epoch": 0.8396808077089933, "grad_norm": 2.715895442199244, "learning_rate": 3.1680481457106895e-07, "loss": 0.9757, "step": 7984 }, { "epoch": 0.8397859781508407, "grad_norm": 2.250183946803774, "learning_rate": 3.1639819377574624e-07, "loss": 0.967, "step": 7985 }, { "epoch": 0.839891148592688, "grad_norm": 2.107649904873707, "learning_rate": 3.159918164689871e-07, "loss": 1.0024, "step": 7986 }, { "epoch": 0.8399963190345353, "grad_norm": 2.094200221460402, "learning_rate": 3.1558568269610707e-07, "loss": 0.9948, "step": 7987 }, { "epoch": 0.8401014894763826, "grad_norm": 2.144487336078061, "learning_rate": 3.151797925023922e-07, "loss": 1.03, "step": 7988 }, { "epoch": 0.84020665991823, "grad_norm": 2.264741520119065, "learning_rate": 3.147741459331033e-07, "loss": 0.9903, "step": 7989 }, { "epoch": 0.8403118303600773, "grad_norm": 2.3375622944629764, "learning_rate": 3.1436874303347185e-07, "loss": 0.9826, "step": 7990 }, { "epoch": 0.8404170008019246, "grad_norm": 2.5671078673407473, "learning_rate": 3.1396358384870426e-07, "loss": 0.9646, "step": 7991 }, { "epoch": 0.8405221712437719, "grad_norm": 2.3619671652091667, "learning_rate": 3.135586684239794e-07, "loss": 0.9667, "step": 7992 }, { "epoch": 0.8406273416856193, "grad_norm": 2.416335511787064, "learning_rate": 3.1315399680444727e-07, "loss": 0.9691, "step": 7993 }, { "epoch": 0.8407325121274666, "grad_norm": 1.9661025621449095, "learning_rate": 3.127495690352331e-07, "loss": 0.9958, "step": 7994 }, { "epoch": 0.8408376825693139, "grad_norm": 3.035263773083995, "learning_rate": 3.1234538516143194e-07, "loss": 0.9768, "step": 7995 }, { "epoch": 0.8409428530111612, "grad_norm": 2.0867961026565207, "learning_rate": 3.119414452281158e-07, "loss": 1.0095, "step": 7996 }, { "epoch": 0.8410480234530086, "grad_norm": 2.7704288005888826, "learning_rate": 3.1153774928032564e-07, "loss": 0.9915, "step": 7997 }, { "epoch": 0.8411531938948559, "grad_norm": 2.6501249494588413, "learning_rate": 3.111342973630771e-07, "loss": 0.9808, "step": 7998 }, { "epoch": 0.8412583643367032, "grad_norm": 2.438311465297352, "learning_rate": 3.107310895213578e-07, "loss": 0.962, "step": 7999 }, { "epoch": 0.8413635347785505, "grad_norm": 2.345077898806914, "learning_rate": 3.1032812580012895e-07, "loss": 0.9818, "step": 8000 }, { "epoch": 0.8414687052203979, "grad_norm": 2.891411545914225, "learning_rate": 3.0992540624432436e-07, "loss": 0.9883, "step": 8001 }, { "epoch": 0.8415738756622452, "grad_norm": 2.415834378113018, "learning_rate": 3.0952293089884984e-07, "loss": 0.9521, "step": 8002 }, { "epoch": 0.8416790461040925, "grad_norm": 2.8617855981045426, "learning_rate": 3.0912069980858496e-07, "loss": 0.9902, "step": 8003 }, { "epoch": 0.8417842165459397, "grad_norm": 2.8879149599658422, "learning_rate": 3.0871871301838053e-07, "loss": 1.0216, "step": 8004 }, { "epoch": 0.841889386987787, "grad_norm": 1.8612027892835419, "learning_rate": 3.083169705730629e-07, "loss": 0.96, "step": 8005 }, { "epoch": 0.8419945574296344, "grad_norm": 2.6330298233689122, "learning_rate": 3.079154725174288e-07, "loss": 0.9465, "step": 8006 }, { "epoch": 0.8420997278714817, "grad_norm": 2.138955256278022, "learning_rate": 3.0751421889624766e-07, "loss": 0.947, "step": 8007 }, { "epoch": 0.842204898313329, "grad_norm": 2.616727462598564, "learning_rate": 3.0711320975426367e-07, "loss": 0.9434, "step": 8008 }, { "epoch": 0.8423100687551763, "grad_norm": 2.4895885340978543, "learning_rate": 3.067124451361905e-07, "loss": 1.0038, "step": 8009 }, { "epoch": 0.8424152391970237, "grad_norm": 3.4008930173947873, "learning_rate": 3.063119250867186e-07, "loss": 0.9394, "step": 8010 }, { "epoch": 0.842520409638871, "grad_norm": 2.851466967075379, "learning_rate": 3.05911649650508e-07, "loss": 0.954, "step": 8011 }, { "epoch": 0.8426255800807183, "grad_norm": 2.9185957846098027, "learning_rate": 3.055116188721932e-07, "loss": 1.0138, "step": 8012 }, { "epoch": 0.8427307505225656, "grad_norm": 2.260085530350411, "learning_rate": 3.051118327963798e-07, "loss": 0.9917, "step": 8013 }, { "epoch": 0.842835920964413, "grad_norm": 2.458882313705006, "learning_rate": 3.047122914676473e-07, "loss": 1.0258, "step": 8014 }, { "epoch": 0.8429410914062603, "grad_norm": 3.1014214186637883, "learning_rate": 3.043129949305487e-07, "loss": 0.9228, "step": 8015 }, { "epoch": 0.8430462618481076, "grad_norm": 1.6614260018195441, "learning_rate": 3.0391394322960706e-07, "loss": 0.9825, "step": 8016 }, { "epoch": 0.8431514322899549, "grad_norm": 2.0667501456498605, "learning_rate": 3.035151364093214e-07, "loss": 1.0093, "step": 8017 }, { "epoch": 0.8432566027318023, "grad_norm": 2.598733278916, "learning_rate": 3.031165745141595e-07, "loss": 0.9792, "step": 8018 }, { "epoch": 0.8433617731736496, "grad_norm": 2.5060894728850913, "learning_rate": 3.0271825758856675e-07, "loss": 0.977, "step": 8019 }, { "epoch": 0.8434669436154969, "grad_norm": 2.55384223599344, "learning_rate": 3.02320185676957e-07, "loss": 1.0313, "step": 8020 }, { "epoch": 0.8435721140573442, "grad_norm": 3.1720208128466916, "learning_rate": 3.0192235882371913e-07, "loss": 0.9646, "step": 8021 }, { "epoch": 0.8436772844991915, "grad_norm": 2.813164544453831, "learning_rate": 3.01524777073213e-07, "loss": 0.9844, "step": 8022 }, { "epoch": 0.8437824549410389, "grad_norm": 1.7681715170092602, "learning_rate": 3.0112744046977255e-07, "loss": 0.9852, "step": 8023 }, { "epoch": 0.8438876253828861, "grad_norm": 2.8437496154777326, "learning_rate": 3.007303490577043e-07, "loss": 1.0048, "step": 8024 }, { "epoch": 0.8439927958247334, "grad_norm": 2.7174670709863884, "learning_rate": 3.003335028812862e-07, "loss": 0.978, "step": 8025 }, { "epoch": 0.8440979662665807, "grad_norm": 3.067825714603444, "learning_rate": 2.999369019847709e-07, "loss": 0.9515, "step": 8026 }, { "epoch": 0.8442031367084281, "grad_norm": 2.8915929181982105, "learning_rate": 2.99540546412381e-07, "loss": 1.0105, "step": 8027 }, { "epoch": 0.8443083071502754, "grad_norm": 1.8900985976770452, "learning_rate": 2.99144436208314e-07, "loss": 0.9914, "step": 8028 }, { "epoch": 0.8444134775921227, "grad_norm": 2.2877820769502213, "learning_rate": 2.987485714167396e-07, "loss": 0.9966, "step": 8029 }, { "epoch": 0.84451864803397, "grad_norm": 2.196650528892029, "learning_rate": 2.9835295208179907e-07, "loss": 0.9936, "step": 8030 }, { "epoch": 0.8446238184758174, "grad_norm": 3.2894078035994663, "learning_rate": 2.9795757824760796e-07, "loss": 1.0142, "step": 8031 }, { "epoch": 0.8447289889176647, "grad_norm": 2.297997513581351, "learning_rate": 2.975624499582516e-07, "loss": 0.911, "step": 8032 }, { "epoch": 0.844834159359512, "grad_norm": 2.647899001008078, "learning_rate": 2.9716756725779254e-07, "loss": 0.9509, "step": 8033 }, { "epoch": 0.8449393298013593, "grad_norm": 2.360019920855114, "learning_rate": 2.967729301902611e-07, "loss": 1.0027, "step": 8034 }, { "epoch": 0.8450445002432067, "grad_norm": 2.999008966583943, "learning_rate": 2.96378538799664e-07, "loss": 1.0052, "step": 8035 }, { "epoch": 0.845149670685054, "grad_norm": 2.6618078141442987, "learning_rate": 2.9598439312997745e-07, "loss": 0.9906, "step": 8036 }, { "epoch": 0.8452548411269013, "grad_norm": 2.4720173368158074, "learning_rate": 2.955904932251527e-07, "loss": 0.9599, "step": 8037 }, { "epoch": 0.8453600115687486, "grad_norm": 2.8992055015069678, "learning_rate": 2.9519683912911267e-07, "loss": 1.0055, "step": 8038 }, { "epoch": 0.845465182010596, "grad_norm": 2.265207100310217, "learning_rate": 2.9480343088575227e-07, "loss": 1.0136, "step": 8039 }, { "epoch": 0.8455703524524433, "grad_norm": 2.4872651865839615, "learning_rate": 2.9441026853894024e-07, "loss": 0.9741, "step": 8040 }, { "epoch": 0.8456755228942906, "grad_norm": 2.3337883217309714, "learning_rate": 2.940173521325165e-07, "loss": 0.9959, "step": 8041 }, { "epoch": 0.8457806933361379, "grad_norm": 2.4394701294128054, "learning_rate": 2.936246817102947e-07, "loss": 0.9584, "step": 8042 }, { "epoch": 0.8458858637779852, "grad_norm": 1.8882443397504602, "learning_rate": 2.9323225731606094e-07, "loss": 0.9236, "step": 8043 }, { "epoch": 0.8459910342198325, "grad_norm": 2.2667033507578913, "learning_rate": 2.9284007899357286e-07, "loss": 0.9221, "step": 8044 }, { "epoch": 0.8460962046616798, "grad_norm": 2.370795500055156, "learning_rate": 2.924481467865617e-07, "loss": 0.9892, "step": 8045 }, { "epoch": 0.8462013751035271, "grad_norm": 2.6229927783843827, "learning_rate": 2.920564607387313e-07, "loss": 1.0033, "step": 8046 }, { "epoch": 0.8463065455453744, "grad_norm": 2.675563045044748, "learning_rate": 2.916650208937577e-07, "loss": 0.9703, "step": 8047 }, { "epoch": 0.8464117159872218, "grad_norm": 2.7466454975694754, "learning_rate": 2.912738272952889e-07, "loss": 0.9759, "step": 8048 }, { "epoch": 0.8465168864290691, "grad_norm": 2.1964656610410445, "learning_rate": 2.9088287998694673e-07, "loss": 0.9888, "step": 8049 }, { "epoch": 0.8466220568709164, "grad_norm": 2.479272749557706, "learning_rate": 2.9049217901232387e-07, "loss": 0.9398, "step": 8050 }, { "epoch": 0.8467272273127637, "grad_norm": 2.787185292911654, "learning_rate": 2.901017244149873e-07, "loss": 0.9828, "step": 8051 }, { "epoch": 0.846832397754611, "grad_norm": 1.98715699218698, "learning_rate": 2.897115162384759e-07, "loss": 1.0073, "step": 8052 }, { "epoch": 0.8469375681964584, "grad_norm": 2.17127153048729, "learning_rate": 2.893215545263001e-07, "loss": 0.9981, "step": 8053 }, { "epoch": 0.8470427386383057, "grad_norm": 2.3336424406843115, "learning_rate": 2.889318393219445e-07, "loss": 0.9829, "step": 8054 }, { "epoch": 0.847147909080153, "grad_norm": 2.250375988178359, "learning_rate": 2.885423706688642e-07, "loss": 0.9889, "step": 8055 }, { "epoch": 0.8472530795220004, "grad_norm": 2.1354421736272293, "learning_rate": 2.8815314861048966e-07, "loss": 1.0016, "step": 8056 }, { "epoch": 0.8473582499638477, "grad_norm": 2.331711007943775, "learning_rate": 2.8776417319022145e-07, "loss": 0.9307, "step": 8057 }, { "epoch": 0.847463420405695, "grad_norm": 2.8514494357851214, "learning_rate": 2.8737544445143263e-07, "loss": 0.9425, "step": 8058 }, { "epoch": 0.8475685908475423, "grad_norm": 2.6329555820560557, "learning_rate": 2.869869624374699e-07, "loss": 0.9704, "step": 8059 }, { "epoch": 0.8476737612893896, "grad_norm": 2.129162011010815, "learning_rate": 2.8659872719165203e-07, "loss": 0.9776, "step": 8060 }, { "epoch": 0.847778931731237, "grad_norm": 2.334892721506637, "learning_rate": 2.86210738757271e-07, "loss": 0.9834, "step": 8061 }, { "epoch": 0.8478841021730843, "grad_norm": 2.3641357364728273, "learning_rate": 2.858229971775894e-07, "loss": 0.9409, "step": 8062 }, { "epoch": 0.8479892726149316, "grad_norm": 2.1169833059337764, "learning_rate": 2.8543550249584465e-07, "loss": 0.9716, "step": 8063 }, { "epoch": 0.848094443056779, "grad_norm": 2.2632523362465875, "learning_rate": 2.850482547552441e-07, "loss": 0.9724, "step": 8064 }, { "epoch": 0.8481996134986262, "grad_norm": 2.508398111461374, "learning_rate": 2.846612539989693e-07, "loss": 0.9645, "step": 8065 }, { "epoch": 0.8483047839404735, "grad_norm": 3.107750551526337, "learning_rate": 2.8427450027017493e-07, "loss": 0.9812, "step": 8066 }, { "epoch": 0.8484099543823208, "grad_norm": 2.1641973776169845, "learning_rate": 2.838879936119854e-07, "loss": 0.9508, "step": 8067 }, { "epoch": 0.8485151248241681, "grad_norm": 2.465015127853074, "learning_rate": 2.8350173406749975e-07, "loss": 0.9792, "step": 8068 }, { "epoch": 0.8486202952660155, "grad_norm": 1.9790492113518683, "learning_rate": 2.8311572167978947e-07, "loss": 0.9586, "step": 8069 }, { "epoch": 0.8487254657078628, "grad_norm": 3.114675329966703, "learning_rate": 2.827299564918978e-07, "loss": 0.9591, "step": 8070 }, { "epoch": 0.8488306361497101, "grad_norm": 2.733280124937022, "learning_rate": 2.823444385468399e-07, "loss": 1.0032, "step": 8071 }, { "epoch": 0.8489358065915574, "grad_norm": 2.0042271783468766, "learning_rate": 2.819591678876052e-07, "loss": 0.9599, "step": 8072 }, { "epoch": 0.8490409770334048, "grad_norm": 2.8190894222776337, "learning_rate": 2.815741445571529e-07, "loss": 0.9586, "step": 8073 }, { "epoch": 0.8491461474752521, "grad_norm": 2.8042120225394753, "learning_rate": 2.8118936859841684e-07, "loss": 0.9776, "step": 8074 }, { "epoch": 0.8492513179170994, "grad_norm": 2.0986367427377446, "learning_rate": 2.8080484005430313e-07, "loss": 0.9594, "step": 8075 }, { "epoch": 0.8493564883589467, "grad_norm": 2.8316742922493017, "learning_rate": 2.804205589676884e-07, "loss": 0.9596, "step": 8076 }, { "epoch": 0.849461658800794, "grad_norm": 3.077204387316349, "learning_rate": 2.8003652538142413e-07, "loss": 0.96, "step": 8077 }, { "epoch": 0.8495668292426414, "grad_norm": 2.310308677624084, "learning_rate": 2.796527393383322e-07, "loss": 0.9743, "step": 8078 }, { "epoch": 0.8496719996844887, "grad_norm": 2.1833465701082884, "learning_rate": 2.7926920088120786e-07, "loss": 0.9564, "step": 8079 }, { "epoch": 0.849777170126336, "grad_norm": 2.3941525202307274, "learning_rate": 2.788859100528196e-07, "loss": 0.9678, "step": 8080 }, { "epoch": 0.8498823405681833, "grad_norm": 2.6311798448564607, "learning_rate": 2.785028668959061e-07, "loss": 0.9633, "step": 8081 }, { "epoch": 0.8499875110100307, "grad_norm": 2.963362741185363, "learning_rate": 2.781200714531801e-07, "loss": 0.9707, "step": 8082 }, { "epoch": 0.850092681451878, "grad_norm": 3.184965341727443, "learning_rate": 2.7773752376732605e-07, "loss": 1.0092, "step": 8083 }, { "epoch": 0.8501978518937253, "grad_norm": 1.8532486139994901, "learning_rate": 2.7735522388100206e-07, "loss": 0.9832, "step": 8084 }, { "epoch": 0.8503030223355725, "grad_norm": 2.1835915901309075, "learning_rate": 2.76973171836836e-07, "loss": 0.9377, "step": 8085 }, { "epoch": 0.8504081927774199, "grad_norm": 3.5734845337817935, "learning_rate": 2.7659136767743096e-07, "loss": 1.0057, "step": 8086 }, { "epoch": 0.8505133632192672, "grad_norm": 2.1809470260774644, "learning_rate": 2.7620981144536014e-07, "loss": 0.943, "step": 8087 }, { "epoch": 0.8506185336611145, "grad_norm": 2.348459690619445, "learning_rate": 2.7582850318317005e-07, "loss": 0.929, "step": 8088 }, { "epoch": 0.8507237041029618, "grad_norm": 2.4740076632830146, "learning_rate": 2.7544744293338063e-07, "loss": 0.9618, "step": 8089 }, { "epoch": 0.8508288745448092, "grad_norm": 3.4907303797398517, "learning_rate": 2.750666307384814e-07, "loss": 1.0089, "step": 8090 }, { "epoch": 0.8509340449866565, "grad_norm": 3.116098339723679, "learning_rate": 2.746860666409371e-07, "loss": 0.9985, "step": 8091 }, { "epoch": 0.8510392154285038, "grad_norm": 2.302135132288717, "learning_rate": 2.743057506831834e-07, "loss": 0.9955, "step": 8092 }, { "epoch": 0.8511443858703511, "grad_norm": 2.454597566905971, "learning_rate": 2.7392568290762786e-07, "loss": 0.9345, "step": 8093 }, { "epoch": 0.8512495563121985, "grad_norm": 2.297569677220409, "learning_rate": 2.7354586335665205e-07, "loss": 0.9634, "step": 8094 }, { "epoch": 0.8513547267540458, "grad_norm": 2.5670899366081614, "learning_rate": 2.7316629207260745e-07, "loss": 0.9547, "step": 8095 }, { "epoch": 0.8514598971958931, "grad_norm": 2.111670089542108, "learning_rate": 2.727869690978202e-07, "loss": 0.9991, "step": 8096 }, { "epoch": 0.8515650676377404, "grad_norm": 2.1740992859773454, "learning_rate": 2.7240789447458756e-07, "loss": 0.9991, "step": 8097 }, { "epoch": 0.8516702380795877, "grad_norm": 3.134509753421687, "learning_rate": 2.7202906824517955e-07, "loss": 0.9657, "step": 8098 }, { "epoch": 0.8517754085214351, "grad_norm": 2.375342092275538, "learning_rate": 2.716504904518372e-07, "loss": 0.9857, "step": 8099 }, { "epoch": 0.8518805789632824, "grad_norm": 2.1830836767759045, "learning_rate": 2.7127216113677636e-07, "loss": 0.9715, "step": 8100 }, { "epoch": 0.8519857494051297, "grad_norm": 3.471010207274205, "learning_rate": 2.708940803421825e-07, "loss": 1.0784, "step": 8101 }, { "epoch": 0.852090919846977, "grad_norm": 2.6989538225629897, "learning_rate": 2.705162481102147e-07, "loss": 0.9515, "step": 8102 }, { "epoch": 0.8521960902888244, "grad_norm": 2.428855683370294, "learning_rate": 2.7013866448300506e-07, "loss": 1.0098, "step": 8103 }, { "epoch": 0.8523012607306717, "grad_norm": 1.9294068184039133, "learning_rate": 2.697613295026563e-07, "loss": 0.9569, "step": 8104 }, { "epoch": 0.8524064311725189, "grad_norm": 2.703791788987313, "learning_rate": 2.6938424321124396e-07, "loss": 1.0116, "step": 8105 }, { "epoch": 0.8525116016143662, "grad_norm": 1.8334006288217113, "learning_rate": 2.690074056508168e-07, "loss": 1.0143, "step": 8106 }, { "epoch": 0.8526167720562136, "grad_norm": 2.086771645052246, "learning_rate": 2.686308168633953e-07, "loss": 0.9975, "step": 8107 }, { "epoch": 0.8527219424980609, "grad_norm": 2.064759245549448, "learning_rate": 2.6825447689097174e-07, "loss": 0.9949, "step": 8108 }, { "epoch": 0.8528271129399082, "grad_norm": 2.5706290935033627, "learning_rate": 2.678783857755102e-07, "loss": 0.9576, "step": 8109 }, { "epoch": 0.8529322833817555, "grad_norm": 2.7378677889966974, "learning_rate": 2.675025435589482e-07, "loss": 0.9744, "step": 8110 }, { "epoch": 0.8530374538236029, "grad_norm": 2.8537643482308424, "learning_rate": 2.671269502831955e-07, "loss": 0.9979, "step": 8111 }, { "epoch": 0.8531426242654502, "grad_norm": 2.18393636096386, "learning_rate": 2.6675160599013374e-07, "loss": 0.9741, "step": 8112 }, { "epoch": 0.8532477947072975, "grad_norm": 2.101881287973468, "learning_rate": 2.66376510721616e-07, "loss": 0.9905, "step": 8113 }, { "epoch": 0.8533529651491448, "grad_norm": 2.1876358871831973, "learning_rate": 2.660016645194688e-07, "loss": 0.9924, "step": 8114 }, { "epoch": 0.8534581355909922, "grad_norm": 3.243463727452169, "learning_rate": 2.6562706742549073e-07, "loss": 0.9812, "step": 8115 }, { "epoch": 0.8535633060328395, "grad_norm": 3.2673747014657084, "learning_rate": 2.652527194814511e-07, "loss": 1.0241, "step": 8116 }, { "epoch": 0.8536684764746868, "grad_norm": 2.4360167551039074, "learning_rate": 2.6487862072909404e-07, "loss": 0.9474, "step": 8117 }, { "epoch": 0.8537736469165341, "grad_norm": 2.431103723048994, "learning_rate": 2.645047712101334e-07, "loss": 1.0166, "step": 8118 }, { "epoch": 0.8538788173583814, "grad_norm": 2.598232947614198, "learning_rate": 2.641311709662567e-07, "loss": 0.9566, "step": 8119 }, { "epoch": 0.8539839878002288, "grad_norm": 2.5875433885989123, "learning_rate": 2.637578200391233e-07, "loss": 0.9922, "step": 8120 }, { "epoch": 0.8540891582420761, "grad_norm": 2.3388693300392327, "learning_rate": 2.63384718470365e-07, "loss": 0.9561, "step": 8121 }, { "epoch": 0.8541943286839234, "grad_norm": 2.754827845758967, "learning_rate": 2.630118663015849e-07, "loss": 0.989, "step": 8122 }, { "epoch": 0.8542994991257707, "grad_norm": 2.1192941339795683, "learning_rate": 2.626392635743594e-07, "loss": 0.9967, "step": 8123 }, { "epoch": 0.8544046695676181, "grad_norm": 2.616739686808066, "learning_rate": 2.6226691033023665e-07, "loss": 0.9751, "step": 8124 }, { "epoch": 0.8545098400094654, "grad_norm": 2.0640093068786705, "learning_rate": 2.618948066107363e-07, "loss": 0.9815, "step": 8125 }, { "epoch": 0.8546150104513126, "grad_norm": 3.0722152608836364, "learning_rate": 2.615229524573518e-07, "loss": 0.9867, "step": 8126 }, { "epoch": 0.8547201808931599, "grad_norm": 2.697664599244337, "learning_rate": 2.6115134791154657e-07, "loss": 0.9973, "step": 8127 }, { "epoch": 0.8548253513350073, "grad_norm": 2.1491230648206012, "learning_rate": 2.607799930147581e-07, "loss": 0.9659, "step": 8128 }, { "epoch": 0.8549305217768546, "grad_norm": 2.091866541025715, "learning_rate": 2.604088878083957e-07, "loss": 0.9658, "step": 8129 }, { "epoch": 0.8550356922187019, "grad_norm": 2.065310881673391, "learning_rate": 2.600380323338397e-07, "loss": 0.981, "step": 8130 }, { "epoch": 0.8551408626605492, "grad_norm": 2.637190411853234, "learning_rate": 2.596674266324442e-07, "loss": 0.9403, "step": 8131 }, { "epoch": 0.8552460331023966, "grad_norm": 2.2549585723194756, "learning_rate": 2.5929707074553364e-07, "loss": 0.9679, "step": 8132 }, { "epoch": 0.8553512035442439, "grad_norm": 2.1466692322213095, "learning_rate": 2.58926964714406e-07, "loss": 0.947, "step": 8133 }, { "epoch": 0.8554563739860912, "grad_norm": 3.13947290894236, "learning_rate": 2.5855710858033096e-07, "loss": 0.9944, "step": 8134 }, { "epoch": 0.8555615444279385, "grad_norm": 2.2159814940956912, "learning_rate": 2.581875023845512e-07, "loss": 0.9693, "step": 8135 }, { "epoch": 0.8556667148697858, "grad_norm": 2.261359755971855, "learning_rate": 2.5781814616827936e-07, "loss": 0.9711, "step": 8136 }, { "epoch": 0.8557718853116332, "grad_norm": 2.1664017570010468, "learning_rate": 2.57449039972702e-07, "loss": 0.9714, "step": 8137 }, { "epoch": 0.8558770557534805, "grad_norm": 2.2579137649584418, "learning_rate": 2.5708018383897803e-07, "loss": 0.9975, "step": 8138 }, { "epoch": 0.8559822261953278, "grad_norm": 3.238109092541349, "learning_rate": 2.567115778082366e-07, "loss": 0.9784, "step": 8139 }, { "epoch": 0.8560873966371751, "grad_norm": 2.4097012019064357, "learning_rate": 2.563432219215814e-07, "loss": 0.9779, "step": 8140 }, { "epoch": 0.8561925670790225, "grad_norm": 2.2467448008644575, "learning_rate": 2.559751162200855e-07, "loss": 0.9641, "step": 8141 }, { "epoch": 0.8562977375208698, "grad_norm": 2.3268805950761577, "learning_rate": 2.556072607447965e-07, "loss": 1.0105, "step": 8142 }, { "epoch": 0.8564029079627171, "grad_norm": 2.887233445189877, "learning_rate": 2.552396555367334e-07, "loss": 0.9784, "step": 8143 }, { "epoch": 0.8565080784045644, "grad_norm": 2.2652184394813246, "learning_rate": 2.548723006368864e-07, "loss": 0.9439, "step": 8144 }, { "epoch": 0.8566132488464118, "grad_norm": 3.416084310322756, "learning_rate": 2.545051960862188e-07, "loss": 1.0174, "step": 8145 }, { "epoch": 0.856718419288259, "grad_norm": 2.2619883051129888, "learning_rate": 2.541383419256646e-07, "loss": 0.9155, "step": 8146 }, { "epoch": 0.8568235897301063, "grad_norm": 3.2034627263132034, "learning_rate": 2.5377173819613274e-07, "loss": 0.953, "step": 8147 }, { "epoch": 0.8569287601719536, "grad_norm": 2.3308420908834213, "learning_rate": 2.53405384938501e-07, "loss": 0.977, "step": 8148 }, { "epoch": 0.857033930613801, "grad_norm": 2.549797506834715, "learning_rate": 2.530392821936212e-07, "loss": 0.9436, "step": 8149 }, { "epoch": 0.8571391010556483, "grad_norm": 2.001168695818676, "learning_rate": 2.526734300023162e-07, "loss": 0.965, "step": 8150 }, { "epoch": 0.8572442714974956, "grad_norm": 2.9838680004948293, "learning_rate": 2.5230782840538147e-07, "loss": 0.9686, "step": 8151 }, { "epoch": 0.8573494419393429, "grad_norm": 2.0859980624244487, "learning_rate": 2.5194247744358523e-07, "loss": 0.9598, "step": 8152 }, { "epoch": 0.8574546123811903, "grad_norm": 1.6033395922396991, "learning_rate": 2.5157737715766544e-07, "loss": 0.9206, "step": 8153 }, { "epoch": 0.8575597828230376, "grad_norm": 2.47393713284398, "learning_rate": 2.5121252758833537e-07, "loss": 0.9946, "step": 8154 }, { "epoch": 0.8576649532648849, "grad_norm": 2.375844708577336, "learning_rate": 2.508479287762769e-07, "loss": 0.9725, "step": 8155 }, { "epoch": 0.8577701237067322, "grad_norm": 1.907812917306332, "learning_rate": 2.504835807621464e-07, "loss": 0.9562, "step": 8156 }, { "epoch": 0.8578752941485795, "grad_norm": 1.967540450625011, "learning_rate": 2.501194835865717e-07, "loss": 0.9612, "step": 8157 }, { "epoch": 0.8579804645904269, "grad_norm": 2.9450000337243147, "learning_rate": 2.4975563729015244e-07, "loss": 0.9839, "step": 8158 }, { "epoch": 0.8580856350322742, "grad_norm": 2.85718562948114, "learning_rate": 2.493920419134604e-07, "loss": 0.9857, "step": 8159 }, { "epoch": 0.8581908054741215, "grad_norm": 2.384457833137985, "learning_rate": 2.4902869749703797e-07, "loss": 0.9708, "step": 8160 }, { "epoch": 0.8582959759159688, "grad_norm": 3.4120765102011985, "learning_rate": 2.4866560408140284e-07, "loss": 0.9713, "step": 8161 }, { "epoch": 0.8584011463578162, "grad_norm": 2.19415436252959, "learning_rate": 2.483027617070413e-07, "loss": 0.953, "step": 8162 }, { "epoch": 0.8585063167996635, "grad_norm": 2.8855737475987264, "learning_rate": 2.479401704144144e-07, "loss": 0.9333, "step": 8163 }, { "epoch": 0.8586114872415108, "grad_norm": 2.3336198104728374, "learning_rate": 2.4757783024395244e-07, "loss": 0.9902, "step": 8164 }, { "epoch": 0.8587166576833581, "grad_norm": 2.645052818170446, "learning_rate": 2.472157412360596e-07, "loss": 0.9993, "step": 8165 }, { "epoch": 0.8588218281252055, "grad_norm": 2.332866176403049, "learning_rate": 2.4685390343111265e-07, "loss": 0.9832, "step": 8166 }, { "epoch": 0.8589269985670527, "grad_norm": 2.8089915163327266, "learning_rate": 2.46492316869458e-07, "loss": 0.9792, "step": 8167 }, { "epoch": 0.8590321690089, "grad_norm": 2.459421993014172, "learning_rate": 2.461309815914162e-07, "loss": 1.0708, "step": 8168 }, { "epoch": 0.8591373394507473, "grad_norm": 2.3521339437040734, "learning_rate": 2.45769897637278e-07, "loss": 1.0167, "step": 8169 }, { "epoch": 0.8592425098925947, "grad_norm": 3.0044324465369336, "learning_rate": 2.4540906504730814e-07, "loss": 0.9617, "step": 8170 }, { "epoch": 0.859347680334442, "grad_norm": 2.554374282868245, "learning_rate": 2.450484838617417e-07, "loss": 0.9495, "step": 8171 }, { "epoch": 0.8594528507762893, "grad_norm": 2.387412844879945, "learning_rate": 2.446881541207868e-07, "loss": 0.9826, "step": 8172 }, { "epoch": 0.8595580212181366, "grad_norm": 1.942667199406407, "learning_rate": 2.4432807586462214e-07, "loss": 0.9746, "step": 8173 }, { "epoch": 0.859663191659984, "grad_norm": 2.590230625555086, "learning_rate": 2.4396824913339946e-07, "loss": 0.9675, "step": 8174 }, { "epoch": 0.8597683621018313, "grad_norm": 2.544725569198336, "learning_rate": 2.436086739672433e-07, "loss": 0.9573, "step": 8175 }, { "epoch": 0.8598735325436786, "grad_norm": 2.875401976015427, "learning_rate": 2.4324935040624747e-07, "loss": 0.9746, "step": 8176 }, { "epoch": 0.8599787029855259, "grad_norm": 2.330210767863903, "learning_rate": 2.4289027849048076e-07, "loss": 0.9509, "step": 8177 }, { "epoch": 0.8600838734273732, "grad_norm": 3.8731187149712456, "learning_rate": 2.4253145825998134e-07, "loss": 0.972, "step": 8178 }, { "epoch": 0.8601890438692206, "grad_norm": 2.4303181220029746, "learning_rate": 2.4217288975476094e-07, "loss": 0.9954, "step": 8179 }, { "epoch": 0.8602942143110679, "grad_norm": 1.9710335180041265, "learning_rate": 2.418145730148033e-07, "loss": 0.957, "step": 8180 }, { "epoch": 0.8603993847529152, "grad_norm": 2.7525905486987288, "learning_rate": 2.414565080800624e-07, "loss": 0.9754, "step": 8181 }, { "epoch": 0.8605045551947625, "grad_norm": 3.292704941422295, "learning_rate": 2.4109869499046647e-07, "loss": 0.9751, "step": 8182 }, { "epoch": 0.8606097256366099, "grad_norm": 2.165266902747974, "learning_rate": 2.4074113378591254e-07, "loss": 0.9757, "step": 8183 }, { "epoch": 0.8607148960784572, "grad_norm": 2.4020231317592806, "learning_rate": 2.4038382450627396e-07, "loss": 0.9993, "step": 8184 }, { "epoch": 0.8608200665203045, "grad_norm": 2.5604039867867816, "learning_rate": 2.400267671913917e-07, "loss": 1.0048, "step": 8185 }, { "epoch": 0.8609252369621518, "grad_norm": 1.8354917739415633, "learning_rate": 2.3966996188108133e-07, "loss": 0.9127, "step": 8186 }, { "epoch": 0.861030407403999, "grad_norm": 2.865868216648736, "learning_rate": 2.3931340861512884e-07, "loss": 0.9865, "step": 8187 }, { "epoch": 0.8611355778458464, "grad_norm": 1.9746436738041568, "learning_rate": 2.389571074332928e-07, "loss": 0.9466, "step": 8188 }, { "epoch": 0.8612407482876937, "grad_norm": 3.3252842390412796, "learning_rate": 2.38601058375304e-07, "loss": 1.0068, "step": 8189 }, { "epoch": 0.861345918729541, "grad_norm": 2.847887785313545, "learning_rate": 2.382452614808642e-07, "loss": 0.999, "step": 8190 }, { "epoch": 0.8614510891713884, "grad_norm": 1.955383199190147, "learning_rate": 2.3788971678964794e-07, "loss": 0.9989, "step": 8191 }, { "epoch": 0.8615562596132357, "grad_norm": 2.285791537624384, "learning_rate": 2.3753442434129998e-07, "loss": 0.9703, "step": 8192 }, { "epoch": 0.861661430055083, "grad_norm": 2.6866877107259026, "learning_rate": 2.3717938417543995e-07, "loss": 0.9215, "step": 8193 }, { "epoch": 0.8617666004969303, "grad_norm": 2.7588675656238677, "learning_rate": 2.3682459633165704e-07, "loss": 0.974, "step": 8194 }, { "epoch": 0.8618717709387776, "grad_norm": 3.3460239543274843, "learning_rate": 2.3647006084951214e-07, "loss": 0.9426, "step": 8195 }, { "epoch": 0.861976941380625, "grad_norm": 2.3650104551290685, "learning_rate": 2.3611577776853965e-07, "loss": 0.9484, "step": 8196 }, { "epoch": 0.8620821118224723, "grad_norm": 2.834027735071727, "learning_rate": 2.3576174712824335e-07, "loss": 0.9879, "step": 8197 }, { "epoch": 0.8621872822643196, "grad_norm": 2.2266254740819966, "learning_rate": 2.354079689681024e-07, "loss": 1.0312, "step": 8198 }, { "epoch": 0.862292452706167, "grad_norm": 2.550002403776596, "learning_rate": 2.350544433275645e-07, "loss": 0.9572, "step": 8199 }, { "epoch": 0.8623976231480143, "grad_norm": 3.3029388049198825, "learning_rate": 2.3470117024605138e-07, "loss": 0.9695, "step": 8200 }, { "epoch": 0.8625027935898616, "grad_norm": 2.7620071406830635, "learning_rate": 2.3434814976295462e-07, "loss": 0.9729, "step": 8201 }, { "epoch": 0.8626079640317089, "grad_norm": 2.279717835201499, "learning_rate": 2.3399538191763937e-07, "loss": 1.0069, "step": 8202 }, { "epoch": 0.8627131344735562, "grad_norm": 2.509494166741847, "learning_rate": 2.3364286674944254e-07, "loss": 0.9409, "step": 8203 }, { "epoch": 0.8628183049154036, "grad_norm": 2.7206541859104267, "learning_rate": 2.3329060429767124e-07, "loss": 0.9442, "step": 8204 }, { "epoch": 0.8629234753572509, "grad_norm": 2.372820235661989, "learning_rate": 2.329385946016066e-07, "loss": 0.9734, "step": 8205 }, { "epoch": 0.8630286457990982, "grad_norm": 2.6935312672829874, "learning_rate": 2.325868377004986e-07, "loss": 0.9754, "step": 8206 }, { "epoch": 0.8631338162409454, "grad_norm": 2.1552030244555587, "learning_rate": 2.3223533363357308e-07, "loss": 0.9789, "step": 8207 }, { "epoch": 0.8632389866827928, "grad_norm": 2.412292284565908, "learning_rate": 2.3188408244002424e-07, "loss": 0.9809, "step": 8208 }, { "epoch": 0.8633441571246401, "grad_norm": 2.088983007031558, "learning_rate": 2.3153308415901993e-07, "loss": 1.0039, "step": 8209 }, { "epoch": 0.8634493275664874, "grad_norm": 2.4663761465549547, "learning_rate": 2.3118233882969854e-07, "loss": 1.0072, "step": 8210 }, { "epoch": 0.8635544980083347, "grad_norm": 2.147148470340049, "learning_rate": 2.3083184649117046e-07, "loss": 0.9939, "step": 8211 }, { "epoch": 0.863659668450182, "grad_norm": 2.709658500142506, "learning_rate": 2.3048160718252e-07, "loss": 0.9834, "step": 8212 }, { "epoch": 0.8637648388920294, "grad_norm": 2.7515549275388804, "learning_rate": 2.3013162094279977e-07, "loss": 1.009, "step": 8213 }, { "epoch": 0.8638700093338767, "grad_norm": 2.999999695153477, "learning_rate": 2.297818878110375e-07, "loss": 0.918, "step": 8214 }, { "epoch": 0.863975179775724, "grad_norm": 2.8176326824155358, "learning_rate": 2.2943240782623e-07, "loss": 0.9514, "step": 8215 }, { "epoch": 0.8640803502175713, "grad_norm": 2.673202502494635, "learning_rate": 2.2908318102734724e-07, "loss": 1.003, "step": 8216 }, { "epoch": 0.8641855206594187, "grad_norm": 2.9697524493518253, "learning_rate": 2.2873420745333163e-07, "loss": 0.9965, "step": 8217 }, { "epoch": 0.864290691101266, "grad_norm": 2.3065886709900365, "learning_rate": 2.283854871430949e-07, "loss": 0.9499, "step": 8218 }, { "epoch": 0.8643958615431133, "grad_norm": 2.3908258377099565, "learning_rate": 2.2803702013552364e-07, "loss": 0.9679, "step": 8219 }, { "epoch": 0.8645010319849606, "grad_norm": 2.586564468311502, "learning_rate": 2.2768880646947268e-07, "loss": 0.9946, "step": 8220 }, { "epoch": 0.864606202426808, "grad_norm": 3.179651943448657, "learning_rate": 2.2734084618377285e-07, "loss": 1.0029, "step": 8221 }, { "epoch": 0.8647113728686553, "grad_norm": 3.5059554764194156, "learning_rate": 2.2699313931722284e-07, "loss": 1.0056, "step": 8222 }, { "epoch": 0.8648165433105026, "grad_norm": 2.3482968719594823, "learning_rate": 2.2664568590859525e-07, "loss": 0.9858, "step": 8223 }, { "epoch": 0.8649217137523499, "grad_norm": 2.786091616547652, "learning_rate": 2.2629848599663357e-07, "loss": 0.9675, "step": 8224 }, { "epoch": 0.8650268841941973, "grad_norm": 2.151802230862855, "learning_rate": 2.2595153962005345e-07, "loss": 1.0044, "step": 8225 }, { "epoch": 0.8651320546360446, "grad_norm": 2.2954772594860793, "learning_rate": 2.2560484681754258e-07, "loss": 1.0249, "step": 8226 }, { "epoch": 0.8652372250778919, "grad_norm": 2.4848587884057065, "learning_rate": 2.2525840762775863e-07, "loss": 0.9688, "step": 8227 }, { "epoch": 0.8653423955197391, "grad_norm": 2.825905090752021, "learning_rate": 2.2491222208933377e-07, "loss": 1.004, "step": 8228 }, { "epoch": 0.8654475659615865, "grad_norm": 2.6469755844197635, "learning_rate": 2.245662902408688e-07, "loss": 0.999, "step": 8229 }, { "epoch": 0.8655527364034338, "grad_norm": 3.240033136066609, "learning_rate": 2.2422061212093947e-07, "loss": 0.9899, "step": 8230 }, { "epoch": 0.8656579068452811, "grad_norm": 2.8389209244754787, "learning_rate": 2.238751877680906e-07, "loss": 1.0278, "step": 8231 }, { "epoch": 0.8657630772871284, "grad_norm": 2.6429507672792374, "learning_rate": 2.2353001722083962e-07, "loss": 0.986, "step": 8232 }, { "epoch": 0.8658682477289757, "grad_norm": 2.0796610902282047, "learning_rate": 2.2318510051767632e-07, "loss": 0.9746, "step": 8233 }, { "epoch": 0.8659734181708231, "grad_norm": 2.1854334963011137, "learning_rate": 2.2284043769706026e-07, "loss": 0.9798, "step": 8234 }, { "epoch": 0.8660785886126704, "grad_norm": 2.154239959742983, "learning_rate": 2.2249602879742594e-07, "loss": 0.9868, "step": 8235 }, { "epoch": 0.8661837590545177, "grad_norm": 2.6042428551850563, "learning_rate": 2.2215187385717623e-07, "loss": 1.0048, "step": 8236 }, { "epoch": 0.866288929496365, "grad_norm": 2.3168200385005435, "learning_rate": 2.2180797291468825e-07, "loss": 0.9696, "step": 8237 }, { "epoch": 0.8663940999382124, "grad_norm": 2.612002220024735, "learning_rate": 2.2146432600830799e-07, "loss": 0.9707, "step": 8238 }, { "epoch": 0.8664992703800597, "grad_norm": 2.7310391889877494, "learning_rate": 2.2112093317635587e-07, "loss": 0.9626, "step": 8239 }, { "epoch": 0.866604440821907, "grad_norm": 2.1973508769452152, "learning_rate": 2.207777944571232e-07, "loss": 0.9735, "step": 8240 }, { "epoch": 0.8667096112637543, "grad_norm": 2.184444168526134, "learning_rate": 2.2043490988887168e-07, "loss": 0.9651, "step": 8241 }, { "epoch": 0.8668147817056017, "grad_norm": 1.7840753769682516, "learning_rate": 2.200922795098362e-07, "loss": 0.9442, "step": 8242 }, { "epoch": 0.866919952147449, "grad_norm": 2.612245040597428, "learning_rate": 2.1974990335822179e-07, "loss": 0.9973, "step": 8243 }, { "epoch": 0.8670251225892963, "grad_norm": 2.7915387255241346, "learning_rate": 2.1940778147220758e-07, "loss": 0.9892, "step": 8244 }, { "epoch": 0.8671302930311436, "grad_norm": 2.104198732577539, "learning_rate": 2.1906591388994148e-07, "loss": 0.9436, "step": 8245 }, { "epoch": 0.867235463472991, "grad_norm": 2.381480939039506, "learning_rate": 2.187243006495454e-07, "loss": 1.0353, "step": 8246 }, { "epoch": 0.8673406339148383, "grad_norm": 2.7229634312983917, "learning_rate": 2.1838294178911146e-07, "loss": 1.0069, "step": 8247 }, { "epoch": 0.8674458043566855, "grad_norm": 2.3789881685811154, "learning_rate": 2.1804183734670277e-07, "loss": 0.9485, "step": 8248 }, { "epoch": 0.8675509747985328, "grad_norm": 2.5490016088242125, "learning_rate": 2.1770098736035694e-07, "loss": 0.9878, "step": 8249 }, { "epoch": 0.8676561452403801, "grad_norm": 2.8862833601914764, "learning_rate": 2.1736039186808e-07, "loss": 0.988, "step": 8250 }, { "epoch": 0.8677613156822275, "grad_norm": 2.4021252306313747, "learning_rate": 2.170200509078521e-07, "loss": 0.9925, "step": 8251 }, { "epoch": 0.8678664861240748, "grad_norm": 2.489969509776828, "learning_rate": 2.166799645176229e-07, "loss": 1.0052, "step": 8252 }, { "epoch": 0.8679716565659221, "grad_norm": 1.9153232869677428, "learning_rate": 2.1634013273531512e-07, "loss": 0.9971, "step": 8253 }, { "epoch": 0.8680768270077694, "grad_norm": 2.0476262837462156, "learning_rate": 2.160005555988229e-07, "loss": 0.9905, "step": 8254 }, { "epoch": 0.8681819974496168, "grad_norm": 3.0949990892144665, "learning_rate": 2.1566123314601118e-07, "loss": 0.9642, "step": 8255 }, { "epoch": 0.8682871678914641, "grad_norm": 2.359080168169369, "learning_rate": 2.1532216541471778e-07, "loss": 1.0135, "step": 8256 }, { "epoch": 0.8683923383333114, "grad_norm": 1.66683761103825, "learning_rate": 2.1498335244275e-07, "loss": 0.9266, "step": 8257 }, { "epoch": 0.8684975087751587, "grad_norm": 2.133569622507562, "learning_rate": 2.1464479426789005e-07, "loss": 0.961, "step": 8258 }, { "epoch": 0.8686026792170061, "grad_norm": 2.588002312127146, "learning_rate": 2.143064909278883e-07, "loss": 0.9796, "step": 8259 }, { "epoch": 0.8687078496588534, "grad_norm": 3.192126932345157, "learning_rate": 2.1396844246046904e-07, "loss": 1.006, "step": 8260 }, { "epoch": 0.8688130201007007, "grad_norm": 2.4896171740508946, "learning_rate": 2.1363064890332658e-07, "loss": 0.9849, "step": 8261 }, { "epoch": 0.868918190542548, "grad_norm": 2.7731731799976305, "learning_rate": 2.1329311029412796e-07, "loss": 0.9458, "step": 8262 }, { "epoch": 0.8690233609843954, "grad_norm": 2.722330994924211, "learning_rate": 2.1295582667051173e-07, "loss": 1.0028, "step": 8263 }, { "epoch": 0.8691285314262427, "grad_norm": 2.1961408410319008, "learning_rate": 2.1261879807008667e-07, "loss": 1.0216, "step": 8264 }, { "epoch": 0.86923370186809, "grad_norm": 2.123750174696564, "learning_rate": 2.1228202453043522e-07, "loss": 1.01, "step": 8265 }, { "epoch": 0.8693388723099373, "grad_norm": 2.6809762221376463, "learning_rate": 2.1194550608910902e-07, "loss": 1.0123, "step": 8266 }, { "epoch": 0.8694440427517847, "grad_norm": 2.5457872775140986, "learning_rate": 2.1160924278363333e-07, "loss": 0.9877, "step": 8267 }, { "epoch": 0.8695492131936319, "grad_norm": 2.085122359184593, "learning_rate": 2.1127323465150422e-07, "loss": 0.9743, "step": 8268 }, { "epoch": 0.8696543836354792, "grad_norm": 2.146743192267201, "learning_rate": 2.1093748173018846e-07, "loss": 0.9686, "step": 8269 }, { "epoch": 0.8697595540773265, "grad_norm": 2.3756277033536812, "learning_rate": 2.106019840571255e-07, "loss": 0.954, "step": 8270 }, { "epoch": 0.8698647245191738, "grad_norm": 1.9419516360185134, "learning_rate": 2.1026674166972627e-07, "loss": 0.9669, "step": 8271 }, { "epoch": 0.8699698949610212, "grad_norm": 3.0111258367329397, "learning_rate": 2.099317546053728e-07, "loss": 1.0088, "step": 8272 }, { "epoch": 0.8700750654028685, "grad_norm": 2.3215678619891364, "learning_rate": 2.09597022901418e-07, "loss": 0.9994, "step": 8273 }, { "epoch": 0.8701802358447158, "grad_norm": 2.866407727751618, "learning_rate": 2.0926254659518835e-07, "loss": 0.9804, "step": 8274 }, { "epoch": 0.8702854062865631, "grad_norm": 2.189238826195812, "learning_rate": 2.0892832572397935e-07, "loss": 0.9907, "step": 8275 }, { "epoch": 0.8703905767284105, "grad_norm": 1.9925032781875867, "learning_rate": 2.0859436032505954e-07, "loss": 1.0121, "step": 8276 }, { "epoch": 0.8704957471702578, "grad_norm": 2.0387873295462744, "learning_rate": 2.0826065043566935e-07, "loss": 0.9812, "step": 8277 }, { "epoch": 0.8706009176121051, "grad_norm": 1.838300067613648, "learning_rate": 2.0792719609301904e-07, "loss": 0.9273, "step": 8278 }, { "epoch": 0.8707060880539524, "grad_norm": 2.587747678789586, "learning_rate": 2.075939973342922e-07, "loss": 0.9786, "step": 8279 }, { "epoch": 0.8708112584957998, "grad_norm": 3.083618118123322, "learning_rate": 2.0726105419664188e-07, "loss": 0.9509, "step": 8280 }, { "epoch": 0.8709164289376471, "grad_norm": 2.1215733170081488, "learning_rate": 2.0692836671719536e-07, "loss": 0.9764, "step": 8281 }, { "epoch": 0.8710215993794944, "grad_norm": 2.305156580274503, "learning_rate": 2.065959349330493e-07, "loss": 1.0078, "step": 8282 }, { "epoch": 0.8711267698213417, "grad_norm": 2.6860204494280153, "learning_rate": 2.0626375888127187e-07, "loss": 0.996, "step": 8283 }, { "epoch": 0.8712319402631891, "grad_norm": 2.897921994981041, "learning_rate": 2.0593183859890369e-07, "loss": 0.9731, "step": 8284 }, { "epoch": 0.8713371107050364, "grad_norm": 3.5559919783151974, "learning_rate": 2.0560017412295658e-07, "loss": 0.9663, "step": 8285 }, { "epoch": 0.8714422811468837, "grad_norm": 2.4981822525310826, "learning_rate": 2.0526876549041368e-07, "loss": 0.9962, "step": 8286 }, { "epoch": 0.871547451588731, "grad_norm": 2.1707524269042926, "learning_rate": 2.0493761273822937e-07, "loss": 1.0296, "step": 8287 }, { "epoch": 0.8716526220305784, "grad_norm": 1.9671933144654712, "learning_rate": 2.046067159033302e-07, "loss": 1.0113, "step": 8288 }, { "epoch": 0.8717577924724256, "grad_norm": 1.8601153938979094, "learning_rate": 2.0427607502261303e-07, "loss": 1.0016, "step": 8289 }, { "epoch": 0.8718629629142729, "grad_norm": 3.164956063667044, "learning_rate": 2.039456901329473e-07, "loss": 1.0117, "step": 8290 }, { "epoch": 0.8719681333561202, "grad_norm": 2.629745086363934, "learning_rate": 2.0361556127117404e-07, "loss": 0.9834, "step": 8291 }, { "epoch": 0.8720733037979675, "grad_norm": 2.1561302946827907, "learning_rate": 2.0328568847410413e-07, "loss": 0.9296, "step": 8292 }, { "epoch": 0.8721784742398149, "grad_norm": 2.281348519655876, "learning_rate": 2.0295607177852146e-07, "loss": 0.9825, "step": 8293 }, { "epoch": 0.8722836446816622, "grad_norm": 3.1673280335921876, "learning_rate": 2.0262671122118078e-07, "loss": 1.0055, "step": 8294 }, { "epoch": 0.8723888151235095, "grad_norm": 2.292148446422658, "learning_rate": 2.0229760683880884e-07, "loss": 0.9556, "step": 8295 }, { "epoch": 0.8724939855653568, "grad_norm": 3.1357836739117175, "learning_rate": 2.0196875866810266e-07, "loss": 1.0117, "step": 8296 }, { "epoch": 0.8725991560072042, "grad_norm": 2.519946732312392, "learning_rate": 2.0164016674573185e-07, "loss": 0.9937, "step": 8297 }, { "epoch": 0.8727043264490515, "grad_norm": 2.564131547407527, "learning_rate": 2.0131183110833646e-07, "loss": 0.9503, "step": 8298 }, { "epoch": 0.8728094968908988, "grad_norm": 2.280967440361249, "learning_rate": 2.0098375179252867e-07, "loss": 0.9791, "step": 8299 }, { "epoch": 0.8729146673327461, "grad_norm": 2.354741913861808, "learning_rate": 2.006559288348922e-07, "loss": 0.9536, "step": 8300 }, { "epoch": 0.8730198377745935, "grad_norm": 3.5165919413572926, "learning_rate": 2.003283622719815e-07, "loss": 0.9616, "step": 8301 }, { "epoch": 0.8731250082164408, "grad_norm": 2.355749665234238, "learning_rate": 2.0000105214032313e-07, "loss": 0.9959, "step": 8302 }, { "epoch": 0.8732301786582881, "grad_norm": 2.5396746122652822, "learning_rate": 1.99673998476414e-07, "loss": 0.9603, "step": 8303 }, { "epoch": 0.8733353491001354, "grad_norm": 2.314335599258069, "learning_rate": 1.9934720131672357e-07, "loss": 0.9933, "step": 8304 }, { "epoch": 0.8734405195419828, "grad_norm": 2.4455577788280523, "learning_rate": 1.99020660697693e-07, "loss": 1.0243, "step": 8305 }, { "epoch": 0.8735456899838301, "grad_norm": 2.393852124530319, "learning_rate": 1.9869437665573255e-07, "loss": 0.9855, "step": 8306 }, { "epoch": 0.8736508604256774, "grad_norm": 1.9162658676831326, "learning_rate": 1.9836834922722648e-07, "loss": 0.9762, "step": 8307 }, { "epoch": 0.8737560308675247, "grad_norm": 2.6740161604492907, "learning_rate": 1.980425784485293e-07, "loss": 0.9674, "step": 8308 }, { "epoch": 0.873861201309372, "grad_norm": 2.380114295557652, "learning_rate": 1.97717064355967e-07, "loss": 0.999, "step": 8309 }, { "epoch": 0.8739663717512193, "grad_norm": 3.1166913813516883, "learning_rate": 1.973918069858366e-07, "loss": 0.9538, "step": 8310 }, { "epoch": 0.8740715421930666, "grad_norm": 2.807493356835643, "learning_rate": 1.9706680637440745e-07, "loss": 0.998, "step": 8311 }, { "epoch": 0.8741767126349139, "grad_norm": 1.8540453587981012, "learning_rate": 1.9674206255791862e-07, "loss": 0.9781, "step": 8312 }, { "epoch": 0.8742818830767612, "grad_norm": 2.297351573084357, "learning_rate": 1.9641757557258223e-07, "loss": 1.0059, "step": 8313 }, { "epoch": 0.8743870535186086, "grad_norm": 2.3795558043590797, "learning_rate": 1.9609334545458132e-07, "loss": 0.9783, "step": 8314 }, { "epoch": 0.8744922239604559, "grad_norm": 2.108673183419314, "learning_rate": 1.9576937224006942e-07, "loss": 0.9516, "step": 8315 }, { "epoch": 0.8745973944023032, "grad_norm": 1.9087942867413161, "learning_rate": 1.954456559651724e-07, "loss": 0.9661, "step": 8316 }, { "epoch": 0.8747025648441505, "grad_norm": 2.3081851420364266, "learning_rate": 1.9512219666598774e-07, "loss": 0.9554, "step": 8317 }, { "epoch": 0.8748077352859979, "grad_norm": 2.9514737071246038, "learning_rate": 1.9479899437858246e-07, "loss": 1.0265, "step": 8318 }, { "epoch": 0.8749129057278452, "grad_norm": 2.74991764931712, "learning_rate": 1.9447604913899715e-07, "loss": 0.9832, "step": 8319 }, { "epoch": 0.8750180761696925, "grad_norm": 3.2909066831788985, "learning_rate": 1.941533609832419e-07, "loss": 1.0034, "step": 8320 }, { "epoch": 0.8751232466115398, "grad_norm": 2.9061285090024063, "learning_rate": 1.9383092994729956e-07, "loss": 0.9832, "step": 8321 }, { "epoch": 0.8752284170533872, "grad_norm": 2.0846159205628747, "learning_rate": 1.935087560671231e-07, "loss": 0.9958, "step": 8322 }, { "epoch": 0.8753335874952345, "grad_norm": 2.3654315207259167, "learning_rate": 1.9318683937863846e-07, "loss": 0.9666, "step": 8323 }, { "epoch": 0.8754387579370818, "grad_norm": 2.3194539073632963, "learning_rate": 1.9286517991774084e-07, "loss": 0.9446, "step": 8324 }, { "epoch": 0.8755439283789291, "grad_norm": 2.3802207813099314, "learning_rate": 1.9254377772029847e-07, "loss": 0.9544, "step": 8325 }, { "epoch": 0.8756490988207765, "grad_norm": 1.8571233807580478, "learning_rate": 1.9222263282214908e-07, "loss": 0.9913, "step": 8326 }, { "epoch": 0.8757542692626238, "grad_norm": 3.2909851431362447, "learning_rate": 1.9190174525910404e-07, "loss": 0.9587, "step": 8327 }, { "epoch": 0.8758594397044711, "grad_norm": 2.4571652278790554, "learning_rate": 1.9158111506694442e-07, "loss": 0.9733, "step": 8328 }, { "epoch": 0.8759646101463183, "grad_norm": 2.497778471844159, "learning_rate": 1.9126074228142278e-07, "loss": 0.9895, "step": 8329 }, { "epoch": 0.8760697805881656, "grad_norm": 2.9691315953675703, "learning_rate": 1.9094062693826298e-07, "loss": 1.0181, "step": 8330 }, { "epoch": 0.876174951030013, "grad_norm": 2.5598658628083664, "learning_rate": 1.9062076907316097e-07, "loss": 1.0124, "step": 8331 }, { "epoch": 0.8762801214718603, "grad_norm": 3.447326481914661, "learning_rate": 1.9030116872178317e-07, "loss": 1.0096, "step": 8332 }, { "epoch": 0.8763852919137076, "grad_norm": 2.4686568055463027, "learning_rate": 1.8998182591976776e-07, "loss": 1.0027, "step": 8333 }, { "epoch": 0.876490462355555, "grad_norm": 3.8447359211448853, "learning_rate": 1.8966274070272294e-07, "loss": 1.0149, "step": 8334 }, { "epoch": 0.8765956327974023, "grad_norm": 2.5490921354790927, "learning_rate": 1.8934391310622996e-07, "loss": 0.9944, "step": 8335 }, { "epoch": 0.8767008032392496, "grad_norm": 3.1163782715259734, "learning_rate": 1.8902534316584065e-07, "loss": 0.9888, "step": 8336 }, { "epoch": 0.8768059736810969, "grad_norm": 2.157509845767865, "learning_rate": 1.88707030917078e-07, "loss": 0.9813, "step": 8337 }, { "epoch": 0.8769111441229442, "grad_norm": 2.3749513405972773, "learning_rate": 1.883889763954358e-07, "loss": 1.0061, "step": 8338 }, { "epoch": 0.8770163145647916, "grad_norm": 3.006163202985406, "learning_rate": 1.8807117963637988e-07, "loss": 0.9441, "step": 8339 }, { "epoch": 0.8771214850066389, "grad_norm": 2.940885641311102, "learning_rate": 1.8775364067534773e-07, "loss": 0.9773, "step": 8340 }, { "epoch": 0.8772266554484862, "grad_norm": 2.95414648550892, "learning_rate": 1.874363595477463e-07, "loss": 1.0312, "step": 8341 }, { "epoch": 0.8773318258903335, "grad_norm": 1.881654030719675, "learning_rate": 1.871193362889559e-07, "loss": 0.9565, "step": 8342 }, { "epoch": 0.8774369963321809, "grad_norm": 2.7330908747232567, "learning_rate": 1.8680257093432603e-07, "loss": 0.9809, "step": 8343 }, { "epoch": 0.8775421667740282, "grad_norm": 2.3063931397393067, "learning_rate": 1.8648606351917925e-07, "loss": 0.9502, "step": 8344 }, { "epoch": 0.8776473372158755, "grad_norm": 2.6553680130796127, "learning_rate": 1.8616981407880818e-07, "loss": 1.0175, "step": 8345 }, { "epoch": 0.8777525076577228, "grad_norm": 2.198765623782212, "learning_rate": 1.8585382264847795e-07, "loss": 0.9657, "step": 8346 }, { "epoch": 0.8778576780995702, "grad_norm": 2.2406067363185103, "learning_rate": 1.8553808926342286e-07, "loss": 0.9648, "step": 8347 }, { "epoch": 0.8779628485414175, "grad_norm": 2.6426330128760926, "learning_rate": 1.852226139588506e-07, "loss": 0.9899, "step": 8348 }, { "epoch": 0.8780680189832648, "grad_norm": 2.241496860388617, "learning_rate": 1.849073967699383e-07, "loss": 1.0067, "step": 8349 }, { "epoch": 0.878173189425112, "grad_norm": 2.481058599968573, "learning_rate": 1.845924377318356e-07, "loss": 0.9788, "step": 8350 }, { "epoch": 0.8782783598669593, "grad_norm": 2.046724061828166, "learning_rate": 1.8427773687966304e-07, "loss": 1.0109, "step": 8351 }, { "epoch": 0.8783835303088067, "grad_norm": 2.376223438068095, "learning_rate": 1.8396329424851174e-07, "loss": 0.9638, "step": 8352 }, { "epoch": 0.878488700750654, "grad_norm": 2.2632885387151354, "learning_rate": 1.836491098734447e-07, "loss": 1.0035, "step": 8353 }, { "epoch": 0.8785938711925013, "grad_norm": 2.655319309933802, "learning_rate": 1.8333518378949617e-07, "loss": 0.997, "step": 8354 }, { "epoch": 0.8786990416343486, "grad_norm": 2.788664034342027, "learning_rate": 1.830215160316709e-07, "loss": 0.9811, "step": 8355 }, { "epoch": 0.878804212076196, "grad_norm": 2.4388561721428292, "learning_rate": 1.8270810663494591e-07, "loss": 0.9727, "step": 8356 }, { "epoch": 0.8789093825180433, "grad_norm": 3.2497459609934944, "learning_rate": 1.8239495563426802e-07, "loss": 0.9763, "step": 8357 }, { "epoch": 0.8790145529598906, "grad_norm": 2.7302020542205385, "learning_rate": 1.8208206306455616e-07, "loss": 0.9581, "step": 8358 }, { "epoch": 0.8791197234017379, "grad_norm": 2.96775947443148, "learning_rate": 1.8176942896070083e-07, "loss": 0.9633, "step": 8359 }, { "epoch": 0.8792248938435853, "grad_norm": 2.3368503352333776, "learning_rate": 1.8145705335756298e-07, "loss": 1.0008, "step": 8360 }, { "epoch": 0.8793300642854326, "grad_norm": 2.601467656711171, "learning_rate": 1.8114493628997448e-07, "loss": 0.9852, "step": 8361 }, { "epoch": 0.8794352347272799, "grad_norm": 2.0109472927516876, "learning_rate": 1.808330777927389e-07, "loss": 0.9794, "step": 8362 }, { "epoch": 0.8795404051691272, "grad_norm": 2.457219484542861, "learning_rate": 1.8052147790063146e-07, "loss": 0.9961, "step": 8363 }, { "epoch": 0.8796455756109746, "grad_norm": 2.36559880573984, "learning_rate": 1.8021013664839744e-07, "loss": 1.0001, "step": 8364 }, { "epoch": 0.8797507460528219, "grad_norm": 2.0670040655202695, "learning_rate": 1.7989905407075404e-07, "loss": 0.9953, "step": 8365 }, { "epoch": 0.8798559164946692, "grad_norm": 2.4646404767978787, "learning_rate": 1.795882302023891e-07, "loss": 0.9685, "step": 8366 }, { "epoch": 0.8799610869365165, "grad_norm": 2.0767215820236795, "learning_rate": 1.7927766507796208e-07, "loss": 0.9679, "step": 8367 }, { "epoch": 0.8800662573783639, "grad_norm": 2.3891164659375628, "learning_rate": 1.7896735873210364e-07, "loss": 0.9736, "step": 8368 }, { "epoch": 0.8801714278202112, "grad_norm": 3.110884943284728, "learning_rate": 1.7865731119941498e-07, "loss": 0.9882, "step": 8369 }, { "epoch": 0.8802765982620584, "grad_norm": 2.5430378783802707, "learning_rate": 1.78347522514469e-07, "loss": 0.9486, "step": 8370 }, { "epoch": 0.8803817687039057, "grad_norm": 2.3703437804084424, "learning_rate": 1.780379927118095e-07, "loss": 0.931, "step": 8371 }, { "epoch": 0.880486939145753, "grad_norm": 2.2176388854921583, "learning_rate": 1.7772872182595102e-07, "loss": 0.961, "step": 8372 }, { "epoch": 0.8805921095876004, "grad_norm": 2.930906441515779, "learning_rate": 1.7741970989138046e-07, "loss": 0.9894, "step": 8373 }, { "epoch": 0.8806972800294477, "grad_norm": 2.376144556185852, "learning_rate": 1.771109569425547e-07, "loss": 0.9908, "step": 8374 }, { "epoch": 0.880802450471295, "grad_norm": 2.3563178048249656, "learning_rate": 1.7680246301390202e-07, "loss": 0.9782, "step": 8375 }, { "epoch": 0.8809076209131423, "grad_norm": 2.407079080129235, "learning_rate": 1.7649422813982187e-07, "loss": 1.0058, "step": 8376 }, { "epoch": 0.8810127913549897, "grad_norm": 2.6815694041139504, "learning_rate": 1.7618625235468507e-07, "loss": 0.9664, "step": 8377 }, { "epoch": 0.881117961796837, "grad_norm": 3.395370750987973, "learning_rate": 1.7587853569283302e-07, "loss": 1.0168, "step": 8378 }, { "epoch": 0.8812231322386843, "grad_norm": 2.2069125097551283, "learning_rate": 1.7557107818857889e-07, "loss": 0.9825, "step": 8379 }, { "epoch": 0.8813283026805316, "grad_norm": 3.2766113793720266, "learning_rate": 1.7526387987620602e-07, "loss": 1.0089, "step": 8380 }, { "epoch": 0.881433473122379, "grad_norm": 2.811898086555635, "learning_rate": 1.7495694078996984e-07, "loss": 1.0126, "step": 8381 }, { "epoch": 0.8815386435642263, "grad_norm": 3.267487035349017, "learning_rate": 1.7465026096409598e-07, "loss": 1.006, "step": 8382 }, { "epoch": 0.8816438140060736, "grad_norm": 2.2437889546052934, "learning_rate": 1.743438404327827e-07, "loss": 0.978, "step": 8383 }, { "epoch": 0.8817489844479209, "grad_norm": 2.969311191359739, "learning_rate": 1.740376792301973e-07, "loss": 0.986, "step": 8384 }, { "epoch": 0.8818541548897683, "grad_norm": 2.937579661645254, "learning_rate": 1.7373177739047898e-07, "loss": 0.9688, "step": 8385 }, { "epoch": 0.8819593253316156, "grad_norm": 2.0161300763505494, "learning_rate": 1.7342613494773896e-07, "loss": 0.9934, "step": 8386 }, { "epoch": 0.8820644957734629, "grad_norm": 2.640765009401412, "learning_rate": 1.731207519360581e-07, "loss": 0.971, "step": 8387 }, { "epoch": 0.8821696662153102, "grad_norm": 2.4341326910073557, "learning_rate": 1.7281562838948968e-07, "loss": 1.0054, "step": 8388 }, { "epoch": 0.8822748366571576, "grad_norm": 2.611158264197989, "learning_rate": 1.725107643420565e-07, "loss": 1.0137, "step": 8389 }, { "epoch": 0.8823800070990048, "grad_norm": 2.040666613213012, "learning_rate": 1.7220615982775357e-07, "loss": 0.9651, "step": 8390 }, { "epoch": 0.8824851775408521, "grad_norm": 1.9483793255653934, "learning_rate": 1.719018148805471e-07, "loss": 0.9993, "step": 8391 }, { "epoch": 0.8825903479826994, "grad_norm": 2.2723352862096635, "learning_rate": 1.715977295343732e-07, "loss": 0.9899, "step": 8392 }, { "epoch": 0.8826955184245467, "grad_norm": 1.9036323549582663, "learning_rate": 1.7129390382314065e-07, "loss": 0.9872, "step": 8393 }, { "epoch": 0.8828006888663941, "grad_norm": 2.229306978269511, "learning_rate": 1.7099033778072732e-07, "loss": 1.0015, "step": 8394 }, { "epoch": 0.8829058593082414, "grad_norm": 2.508833961836707, "learning_rate": 1.7068703144098365e-07, "loss": 0.9562, "step": 8395 }, { "epoch": 0.8830110297500887, "grad_norm": 2.5956332163694156, "learning_rate": 1.7038398483773088e-07, "loss": 0.9996, "step": 8396 }, { "epoch": 0.883116200191936, "grad_norm": 1.9692024923776876, "learning_rate": 1.700811980047612e-07, "loss": 0.9557, "step": 8397 }, { "epoch": 0.8832213706337834, "grad_norm": 2.411668195333863, "learning_rate": 1.69778670975837e-07, "loss": 1.0025, "step": 8398 }, { "epoch": 0.8833265410756307, "grad_norm": 2.4902348906801257, "learning_rate": 1.69476403784693e-07, "loss": 1.0012, "step": 8399 }, { "epoch": 0.883431711517478, "grad_norm": 2.818533940448939, "learning_rate": 1.6917439646503415e-07, "loss": 0.9662, "step": 8400 }, { "epoch": 0.8835368819593253, "grad_norm": 3.015978971725827, "learning_rate": 1.688726490505363e-07, "loss": 0.9438, "step": 8401 }, { "epoch": 0.8836420524011727, "grad_norm": 3.354485337789241, "learning_rate": 1.6857116157484755e-07, "loss": 0.9615, "step": 8402 }, { "epoch": 0.88374722284302, "grad_norm": 2.4921532513409224, "learning_rate": 1.6826993407158488e-07, "loss": 0.9736, "step": 8403 }, { "epoch": 0.8838523932848673, "grad_norm": 2.414353246940312, "learning_rate": 1.679689665743381e-07, "loss": 0.9845, "step": 8404 }, { "epoch": 0.8839575637267146, "grad_norm": 3.235772053731161, "learning_rate": 1.6766825911666757e-07, "loss": 1.0116, "step": 8405 }, { "epoch": 0.884062734168562, "grad_norm": 2.262414448014693, "learning_rate": 1.6736781173210426e-07, "loss": 0.9497, "step": 8406 }, { "epoch": 0.8841679046104093, "grad_norm": 2.731997494521064, "learning_rate": 1.670676244541508e-07, "loss": 1.0178, "step": 8407 }, { "epoch": 0.8842730750522566, "grad_norm": 2.2117783222892786, "learning_rate": 1.6676769731627907e-07, "loss": 0.9542, "step": 8408 }, { "epoch": 0.8843782454941039, "grad_norm": 2.330534181027143, "learning_rate": 1.6646803035193532e-07, "loss": 0.9347, "step": 8409 }, { "epoch": 0.8844834159359513, "grad_norm": 2.492206689262255, "learning_rate": 1.661686235945331e-07, "loss": 1.0167, "step": 8410 }, { "epoch": 0.8845885863777985, "grad_norm": 2.63746278586202, "learning_rate": 1.6586947707745965e-07, "loss": 0.9653, "step": 8411 }, { "epoch": 0.8846937568196458, "grad_norm": 1.9434162730085702, "learning_rate": 1.655705908340713e-07, "loss": 0.893, "step": 8412 }, { "epoch": 0.8847989272614931, "grad_norm": 1.9752644333020086, "learning_rate": 1.6527196489769664e-07, "loss": 1.0008, "step": 8413 }, { "epoch": 0.8849040977033404, "grad_norm": 2.39797801920839, "learning_rate": 1.6497359930163492e-07, "loss": 1.0064, "step": 8414 }, { "epoch": 0.8850092681451878, "grad_norm": 3.023142379449854, "learning_rate": 1.6467549407915563e-07, "loss": 0.9955, "step": 8415 }, { "epoch": 0.8851144385870351, "grad_norm": 2.4594124824433967, "learning_rate": 1.6437764926350074e-07, "loss": 0.9866, "step": 8416 }, { "epoch": 0.8852196090288824, "grad_norm": 3.1186745683219406, "learning_rate": 1.640800648878807e-07, "loss": 1.015, "step": 8417 }, { "epoch": 0.8853247794707297, "grad_norm": 2.013217130768628, "learning_rate": 1.637827409854803e-07, "loss": 0.9681, "step": 8418 }, { "epoch": 0.8854299499125771, "grad_norm": 2.412821949942149, "learning_rate": 1.6348567758945277e-07, "loss": 1.0154, "step": 8419 }, { "epoch": 0.8855351203544244, "grad_norm": 2.82262481468373, "learning_rate": 1.6318887473292245e-07, "loss": 0.9982, "step": 8420 }, { "epoch": 0.8856402907962717, "grad_norm": 2.4389919981151498, "learning_rate": 1.6289233244898616e-07, "loss": 0.9898, "step": 8421 }, { "epoch": 0.885745461238119, "grad_norm": 2.5329579890512885, "learning_rate": 1.6259605077070888e-07, "loss": 0.9611, "step": 8422 }, { "epoch": 0.8858506316799664, "grad_norm": 2.242020126640096, "learning_rate": 1.6230002973113056e-07, "loss": 0.9727, "step": 8423 }, { "epoch": 0.8859558021218137, "grad_norm": 2.678622402703502, "learning_rate": 1.620042693632584e-07, "loss": 0.973, "step": 8424 }, { "epoch": 0.886060972563661, "grad_norm": 3.081880759439436, "learning_rate": 1.61708769700073e-07, "loss": 0.9799, "step": 8425 }, { "epoch": 0.8861661430055083, "grad_norm": 2.3125089299768073, "learning_rate": 1.614135307745235e-07, "loss": 0.9988, "step": 8426 }, { "epoch": 0.8862713134473557, "grad_norm": 1.7855830987719163, "learning_rate": 1.611185526195322e-07, "loss": 0.9772, "step": 8427 }, { "epoch": 0.886376483889203, "grad_norm": 1.8724555459292305, "learning_rate": 1.6082383526799196e-07, "loss": 1.0256, "step": 8428 }, { "epoch": 0.8864816543310503, "grad_norm": 3.5183308798130972, "learning_rate": 1.6052937875276481e-07, "loss": 0.9885, "step": 8429 }, { "epoch": 0.8865868247728976, "grad_norm": 1.9475600577969459, "learning_rate": 1.602351831066862e-07, "loss": 0.9556, "step": 8430 }, { "epoch": 0.8866919952147448, "grad_norm": 2.068576517697177, "learning_rate": 1.5994124836255952e-07, "loss": 0.9723, "step": 8431 }, { "epoch": 0.8867971656565922, "grad_norm": 2.0504911635408307, "learning_rate": 1.5964757455316282e-07, "loss": 0.9624, "step": 8432 }, { "epoch": 0.8869023360984395, "grad_norm": 2.384507371414114, "learning_rate": 1.593541617112415e-07, "loss": 0.9625, "step": 8433 }, { "epoch": 0.8870075065402868, "grad_norm": 2.2608056805889936, "learning_rate": 1.5906100986951445e-07, "loss": 0.9405, "step": 8434 }, { "epoch": 0.8871126769821341, "grad_norm": 2.3029169049564775, "learning_rate": 1.5876811906066992e-07, "loss": 1.013, "step": 8435 }, { "epoch": 0.8872178474239815, "grad_norm": 3.240342256666061, "learning_rate": 1.5847548931736678e-07, "loss": 0.9972, "step": 8436 }, { "epoch": 0.8873230178658288, "grad_norm": 2.0368840918231412, "learning_rate": 1.5818312067223673e-07, "loss": 1.0, "step": 8437 }, { "epoch": 0.8874281883076761, "grad_norm": 2.4964437098615773, "learning_rate": 1.5789101315788007e-07, "loss": 0.9554, "step": 8438 }, { "epoch": 0.8875333587495234, "grad_norm": 2.719371843674228, "learning_rate": 1.5759916680687022e-07, "loss": 1.0049, "step": 8439 }, { "epoch": 0.8876385291913708, "grad_norm": 2.545862838080534, "learning_rate": 1.5730758165174914e-07, "loss": 0.979, "step": 8440 }, { "epoch": 0.8877436996332181, "grad_norm": 4.491846091536825, "learning_rate": 1.5701625772503142e-07, "loss": 1.0328, "step": 8441 }, { "epoch": 0.8878488700750654, "grad_norm": 2.9635865459634285, "learning_rate": 1.5672519505920248e-07, "loss": 1.0136, "step": 8442 }, { "epoch": 0.8879540405169127, "grad_norm": 2.5871391440866836, "learning_rate": 1.5643439368671688e-07, "loss": 1.0381, "step": 8443 }, { "epoch": 0.8880592109587601, "grad_norm": 2.367393248660268, "learning_rate": 1.561438536400023e-07, "loss": 0.9617, "step": 8444 }, { "epoch": 0.8881643814006074, "grad_norm": 2.293140538401526, "learning_rate": 1.5585357495145504e-07, "loss": 0.9558, "step": 8445 }, { "epoch": 0.8882695518424547, "grad_norm": 2.217902357821252, "learning_rate": 1.5556355765344477e-07, "loss": 0.9856, "step": 8446 }, { "epoch": 0.888374722284302, "grad_norm": 3.1515334464477927, "learning_rate": 1.552738017783098e-07, "loss": 0.9647, "step": 8447 }, { "epoch": 0.8884798927261494, "grad_norm": 1.9568682729973774, "learning_rate": 1.5498430735836067e-07, "loss": 0.9509, "step": 8448 }, { "epoch": 0.8885850631679967, "grad_norm": 2.243562043893478, "learning_rate": 1.5469507442587766e-07, "loss": 0.9903, "step": 8449 }, { "epoch": 0.888690233609844, "grad_norm": 2.514164405589009, "learning_rate": 1.5440610301311277e-07, "loss": 1.0134, "step": 8450 }, { "epoch": 0.8887954040516912, "grad_norm": 3.3526553400519883, "learning_rate": 1.5411739315228909e-07, "loss": 0.9841, "step": 8451 }, { "epoch": 0.8889005744935385, "grad_norm": 2.7744608538169317, "learning_rate": 1.538289448755989e-07, "loss": 1.0142, "step": 8452 }, { "epoch": 0.8890057449353859, "grad_norm": 2.2860868670728114, "learning_rate": 1.5354075821520765e-07, "loss": 1.0062, "step": 8453 }, { "epoch": 0.8891109153772332, "grad_norm": 3.007217663938393, "learning_rate": 1.5325283320324903e-07, "loss": 0.9644, "step": 8454 }, { "epoch": 0.8892160858190805, "grad_norm": 2.9791595489559435, "learning_rate": 1.5296516987183042e-07, "loss": 0.9896, "step": 8455 }, { "epoch": 0.8893212562609278, "grad_norm": 1.8583784300969213, "learning_rate": 1.5267776825302782e-07, "loss": 1.0152, "step": 8456 }, { "epoch": 0.8894264267027752, "grad_norm": 2.729685552570694, "learning_rate": 1.523906283788884e-07, "loss": 1.0083, "step": 8457 }, { "epoch": 0.8895315971446225, "grad_norm": 3.1558272502401556, "learning_rate": 1.5210375028143097e-07, "loss": 0.9668, "step": 8458 }, { "epoch": 0.8896367675864698, "grad_norm": 2.1971892785045033, "learning_rate": 1.518171339926436e-07, "loss": 1.0038, "step": 8459 }, { "epoch": 0.8897419380283171, "grad_norm": 2.841371147766442, "learning_rate": 1.5153077954448813e-07, "loss": 0.9885, "step": 8460 }, { "epoch": 0.8898471084701645, "grad_norm": 2.691644935448725, "learning_rate": 1.5124468696889383e-07, "loss": 0.9512, "step": 8461 }, { "epoch": 0.8899522789120118, "grad_norm": 3.5217017668869097, "learning_rate": 1.5095885629776319e-07, "loss": 0.9877, "step": 8462 }, { "epoch": 0.8900574493538591, "grad_norm": 1.6034639625291485, "learning_rate": 1.506732875629674e-07, "loss": 1.0055, "step": 8463 }, { "epoch": 0.8901626197957064, "grad_norm": 2.1047634595751026, "learning_rate": 1.5038798079635043e-07, "loss": 0.9644, "step": 8464 }, { "epoch": 0.8902677902375538, "grad_norm": 2.1406693190498256, "learning_rate": 1.5010293602972653e-07, "loss": 0.974, "step": 8465 }, { "epoch": 0.8903729606794011, "grad_norm": 2.042851624189361, "learning_rate": 1.4981815329487948e-07, "loss": 0.9174, "step": 8466 }, { "epoch": 0.8904781311212484, "grad_norm": 2.5483924252229673, "learning_rate": 1.4953363262356552e-07, "loss": 0.9662, "step": 8467 }, { "epoch": 0.8905833015630957, "grad_norm": 3.0624720151851617, "learning_rate": 1.4924937404750954e-07, "loss": 0.9856, "step": 8468 }, { "epoch": 0.890688472004943, "grad_norm": 3.0903489981255654, "learning_rate": 1.4896537759841067e-07, "loss": 1.0083, "step": 8469 }, { "epoch": 0.8907936424467904, "grad_norm": 2.1498114196551823, "learning_rate": 1.486816433079352e-07, "loss": 0.9812, "step": 8470 }, { "epoch": 0.8908988128886377, "grad_norm": 2.15804642211936, "learning_rate": 1.4839817120772288e-07, "loss": 0.9824, "step": 8471 }, { "epoch": 0.8910039833304849, "grad_norm": 2.069762197489603, "learning_rate": 1.4811496132938196e-07, "loss": 0.9699, "step": 8472 }, { "epoch": 0.8911091537723322, "grad_norm": 2.3885493176951296, "learning_rate": 1.4783201370449225e-07, "loss": 0.9592, "step": 8473 }, { "epoch": 0.8912143242141796, "grad_norm": 2.5287347361503936, "learning_rate": 1.4754932836460622e-07, "loss": 0.927, "step": 8474 }, { "epoch": 0.8913194946560269, "grad_norm": 2.477400242672135, "learning_rate": 1.47266905341244e-07, "loss": 0.9984, "step": 8475 }, { "epoch": 0.8914246650978742, "grad_norm": 2.8722700619640027, "learning_rate": 1.4698474466589896e-07, "loss": 0.9341, "step": 8476 }, { "epoch": 0.8915298355397215, "grad_norm": 2.722448788439971, "learning_rate": 1.4670284637003345e-07, "loss": 0.9496, "step": 8477 }, { "epoch": 0.8916350059815689, "grad_norm": 2.8289714203437892, "learning_rate": 1.464212104850815e-07, "loss": 0.9568, "step": 8478 }, { "epoch": 0.8917401764234162, "grad_norm": 2.6306864237152623, "learning_rate": 1.4613983704244826e-07, "loss": 1.0054, "step": 8479 }, { "epoch": 0.8918453468652635, "grad_norm": 2.018496472612239, "learning_rate": 1.4585872607350837e-07, "loss": 0.9911, "step": 8480 }, { "epoch": 0.8919505173071108, "grad_norm": 2.6075018388842577, "learning_rate": 1.4557787760960812e-07, "loss": 0.9536, "step": 8481 }, { "epoch": 0.8920556877489582, "grad_norm": 2.419257762481413, "learning_rate": 1.4529729168206386e-07, "loss": 0.9466, "step": 8482 }, { "epoch": 0.8921608581908055, "grad_norm": 2.726364003607082, "learning_rate": 1.4501696832216445e-07, "loss": 0.9499, "step": 8483 }, { "epoch": 0.8922660286326528, "grad_norm": 2.3855719640701807, "learning_rate": 1.4473690756116654e-07, "loss": 0.9907, "step": 8484 }, { "epoch": 0.8923711990745001, "grad_norm": 2.4566580192018845, "learning_rate": 1.4445710943030018e-07, "loss": 0.9854, "step": 8485 }, { "epoch": 0.8924763695163475, "grad_norm": 2.816644567674129, "learning_rate": 1.4417757396076427e-07, "loss": 1.0299, "step": 8486 }, { "epoch": 0.8925815399581948, "grad_norm": 2.4296406347952884, "learning_rate": 1.4389830118372972e-07, "loss": 0.9775, "step": 8487 }, { "epoch": 0.8926867104000421, "grad_norm": 1.952494054315651, "learning_rate": 1.4361929113033747e-07, "loss": 0.9887, "step": 8488 }, { "epoch": 0.8927918808418894, "grad_norm": 2.203328059269768, "learning_rate": 1.4334054383169897e-07, "loss": 0.9499, "step": 8489 }, { "epoch": 0.8928970512837368, "grad_norm": 1.8322340247823978, "learning_rate": 1.430620593188975e-07, "loss": 0.9407, "step": 8490 }, { "epoch": 0.8930022217255841, "grad_norm": 2.480996419359068, "learning_rate": 1.4278383762298536e-07, "loss": 0.9938, "step": 8491 }, { "epoch": 0.8931073921674313, "grad_norm": 3.1090653197790528, "learning_rate": 1.4250587877498695e-07, "loss": 0.9909, "step": 8492 }, { "epoch": 0.8932125626092786, "grad_norm": 2.720781388178653, "learning_rate": 1.422281828058969e-07, "loss": 1.0105, "step": 8493 }, { "epoch": 0.8933177330511259, "grad_norm": 2.753443908893908, "learning_rate": 1.4195074974667989e-07, "loss": 0.994, "step": 8494 }, { "epoch": 0.8934229034929733, "grad_norm": 1.8341611634515465, "learning_rate": 1.4167357962827283e-07, "loss": 0.9763, "step": 8495 }, { "epoch": 0.8935280739348206, "grad_norm": 2.9427961867698977, "learning_rate": 1.4139667248158072e-07, "loss": 0.9565, "step": 8496 }, { "epoch": 0.8936332443766679, "grad_norm": 2.4027494539397103, "learning_rate": 1.4112002833748279e-07, "loss": 0.9674, "step": 8497 }, { "epoch": 0.8937384148185152, "grad_norm": 3.598778669005095, "learning_rate": 1.4084364722682598e-07, "loss": 0.9917, "step": 8498 }, { "epoch": 0.8938435852603626, "grad_norm": 3.346549402658093, "learning_rate": 1.4056752918042898e-07, "loss": 0.9366, "step": 8499 }, { "epoch": 0.8939487557022099, "grad_norm": 2.5730479469223897, "learning_rate": 1.4029167422908107e-07, "loss": 0.9992, "step": 8500 }, { "epoch": 0.8940539261440572, "grad_norm": 2.7809454904611313, "learning_rate": 1.4001608240354236e-07, "loss": 0.9954, "step": 8501 }, { "epoch": 0.8941590965859045, "grad_norm": 2.063097454423752, "learning_rate": 1.3974075373454383e-07, "loss": 0.9769, "step": 8502 }, { "epoch": 0.8942642670277519, "grad_norm": 1.8225435784652433, "learning_rate": 1.3946568825278618e-07, "loss": 0.9645, "step": 8503 }, { "epoch": 0.8943694374695992, "grad_norm": 2.755707682683513, "learning_rate": 1.3919088598894154e-07, "loss": 0.9932, "step": 8504 }, { "epoch": 0.8944746079114465, "grad_norm": 3.3674579224287773, "learning_rate": 1.3891634697365207e-07, "loss": 1.0107, "step": 8505 }, { "epoch": 0.8945797783532938, "grad_norm": 3.130174869282355, "learning_rate": 1.386420712375322e-07, "loss": 1.0157, "step": 8506 }, { "epoch": 0.8946849487951412, "grad_norm": 2.0725928871095154, "learning_rate": 1.3836805881116495e-07, "loss": 0.9675, "step": 8507 }, { "epoch": 0.8947901192369885, "grad_norm": 2.345929950085412, "learning_rate": 1.3809430972510446e-07, "loss": 0.9821, "step": 8508 }, { "epoch": 0.8948952896788358, "grad_norm": 2.7903627116773, "learning_rate": 1.378208240098766e-07, "loss": 0.9848, "step": 8509 }, { "epoch": 0.8950004601206831, "grad_norm": 2.328892581734959, "learning_rate": 1.3754760169597676e-07, "loss": 1.0026, "step": 8510 }, { "epoch": 0.8951056305625305, "grad_norm": 2.8229279376291934, "learning_rate": 1.3727464281387186e-07, "loss": 0.9352, "step": 8511 }, { "epoch": 0.8952108010043777, "grad_norm": 2.8468987303756284, "learning_rate": 1.3700194739399848e-07, "loss": 1.0079, "step": 8512 }, { "epoch": 0.895315971446225, "grad_norm": 2.015499855514806, "learning_rate": 1.3672951546676444e-07, "loss": 0.9678, "step": 8513 }, { "epoch": 0.8954211418880723, "grad_norm": 1.976934703918547, "learning_rate": 1.364573470625477e-07, "loss": 0.9829, "step": 8514 }, { "epoch": 0.8955263123299196, "grad_norm": 2.31984836197906, "learning_rate": 1.3618544221169732e-07, "loss": 0.9756, "step": 8515 }, { "epoch": 0.895631482771767, "grad_norm": 2.665380278443687, "learning_rate": 1.359138009445335e-07, "loss": 0.9988, "step": 8516 }, { "epoch": 0.8957366532136143, "grad_norm": 1.9964367122956426, "learning_rate": 1.3564242329134502e-07, "loss": 0.944, "step": 8517 }, { "epoch": 0.8958418236554616, "grad_norm": 2.576231474046989, "learning_rate": 1.3537130928239383e-07, "loss": 0.9717, "step": 8518 }, { "epoch": 0.8959469940973089, "grad_norm": 2.5238951016013367, "learning_rate": 1.3510045894791018e-07, "loss": 0.9593, "step": 8519 }, { "epoch": 0.8960521645391563, "grad_norm": 3.0112910269228275, "learning_rate": 1.3482987231809714e-07, "loss": 1.0174, "step": 8520 }, { "epoch": 0.8961573349810036, "grad_norm": 2.728647164800237, "learning_rate": 1.345595494231261e-07, "loss": 1.0167, "step": 8521 }, { "epoch": 0.8962625054228509, "grad_norm": 3.110617555783042, "learning_rate": 1.3428949029314132e-07, "loss": 1.0145, "step": 8522 }, { "epoch": 0.8963676758646982, "grad_norm": 2.1989572332927216, "learning_rate": 1.3401969495825534e-07, "loss": 0.9382, "step": 8523 }, { "epoch": 0.8964728463065456, "grad_norm": 2.530350556427271, "learning_rate": 1.3375016344855302e-07, "loss": 1.0152, "step": 8524 }, { "epoch": 0.8965780167483929, "grad_norm": 3.24507065070509, "learning_rate": 1.3348089579408946e-07, "loss": 1.0011, "step": 8525 }, { "epoch": 0.8966831871902402, "grad_norm": 2.292129085968337, "learning_rate": 1.3321189202488955e-07, "loss": 0.9794, "step": 8526 }, { "epoch": 0.8967883576320875, "grad_norm": 2.5251603922264785, "learning_rate": 1.3294315217094956e-07, "loss": 1.0009, "step": 8527 }, { "epoch": 0.8968935280739349, "grad_norm": 1.9633155096515325, "learning_rate": 1.3267467626223606e-07, "loss": 0.959, "step": 8528 }, { "epoch": 0.8969986985157822, "grad_norm": 3.11725878427488, "learning_rate": 1.32406464328686e-07, "loss": 1.0017, "step": 8529 }, { "epoch": 0.8971038689576295, "grad_norm": 1.9422340766869772, "learning_rate": 1.321385164002076e-07, "loss": 0.9736, "step": 8530 }, { "epoch": 0.8972090393994768, "grad_norm": 2.3021774888473225, "learning_rate": 1.3187083250667865e-07, "loss": 0.9732, "step": 8531 }, { "epoch": 0.8973142098413242, "grad_norm": 2.6462765296135227, "learning_rate": 1.3160341267794778e-07, "loss": 0.9473, "step": 8532 }, { "epoch": 0.8974193802831714, "grad_norm": 2.9968459492356376, "learning_rate": 1.3133625694383506e-07, "loss": 1.0007, "step": 8533 }, { "epoch": 0.8975245507250187, "grad_norm": 2.8871273274281726, "learning_rate": 1.310693653341305e-07, "loss": 0.9901, "step": 8534 }, { "epoch": 0.897629721166866, "grad_norm": 2.4909489473224817, "learning_rate": 1.3080273787859367e-07, "loss": 1.0059, "step": 8535 }, { "epoch": 0.8977348916087133, "grad_norm": 2.775824788943004, "learning_rate": 1.3053637460695655e-07, "loss": 1.0172, "step": 8536 }, { "epoch": 0.8978400620505607, "grad_norm": 2.405665330311999, "learning_rate": 1.3027027554891992e-07, "loss": 1.0053, "step": 8537 }, { "epoch": 0.897945232492408, "grad_norm": 2.194682374380952, "learning_rate": 1.3000444073415637e-07, "loss": 0.9454, "step": 8538 }, { "epoch": 0.8980504029342553, "grad_norm": 2.5244706470088323, "learning_rate": 1.2973887019230885e-07, "loss": 0.9712, "step": 8539 }, { "epoch": 0.8981555733761026, "grad_norm": 2.9834146366006844, "learning_rate": 1.2947356395298956e-07, "loss": 0.9946, "step": 8540 }, { "epoch": 0.89826074381795, "grad_norm": 2.246603088988245, "learning_rate": 1.292085220457834e-07, "loss": 0.9229, "step": 8541 }, { "epoch": 0.8983659142597973, "grad_norm": 2.4711768931739004, "learning_rate": 1.2894374450024338e-07, "loss": 0.9644, "step": 8542 }, { "epoch": 0.8984710847016446, "grad_norm": 1.99224760834784, "learning_rate": 1.2867923134589483e-07, "loss": 0.9821, "step": 8543 }, { "epoch": 0.8985762551434919, "grad_norm": 1.6913963223277704, "learning_rate": 1.2841498261223355e-07, "loss": 0.9516, "step": 8544 }, { "epoch": 0.8986814255853393, "grad_norm": 2.161369459321979, "learning_rate": 1.2815099832872463e-07, "loss": 0.9564, "step": 8545 }, { "epoch": 0.8987865960271866, "grad_norm": 2.1550070381084114, "learning_rate": 1.278872785248042e-07, "loss": 0.9686, "step": 8546 }, { "epoch": 0.8988917664690339, "grad_norm": 2.840197703849838, "learning_rate": 1.276238232298796e-07, "loss": 0.9632, "step": 8547 }, { "epoch": 0.8989969369108812, "grad_norm": 2.5769335426004982, "learning_rate": 1.273606324733284e-07, "loss": 0.9816, "step": 8548 }, { "epoch": 0.8991021073527286, "grad_norm": 2.6707662326181425, "learning_rate": 1.2709770628449776e-07, "loss": 0.9601, "step": 8549 }, { "epoch": 0.8992072777945759, "grad_norm": 1.7310467132195133, "learning_rate": 1.2683504469270636e-07, "loss": 0.9591, "step": 8550 }, { "epoch": 0.8993124482364232, "grad_norm": 2.548589763012564, "learning_rate": 1.2657264772724247e-07, "loss": 0.993, "step": 8551 }, { "epoch": 0.8994176186782705, "grad_norm": 2.2542436406644684, "learning_rate": 1.2631051541736578e-07, "loss": 0.9368, "step": 8552 }, { "epoch": 0.8995227891201177, "grad_norm": 2.5992564379457455, "learning_rate": 1.2604864779230674e-07, "loss": 0.9385, "step": 8553 }, { "epoch": 0.8996279595619651, "grad_norm": 2.3697543945400343, "learning_rate": 1.2578704488126426e-07, "loss": 0.9866, "step": 8554 }, { "epoch": 0.8997331300038124, "grad_norm": 2.31815816883965, "learning_rate": 1.2552570671340998e-07, "loss": 0.974, "step": 8555 }, { "epoch": 0.8998383004456597, "grad_norm": 2.6932418190961913, "learning_rate": 1.2526463331788503e-07, "loss": 1.0144, "step": 8556 }, { "epoch": 0.899943470887507, "grad_norm": 2.969325960064139, "learning_rate": 1.250038247238014e-07, "loss": 1.0272, "step": 8557 }, { "epoch": 0.9000486413293544, "grad_norm": 2.781868699429922, "learning_rate": 1.2474328096024086e-07, "loss": 0.999, "step": 8558 }, { "epoch": 0.9001538117712017, "grad_norm": 3.139713598126397, "learning_rate": 1.244830020562557e-07, "loss": 0.9944, "step": 8559 }, { "epoch": 0.900258982213049, "grad_norm": 2.2804116496124767, "learning_rate": 1.2422298804086963e-07, "loss": 0.9445, "step": 8560 }, { "epoch": 0.9003641526548963, "grad_norm": 2.1291441323106786, "learning_rate": 1.2396323894307587e-07, "loss": 0.9773, "step": 8561 }, { "epoch": 0.9004693230967437, "grad_norm": 2.7682785401579455, "learning_rate": 1.2370375479183905e-07, "loss": 0.9852, "step": 8562 }, { "epoch": 0.900574493538591, "grad_norm": 2.906960990437752, "learning_rate": 1.234445356160932e-07, "loss": 0.958, "step": 8563 }, { "epoch": 0.9006796639804383, "grad_norm": 3.5815863699736807, "learning_rate": 1.2318558144474303e-07, "loss": 1.064, "step": 8564 }, { "epoch": 0.9007848344222856, "grad_norm": 2.7121326512924866, "learning_rate": 1.2292689230666482e-07, "loss": 0.9298, "step": 8565 }, { "epoch": 0.900890004864133, "grad_norm": 2.7103260334531956, "learning_rate": 1.226684682307036e-07, "loss": 0.9481, "step": 8566 }, { "epoch": 0.9009951753059803, "grad_norm": 2.534376466134969, "learning_rate": 1.2241030924567603e-07, "loss": 1.0111, "step": 8567 }, { "epoch": 0.9011003457478276, "grad_norm": 2.1693292560203092, "learning_rate": 1.2215241538036853e-07, "loss": 0.9746, "step": 8568 }, { "epoch": 0.9012055161896749, "grad_norm": 1.8401531315355144, "learning_rate": 1.2189478666353865e-07, "loss": 1.0031, "step": 8569 }, { "epoch": 0.9013106866315223, "grad_norm": 2.575023205233234, "learning_rate": 1.2163742312391342e-07, "loss": 0.9486, "step": 8570 }, { "epoch": 0.9014158570733696, "grad_norm": 1.9758094923843876, "learning_rate": 1.2138032479019206e-07, "loss": 0.9974, "step": 8571 }, { "epoch": 0.9015210275152169, "grad_norm": 2.2170223011196706, "learning_rate": 1.2112349169104172e-07, "loss": 0.9782, "step": 8572 }, { "epoch": 0.9016261979570641, "grad_norm": 1.7838799688859874, "learning_rate": 1.208669238551019e-07, "loss": 0.9755, "step": 8573 }, { "epoch": 0.9017313683989114, "grad_norm": 2.4567274019385765, "learning_rate": 1.2061062131098174e-07, "loss": 0.9455, "step": 8574 }, { "epoch": 0.9018365388407588, "grad_norm": 1.722594664429003, "learning_rate": 1.203545840872611e-07, "loss": 0.9946, "step": 8575 }, { "epoch": 0.9019417092826061, "grad_norm": 2.469843612260426, "learning_rate": 1.2009881221249047e-07, "loss": 0.9817, "step": 8576 }, { "epoch": 0.9020468797244534, "grad_norm": 2.763903318605096, "learning_rate": 1.1984330571518932e-07, "loss": 0.96, "step": 8577 }, { "epoch": 0.9021520501663007, "grad_norm": 2.4544321149500306, "learning_rate": 1.1958806462384953e-07, "loss": 0.9663, "step": 8578 }, { "epoch": 0.9022572206081481, "grad_norm": 2.977813358898067, "learning_rate": 1.1933308896693253e-07, "loss": 1.0148, "step": 8579 }, { "epoch": 0.9023623910499954, "grad_norm": 1.999477072638366, "learning_rate": 1.1907837877286943e-07, "loss": 0.9989, "step": 8580 }, { "epoch": 0.9024675614918427, "grad_norm": 2.2185946512743513, "learning_rate": 1.188239340700631e-07, "loss": 0.9987, "step": 8581 }, { "epoch": 0.90257273193369, "grad_norm": 2.5096878993721856, "learning_rate": 1.1856975488688555e-07, "loss": 0.9858, "step": 8582 }, { "epoch": 0.9026779023755374, "grad_norm": 2.1735177550715403, "learning_rate": 1.1831584125167966e-07, "loss": 1.0257, "step": 8583 }, { "epoch": 0.9027830728173847, "grad_norm": 1.7897935124112625, "learning_rate": 1.1806219319275918e-07, "loss": 0.9778, "step": 8584 }, { "epoch": 0.902888243259232, "grad_norm": 2.15199039194613, "learning_rate": 1.1780881073840816e-07, "loss": 1.0001, "step": 8585 }, { "epoch": 0.9029934137010793, "grad_norm": 3.833073505264171, "learning_rate": 1.1755569391687954e-07, "loss": 1.0042, "step": 8586 }, { "epoch": 0.9030985841429267, "grad_norm": 2.682110463618559, "learning_rate": 1.173028427563988e-07, "loss": 1.0197, "step": 8587 }, { "epoch": 0.903203754584774, "grad_norm": 1.9252316839391137, "learning_rate": 1.1705025728516089e-07, "loss": 0.9597, "step": 8588 }, { "epoch": 0.9033089250266213, "grad_norm": 3.407849681083908, "learning_rate": 1.1679793753133024e-07, "loss": 0.9905, "step": 8589 }, { "epoch": 0.9034140954684686, "grad_norm": 2.514269869679688, "learning_rate": 1.1654588352304347e-07, "loss": 1.0422, "step": 8590 }, { "epoch": 0.903519265910316, "grad_norm": 2.9042596313289244, "learning_rate": 1.1629409528840534e-07, "loss": 0.9899, "step": 8591 }, { "epoch": 0.9036244363521633, "grad_norm": 1.8985121884778924, "learning_rate": 1.1604257285549314e-07, "loss": 0.9963, "step": 8592 }, { "epoch": 0.9037296067940106, "grad_norm": 2.5551184251028602, "learning_rate": 1.1579131625235356e-07, "loss": 0.9615, "step": 8593 }, { "epoch": 0.9038347772358578, "grad_norm": 2.1413999858159354, "learning_rate": 1.1554032550700284e-07, "loss": 0.9406, "step": 8594 }, { "epoch": 0.9039399476777051, "grad_norm": 2.6069475924003704, "learning_rate": 1.1528960064742967e-07, "loss": 0.9775, "step": 8595 }, { "epoch": 0.9040451181195525, "grad_norm": 1.9692405646354523, "learning_rate": 1.1503914170159058e-07, "loss": 0.9734, "step": 8596 }, { "epoch": 0.9041502885613998, "grad_norm": 3.182347862653507, "learning_rate": 1.1478894869741409e-07, "loss": 0.9695, "step": 8597 }, { "epoch": 0.9042554590032471, "grad_norm": 2.393093848945507, "learning_rate": 1.1453902166279895e-07, "loss": 0.9901, "step": 8598 }, { "epoch": 0.9043606294450944, "grad_norm": 1.8954299188375663, "learning_rate": 1.1428936062561402e-07, "loss": 1.0036, "step": 8599 }, { "epoch": 0.9044657998869418, "grad_norm": 2.0529572247299, "learning_rate": 1.1403996561369812e-07, "loss": 0.972, "step": 8600 }, { "epoch": 0.9045709703287891, "grad_norm": 2.3468453698266063, "learning_rate": 1.1379083665486068e-07, "loss": 1.004, "step": 8601 }, { "epoch": 0.9046761407706364, "grad_norm": 1.9568847387652115, "learning_rate": 1.1354197377688198e-07, "loss": 0.9641, "step": 8602 }, { "epoch": 0.9047813112124837, "grad_norm": 2.24292284847942, "learning_rate": 1.1329337700751147e-07, "loss": 0.9581, "step": 8603 }, { "epoch": 0.904886481654331, "grad_norm": 2.5786412520948656, "learning_rate": 1.1304504637447062e-07, "loss": 0.9849, "step": 8604 }, { "epoch": 0.9049916520961784, "grad_norm": 2.6766324984329586, "learning_rate": 1.1279698190544918e-07, "loss": 0.9928, "step": 8605 }, { "epoch": 0.9050968225380257, "grad_norm": 2.4750268462318665, "learning_rate": 1.125491836281084e-07, "loss": 0.9605, "step": 8606 }, { "epoch": 0.905201992979873, "grad_norm": 2.3781929663367936, "learning_rate": 1.1230165157008033e-07, "loss": 1.0048, "step": 8607 }, { "epoch": 0.9053071634217204, "grad_norm": 2.2874545893594553, "learning_rate": 1.1205438575896677e-07, "loss": 0.9634, "step": 8608 }, { "epoch": 0.9054123338635677, "grad_norm": 2.526387831515657, "learning_rate": 1.1180738622233928e-07, "loss": 0.9509, "step": 8609 }, { "epoch": 0.905517504305415, "grad_norm": 2.0896230571312957, "learning_rate": 1.1156065298773972e-07, "loss": 0.9917, "step": 8610 }, { "epoch": 0.9056226747472623, "grad_norm": 2.022491968484562, "learning_rate": 1.113141860826819e-07, "loss": 1.0339, "step": 8611 }, { "epoch": 0.9057278451891096, "grad_norm": 2.1059992344770246, "learning_rate": 1.1106798553464804e-07, "loss": 0.9661, "step": 8612 }, { "epoch": 0.905833015630957, "grad_norm": 2.730011822148298, "learning_rate": 1.1082205137109225e-07, "loss": 0.9576, "step": 8613 }, { "epoch": 0.9059381860728042, "grad_norm": 2.761974033618745, "learning_rate": 1.1057638361943679e-07, "loss": 0.9832, "step": 8614 }, { "epoch": 0.9060433565146515, "grad_norm": 2.9220028143550656, "learning_rate": 1.1033098230707668e-07, "loss": 1.0099, "step": 8615 }, { "epoch": 0.9061485269564988, "grad_norm": 3.1531693192642467, "learning_rate": 1.1008584746137558e-07, "loss": 0.9964, "step": 8616 }, { "epoch": 0.9062536973983462, "grad_norm": 2.190987178982146, "learning_rate": 1.0984097910966802e-07, "loss": 0.9959, "step": 8617 }, { "epoch": 0.9063588678401935, "grad_norm": 3.111299811220905, "learning_rate": 1.0959637727925881e-07, "loss": 1.0023, "step": 8618 }, { "epoch": 0.9064640382820408, "grad_norm": 3.157837969992092, "learning_rate": 1.0935204199742255e-07, "loss": 1.0217, "step": 8619 }, { "epoch": 0.9065692087238881, "grad_norm": 2.4262394538613283, "learning_rate": 1.0910797329140466e-07, "loss": 1.0104, "step": 8620 }, { "epoch": 0.9066743791657355, "grad_norm": 2.2045284669897396, "learning_rate": 1.0886417118842113e-07, "loss": 0.9879, "step": 8621 }, { "epoch": 0.9067795496075828, "grad_norm": 3.4589297463267084, "learning_rate": 1.0862063571565773e-07, "loss": 0.9798, "step": 8622 }, { "epoch": 0.9068847200494301, "grad_norm": 2.8785742456954004, "learning_rate": 1.0837736690026996e-07, "loss": 0.9865, "step": 8623 }, { "epoch": 0.9069898904912774, "grad_norm": 2.194186906971404, "learning_rate": 1.0813436476938444e-07, "loss": 0.9963, "step": 8624 }, { "epoch": 0.9070950609331248, "grad_norm": 2.6444058808730273, "learning_rate": 1.0789162935009839e-07, "loss": 0.991, "step": 8625 }, { "epoch": 0.9072002313749721, "grad_norm": 2.6206618870443417, "learning_rate": 1.0764916066947795e-07, "loss": 0.9771, "step": 8626 }, { "epoch": 0.9073054018168194, "grad_norm": 1.9213999370076098, "learning_rate": 1.0740695875456064e-07, "loss": 0.9315, "step": 8627 }, { "epoch": 0.9074105722586667, "grad_norm": 2.700327806526417, "learning_rate": 1.0716502363235348e-07, "loss": 0.9627, "step": 8628 }, { "epoch": 0.907515742700514, "grad_norm": 2.9427305067667624, "learning_rate": 1.069233553298346e-07, "loss": 0.9787, "step": 8629 }, { "epoch": 0.9076209131423614, "grad_norm": 2.7356277181692445, "learning_rate": 1.0668195387395164e-07, "loss": 1.0438, "step": 8630 }, { "epoch": 0.9077260835842087, "grad_norm": 2.3404693898022777, "learning_rate": 1.0644081929162275e-07, "loss": 0.9924, "step": 8631 }, { "epoch": 0.907831254026056, "grad_norm": 2.1474982707381893, "learning_rate": 1.0619995160973645e-07, "loss": 0.9078, "step": 8632 }, { "epoch": 0.9079364244679033, "grad_norm": 2.233221210407833, "learning_rate": 1.0595935085515069e-07, "loss": 1.0021, "step": 8633 }, { "epoch": 0.9080415949097506, "grad_norm": 2.707217846445669, "learning_rate": 1.0571901705469567e-07, "loss": 1.0373, "step": 8634 }, { "epoch": 0.9081467653515979, "grad_norm": 2.0945684851181987, "learning_rate": 1.0547895023516913e-07, "loss": 0.9887, "step": 8635 }, { "epoch": 0.9082519357934452, "grad_norm": 2.818236365087763, "learning_rate": 1.0523915042334132e-07, "loss": 0.9345, "step": 8636 }, { "epoch": 0.9083571062352925, "grad_norm": 2.724573085747091, "learning_rate": 1.0499961764595112e-07, "loss": 1.036, "step": 8637 }, { "epoch": 0.9084622766771399, "grad_norm": 2.706854585553475, "learning_rate": 1.0476035192970857e-07, "loss": 0.953, "step": 8638 }, { "epoch": 0.9085674471189872, "grad_norm": 1.9228244106745358, "learning_rate": 1.045213533012937e-07, "loss": 0.9746, "step": 8639 }, { "epoch": 0.9086726175608345, "grad_norm": 2.5814153527028973, "learning_rate": 1.042826217873566e-07, "loss": 0.9523, "step": 8640 }, { "epoch": 0.9087777880026818, "grad_norm": 2.9607847333673694, "learning_rate": 1.0404415741451818e-07, "loss": 0.9877, "step": 8641 }, { "epoch": 0.9088829584445292, "grad_norm": 2.9016091841556984, "learning_rate": 1.0380596020936801e-07, "loss": 1.0021, "step": 8642 }, { "epoch": 0.9089881288863765, "grad_norm": 2.633994800512432, "learning_rate": 1.035680301984679e-07, "loss": 0.9496, "step": 8643 }, { "epoch": 0.9090932993282238, "grad_norm": 3.3617291767747357, "learning_rate": 1.0333036740834857e-07, "loss": 0.9403, "step": 8644 }, { "epoch": 0.9091984697700711, "grad_norm": 2.677523368933197, "learning_rate": 1.0309297186551131e-07, "loss": 0.9678, "step": 8645 }, { "epoch": 0.9093036402119185, "grad_norm": 2.373476234340472, "learning_rate": 1.0285584359642747e-07, "loss": 0.9627, "step": 8646 }, { "epoch": 0.9094088106537658, "grad_norm": 2.0766733599038845, "learning_rate": 1.0261898262753811e-07, "loss": 0.9632, "step": 8647 }, { "epoch": 0.9095139810956131, "grad_norm": 2.4397958236252553, "learning_rate": 1.0238238898525654e-07, "loss": 0.9787, "step": 8648 }, { "epoch": 0.9096191515374604, "grad_norm": 2.429982084341817, "learning_rate": 1.0214606269596361e-07, "loss": 0.9983, "step": 8649 }, { "epoch": 0.9097243219793077, "grad_norm": 1.7677248006677606, "learning_rate": 1.0191000378601213e-07, "loss": 0.9348, "step": 8650 }, { "epoch": 0.9098294924211551, "grad_norm": 1.8993802694959563, "learning_rate": 1.0167421228172381e-07, "loss": 1.0176, "step": 8651 }, { "epoch": 0.9099346628630024, "grad_norm": 2.6748517834984593, "learning_rate": 1.0143868820939179e-07, "loss": 0.986, "step": 8652 }, { "epoch": 0.9100398333048497, "grad_norm": 2.622839246677593, "learning_rate": 1.0120343159527923e-07, "loss": 0.9792, "step": 8653 }, { "epoch": 0.910145003746697, "grad_norm": 1.9757068387048435, "learning_rate": 1.0096844246561794e-07, "loss": 0.9901, "step": 8654 }, { "epoch": 0.9102501741885443, "grad_norm": 2.095050670569624, "learning_rate": 1.0073372084661193e-07, "loss": 0.9605, "step": 8655 }, { "epoch": 0.9103553446303916, "grad_norm": 2.750980361243624, "learning_rate": 1.0049926676443361e-07, "loss": 0.9974, "step": 8656 }, { "epoch": 0.9104605150722389, "grad_norm": 2.961100634051163, "learning_rate": 1.0026508024522791e-07, "loss": 0.981, "step": 8657 }, { "epoch": 0.9105656855140862, "grad_norm": 1.8783234007957286, "learning_rate": 1.0003116131510698e-07, "loss": 0.989, "step": 8658 }, { "epoch": 0.9106708559559336, "grad_norm": 2.671085261106148, "learning_rate": 9.979751000015552e-08, "loss": 0.9828, "step": 8659 }, { "epoch": 0.9107760263977809, "grad_norm": 3.0384313224763737, "learning_rate": 9.956412632642715e-08, "loss": 0.9908, "step": 8660 }, { "epoch": 0.9108811968396282, "grad_norm": 2.5512974558431227, "learning_rate": 9.933101031994547e-08, "loss": 0.9434, "step": 8661 }, { "epoch": 0.9109863672814755, "grad_norm": 1.737662256811027, "learning_rate": 9.909816200670552e-08, "loss": 0.9693, "step": 8662 }, { "epoch": 0.9110915377233229, "grad_norm": 3.0073945279323833, "learning_rate": 9.886558141267127e-08, "loss": 0.9521, "step": 8663 }, { "epoch": 0.9111967081651702, "grad_norm": 1.884812573575343, "learning_rate": 9.863326856377753e-08, "loss": 0.9782, "step": 8664 }, { "epoch": 0.9113018786070175, "grad_norm": 2.722404955393905, "learning_rate": 9.840122348592857e-08, "loss": 0.9983, "step": 8665 }, { "epoch": 0.9114070490488648, "grad_norm": 2.5101444576387344, "learning_rate": 9.816944620499952e-08, "loss": 0.9915, "step": 8666 }, { "epoch": 0.9115122194907122, "grad_norm": 2.763819589917786, "learning_rate": 9.793793674683555e-08, "loss": 1.0128, "step": 8667 }, { "epoch": 0.9116173899325595, "grad_norm": 2.690122259083069, "learning_rate": 9.770669513725128e-08, "loss": 1.0093, "step": 8668 }, { "epoch": 0.9117225603744068, "grad_norm": 2.5984553040911003, "learning_rate": 9.747572140203221e-08, "loss": 0.9857, "step": 8669 }, { "epoch": 0.9118277308162541, "grad_norm": 2.3984861025276194, "learning_rate": 9.724501556693327e-08, "loss": 1.0068, "step": 8670 }, { "epoch": 0.9119329012581014, "grad_norm": 3.124841252252845, "learning_rate": 9.701457765768113e-08, "loss": 0.9785, "step": 8671 }, { "epoch": 0.9120380716999488, "grad_norm": 2.377091759025528, "learning_rate": 9.678440769996994e-08, "loss": 0.9923, "step": 8672 }, { "epoch": 0.9121432421417961, "grad_norm": 2.439191969177443, "learning_rate": 9.655450571946667e-08, "loss": 0.9754, "step": 8673 }, { "epoch": 0.9122484125836434, "grad_norm": 2.3851721865392355, "learning_rate": 9.632487174180638e-08, "loss": 0.9982, "step": 8674 }, { "epoch": 0.9123535830254906, "grad_norm": 2.172576222886981, "learning_rate": 9.609550579259497e-08, "loss": 0.9679, "step": 8675 }, { "epoch": 0.912458753467338, "grad_norm": 2.301925417223173, "learning_rate": 9.586640789740948e-08, "loss": 0.9739, "step": 8676 }, { "epoch": 0.9125639239091853, "grad_norm": 2.478887405016733, "learning_rate": 9.563757808179502e-08, "loss": 1.0417, "step": 8677 }, { "epoch": 0.9126690943510326, "grad_norm": 2.7317447487137407, "learning_rate": 9.54090163712687e-08, "loss": 0.9742, "step": 8678 }, { "epoch": 0.9127742647928799, "grad_norm": 2.7528530250768046, "learning_rate": 9.518072279131596e-08, "loss": 0.9546, "step": 8679 }, { "epoch": 0.9128794352347273, "grad_norm": 2.128520981516656, "learning_rate": 9.495269736739448e-08, "loss": 0.9033, "step": 8680 }, { "epoch": 0.9129846056765746, "grad_norm": 2.8260209548553346, "learning_rate": 9.472494012493034e-08, "loss": 0.9833, "step": 8681 }, { "epoch": 0.9130897761184219, "grad_norm": 2.710474999545366, "learning_rate": 9.449745108931985e-08, "loss": 0.9626, "step": 8682 }, { "epoch": 0.9131949465602692, "grad_norm": 2.4800110937909734, "learning_rate": 9.427023028593051e-08, "loss": 0.9755, "step": 8683 }, { "epoch": 0.9133001170021166, "grad_norm": 2.3592614302731683, "learning_rate": 9.404327774009819e-08, "loss": 0.9557, "step": 8684 }, { "epoch": 0.9134052874439639, "grad_norm": 2.9956075352373364, "learning_rate": 9.381659347713123e-08, "loss": 0.9975, "step": 8685 }, { "epoch": 0.9135104578858112, "grad_norm": 2.1831281057025613, "learning_rate": 9.359017752230582e-08, "loss": 0.9784, "step": 8686 }, { "epoch": 0.9136156283276585, "grad_norm": 2.2725775975518148, "learning_rate": 9.336402990086924e-08, "loss": 1.0201, "step": 8687 }, { "epoch": 0.9137207987695058, "grad_norm": 2.794970120659367, "learning_rate": 9.313815063803883e-08, "loss": 0.927, "step": 8688 }, { "epoch": 0.9138259692113532, "grad_norm": 2.447763388249118, "learning_rate": 9.291253975900138e-08, "loss": 0.9516, "step": 8689 }, { "epoch": 0.9139311396532005, "grad_norm": 2.5939870031479773, "learning_rate": 9.268719728891512e-08, "loss": 1.0149, "step": 8690 }, { "epoch": 0.9140363100950478, "grad_norm": 2.5305551362373864, "learning_rate": 9.24621232529066e-08, "loss": 1.0017, "step": 8691 }, { "epoch": 0.9141414805368951, "grad_norm": 2.901219705340327, "learning_rate": 9.223731767607436e-08, "loss": 0.9446, "step": 8692 }, { "epoch": 0.9142466509787425, "grad_norm": 2.353334916914352, "learning_rate": 9.201278058348446e-08, "loss": 0.9655, "step": 8693 }, { "epoch": 0.9143518214205898, "grad_norm": 2.2889584109431094, "learning_rate": 9.17885120001763e-08, "loss": 1.0182, "step": 8694 }, { "epoch": 0.914456991862437, "grad_norm": 2.5187641418281723, "learning_rate": 9.156451195115601e-08, "loss": 0.9578, "step": 8695 }, { "epoch": 0.9145621623042843, "grad_norm": 2.466078538297683, "learning_rate": 9.134078046140249e-08, "loss": 0.949, "step": 8696 }, { "epoch": 0.9146673327461317, "grad_norm": 1.969452495220898, "learning_rate": 9.111731755586329e-08, "loss": 0.9636, "step": 8697 }, { "epoch": 0.914772503187979, "grad_norm": 2.2396101458216187, "learning_rate": 9.089412325945513e-08, "loss": 0.9892, "step": 8698 }, { "epoch": 0.9148776736298263, "grad_norm": 2.3519886141123956, "learning_rate": 9.067119759706755e-08, "loss": 0.9746, "step": 8699 }, { "epoch": 0.9149828440716736, "grad_norm": 2.4208396168966275, "learning_rate": 9.044854059355763e-08, "loss": 1.05, "step": 8700 }, { "epoch": 0.915088014513521, "grad_norm": 3.9523038988472425, "learning_rate": 9.022615227375353e-08, "loss": 1.0266, "step": 8701 }, { "epoch": 0.9151931849553683, "grad_norm": 2.002467211998927, "learning_rate": 9.000403266245294e-08, "loss": 0.9787, "step": 8702 }, { "epoch": 0.9152983553972156, "grad_norm": 2.0671531516467776, "learning_rate": 8.978218178442439e-08, "loss": 0.9251, "step": 8703 }, { "epoch": 0.9154035258390629, "grad_norm": 2.7026551909268046, "learning_rate": 8.956059966440583e-08, "loss": 1.0106, "step": 8704 }, { "epoch": 0.9155086962809103, "grad_norm": 2.3171869376715213, "learning_rate": 8.933928632710531e-08, "loss": 0.9916, "step": 8705 }, { "epoch": 0.9156138667227576, "grad_norm": 2.317933352612227, "learning_rate": 8.911824179720113e-08, "loss": 0.973, "step": 8706 }, { "epoch": 0.9157190371646049, "grad_norm": 2.524849884723814, "learning_rate": 8.889746609934108e-08, "loss": 1.0246, "step": 8707 }, { "epoch": 0.9158242076064522, "grad_norm": 2.8343370527261627, "learning_rate": 8.867695925814407e-08, "loss": 0.9862, "step": 8708 }, { "epoch": 0.9159293780482995, "grad_norm": 2.2205891500846002, "learning_rate": 8.845672129819766e-08, "loss": 0.9454, "step": 8709 }, { "epoch": 0.9160345484901469, "grad_norm": 2.624543266262543, "learning_rate": 8.823675224406052e-08, "loss": 1.0148, "step": 8710 }, { "epoch": 0.9161397189319942, "grad_norm": 1.924274060643869, "learning_rate": 8.801705212026058e-08, "loss": 0.9628, "step": 8711 }, { "epoch": 0.9162448893738415, "grad_norm": 2.6357976887412855, "learning_rate": 8.779762095129623e-08, "loss": 0.9868, "step": 8712 }, { "epoch": 0.9163500598156888, "grad_norm": 2.201860694431399, "learning_rate": 8.757845876163601e-08, "loss": 0.9836, "step": 8713 }, { "epoch": 0.9164552302575362, "grad_norm": 2.6034939502760923, "learning_rate": 8.735956557571785e-08, "loss": 0.9164, "step": 8714 }, { "epoch": 0.9165604006993835, "grad_norm": 2.7455401077210997, "learning_rate": 8.714094141795026e-08, "loss": 1.0081, "step": 8715 }, { "epoch": 0.9166655711412307, "grad_norm": 3.016771299173225, "learning_rate": 8.692258631271127e-08, "loss": 0.928, "step": 8716 }, { "epoch": 0.916770741583078, "grad_norm": 2.072039917472285, "learning_rate": 8.670450028434946e-08, "loss": 0.9588, "step": 8717 }, { "epoch": 0.9168759120249254, "grad_norm": 2.987620512435773, "learning_rate": 8.648668335718313e-08, "loss": 0.9895, "step": 8718 }, { "epoch": 0.9169810824667727, "grad_norm": 2.5553475573583873, "learning_rate": 8.626913555550009e-08, "loss": 0.9904, "step": 8719 }, { "epoch": 0.91708625290862, "grad_norm": 3.2548118763612157, "learning_rate": 8.605185690355927e-08, "loss": 1.0137, "step": 8720 }, { "epoch": 0.9171914233504673, "grad_norm": 2.524991929264644, "learning_rate": 8.583484742558823e-08, "loss": 0.9857, "step": 8721 }, { "epoch": 0.9172965937923147, "grad_norm": 3.326075876639707, "learning_rate": 8.561810714578595e-08, "loss": 0.9922, "step": 8722 }, { "epoch": 0.917401764234162, "grad_norm": 2.06394387394277, "learning_rate": 8.54016360883203e-08, "loss": 0.9517, "step": 8723 }, { "epoch": 0.9175069346760093, "grad_norm": 2.7063497472638707, "learning_rate": 8.518543427732951e-08, "loss": 1.0339, "step": 8724 }, { "epoch": 0.9176121051178566, "grad_norm": 2.6198469236901722, "learning_rate": 8.496950173692147e-08, "loss": 0.9994, "step": 8725 }, { "epoch": 0.917717275559704, "grad_norm": 2.0768494403671203, "learning_rate": 8.475383849117474e-08, "loss": 0.9819, "step": 8726 }, { "epoch": 0.9178224460015513, "grad_norm": 2.0259311339586747, "learning_rate": 8.453844456413729e-08, "loss": 0.9839, "step": 8727 }, { "epoch": 0.9179276164433986, "grad_norm": 3.4906371799996205, "learning_rate": 8.43233199798274e-08, "loss": 0.9509, "step": 8728 }, { "epoch": 0.9180327868852459, "grad_norm": 2.233812559283946, "learning_rate": 8.410846476223283e-08, "loss": 1.001, "step": 8729 }, { "epoch": 0.9181379573270932, "grad_norm": 2.680065549787725, "learning_rate": 8.389387893531137e-08, "loss": 0.9476, "step": 8730 }, { "epoch": 0.9182431277689406, "grad_norm": 1.8569584887135309, "learning_rate": 8.367956252299192e-08, "loss": 0.9769, "step": 8731 }, { "epoch": 0.9183482982107879, "grad_norm": 2.3494913181215695, "learning_rate": 8.346551554917204e-08, "loss": 0.9841, "step": 8732 }, { "epoch": 0.9184534686526352, "grad_norm": 2.450100459990453, "learning_rate": 8.325173803771902e-08, "loss": 0.9682, "step": 8733 }, { "epoch": 0.9185586390944825, "grad_norm": 2.3534886505356836, "learning_rate": 8.303823001247102e-08, "loss": 0.9905, "step": 8734 }, { "epoch": 0.9186638095363299, "grad_norm": 2.0769040702020463, "learning_rate": 8.282499149723622e-08, "loss": 0.9871, "step": 8735 }, { "epoch": 0.9187689799781771, "grad_norm": 2.328949116074991, "learning_rate": 8.261202251579253e-08, "loss": 0.9732, "step": 8736 }, { "epoch": 0.9188741504200244, "grad_norm": 2.3004852069789754, "learning_rate": 8.239932309188681e-08, "loss": 0.9529, "step": 8737 }, { "epoch": 0.9189793208618717, "grad_norm": 2.467224662817621, "learning_rate": 8.218689324923729e-08, "loss": 0.9588, "step": 8738 }, { "epoch": 0.919084491303719, "grad_norm": 2.2727911833425076, "learning_rate": 8.197473301153142e-08, "loss": 1.0025, "step": 8739 }, { "epoch": 0.9191896617455664, "grad_norm": 2.8021991978145455, "learning_rate": 8.176284240242638e-08, "loss": 1.0289, "step": 8740 }, { "epoch": 0.9192948321874137, "grad_norm": 2.5953858991707492, "learning_rate": 8.15512214455505e-08, "loss": 0.9927, "step": 8741 }, { "epoch": 0.919400002629261, "grad_norm": 2.374685117659884, "learning_rate": 8.133987016450018e-08, "loss": 0.9481, "step": 8742 }, { "epoch": 0.9195051730711084, "grad_norm": 2.722786813359639, "learning_rate": 8.112878858284351e-08, "loss": 0.9962, "step": 8743 }, { "epoch": 0.9196103435129557, "grad_norm": 2.843740539019621, "learning_rate": 8.091797672411666e-08, "loss": 0.9894, "step": 8744 }, { "epoch": 0.919715513954803, "grad_norm": 2.462450080595821, "learning_rate": 8.070743461182807e-08, "loss": 0.9424, "step": 8745 }, { "epoch": 0.9198206843966503, "grad_norm": 2.200892646883302, "learning_rate": 8.04971622694542e-08, "loss": 0.9761, "step": 8746 }, { "epoch": 0.9199258548384976, "grad_norm": 1.8444502246648864, "learning_rate": 8.028715972044216e-08, "loss": 0.9718, "step": 8747 }, { "epoch": 0.920031025280345, "grad_norm": 2.6368564963597954, "learning_rate": 8.007742698820848e-08, "loss": 0.9807, "step": 8748 }, { "epoch": 0.9201361957221923, "grad_norm": 2.85801832795167, "learning_rate": 7.986796409614028e-08, "loss": 0.9885, "step": 8749 }, { "epoch": 0.9202413661640396, "grad_norm": 2.2819001687005476, "learning_rate": 7.965877106759473e-08, "loss": 0.9686, "step": 8750 }, { "epoch": 0.920346536605887, "grad_norm": 2.506401312891756, "learning_rate": 7.944984792589788e-08, "loss": 1.021, "step": 8751 }, { "epoch": 0.9204517070477343, "grad_norm": 2.446263624804167, "learning_rate": 7.924119469434666e-08, "loss": 1.0275, "step": 8752 }, { "epoch": 0.9205568774895816, "grad_norm": 2.557300434643533, "learning_rate": 7.90328113962069e-08, "loss": 0.9428, "step": 8753 }, { "epoch": 0.9206620479314289, "grad_norm": 2.3857984916068538, "learning_rate": 7.882469805471582e-08, "loss": 1.0545, "step": 8754 }, { "epoch": 0.9207672183732762, "grad_norm": 2.7202727116635312, "learning_rate": 7.861685469307905e-08, "loss": 1.0019, "step": 8755 }, { "epoch": 0.9208723888151235, "grad_norm": 1.5934819593693805, "learning_rate": 7.840928133447306e-08, "loss": 0.9311, "step": 8756 }, { "epoch": 0.9209775592569708, "grad_norm": 2.9845596688934655, "learning_rate": 7.82019780020435e-08, "loss": 1.0253, "step": 8757 }, { "epoch": 0.9210827296988181, "grad_norm": 2.046747867064963, "learning_rate": 7.799494471890684e-08, "loss": 1.0192, "step": 8758 }, { "epoch": 0.9211879001406654, "grad_norm": 2.5073020601329485, "learning_rate": 7.778818150814854e-08, "loss": 0.9559, "step": 8759 }, { "epoch": 0.9212930705825128, "grad_norm": 2.5720686156196897, "learning_rate": 7.75816883928246e-08, "loss": 1.013, "step": 8760 }, { "epoch": 0.9213982410243601, "grad_norm": 2.4862086127778675, "learning_rate": 7.73754653959602e-08, "loss": 0.9639, "step": 8761 }, { "epoch": 0.9215034114662074, "grad_norm": 2.364000470048889, "learning_rate": 7.716951254055111e-08, "loss": 0.9796, "step": 8762 }, { "epoch": 0.9216085819080547, "grad_norm": 1.8736154182919098, "learning_rate": 7.69638298495623e-08, "loss": 0.9932, "step": 8763 }, { "epoch": 0.921713752349902, "grad_norm": 2.178955434285585, "learning_rate": 7.675841734592987e-08, "loss": 1.0271, "step": 8764 }, { "epoch": 0.9218189227917494, "grad_norm": 3.1224740330325993, "learning_rate": 7.655327505255772e-08, "loss": 0.9938, "step": 8765 }, { "epoch": 0.9219240932335967, "grad_norm": 2.6693925921641553, "learning_rate": 7.634840299232171e-08, "loss": 0.9828, "step": 8766 }, { "epoch": 0.922029263675444, "grad_norm": 2.7013624068102207, "learning_rate": 7.614380118806636e-08, "loss": 0.9895, "step": 8767 }, { "epoch": 0.9221344341172913, "grad_norm": 2.201528972225391, "learning_rate": 7.593946966260618e-08, "loss": 0.9644, "step": 8768 }, { "epoch": 0.9222396045591387, "grad_norm": 2.3707667253047697, "learning_rate": 7.573540843872602e-08, "loss": 1.0226, "step": 8769 }, { "epoch": 0.922344775000986, "grad_norm": 2.1492440827092714, "learning_rate": 7.553161753918015e-08, "loss": 0.9661, "step": 8770 }, { "epoch": 0.9224499454428333, "grad_norm": 2.1183442817355154, "learning_rate": 7.532809698669263e-08, "loss": 0.997, "step": 8771 }, { "epoch": 0.9225551158846806, "grad_norm": 2.0129347965512996, "learning_rate": 7.512484680395782e-08, "loss": 0.9551, "step": 8772 }, { "epoch": 0.922660286326528, "grad_norm": 3.3447687257492644, "learning_rate": 7.492186701364007e-08, "loss": 0.9852, "step": 8773 }, { "epoch": 0.9227654567683753, "grad_norm": 2.5874808124779007, "learning_rate": 7.471915763837268e-08, "loss": 0.9863, "step": 8774 }, { "epoch": 0.9228706272102226, "grad_norm": 2.247477950163746, "learning_rate": 7.451671870075949e-08, "loss": 0.9511, "step": 8775 }, { "epoch": 0.9229757976520699, "grad_norm": 2.349577908913971, "learning_rate": 7.431455022337386e-08, "loss": 0.9971, "step": 8776 }, { "epoch": 0.9230809680939172, "grad_norm": 2.4880434952386934, "learning_rate": 7.411265222875913e-08, "loss": 0.9879, "step": 8777 }, { "epoch": 0.9231861385357645, "grad_norm": 2.6428533811226704, "learning_rate": 7.391102473942897e-08, "loss": 0.9675, "step": 8778 }, { "epoch": 0.9232913089776118, "grad_norm": 2.2938047586552366, "learning_rate": 7.370966777786564e-08, "loss": 0.9626, "step": 8779 }, { "epoch": 0.9233964794194591, "grad_norm": 2.684986649020841, "learning_rate": 7.350858136652262e-08, "loss": 1.0519, "step": 8780 }, { "epoch": 0.9235016498613065, "grad_norm": 3.0768759986996947, "learning_rate": 7.330776552782248e-08, "loss": 0.9541, "step": 8781 }, { "epoch": 0.9236068203031538, "grad_norm": 2.480420721077424, "learning_rate": 7.310722028415762e-08, "loss": 0.9568, "step": 8782 }, { "epoch": 0.9237119907450011, "grad_norm": 2.7358308672297107, "learning_rate": 7.290694565789069e-08, "loss": 0.9606, "step": 8783 }, { "epoch": 0.9238171611868484, "grad_norm": 2.4196600463864177, "learning_rate": 7.270694167135356e-08, "loss": 0.9517, "step": 8784 }, { "epoch": 0.9239223316286957, "grad_norm": 3.033022762994752, "learning_rate": 7.25072083468481e-08, "loss": 1.033, "step": 8785 }, { "epoch": 0.9240275020705431, "grad_norm": 2.710152700629641, "learning_rate": 7.230774570664623e-08, "loss": 0.9857, "step": 8786 }, { "epoch": 0.9241326725123904, "grad_norm": 2.4433124063205027, "learning_rate": 7.210855377299014e-08, "loss": 0.9722, "step": 8787 }, { "epoch": 0.9242378429542377, "grad_norm": 3.0106631380990425, "learning_rate": 7.190963256809069e-08, "loss": 0.9721, "step": 8788 }, { "epoch": 0.924343013396085, "grad_norm": 2.482926124872465, "learning_rate": 7.17109821141293e-08, "loss": 0.9866, "step": 8789 }, { "epoch": 0.9244481838379324, "grad_norm": 2.4915008975300927, "learning_rate": 7.151260243325686e-08, "loss": 1.024, "step": 8790 }, { "epoch": 0.9245533542797797, "grad_norm": 2.9578504303961055, "learning_rate": 7.13144935475943e-08, "loss": 0.9146, "step": 8791 }, { "epoch": 0.924658524721627, "grad_norm": 3.1081419272442856, "learning_rate": 7.111665547923252e-08, "loss": 0.9885, "step": 8792 }, { "epoch": 0.9247636951634743, "grad_norm": 2.3985246667372544, "learning_rate": 7.091908825023197e-08, "loss": 1.0025, "step": 8793 }, { "epoch": 0.9248688656053217, "grad_norm": 2.428656822228575, "learning_rate": 7.072179188262252e-08, "loss": 0.9968, "step": 8794 }, { "epoch": 0.924974036047169, "grad_norm": 2.255455390801362, "learning_rate": 7.052476639840489e-08, "loss": 0.9685, "step": 8795 }, { "epoch": 0.9250792064890163, "grad_norm": 2.3473107950280894, "learning_rate": 7.032801181954873e-08, "loss": 0.9967, "step": 8796 }, { "epoch": 0.9251843769308635, "grad_norm": 2.762465947655461, "learning_rate": 7.013152816799317e-08, "loss": 1.0137, "step": 8797 }, { "epoch": 0.9252895473727109, "grad_norm": 2.082339717782587, "learning_rate": 6.993531546564874e-08, "loss": 0.9553, "step": 8798 }, { "epoch": 0.9253947178145582, "grad_norm": 2.4355695449007193, "learning_rate": 6.973937373439349e-08, "loss": 0.9761, "step": 8799 }, { "epoch": 0.9254998882564055, "grad_norm": 2.3323761744575453, "learning_rate": 6.954370299607715e-08, "loss": 0.9978, "step": 8800 }, { "epoch": 0.9256050586982528, "grad_norm": 2.7276265273799636, "learning_rate": 6.93483032725184e-08, "loss": 1.014, "step": 8801 }, { "epoch": 0.9257102291401001, "grad_norm": 2.451649200404242, "learning_rate": 6.91531745855059e-08, "loss": 0.9806, "step": 8802 }, { "epoch": 0.9258153995819475, "grad_norm": 2.3782834691727186, "learning_rate": 6.895831695679756e-08, "loss": 0.9996, "step": 8803 }, { "epoch": 0.9259205700237948, "grad_norm": 2.725803550156134, "learning_rate": 6.876373040812234e-08, "loss": 0.9837, "step": 8804 }, { "epoch": 0.9260257404656421, "grad_norm": 1.692896666258162, "learning_rate": 6.856941496117736e-08, "loss": 0.9844, "step": 8805 }, { "epoch": 0.9261309109074894, "grad_norm": 2.627960374778733, "learning_rate": 6.837537063763083e-08, "loss": 0.9603, "step": 8806 }, { "epoch": 0.9262360813493368, "grad_norm": 2.4123212586639045, "learning_rate": 6.818159745911989e-08, "loss": 0.9272, "step": 8807 }, { "epoch": 0.9263412517911841, "grad_norm": 2.672047655684633, "learning_rate": 6.798809544725171e-08, "loss": 0.9814, "step": 8808 }, { "epoch": 0.9264464222330314, "grad_norm": 2.8051273113292434, "learning_rate": 6.779486462360346e-08, "loss": 0.9619, "step": 8809 }, { "epoch": 0.9265515926748787, "grad_norm": 1.930413903321851, "learning_rate": 6.760190500972208e-08, "loss": 0.9405, "step": 8810 }, { "epoch": 0.9266567631167261, "grad_norm": 2.0349253679776975, "learning_rate": 6.740921662712368e-08, "loss": 0.9864, "step": 8811 }, { "epoch": 0.9267619335585734, "grad_norm": 2.6944705175063954, "learning_rate": 6.721679949729499e-08, "loss": 0.9921, "step": 8812 }, { "epoch": 0.9268671040004207, "grad_norm": 2.118350315003092, "learning_rate": 6.702465364169103e-08, "loss": 0.9487, "step": 8813 }, { "epoch": 0.926972274442268, "grad_norm": 2.6969741905193363, "learning_rate": 6.683277908173858e-08, "loss": 0.9865, "step": 8814 }, { "epoch": 0.9270774448841154, "grad_norm": 2.557886102693444, "learning_rate": 6.664117583883272e-08, "loss": 1.017, "step": 8815 }, { "epoch": 0.9271826153259627, "grad_norm": 2.0970620367491635, "learning_rate": 6.64498439343389e-08, "loss": 1.0267, "step": 8816 }, { "epoch": 0.9272877857678099, "grad_norm": 2.4865425974749003, "learning_rate": 6.625878338959168e-08, "loss": 1.0033, "step": 8817 }, { "epoch": 0.9273929562096572, "grad_norm": 1.8276518666416652, "learning_rate": 6.606799422589627e-08, "loss": 0.9588, "step": 8818 }, { "epoch": 0.9274981266515046, "grad_norm": 2.3070835376793304, "learning_rate": 6.587747646452675e-08, "loss": 0.966, "step": 8819 }, { "epoch": 0.9276032970933519, "grad_norm": 2.5299309056388752, "learning_rate": 6.568723012672779e-08, "loss": 0.9866, "step": 8820 }, { "epoch": 0.9277084675351992, "grad_norm": 2.547941361140502, "learning_rate": 6.549725523371298e-08, "loss": 0.992, "step": 8821 }, { "epoch": 0.9278136379770465, "grad_norm": 2.417261025425601, "learning_rate": 6.530755180666593e-08, "loss": 0.981, "step": 8822 }, { "epoch": 0.9279188084188938, "grad_norm": 2.9686003899940565, "learning_rate": 6.511811986674028e-08, "loss": 1.0337, "step": 8823 }, { "epoch": 0.9280239788607412, "grad_norm": 2.8340124141461978, "learning_rate": 6.49289594350594e-08, "loss": 0.9824, "step": 8824 }, { "epoch": 0.9281291493025885, "grad_norm": 3.171176281484354, "learning_rate": 6.47400705327153e-08, "loss": 0.9947, "step": 8825 }, { "epoch": 0.9282343197444358, "grad_norm": 2.564414628087072, "learning_rate": 6.455145318077144e-08, "loss": 0.9985, "step": 8826 }, { "epoch": 0.9283394901862831, "grad_norm": 2.4567715942774337, "learning_rate": 6.436310740025986e-08, "loss": 0.9892, "step": 8827 }, { "epoch": 0.9284446606281305, "grad_norm": 2.4634151576295746, "learning_rate": 6.41750332121821e-08, "loss": 0.9948, "step": 8828 }, { "epoch": 0.9285498310699778, "grad_norm": 2.2133318934099724, "learning_rate": 6.398723063751083e-08, "loss": 0.9897, "step": 8829 }, { "epoch": 0.9286550015118251, "grad_norm": 2.215396747584313, "learning_rate": 6.379969969718653e-08, "loss": 0.9914, "step": 8830 }, { "epoch": 0.9287601719536724, "grad_norm": 2.459138737636483, "learning_rate": 6.361244041212078e-08, "loss": 1.0084, "step": 8831 }, { "epoch": 0.9288653423955198, "grad_norm": 1.934406044306015, "learning_rate": 6.342545280319468e-08, "loss": 0.9425, "step": 8832 }, { "epoch": 0.9289705128373671, "grad_norm": 2.496708383635269, "learning_rate": 6.323873689125848e-08, "loss": 0.9742, "step": 8833 }, { "epoch": 0.9290756832792144, "grad_norm": 2.078495436451324, "learning_rate": 6.305229269713276e-08, "loss": 0.9499, "step": 8834 }, { "epoch": 0.9291808537210617, "grad_norm": 2.0553106122159948, "learning_rate": 6.286612024160699e-08, "loss": 0.984, "step": 8835 }, { "epoch": 0.9292860241629091, "grad_norm": 1.9572815789421085, "learning_rate": 6.268021954544095e-08, "loss": 0.9922, "step": 8836 }, { "epoch": 0.9293911946047564, "grad_norm": 2.0721426471435618, "learning_rate": 6.249459062936447e-08, "loss": 0.9806, "step": 8837 }, { "epoch": 0.9294963650466036, "grad_norm": 2.9623950844860585, "learning_rate": 6.230923351407653e-08, "loss": 1.0178, "step": 8838 }, { "epoch": 0.9296015354884509, "grad_norm": 2.736503439361389, "learning_rate": 6.212414822024532e-08, "loss": 1.0119, "step": 8839 }, { "epoch": 0.9297067059302982, "grad_norm": 1.6867869389616887, "learning_rate": 6.193933476850961e-08, "loss": 0.9662, "step": 8840 }, { "epoch": 0.9298118763721456, "grad_norm": 2.572027064275728, "learning_rate": 6.175479317947818e-08, "loss": 0.981, "step": 8841 }, { "epoch": 0.9299170468139929, "grad_norm": 2.7961316524981563, "learning_rate": 6.157052347372767e-08, "loss": 0.978, "step": 8842 }, { "epoch": 0.9300222172558402, "grad_norm": 2.0569779619598885, "learning_rate": 6.138652567180658e-08, "loss": 0.9811, "step": 8843 }, { "epoch": 0.9301273876976875, "grad_norm": 2.1285919020070114, "learning_rate": 6.120279979423133e-08, "loss": 1.0071, "step": 8844 }, { "epoch": 0.9302325581395349, "grad_norm": 2.6903412011111505, "learning_rate": 6.101934586148938e-08, "loss": 0.9554, "step": 8845 }, { "epoch": 0.9303377285813822, "grad_norm": 2.8879413392379063, "learning_rate": 6.083616389403691e-08, "loss": 0.9768, "step": 8846 }, { "epoch": 0.9304428990232295, "grad_norm": 2.7703622961279124, "learning_rate": 6.065325391230032e-08, "loss": 1.0119, "step": 8847 }, { "epoch": 0.9305480694650768, "grad_norm": 2.2151304541476757, "learning_rate": 6.047061593667552e-08, "loss": 1.0256, "step": 8848 }, { "epoch": 0.9306532399069242, "grad_norm": 2.3412586834401186, "learning_rate": 6.028824998752764e-08, "loss": 0.9941, "step": 8849 }, { "epoch": 0.9307584103487715, "grad_norm": 3.0538317688803205, "learning_rate": 6.010615608519261e-08, "loss": 1.0315, "step": 8850 }, { "epoch": 0.9308635807906188, "grad_norm": 2.2047583894385627, "learning_rate": 5.992433424997473e-08, "loss": 0.989, "step": 8851 }, { "epoch": 0.9309687512324661, "grad_norm": 1.922856138628748, "learning_rate": 5.974278450214893e-08, "loss": 0.991, "step": 8852 }, { "epoch": 0.9310739216743135, "grad_norm": 3.0208302634204536, "learning_rate": 5.9561506861958994e-08, "loss": 0.9735, "step": 8853 }, { "epoch": 0.9311790921161608, "grad_norm": 2.8134158677180237, "learning_rate": 5.9380501349619034e-08, "loss": 0.9672, "step": 8854 }, { "epoch": 0.9312842625580081, "grad_norm": 2.048098684977348, "learning_rate": 5.9199767985312905e-08, "loss": 0.993, "step": 8855 }, { "epoch": 0.9313894329998554, "grad_norm": 2.345542378976877, "learning_rate": 5.9019306789193374e-08, "loss": 0.9825, "step": 8856 }, { "epoch": 0.9314946034417028, "grad_norm": 2.3536588929040962, "learning_rate": 5.883911778138324e-08, "loss": 1.0004, "step": 8857 }, { "epoch": 0.93159977388355, "grad_norm": 2.2434011250068533, "learning_rate": 5.865920098197475e-08, "loss": 1.0042, "step": 8858 }, { "epoch": 0.9317049443253973, "grad_norm": 2.5379253604796577, "learning_rate": 5.847955641103076e-08, "loss": 0.9675, "step": 8859 }, { "epoch": 0.9318101147672446, "grad_norm": 2.1102591640587627, "learning_rate": 5.830018408858246e-08, "loss": 0.9685, "step": 8860 }, { "epoch": 0.931915285209092, "grad_norm": 2.0332852212785144, "learning_rate": 5.8121084034631625e-08, "loss": 0.9865, "step": 8861 }, { "epoch": 0.9320204556509393, "grad_norm": 2.243828323356822, "learning_rate": 5.7942256269148675e-08, "loss": 0.9901, "step": 8862 }, { "epoch": 0.9321256260927866, "grad_norm": 3.0058206320010403, "learning_rate": 5.776370081207516e-08, "loss": 1.0299, "step": 8863 }, { "epoch": 0.9322307965346339, "grad_norm": 2.3047446223648373, "learning_rate": 5.758541768332071e-08, "loss": 0.9945, "step": 8864 }, { "epoch": 0.9323359669764812, "grad_norm": 2.4665165924323453, "learning_rate": 5.740740690276553e-08, "loss": 0.994, "step": 8865 }, { "epoch": 0.9324411374183286, "grad_norm": 2.5834709263911715, "learning_rate": 5.722966849025957e-08, "loss": 1.0037, "step": 8866 }, { "epoch": 0.9325463078601759, "grad_norm": 2.2524845705290133, "learning_rate": 5.7052202465621434e-08, "loss": 1.002, "step": 8867 }, { "epoch": 0.9326514783020232, "grad_norm": 2.4353939058546237, "learning_rate": 5.687500884864e-08, "loss": 0.983, "step": 8868 }, { "epoch": 0.9327566487438705, "grad_norm": 2.5223813989202197, "learning_rate": 5.66980876590742e-08, "loss": 0.9392, "step": 8869 }, { "epoch": 0.9328618191857179, "grad_norm": 2.2960675605121583, "learning_rate": 5.652143891665157e-08, "loss": 0.9687, "step": 8870 }, { "epoch": 0.9329669896275652, "grad_norm": 2.10511952467411, "learning_rate": 5.634506264107054e-08, "loss": 0.9637, "step": 8871 }, { "epoch": 0.9330721600694125, "grad_norm": 2.4587695553336513, "learning_rate": 5.616895885199758e-08, "loss": 0.9774, "step": 8872 }, { "epoch": 0.9331773305112598, "grad_norm": 1.965729243927272, "learning_rate": 5.5993127569070325e-08, "loss": 0.9482, "step": 8873 }, { "epoch": 0.9332825009531072, "grad_norm": 3.8358216369631597, "learning_rate": 5.5817568811894763e-08, "loss": 0.9987, "step": 8874 }, { "epoch": 0.9333876713949545, "grad_norm": 1.5484337642536643, "learning_rate": 5.564228260004773e-08, "loss": 0.937, "step": 8875 }, { "epoch": 0.9334928418368018, "grad_norm": 4.054983699134379, "learning_rate": 5.5467268953074414e-08, "loss": 1.0385, "step": 8876 }, { "epoch": 0.9335980122786491, "grad_norm": 2.3147943515098715, "learning_rate": 5.529252789049033e-08, "loss": 1.0191, "step": 8877 }, { "epoch": 0.9337031827204963, "grad_norm": 2.76261922574638, "learning_rate": 5.5118059431781e-08, "loss": 0.9745, "step": 8878 }, { "epoch": 0.9338083531623437, "grad_norm": 3.2491525384795863, "learning_rate": 5.4943863596400026e-08, "loss": 0.9695, "step": 8879 }, { "epoch": 0.933913523604191, "grad_norm": 2.4349584193925042, "learning_rate": 5.476994040377243e-08, "loss": 0.9973, "step": 8880 }, { "epoch": 0.9340186940460383, "grad_norm": 1.57930103970821, "learning_rate": 5.45962898732913e-08, "loss": 0.9405, "step": 8881 }, { "epoch": 0.9341238644878856, "grad_norm": 2.8444833537713805, "learning_rate": 5.442291202432087e-08, "loss": 1.0168, "step": 8882 }, { "epoch": 0.934229034929733, "grad_norm": 2.0406400774301776, "learning_rate": 5.424980687619319e-08, "loss": 0.9987, "step": 8883 }, { "epoch": 0.9343342053715803, "grad_norm": 1.6448616663744222, "learning_rate": 5.407697444821169e-08, "loss": 0.9668, "step": 8884 }, { "epoch": 0.9344393758134276, "grad_norm": 2.320476937675156, "learning_rate": 5.3904414759648195e-08, "loss": 0.9719, "step": 8885 }, { "epoch": 0.9345445462552749, "grad_norm": 2.5020571289750873, "learning_rate": 5.3732127829743964e-08, "loss": 0.9816, "step": 8886 }, { "epoch": 0.9346497166971223, "grad_norm": 2.240359223042951, "learning_rate": 5.356011367771113e-08, "loss": 0.9784, "step": 8887 }, { "epoch": 0.9347548871389696, "grad_norm": 1.9605700811751219, "learning_rate": 5.338837232272992e-08, "loss": 1.0099, "step": 8888 }, { "epoch": 0.9348600575808169, "grad_norm": 2.0597267796298486, "learning_rate": 5.321690378395167e-08, "loss": 0.9846, "step": 8889 }, { "epoch": 0.9349652280226642, "grad_norm": 1.8547486595770128, "learning_rate": 5.304570808049553e-08, "loss": 0.9384, "step": 8890 }, { "epoch": 0.9350703984645116, "grad_norm": 2.197878394312316, "learning_rate": 5.287478523145151e-08, "loss": 0.9896, "step": 8891 }, { "epoch": 0.9351755689063589, "grad_norm": 2.240523235271297, "learning_rate": 5.2704135255879085e-08, "loss": 0.9526, "step": 8892 }, { "epoch": 0.9352807393482062, "grad_norm": 2.8082105695500172, "learning_rate": 5.253375817280665e-08, "loss": 0.9864, "step": 8893 }, { "epoch": 0.9353859097900535, "grad_norm": 2.8371694669918472, "learning_rate": 5.236365400123289e-08, "loss": 0.9613, "step": 8894 }, { "epoch": 0.9354910802319009, "grad_norm": 1.7047869479150661, "learning_rate": 5.219382276012514e-08, "loss": 0.9818, "step": 8895 }, { "epoch": 0.9355962506737482, "grad_norm": 2.6335657856305885, "learning_rate": 5.2024264468422125e-08, "loss": 0.9802, "step": 8896 }, { "epoch": 0.9357014211155955, "grad_norm": 2.1047402552338745, "learning_rate": 5.185497914502957e-08, "loss": 0.9918, "step": 8897 }, { "epoch": 0.9358065915574428, "grad_norm": 2.964283660935744, "learning_rate": 5.168596680882515e-08, "loss": 1.0045, "step": 8898 }, { "epoch": 0.93591176199929, "grad_norm": 2.164852522853516, "learning_rate": 5.151722747865434e-08, "loss": 0.9464, "step": 8899 }, { "epoch": 0.9360169324411374, "grad_norm": 2.1258441873852463, "learning_rate": 5.134876117333321e-08, "loss": 0.9473, "step": 8900 }, { "epoch": 0.9361221028829847, "grad_norm": 2.587963254415204, "learning_rate": 5.1180567911646994e-08, "loss": 1.0034, "step": 8901 }, { "epoch": 0.936227273324832, "grad_norm": 2.6791686319504753, "learning_rate": 5.1012647712350425e-08, "loss": 0.9854, "step": 8902 }, { "epoch": 0.9363324437666793, "grad_norm": 2.4951824060899597, "learning_rate": 5.084500059416852e-08, "loss": 0.9324, "step": 8903 }, { "epoch": 0.9364376142085267, "grad_norm": 2.2357393856839405, "learning_rate": 5.067762657579412e-08, "loss": 0.9492, "step": 8904 }, { "epoch": 0.936542784650374, "grad_norm": 1.9973404560425712, "learning_rate": 5.0510525675891706e-08, "loss": 0.9965, "step": 8905 }, { "epoch": 0.9366479550922213, "grad_norm": 2.64657389622434, "learning_rate": 5.0343697913093904e-08, "loss": 0.982, "step": 8906 }, { "epoch": 0.9367531255340686, "grad_norm": 1.9223409424201854, "learning_rate": 5.017714330600332e-08, "loss": 1.0052, "step": 8907 }, { "epoch": 0.936858295975916, "grad_norm": 2.203306276144651, "learning_rate": 5.0010861873192596e-08, "loss": 1.022, "step": 8908 }, { "epoch": 0.9369634664177633, "grad_norm": 2.4829606009339282, "learning_rate": 4.984485363320218e-08, "loss": 0.9611, "step": 8909 }, { "epoch": 0.9370686368596106, "grad_norm": 1.9321781349119374, "learning_rate": 4.9679118604544496e-08, "loss": 0.9633, "step": 8910 }, { "epoch": 0.9371738073014579, "grad_norm": 2.2454619717290285, "learning_rate": 4.951365680569975e-08, "loss": 0.9923, "step": 8911 }, { "epoch": 0.9372789777433053, "grad_norm": 2.61658321641514, "learning_rate": 4.9348468255118465e-08, "loss": 1.0092, "step": 8912 }, { "epoch": 0.9373841481851526, "grad_norm": 3.2178127868301494, "learning_rate": 4.918355297122035e-08, "loss": 1.0305, "step": 8913 }, { "epoch": 0.9374893186269999, "grad_norm": 2.5158970965320178, "learning_rate": 4.901891097239431e-08, "loss": 0.932, "step": 8914 }, { "epoch": 0.9375944890688472, "grad_norm": 2.853474567380966, "learning_rate": 4.885454227700009e-08, "loss": 0.9817, "step": 8915 }, { "epoch": 0.9376996595106946, "grad_norm": 2.8015618857324105, "learning_rate": 4.8690446903365e-08, "loss": 0.9716, "step": 8916 }, { "epoch": 0.9378048299525419, "grad_norm": 2.801611751147078, "learning_rate": 4.8526624869787985e-08, "loss": 0.9855, "step": 8917 }, { "epoch": 0.9379100003943892, "grad_norm": 2.1624199744457426, "learning_rate": 4.836307619453556e-08, "loss": 0.9891, "step": 8918 }, { "epoch": 0.9380151708362364, "grad_norm": 2.9139791008885907, "learning_rate": 4.819980089584564e-08, "loss": 0.9789, "step": 8919 }, { "epoch": 0.9381203412780837, "grad_norm": 2.923200016146771, "learning_rate": 4.8036798991923925e-08, "loss": 0.9436, "step": 8920 }, { "epoch": 0.9382255117199311, "grad_norm": 2.295229636965682, "learning_rate": 4.7874070500946725e-08, "loss": 0.975, "step": 8921 }, { "epoch": 0.9383306821617784, "grad_norm": 3.0300460033187515, "learning_rate": 4.771161544105951e-08, "loss": 0.9951, "step": 8922 }, { "epoch": 0.9384358526036257, "grad_norm": 2.115432155833796, "learning_rate": 4.754943383037669e-08, "loss": 0.9423, "step": 8923 }, { "epoch": 0.938541023045473, "grad_norm": 2.2142585640978525, "learning_rate": 4.7387525686983793e-08, "loss": 1.0046, "step": 8924 }, { "epoch": 0.9386461934873204, "grad_norm": 2.2974730077916714, "learning_rate": 4.722589102893416e-08, "loss": 0.9384, "step": 8925 }, { "epoch": 0.9387513639291677, "grad_norm": 2.4189018597780216, "learning_rate": 4.70645298742517e-08, "loss": 0.9834, "step": 8926 }, { "epoch": 0.938856534371015, "grad_norm": 2.2427060135015315, "learning_rate": 4.690344224092924e-08, "loss": 0.9165, "step": 8927 }, { "epoch": 0.9389617048128623, "grad_norm": 3.2157140769814667, "learning_rate": 4.674262814692909e-08, "loss": 0.9877, "step": 8928 }, { "epoch": 0.9390668752547097, "grad_norm": 2.2854910606933556, "learning_rate": 4.658208761018357e-08, "loss": 0.9979, "step": 8929 }, { "epoch": 0.939172045696557, "grad_norm": 2.3119573645611307, "learning_rate": 4.6421820648593906e-08, "loss": 0.9796, "step": 8930 }, { "epoch": 0.9392772161384043, "grad_norm": 2.2403463122387914, "learning_rate": 4.626182728003165e-08, "loss": 0.9831, "step": 8931 }, { "epoch": 0.9393823865802516, "grad_norm": 1.9681555317969763, "learning_rate": 4.61021075223364e-08, "loss": 0.9454, "step": 8932 }, { "epoch": 0.939487557022099, "grad_norm": 2.0758545826298844, "learning_rate": 4.594266139331921e-08, "loss": 0.9768, "step": 8933 }, { "epoch": 0.9395927274639463, "grad_norm": 1.836114474356616, "learning_rate": 4.57834889107589e-08, "loss": 1.0333, "step": 8934 }, { "epoch": 0.9396978979057936, "grad_norm": 2.439188983743261, "learning_rate": 4.5624590092404884e-08, "loss": 0.9691, "step": 8935 }, { "epoch": 0.9398030683476409, "grad_norm": 2.093740532951504, "learning_rate": 4.546596495597494e-08, "loss": 0.9777, "step": 8936 }, { "epoch": 0.9399082387894883, "grad_norm": 2.877703863906485, "learning_rate": 4.530761351915741e-08, "loss": 0.9488, "step": 8937 }, { "epoch": 0.9400134092313356, "grad_norm": 2.757214678581196, "learning_rate": 4.5149535799610125e-08, "loss": 1.0115, "step": 8938 }, { "epoch": 0.9401185796731828, "grad_norm": 3.325004102364057, "learning_rate": 4.499173181495897e-08, "loss": 0.9751, "step": 8939 }, { "epoch": 0.9402237501150301, "grad_norm": 2.3523829345278915, "learning_rate": 4.4834201582801275e-08, "loss": 0.9737, "step": 8940 }, { "epoch": 0.9403289205568774, "grad_norm": 3.8621952023589445, "learning_rate": 4.467694512070242e-08, "loss": 1.0262, "step": 8941 }, { "epoch": 0.9404340909987248, "grad_norm": 2.185909471977706, "learning_rate": 4.451996244619755e-08, "loss": 1.009, "step": 8942 }, { "epoch": 0.9405392614405721, "grad_norm": 1.7242270309634, "learning_rate": 4.436325357679211e-08, "loss": 0.9852, "step": 8943 }, { "epoch": 0.9406444318824194, "grad_norm": 2.12009999134058, "learning_rate": 4.420681852995962e-08, "loss": 0.9544, "step": 8944 }, { "epoch": 0.9407496023242667, "grad_norm": 2.7180402269060737, "learning_rate": 4.4050657323144454e-08, "loss": 0.9866, "step": 8945 }, { "epoch": 0.9408547727661141, "grad_norm": 2.3821757021611227, "learning_rate": 4.3894769973759075e-08, "loss": 0.9786, "step": 8946 }, { "epoch": 0.9409599432079614, "grad_norm": 2.4406138087963023, "learning_rate": 4.3739156499186806e-08, "loss": 1.0249, "step": 8947 }, { "epoch": 0.9410651136498087, "grad_norm": 1.8333812495807653, "learning_rate": 4.358381691677932e-08, "loss": 0.9849, "step": 8948 }, { "epoch": 0.941170284091656, "grad_norm": 1.973400633068133, "learning_rate": 4.342875124385859e-08, "loss": 0.9627, "step": 8949 }, { "epoch": 0.9412754545335034, "grad_norm": 2.2827790185249075, "learning_rate": 4.3273959497715234e-08, "loss": 0.9529, "step": 8950 }, { "epoch": 0.9413806249753507, "grad_norm": 1.789449664350765, "learning_rate": 4.311944169560989e-08, "loss": 0.9506, "step": 8951 }, { "epoch": 0.941485795417198, "grad_norm": 2.0245454190254955, "learning_rate": 4.296519785477293e-08, "loss": 0.9788, "step": 8952 }, { "epoch": 0.9415909658590453, "grad_norm": 2.983790376093045, "learning_rate": 4.2811227992402834e-08, "loss": 0.9872, "step": 8953 }, { "epoch": 0.9416961363008927, "grad_norm": 2.448225720897263, "learning_rate": 4.2657532125669196e-08, "loss": 0.9493, "step": 8954 }, { "epoch": 0.94180130674274, "grad_norm": 3.0859410244770693, "learning_rate": 4.25041102717097e-08, "loss": 0.9438, "step": 8955 }, { "epoch": 0.9419064771845873, "grad_norm": 2.3832499552025466, "learning_rate": 4.2350962447632594e-08, "loss": 0.9902, "step": 8956 }, { "epoch": 0.9420116476264346, "grad_norm": 2.251093370418025, "learning_rate": 4.219808867051506e-08, "loss": 0.9439, "step": 8957 }, { "epoch": 0.942116818068282, "grad_norm": 3.0337285007524, "learning_rate": 4.204548895740346e-08, "loss": 0.9715, "step": 8958 }, { "epoch": 0.9422219885101293, "grad_norm": 2.067055271479491, "learning_rate": 4.18931633253139e-08, "loss": 0.9864, "step": 8959 }, { "epoch": 0.9423271589519765, "grad_norm": 2.198046384198305, "learning_rate": 4.174111179123141e-08, "loss": 0.994, "step": 8960 }, { "epoch": 0.9424323293938238, "grad_norm": 2.2696199460704225, "learning_rate": 4.158933437211188e-08, "loss": 0.9611, "step": 8961 }, { "epoch": 0.9425374998356711, "grad_norm": 2.2636911949582124, "learning_rate": 4.1437831084878974e-08, "loss": 0.9585, "step": 8962 }, { "epoch": 0.9426426702775185, "grad_norm": 2.490147217829843, "learning_rate": 4.12866019464267e-08, "loss": 0.9212, "step": 8963 }, { "epoch": 0.9427478407193658, "grad_norm": 2.279237319888199, "learning_rate": 4.1135646973618214e-08, "loss": 0.9686, "step": 8964 }, { "epoch": 0.9428530111612131, "grad_norm": 2.9443355297300187, "learning_rate": 4.098496618328618e-08, "loss": 1.008, "step": 8965 }, { "epoch": 0.9429581816030604, "grad_norm": 2.4101695450847074, "learning_rate": 4.083455959223298e-08, "loss": 0.9955, "step": 8966 }, { "epoch": 0.9430633520449078, "grad_norm": 3.2074191412814916, "learning_rate": 4.068442721722965e-08, "loss": 1.0099, "step": 8967 }, { "epoch": 0.9431685224867551, "grad_norm": 3.174497962789582, "learning_rate": 4.0534569075017516e-08, "loss": 0.9797, "step": 8968 }, { "epoch": 0.9432736929286024, "grad_norm": 2.5583454557550014, "learning_rate": 4.038498518230627e-08, "loss": 1.006, "step": 8969 }, { "epoch": 0.9433788633704497, "grad_norm": 1.8038618775066488, "learning_rate": 4.0235675555776734e-08, "loss": 0.9574, "step": 8970 }, { "epoch": 0.9434840338122971, "grad_norm": 2.514174840873185, "learning_rate": 4.008664021207698e-08, "loss": 0.993, "step": 8971 }, { "epoch": 0.9435892042541444, "grad_norm": 2.1814124733410387, "learning_rate": 3.993787916782649e-08, "loss": 0.9489, "step": 8972 }, { "epoch": 0.9436943746959917, "grad_norm": 2.5226893485355952, "learning_rate": 3.978939243961283e-08, "loss": 1.0078, "step": 8973 }, { "epoch": 0.943799545137839, "grad_norm": 2.256815087313752, "learning_rate": 3.964118004399331e-08, "loss": 0.9775, "step": 8974 }, { "epoch": 0.9439047155796864, "grad_norm": 2.8223010195407947, "learning_rate": 3.949324199749527e-08, "loss": 1.0249, "step": 8975 }, { "epoch": 0.9440098860215337, "grad_norm": 1.391951568158817, "learning_rate": 3.9345578316614396e-08, "loss": 0.9615, "step": 8976 }, { "epoch": 0.944115056463381, "grad_norm": 2.2474764496627717, "learning_rate": 3.919818901781669e-08, "loss": 0.9517, "step": 8977 }, { "epoch": 0.9442202269052283, "grad_norm": 2.472061001047028, "learning_rate": 3.905107411753678e-08, "loss": 0.9883, "step": 8978 }, { "epoch": 0.9443253973470757, "grad_norm": 1.9454273863676264, "learning_rate": 3.89042336321796e-08, "loss": 0.9953, "step": 8979 }, { "epoch": 0.9444305677889229, "grad_norm": 2.5983891669051045, "learning_rate": 3.8757667578119e-08, "loss": 0.9775, "step": 8980 }, { "epoch": 0.9445357382307702, "grad_norm": 2.052082559249859, "learning_rate": 3.8611375971698004e-08, "loss": 1.0009, "step": 8981 }, { "epoch": 0.9446409086726175, "grad_norm": 2.2703611102031402, "learning_rate": 3.8465358829229415e-08, "loss": 0.9423, "step": 8982 }, { "epoch": 0.9447460791144648, "grad_norm": 2.2134942047175814, "learning_rate": 3.831961616699464e-08, "loss": 0.9994, "step": 8983 }, { "epoch": 0.9448512495563122, "grad_norm": 2.9036139692399834, "learning_rate": 3.8174148001246246e-08, "loss": 0.9657, "step": 8984 }, { "epoch": 0.9449564199981595, "grad_norm": 2.1246606320654897, "learning_rate": 3.802895434820431e-08, "loss": 0.9848, "step": 8985 }, { "epoch": 0.9450615904400068, "grad_norm": 2.4282287550373476, "learning_rate": 3.78840352240592e-08, "loss": 0.9852, "step": 8986 }, { "epoch": 0.9451667608818541, "grad_norm": 3.214310427554045, "learning_rate": 3.773939064497051e-08, "loss": 0.9898, "step": 8987 }, { "epoch": 0.9452719313237015, "grad_norm": 2.344091022554252, "learning_rate": 3.759502062706727e-08, "loss": 0.9549, "step": 8988 }, { "epoch": 0.9453771017655488, "grad_norm": 2.4153973988675057, "learning_rate": 3.745092518644827e-08, "loss": 0.9341, "step": 8989 }, { "epoch": 0.9454822722073961, "grad_norm": 2.203918057550135, "learning_rate": 3.730710433918039e-08, "loss": 1.0003, "step": 8990 }, { "epoch": 0.9455874426492434, "grad_norm": 2.265252052511168, "learning_rate": 3.716355810130135e-08, "loss": 1.002, "step": 8991 }, { "epoch": 0.9456926130910908, "grad_norm": 3.0195268989146338, "learning_rate": 3.70202864888175e-08, "loss": 0.9745, "step": 8992 }, { "epoch": 0.9457977835329381, "grad_norm": 2.1813144759793874, "learning_rate": 3.687728951770497e-08, "loss": 0.9996, "step": 8993 }, { "epoch": 0.9459029539747854, "grad_norm": 2.382527317064494, "learning_rate": 3.673456720390878e-08, "loss": 1.0224, "step": 8994 }, { "epoch": 0.9460081244166327, "grad_norm": 2.888448496222066, "learning_rate": 3.659211956334369e-08, "loss": 0.9979, "step": 8995 }, { "epoch": 0.9461132948584801, "grad_norm": 2.428825105062496, "learning_rate": 3.644994661189366e-08, "loss": 0.9911, "step": 8996 }, { "epoch": 0.9462184653003274, "grad_norm": 2.947866269473164, "learning_rate": 3.630804836541213e-08, "loss": 0.9448, "step": 8997 }, { "epoch": 0.9463236357421747, "grad_norm": 2.5597218827599693, "learning_rate": 3.616642483972199e-08, "loss": 0.9859, "step": 8998 }, { "epoch": 0.946428806184022, "grad_norm": 2.158200307069964, "learning_rate": 3.602507605061478e-08, "loss": 0.9825, "step": 8999 }, { "epoch": 0.9465339766258692, "grad_norm": 3.1515542288121883, "learning_rate": 3.588400201385289e-08, "loss": 1.016, "step": 9000 }, { "epoch": 0.9466391470677166, "grad_norm": 2.5215905656201505, "learning_rate": 3.574320274516652e-08, "loss": 0.9547, "step": 9001 }, { "epoch": 0.9467443175095639, "grad_norm": 2.2845409836197135, "learning_rate": 3.560267826025588e-08, "loss": 0.969, "step": 9002 }, { "epoch": 0.9468494879514112, "grad_norm": 2.7215628504061633, "learning_rate": 3.546242857479093e-08, "loss": 0.9303, "step": 9003 }, { "epoch": 0.9469546583932585, "grad_norm": 2.9588772470370475, "learning_rate": 3.5322453704410286e-08, "loss": 1.0371, "step": 9004 }, { "epoch": 0.9470598288351059, "grad_norm": 2.2593667415441954, "learning_rate": 3.518275366472229e-08, "loss": 0.9698, "step": 9005 }, { "epoch": 0.9471649992769532, "grad_norm": 2.042081877507642, "learning_rate": 3.504332847130476e-08, "loss": 1.0017, "step": 9006 }, { "epoch": 0.9472701697188005, "grad_norm": 2.587961130036968, "learning_rate": 3.49041781397047e-08, "loss": 0.9826, "step": 9007 }, { "epoch": 0.9473753401606478, "grad_norm": 3.102076174146032, "learning_rate": 3.4765302685438315e-08, "loss": 0.9855, "step": 9008 }, { "epoch": 0.9474805106024952, "grad_norm": 2.544139468740267, "learning_rate": 3.462670212399099e-08, "loss": 0.9721, "step": 9009 }, { "epoch": 0.9475856810443425, "grad_norm": 2.7727335358217053, "learning_rate": 3.4488376470818153e-08, "loss": 0.9728, "step": 9010 }, { "epoch": 0.9476908514861898, "grad_norm": 2.5294138314705696, "learning_rate": 3.4350325741344114e-08, "loss": 0.9813, "step": 9011 }, { "epoch": 0.9477960219280371, "grad_norm": 2.7089584804818907, "learning_rate": 3.421254995096268e-08, "loss": 1.0279, "step": 9012 }, { "epoch": 0.9479011923698845, "grad_norm": 2.281684751376811, "learning_rate": 3.407504911503684e-08, "loss": 0.9687, "step": 9013 }, { "epoch": 0.9480063628117318, "grad_norm": 2.138661404129903, "learning_rate": 3.3937823248899046e-08, "loss": 0.9265, "step": 9014 }, { "epoch": 0.9481115332535791, "grad_norm": 3.39614476261138, "learning_rate": 3.3800872367850956e-08, "loss": 0.9766, "step": 9015 }, { "epoch": 0.9482167036954264, "grad_norm": 2.2640896136810955, "learning_rate": 3.366419648716368e-08, "loss": 0.967, "step": 9016 }, { "epoch": 0.9483218741372738, "grad_norm": 2.047442890013021, "learning_rate": 3.352779562207753e-08, "loss": 0.9985, "step": 9017 }, { "epoch": 0.9484270445791211, "grad_norm": 1.9785722794844491, "learning_rate": 3.339166978780256e-08, "loss": 0.9326, "step": 9018 }, { "epoch": 0.9485322150209684, "grad_norm": 2.1691668115058733, "learning_rate": 3.3255818999517465e-08, "loss": 1.017, "step": 9019 }, { "epoch": 0.9486373854628157, "grad_norm": 2.4799530161132863, "learning_rate": 3.3120243272371236e-08, "loss": 0.9924, "step": 9020 }, { "epoch": 0.9487425559046629, "grad_norm": 2.353650653525652, "learning_rate": 3.298494262148122e-08, "loss": 1.0099, "step": 9021 }, { "epoch": 0.9488477263465103, "grad_norm": 2.2157690574353905, "learning_rate": 3.2849917061934245e-08, "loss": 1.0099, "step": 9022 }, { "epoch": 0.9489528967883576, "grad_norm": 2.6170941163454726, "learning_rate": 3.2715166608787426e-08, "loss": 1.0147, "step": 9023 }, { "epoch": 0.9490580672302049, "grad_norm": 2.252136933422275, "learning_rate": 3.2580691277065704e-08, "loss": 0.9959, "step": 9024 }, { "epoch": 0.9491632376720522, "grad_norm": 2.0628624156474906, "learning_rate": 3.2446491081764566e-08, "loss": 0.9494, "step": 9025 }, { "epoch": 0.9492684081138996, "grad_norm": 2.8179669861299055, "learning_rate": 3.2312566037848437e-08, "loss": 0.9782, "step": 9026 }, { "epoch": 0.9493735785557469, "grad_norm": 2.4587844515129365, "learning_rate": 3.217891616025065e-08, "loss": 0.9678, "step": 9027 }, { "epoch": 0.9494787489975942, "grad_norm": 2.836843623950099, "learning_rate": 3.204554146387456e-08, "loss": 1.0119, "step": 9028 }, { "epoch": 0.9495839194394415, "grad_norm": 2.6786242065548675, "learning_rate": 3.191244196359244e-08, "loss": 0.9626, "step": 9029 }, { "epoch": 0.9496890898812889, "grad_norm": 1.9157598537055311, "learning_rate": 3.1779617674245754e-08, "loss": 0.9927, "step": 9030 }, { "epoch": 0.9497942603231362, "grad_norm": 2.8959657091729114, "learning_rate": 3.1647068610645706e-08, "loss": 1.0009, "step": 9031 }, { "epoch": 0.9498994307649835, "grad_norm": 2.5929815782861603, "learning_rate": 3.151479478757186e-08, "loss": 1.0032, "step": 9032 }, { "epoch": 0.9500046012068308, "grad_norm": 2.5834124688388633, "learning_rate": 3.1382796219774634e-08, "loss": 1.0217, "step": 9033 }, { "epoch": 0.9501097716486782, "grad_norm": 3.2272054253254363, "learning_rate": 3.125107292197227e-08, "loss": 1.0221, "step": 9034 }, { "epoch": 0.9502149420905255, "grad_norm": 2.6054764436911895, "learning_rate": 3.1119624908853286e-08, "loss": 0.9901, "step": 9035 }, { "epoch": 0.9503201125323728, "grad_norm": 3.169144160863623, "learning_rate": 3.0988452195075127e-08, "loss": 0.9878, "step": 9036 }, { "epoch": 0.9504252829742201, "grad_norm": 2.6138918264045765, "learning_rate": 3.085755479526442e-08, "loss": 1.035, "step": 9037 }, { "epoch": 0.9505304534160675, "grad_norm": 2.2611828427142058, "learning_rate": 3.072693272401756e-08, "loss": 0.9684, "step": 9038 }, { "epoch": 0.9506356238579148, "grad_norm": 2.9501126652696676, "learning_rate": 3.059658599589926e-08, "loss": 0.9749, "step": 9039 }, { "epoch": 0.9507407942997621, "grad_norm": 2.1468682912165087, "learning_rate": 3.046651462544487e-08, "loss": 0.9761, "step": 9040 }, { "epoch": 0.9508459647416093, "grad_norm": 2.4497313864823753, "learning_rate": 3.0336718627158035e-08, "loss": 0.9701, "step": 9041 }, { "epoch": 0.9509511351834566, "grad_norm": 2.153394730671303, "learning_rate": 3.0207198015512195e-08, "loss": 0.9441, "step": 9042 }, { "epoch": 0.951056305625304, "grad_norm": 3.031401492377512, "learning_rate": 3.007795280494996e-08, "loss": 1.0247, "step": 9043 }, { "epoch": 0.9511614760671513, "grad_norm": 2.5059187840965604, "learning_rate": 2.994898300988258e-08, "loss": 0.9365, "step": 9044 }, { "epoch": 0.9512666465089986, "grad_norm": 2.4460236039465615, "learning_rate": 2.9820288644692166e-08, "loss": 0.9831, "step": 9045 }, { "epoch": 0.9513718169508459, "grad_norm": 2.608884100112733, "learning_rate": 2.969186972372806e-08, "loss": 1.0081, "step": 9046 }, { "epoch": 0.9514769873926933, "grad_norm": 2.7708175506380286, "learning_rate": 2.9563726261310767e-08, "loss": 0.9921, "step": 9047 }, { "epoch": 0.9515821578345406, "grad_norm": 2.9910305301759186, "learning_rate": 2.9435858271728845e-08, "loss": 0.9821, "step": 9048 }, { "epoch": 0.9516873282763879, "grad_norm": 2.9234495857772553, "learning_rate": 2.9308265769240894e-08, "loss": 1.0157, "step": 9049 }, { "epoch": 0.9517924987182352, "grad_norm": 1.8321103596533066, "learning_rate": 2.9180948768074424e-08, "loss": 0.9784, "step": 9050 }, { "epoch": 0.9518976691600826, "grad_norm": 2.357717738638271, "learning_rate": 2.905390728242585e-08, "loss": 0.9425, "step": 9051 }, { "epoch": 0.9520028396019299, "grad_norm": 3.189128618030638, "learning_rate": 2.8927141326461903e-08, "loss": 1.0047, "step": 9052 }, { "epoch": 0.9521080100437772, "grad_norm": 2.256843607403256, "learning_rate": 2.8800650914317385e-08, "loss": 0.946, "step": 9053 }, { "epoch": 0.9522131804856245, "grad_norm": 3.062457453580989, "learning_rate": 2.867443606009768e-08, "loss": 1.0034, "step": 9054 }, { "epoch": 0.9523183509274719, "grad_norm": 3.165772898246199, "learning_rate": 2.854849677787569e-08, "loss": 1.0181, "step": 9055 }, { "epoch": 0.9524235213693192, "grad_norm": 2.173606329897896, "learning_rate": 2.8422833081695466e-08, "loss": 1.0107, "step": 9056 }, { "epoch": 0.9525286918111665, "grad_norm": 2.1255256334631976, "learning_rate": 2.8297444985569412e-08, "loss": 1.0023, "step": 9057 }, { "epoch": 0.9526338622530138, "grad_norm": 2.077684783487426, "learning_rate": 2.8172332503479116e-08, "loss": 0.9621, "step": 9058 }, { "epoch": 0.9527390326948612, "grad_norm": 2.1934129457768834, "learning_rate": 2.8047495649375366e-08, "loss": 0.9669, "step": 9059 }, { "epoch": 0.9528442031367085, "grad_norm": 2.8101529667642753, "learning_rate": 2.7922934437178695e-08, "loss": 0.9975, "step": 9060 }, { "epoch": 0.9529493735785557, "grad_norm": 1.9383323625550897, "learning_rate": 2.7798648880778545e-08, "loss": 0.9921, "step": 9061 }, { "epoch": 0.953054544020403, "grad_norm": 1.8036336585586927, "learning_rate": 2.767463899403383e-08, "loss": 0.9686, "step": 9062 }, { "epoch": 0.9531597144622503, "grad_norm": 2.8407059895479696, "learning_rate": 2.755090479077266e-08, "loss": 0.9795, "step": 9063 }, { "epoch": 0.9532648849040977, "grad_norm": 2.758918511717141, "learning_rate": 2.7427446284792324e-08, "loss": 0.9974, "step": 9064 }, { "epoch": 0.953370055345945, "grad_norm": 3.586171387728783, "learning_rate": 2.730426348985904e-08, "loss": 0.9778, "step": 9065 }, { "epoch": 0.9534752257877923, "grad_norm": 2.4256569743524983, "learning_rate": 2.7181356419709313e-08, "loss": 0.9593, "step": 9066 }, { "epoch": 0.9535803962296396, "grad_norm": 2.3740228072151393, "learning_rate": 2.7058725088047466e-08, "loss": 0.9844, "step": 9067 }, { "epoch": 0.953685566671487, "grad_norm": 2.483387574850686, "learning_rate": 2.6936369508548664e-08, "loss": 0.9624, "step": 9068 }, { "epoch": 0.9537907371133343, "grad_norm": 3.1395779842210954, "learning_rate": 2.681428969485589e-08, "loss": 1.0125, "step": 9069 }, { "epoch": 0.9538959075551816, "grad_norm": 3.2864445331020185, "learning_rate": 2.6692485660582133e-08, "loss": 0.9238, "step": 9070 }, { "epoch": 0.9540010779970289, "grad_norm": 2.251033657502406, "learning_rate": 2.6570957419309595e-08, "loss": 0.9286, "step": 9071 }, { "epoch": 0.9541062484388763, "grad_norm": 2.9488738742860527, "learning_rate": 2.6449704984589652e-08, "loss": 1.0007, "step": 9072 }, { "epoch": 0.9542114188807236, "grad_norm": 2.3323515832683945, "learning_rate": 2.632872836994288e-08, "loss": 1.0012, "step": 9073 }, { "epoch": 0.9543165893225709, "grad_norm": 3.032634694644143, "learning_rate": 2.6208027588858765e-08, "loss": 0.9769, "step": 9074 }, { "epoch": 0.9544217597644182, "grad_norm": 1.9718142374783398, "learning_rate": 2.6087602654797097e-08, "loss": 0.9451, "step": 9075 }, { "epoch": 0.9545269302062656, "grad_norm": 2.261250300289189, "learning_rate": 2.5967453581185187e-08, "loss": 1.0058, "step": 9076 }, { "epoch": 0.9546321006481129, "grad_norm": 2.202583197447805, "learning_rate": 2.5847580381421768e-08, "loss": 0.9913, "step": 9077 }, { "epoch": 0.9547372710899602, "grad_norm": 3.0052761549204203, "learning_rate": 2.5727983068872532e-08, "loss": 0.979, "step": 9078 }, { "epoch": 0.9548424415318075, "grad_norm": 2.235135902420106, "learning_rate": 2.560866165687431e-08, "loss": 0.9828, "step": 9079 }, { "epoch": 0.9549476119736549, "grad_norm": 1.8821520458231678, "learning_rate": 2.548961615873202e-08, "loss": 0.9579, "step": 9080 }, { "epoch": 0.9550527824155022, "grad_norm": 2.1799190509368263, "learning_rate": 2.5370846587720044e-08, "loss": 0.9611, "step": 9081 }, { "epoch": 0.9551579528573494, "grad_norm": 2.2231469103632016, "learning_rate": 2.5252352957082505e-08, "loss": 0.9672, "step": 9082 }, { "epoch": 0.9552631232991967, "grad_norm": 2.48946023734881, "learning_rate": 2.513413528003189e-08, "loss": 1.0058, "step": 9083 }, { "epoch": 0.955368293741044, "grad_norm": 2.9722438755007383, "learning_rate": 2.5016193569750712e-08, "loss": 0.9313, "step": 9084 }, { "epoch": 0.9554734641828914, "grad_norm": 2.5433251733042903, "learning_rate": 2.4898527839390118e-08, "loss": 1.0113, "step": 9085 }, { "epoch": 0.9555786346247387, "grad_norm": 1.991419615066436, "learning_rate": 2.4781138102071278e-08, "loss": 0.9612, "step": 9086 }, { "epoch": 0.955683805066586, "grad_norm": 2.4260785139851464, "learning_rate": 2.4664024370883444e-08, "loss": 0.9726, "step": 9087 }, { "epoch": 0.9557889755084333, "grad_norm": 2.623866077425562, "learning_rate": 2.454718665888589e-08, "loss": 0.9465, "step": 9088 }, { "epoch": 0.9558941459502807, "grad_norm": 2.6503139564233282, "learning_rate": 2.4430624979107365e-08, "loss": 1.0072, "step": 9089 }, { "epoch": 0.955999316392128, "grad_norm": 4.298737564898903, "learning_rate": 2.431433934454497e-08, "loss": 0.9838, "step": 9090 }, { "epoch": 0.9561044868339753, "grad_norm": 2.166453454009922, "learning_rate": 2.4198329768165552e-08, "loss": 0.9526, "step": 9091 }, { "epoch": 0.9562096572758226, "grad_norm": 2.262782344611168, "learning_rate": 2.4082596262904877e-08, "loss": 0.909, "step": 9092 }, { "epoch": 0.95631482771767, "grad_norm": 2.4483911651903343, "learning_rate": 2.396713884166818e-08, "loss": 0.9938, "step": 9093 }, { "epoch": 0.9564199981595173, "grad_norm": 3.1826776450887686, "learning_rate": 2.385195751733044e-08, "loss": 0.9652, "step": 9094 }, { "epoch": 0.9565251686013646, "grad_norm": 2.0215236542665114, "learning_rate": 2.3737052302734432e-08, "loss": 0.9577, "step": 9095 }, { "epoch": 0.9566303390432119, "grad_norm": 2.988986889944041, "learning_rate": 2.36224232106938e-08, "loss": 0.9887, "step": 9096 }, { "epoch": 0.9567355094850593, "grad_norm": 2.795418561696307, "learning_rate": 2.3508070253989712e-08, "loss": 0.9974, "step": 9097 }, { "epoch": 0.9568406799269066, "grad_norm": 2.7766248044059876, "learning_rate": 2.3393993445374187e-08, "loss": 1.005, "step": 9098 }, { "epoch": 0.9569458503687539, "grad_norm": 2.559873723889261, "learning_rate": 2.3280192797567046e-08, "loss": 0.9853, "step": 9099 }, { "epoch": 0.9570510208106012, "grad_norm": 1.838309819852872, "learning_rate": 2.31666683232587e-08, "loss": 0.9774, "step": 9100 }, { "epoch": 0.9571561912524486, "grad_norm": 2.3283682967266404, "learning_rate": 2.3053420035107077e-08, "loss": 0.9614, "step": 9101 }, { "epoch": 0.9572613616942958, "grad_norm": 2.0531537931400696, "learning_rate": 2.294044794574096e-08, "loss": 0.996, "step": 9102 }, { "epoch": 0.9573665321361431, "grad_norm": 2.738491776846182, "learning_rate": 2.2827752067757224e-08, "loss": 0.9989, "step": 9103 }, { "epoch": 0.9574717025779904, "grad_norm": 2.8541624802569086, "learning_rate": 2.271533241372248e-08, "loss": 0.9925, "step": 9104 }, { "epoch": 0.9575768730198377, "grad_norm": 2.561463355615611, "learning_rate": 2.260318899617281e-08, "loss": 0.995, "step": 9105 }, { "epoch": 0.9576820434616851, "grad_norm": 2.4009062245111807, "learning_rate": 2.24913218276121e-08, "loss": 1.0175, "step": 9106 }, { "epoch": 0.9577872139035324, "grad_norm": 1.9085583128594759, "learning_rate": 2.2379730920515096e-08, "loss": 0.966, "step": 9107 }, { "epoch": 0.9578923843453797, "grad_norm": 2.3035781917937923, "learning_rate": 2.2268416287325178e-08, "loss": 0.9906, "step": 9108 }, { "epoch": 0.957997554787227, "grad_norm": 2.9253676588000084, "learning_rate": 2.215737794045436e-08, "loss": 0.9913, "step": 9109 }, { "epoch": 0.9581027252290744, "grad_norm": 2.251760542497009, "learning_rate": 2.2046615892284685e-08, "loss": 0.9787, "step": 9110 }, { "epoch": 0.9582078956709217, "grad_norm": 2.6136064885904142, "learning_rate": 2.193613015516627e-08, "loss": 1.0012, "step": 9111 }, { "epoch": 0.958313066112769, "grad_norm": 2.378051255621682, "learning_rate": 2.1825920741420092e-08, "loss": 0.9869, "step": 9112 }, { "epoch": 0.9584182365546163, "grad_norm": 2.420924559361127, "learning_rate": 2.171598766333466e-08, "loss": 0.9531, "step": 9113 }, { "epoch": 0.9585234069964637, "grad_norm": 2.1094674393987125, "learning_rate": 2.1606330933168496e-08, "loss": 0.9684, "step": 9114 }, { "epoch": 0.958628577438311, "grad_norm": 1.7218971943565433, "learning_rate": 2.149695056314932e-08, "loss": 0.9608, "step": 9115 }, { "epoch": 0.9587337478801583, "grad_norm": 2.6145718462197967, "learning_rate": 2.1387846565474047e-08, "loss": 0.9348, "step": 9116 }, { "epoch": 0.9588389183220056, "grad_norm": 2.7778015979594595, "learning_rate": 2.1279018952308218e-08, "loss": 1.015, "step": 9117 }, { "epoch": 0.958944088763853, "grad_norm": 3.616813798965242, "learning_rate": 2.1170467735787124e-08, "loss": 0.9358, "step": 9118 }, { "epoch": 0.9590492592057003, "grad_norm": 3.0938864234470684, "learning_rate": 2.1062192928015536e-08, "loss": 0.9831, "step": 9119 }, { "epoch": 0.9591544296475476, "grad_norm": 2.5032862302480696, "learning_rate": 2.095419454106573e-08, "loss": 1.0055, "step": 9120 }, { "epoch": 0.9592596000893949, "grad_norm": 2.4155226421641234, "learning_rate": 2.084647258698169e-08, "loss": 0.9836, "step": 9121 }, { "epoch": 0.9593647705312421, "grad_norm": 2.2456972979046212, "learning_rate": 2.073902707777464e-08, "loss": 1.0023, "step": 9122 }, { "epoch": 0.9594699409730895, "grad_norm": 3.1418236010714846, "learning_rate": 2.0631858025425554e-08, "loss": 0.9944, "step": 9123 }, { "epoch": 0.9595751114149368, "grad_norm": 2.512431786493916, "learning_rate": 2.052496544188487e-08, "loss": 0.9862, "step": 9124 }, { "epoch": 0.9596802818567841, "grad_norm": 2.4317108069473514, "learning_rate": 2.0418349339071385e-08, "loss": 0.9816, "step": 9125 }, { "epoch": 0.9597854522986314, "grad_norm": 2.5061823875565654, "learning_rate": 2.031200972887448e-08, "loss": 0.9703, "step": 9126 }, { "epoch": 0.9598906227404788, "grad_norm": 1.8971400885405667, "learning_rate": 2.0205946623151063e-08, "loss": 0.9792, "step": 9127 }, { "epoch": 0.9599957931823261, "grad_norm": 2.8512234421263516, "learning_rate": 2.010016003372861e-08, "loss": 0.9609, "step": 9128 }, { "epoch": 0.9601009636241734, "grad_norm": 1.8398551494029969, "learning_rate": 1.9994649972402415e-08, "loss": 0.9747, "step": 9129 }, { "epoch": 0.9602061340660207, "grad_norm": 2.4990349994310415, "learning_rate": 1.9889416450938335e-08, "loss": 0.9787, "step": 9130 }, { "epoch": 0.9603113045078681, "grad_norm": 1.9550532334702702, "learning_rate": 1.9784459481070607e-08, "loss": 0.9957, "step": 9131 }, { "epoch": 0.9604164749497154, "grad_norm": 3.2601942510164252, "learning_rate": 1.9679779074502636e-08, "loss": 0.9309, "step": 9132 }, { "epoch": 0.9605216453915627, "grad_norm": 2.862772288610713, "learning_rate": 1.9575375242907035e-08, "loss": 1.0278, "step": 9133 }, { "epoch": 0.96062681583341, "grad_norm": 2.6535531520322495, "learning_rate": 1.9471247997925324e-08, "loss": 0.9815, "step": 9134 }, { "epoch": 0.9607319862752574, "grad_norm": 2.6376660990295164, "learning_rate": 1.9367397351169326e-08, "loss": 0.9858, "step": 9135 }, { "epoch": 0.9608371567171047, "grad_norm": 3.208400601816702, "learning_rate": 1.9263823314218667e-08, "loss": 0.9999, "step": 9136 }, { "epoch": 0.960942327158952, "grad_norm": 3.117055000265947, "learning_rate": 1.9160525898622716e-08, "loss": 1.0176, "step": 9137 }, { "epoch": 0.9610474976007993, "grad_norm": 2.539642788145546, "learning_rate": 1.9057505115900043e-08, "loss": 0.9976, "step": 9138 }, { "epoch": 0.9611526680426467, "grad_norm": 2.622034495146119, "learning_rate": 1.8954760977538122e-08, "loss": 0.9342, "step": 9139 }, { "epoch": 0.961257838484494, "grad_norm": 2.0976876781379796, "learning_rate": 1.88522934949939e-08, "loss": 0.939, "step": 9140 }, { "epoch": 0.9613630089263413, "grad_norm": 3.6252618212734244, "learning_rate": 1.875010267969296e-08, "loss": 1.0144, "step": 9141 }, { "epoch": 0.9614681793681886, "grad_norm": 2.2004361692134093, "learning_rate": 1.864818854303091e-08, "loss": 0.9742, "step": 9142 }, { "epoch": 0.9615733498100358, "grad_norm": 2.5659276984677013, "learning_rate": 1.8546551096371157e-08, "loss": 0.994, "step": 9143 }, { "epoch": 0.9616785202518832, "grad_norm": 2.2606940570606304, "learning_rate": 1.844519035104797e-08, "loss": 0.9785, "step": 9144 }, { "epoch": 0.9617836906937305, "grad_norm": 3.1995639013446353, "learning_rate": 1.834410631836342e-08, "loss": 1.0375, "step": 9145 }, { "epoch": 0.9618888611355778, "grad_norm": 2.8120072161352883, "learning_rate": 1.8243299009589044e-08, "loss": 0.9765, "step": 9146 }, { "epoch": 0.9619940315774251, "grad_norm": 2.9702132072664504, "learning_rate": 1.814276843596585e-08, "loss": 0.984, "step": 9147 }, { "epoch": 0.9620992020192725, "grad_norm": 2.36435367996225, "learning_rate": 1.8042514608703765e-08, "loss": 0.9905, "step": 9148 }, { "epoch": 0.9622043724611198, "grad_norm": 2.4402173766387647, "learning_rate": 1.7942537538981618e-08, "loss": 0.9907, "step": 9149 }, { "epoch": 0.9623095429029671, "grad_norm": 2.3039341033452048, "learning_rate": 1.7842837237947997e-08, "loss": 0.9937, "step": 9150 }, { "epoch": 0.9624147133448144, "grad_norm": 2.5814133426313397, "learning_rate": 1.7743413716720394e-08, "loss": 0.9995, "step": 9151 }, { "epoch": 0.9625198837866618, "grad_norm": 3.0310519987593323, "learning_rate": 1.764426698638466e-08, "loss": 0.9418, "step": 9152 }, { "epoch": 0.9626250542285091, "grad_norm": 2.414115329844662, "learning_rate": 1.7545397057996683e-08, "loss": 1.0066, "step": 9153 }, { "epoch": 0.9627302246703564, "grad_norm": 2.5905538989812484, "learning_rate": 1.7446803942581524e-08, "loss": 0.9571, "step": 9154 }, { "epoch": 0.9628353951122037, "grad_norm": 2.4397822721298588, "learning_rate": 1.7348487651132895e-08, "loss": 0.9677, "step": 9155 }, { "epoch": 0.962940565554051, "grad_norm": 2.369844437844643, "learning_rate": 1.725044819461369e-08, "loss": 0.9505, "step": 9156 }, { "epoch": 0.9630457359958984, "grad_norm": 2.4593061093551927, "learning_rate": 1.7152685583955995e-08, "loss": 0.9638, "step": 9157 }, { "epoch": 0.9631509064377457, "grad_norm": 2.058831249668448, "learning_rate": 1.7055199830061653e-08, "loss": 0.9734, "step": 9158 }, { "epoch": 0.963256076879593, "grad_norm": 2.5481002332248703, "learning_rate": 1.6957990943800574e-08, "loss": 0.9717, "step": 9159 }, { "epoch": 0.9633612473214404, "grad_norm": 3.1600633484938503, "learning_rate": 1.686105893601242e-08, "loss": 1.0042, "step": 9160 }, { "epoch": 0.9634664177632877, "grad_norm": 3.3311087274523823, "learning_rate": 1.6764403817506047e-08, "loss": 1.0101, "step": 9161 }, { "epoch": 0.963571588205135, "grad_norm": 1.9545499141129272, "learning_rate": 1.6668025599058945e-08, "loss": 0.9977, "step": 9162 }, { "epoch": 0.9636767586469822, "grad_norm": 2.787786032166771, "learning_rate": 1.6571924291418072e-08, "loss": 0.9881, "step": 9163 }, { "epoch": 0.9637819290888295, "grad_norm": 2.7339024071717035, "learning_rate": 1.6476099905299857e-08, "loss": 0.9863, "step": 9164 }, { "epoch": 0.9638870995306769, "grad_norm": 2.3241276868769507, "learning_rate": 1.6380552451389088e-08, "loss": 0.985, "step": 9165 }, { "epoch": 0.9639922699725242, "grad_norm": 2.615775704959889, "learning_rate": 1.6285281940340016e-08, "loss": 1.0107, "step": 9166 }, { "epoch": 0.9640974404143715, "grad_norm": 2.61782809887508, "learning_rate": 1.6190288382776363e-08, "loss": 0.9895, "step": 9167 }, { "epoch": 0.9642026108562188, "grad_norm": 2.9697250302024987, "learning_rate": 1.60955717892905e-08, "loss": 0.9869, "step": 9168 }, { "epoch": 0.9643077812980662, "grad_norm": 2.1659305791507024, "learning_rate": 1.6001132170443968e-08, "loss": 0.9508, "step": 9169 }, { "epoch": 0.9644129517399135, "grad_norm": 2.76396965745972, "learning_rate": 1.5906969536767513e-08, "loss": 0.9662, "step": 9170 }, { "epoch": 0.9645181221817608, "grad_norm": 2.1674061651202434, "learning_rate": 1.5813083898760793e-08, "loss": 0.961, "step": 9171 }, { "epoch": 0.9646232926236081, "grad_norm": 3.3828957028419944, "learning_rate": 1.571947526689349e-08, "loss": 0.9774, "step": 9172 }, { "epoch": 0.9647284630654555, "grad_norm": 2.9496229688779834, "learning_rate": 1.5626143651603087e-08, "loss": 1.0287, "step": 9173 }, { "epoch": 0.9648336335073028, "grad_norm": 2.576235051854255, "learning_rate": 1.553308906329709e-08, "loss": 1.0086, "step": 9174 }, { "epoch": 0.9649388039491501, "grad_norm": 2.6024764773615705, "learning_rate": 1.5440311512351646e-08, "loss": 0.9895, "step": 9175 }, { "epoch": 0.9650439743909974, "grad_norm": 2.7226540157491175, "learning_rate": 1.534781100911209e-08, "loss": 0.9951, "step": 9176 }, { "epoch": 0.9651491448328448, "grad_norm": 2.4235683696464925, "learning_rate": 1.5255587563893227e-08, "loss": 1.0094, "step": 9177 }, { "epoch": 0.9652543152746921, "grad_norm": 1.8039244408062456, "learning_rate": 1.5163641186978216e-08, "loss": 0.9846, "step": 9178 }, { "epoch": 0.9653594857165394, "grad_norm": 2.208004892548754, "learning_rate": 1.507197188862053e-08, "loss": 0.9821, "step": 9179 }, { "epoch": 0.9654646561583867, "grad_norm": 2.686724949615566, "learning_rate": 1.4980579679041153e-08, "loss": 0.9814, "step": 9180 }, { "epoch": 0.965569826600234, "grad_norm": 2.218279129899888, "learning_rate": 1.4889464568431656e-08, "loss": 0.9947, "step": 9181 }, { "epoch": 0.9656749970420814, "grad_norm": 2.619200221829575, "learning_rate": 1.4798626566951968e-08, "loss": 0.9968, "step": 9182 }, { "epoch": 0.9657801674839286, "grad_norm": 2.226366283605976, "learning_rate": 1.4708065684730932e-08, "loss": 0.9942, "step": 9183 }, { "epoch": 0.9658853379257759, "grad_norm": 1.8711672309528635, "learning_rate": 1.4617781931867137e-08, "loss": 0.9866, "step": 9184 }, { "epoch": 0.9659905083676232, "grad_norm": 1.7591848744904266, "learning_rate": 1.4527775318427806e-08, "loss": 0.9644, "step": 9185 }, { "epoch": 0.9660956788094706, "grad_norm": 2.3772749891154312, "learning_rate": 1.4438045854449357e-08, "loss": 0.9752, "step": 9186 }, { "epoch": 0.9662008492513179, "grad_norm": 2.79994419425291, "learning_rate": 1.4348593549937118e-08, "loss": 1.0253, "step": 9187 }, { "epoch": 0.9663060196931652, "grad_norm": 2.9430470064229963, "learning_rate": 1.4259418414866166e-08, "loss": 0.9725, "step": 9188 }, { "epoch": 0.9664111901350125, "grad_norm": 2.844447657650461, "learning_rate": 1.417052045917966e-08, "loss": 1.0171, "step": 9189 }, { "epoch": 0.9665163605768599, "grad_norm": 3.2049646354495067, "learning_rate": 1.4081899692791058e-08, "loss": 0.9617, "step": 9190 }, { "epoch": 0.9666215310187072, "grad_norm": 2.6622301953211487, "learning_rate": 1.399355612558162e-08, "loss": 0.9837, "step": 9191 }, { "epoch": 0.9667267014605545, "grad_norm": 2.122985949724431, "learning_rate": 1.3905489767402913e-08, "loss": 0.9636, "step": 9192 }, { "epoch": 0.9668318719024018, "grad_norm": 2.6769674621149533, "learning_rate": 1.3817700628074582e-08, "loss": 0.9456, "step": 9193 }, { "epoch": 0.9669370423442492, "grad_norm": 2.365588178179128, "learning_rate": 1.3730188717386016e-08, "loss": 0.9796, "step": 9194 }, { "epoch": 0.9670422127860965, "grad_norm": 1.8396720663564547, "learning_rate": 1.3642954045095525e-08, "loss": 0.981, "step": 9195 }, { "epoch": 0.9671473832279438, "grad_norm": 2.3943971715102736, "learning_rate": 1.3555996620930323e-08, "loss": 1.0078, "step": 9196 }, { "epoch": 0.9672525536697911, "grad_norm": 2.419224654846345, "learning_rate": 1.3469316454586823e-08, "loss": 0.9814, "step": 9197 }, { "epoch": 0.9673577241116385, "grad_norm": 2.4530714452657625, "learning_rate": 1.3382913555730626e-08, "loss": 0.9688, "step": 9198 }, { "epoch": 0.9674628945534858, "grad_norm": 2.2680814906299824, "learning_rate": 1.3296787933996246e-08, "loss": 0.9347, "step": 9199 }, { "epoch": 0.9675680649953331, "grad_norm": 2.189230585914418, "learning_rate": 1.3210939598987394e-08, "loss": 0.9303, "step": 9200 }, { "epoch": 0.9676732354371804, "grad_norm": 2.3582843375690214, "learning_rate": 1.3125368560276686e-08, "loss": 1.0175, "step": 9201 }, { "epoch": 0.9677784058790277, "grad_norm": 2.268975158784598, "learning_rate": 1.3040074827406491e-08, "loss": 0.9531, "step": 9202 }, { "epoch": 0.9678835763208751, "grad_norm": 2.5739483459832098, "learning_rate": 1.2955058409886978e-08, "loss": 0.9481, "step": 9203 }, { "epoch": 0.9679887467627223, "grad_norm": 1.8224473421389638, "learning_rate": 1.2870319317198621e-08, "loss": 0.9507, "step": 9204 }, { "epoch": 0.9680939172045696, "grad_norm": 2.8730850996320743, "learning_rate": 1.2785857558790526e-08, "loss": 0.9602, "step": 9205 }, { "epoch": 0.9681990876464169, "grad_norm": 2.724396836974365, "learning_rate": 1.270167314408044e-08, "loss": 0.9638, "step": 9206 }, { "epoch": 0.9683042580882643, "grad_norm": 2.1481024531914046, "learning_rate": 1.261776608245585e-08, "loss": 1.0108, "step": 9207 }, { "epoch": 0.9684094285301116, "grad_norm": 2.528448588522008, "learning_rate": 1.2534136383272888e-08, "loss": 0.9794, "step": 9208 }, { "epoch": 0.9685145989719589, "grad_norm": 3.49084002666298, "learning_rate": 1.2450784055857145e-08, "loss": 0.9712, "step": 9209 }, { "epoch": 0.9686197694138062, "grad_norm": 2.619678363223223, "learning_rate": 1.2367709109503134e-08, "loss": 0.9621, "step": 9210 }, { "epoch": 0.9687249398556536, "grad_norm": 2.8477945371212465, "learning_rate": 1.2284911553474e-08, "loss": 0.9999, "step": 9211 }, { "epoch": 0.9688301102975009, "grad_norm": 2.5699605691953225, "learning_rate": 1.2202391397002355e-08, "loss": 0.9942, "step": 9212 }, { "epoch": 0.9689352807393482, "grad_norm": 2.96718335182804, "learning_rate": 1.2120148649290008e-08, "loss": 0.9763, "step": 9213 }, { "epoch": 0.9690404511811955, "grad_norm": 2.7754708820964806, "learning_rate": 1.2038183319507957e-08, "loss": 0.9626, "step": 9214 }, { "epoch": 0.9691456216230429, "grad_norm": 2.3177867748746466, "learning_rate": 1.1956495416795277e-08, "loss": 1.0024, "step": 9215 }, { "epoch": 0.9692507920648902, "grad_norm": 2.593173481686718, "learning_rate": 1.1875084950261351e-08, "loss": 1.0028, "step": 9216 }, { "epoch": 0.9693559625067375, "grad_norm": 2.9154573832597452, "learning_rate": 1.1793951928983639e-08, "loss": 0.9496, "step": 9217 }, { "epoch": 0.9694611329485848, "grad_norm": 2.4979489626751343, "learning_rate": 1.1713096362009346e-08, "loss": 0.9504, "step": 9218 }, { "epoch": 0.9695663033904321, "grad_norm": 1.9672731194839046, "learning_rate": 1.1632518258354875e-08, "loss": 0.9085, "step": 9219 }, { "epoch": 0.9696714738322795, "grad_norm": 2.481803674239574, "learning_rate": 1.1552217627004426e-08, "loss": 0.966, "step": 9220 }, { "epoch": 0.9697766442741268, "grad_norm": 2.6552953440994282, "learning_rate": 1.1472194476913057e-08, "loss": 1.0244, "step": 9221 }, { "epoch": 0.9698818147159741, "grad_norm": 2.7197091300670135, "learning_rate": 1.1392448817003354e-08, "loss": 0.9968, "step": 9222 }, { "epoch": 0.9699869851578214, "grad_norm": 2.067177962113043, "learning_rate": 1.1312980656167927e-08, "loss": 0.9947, "step": 9223 }, { "epoch": 0.9700921555996687, "grad_norm": 2.347224718073338, "learning_rate": 1.1233790003267741e-08, "loss": 0.9577, "step": 9224 }, { "epoch": 0.970197326041516, "grad_norm": 5.618522310449893, "learning_rate": 1.115487686713379e-08, "loss": 1.0061, "step": 9225 }, { "epoch": 0.9703024964833633, "grad_norm": 1.7054244073511104, "learning_rate": 1.107624125656459e-08, "loss": 0.9655, "step": 9226 }, { "epoch": 0.9704076669252106, "grad_norm": 2.0088677460681734, "learning_rate": 1.0997883180329515e-08, "loss": 0.985, "step": 9227 }, { "epoch": 0.970512837367058, "grad_norm": 3.0153539597861982, "learning_rate": 1.0919802647165467e-08, "loss": 0.9946, "step": 9228 }, { "epoch": 0.9706180078089053, "grad_norm": 3.353254395720258, "learning_rate": 1.0841999665779368e-08, "loss": 1.0188, "step": 9229 }, { "epoch": 0.9707231782507526, "grad_norm": 2.081090994792908, "learning_rate": 1.0764474244846778e-08, "loss": 0.9783, "step": 9230 }, { "epoch": 0.9708283486925999, "grad_norm": 3.243396799607356, "learning_rate": 1.0687226393012173e-08, "loss": 0.9995, "step": 9231 }, { "epoch": 0.9709335191344473, "grad_norm": 1.8699534084309162, "learning_rate": 1.0610256118889772e-08, "loss": 0.9539, "step": 9232 }, { "epoch": 0.9710386895762946, "grad_norm": 2.480474240008781, "learning_rate": 1.0533563431062154e-08, "loss": 0.9863, "step": 9233 }, { "epoch": 0.9711438600181419, "grad_norm": 2.002217087521726, "learning_rate": 1.0457148338080813e-08, "loss": 0.9804, "step": 9234 }, { "epoch": 0.9712490304599892, "grad_norm": 1.5904141400379188, "learning_rate": 1.0381010848466988e-08, "loss": 0.9646, "step": 9235 }, { "epoch": 0.9713542009018366, "grad_norm": 2.4366903231145454, "learning_rate": 1.0305150970710276e-08, "loss": 0.9668, "step": 9236 }, { "epoch": 0.9714593713436839, "grad_norm": 2.822796227094747, "learning_rate": 1.02295687132703e-08, "loss": 1.0271, "step": 9237 }, { "epoch": 0.9715645417855312, "grad_norm": 1.9912340131203807, "learning_rate": 1.0154264084574206e-08, "loss": 0.9428, "step": 9238 }, { "epoch": 0.9716697122273785, "grad_norm": 1.955104338884243, "learning_rate": 1.0079237093019722e-08, "loss": 0.9747, "step": 9239 }, { "epoch": 0.9717748826692258, "grad_norm": 3.2050011615950793, "learning_rate": 1.0004487746972657e-08, "loss": 0.9594, "step": 9240 }, { "epoch": 0.9718800531110732, "grad_norm": 2.526175674743241, "learning_rate": 9.930016054768011e-09, "loss": 0.9817, "step": 9241 }, { "epoch": 0.9719852235529205, "grad_norm": 2.425815712365738, "learning_rate": 9.85582202471025e-09, "loss": 1.0223, "step": 9242 }, { "epoch": 0.9720903939947678, "grad_norm": 2.1478474630955557, "learning_rate": 9.7819056650722e-09, "loss": 0.9611, "step": 9243 }, { "epoch": 0.972195564436615, "grad_norm": 2.63035991433718, "learning_rate": 9.708266984096714e-09, "loss": 0.9549, "step": 9244 }, { "epoch": 0.9723007348784624, "grad_norm": 2.787607325293226, "learning_rate": 9.634905989994448e-09, "loss": 0.9642, "step": 9245 }, { "epoch": 0.9724059053203097, "grad_norm": 2.7011576256397056, "learning_rate": 9.561822690946076e-09, "loss": 0.935, "step": 9246 }, { "epoch": 0.972511075762157, "grad_norm": 3.3865296605768545, "learning_rate": 9.489017095100916e-09, "loss": 0.9845, "step": 9247 }, { "epoch": 0.9726162462040043, "grad_norm": 2.141639825480503, "learning_rate": 9.416489210577473e-09, "loss": 0.9714, "step": 9248 }, { "epoch": 0.9727214166458517, "grad_norm": 2.381480065424657, "learning_rate": 9.344239045462888e-09, "loss": 1.0016, "step": 9249 }, { "epoch": 0.972826587087699, "grad_norm": 3.061957347212088, "learning_rate": 9.272266607813774e-09, "loss": 1.0217, "step": 9250 }, { "epoch": 0.9729317575295463, "grad_norm": 2.5614131075584416, "learning_rate": 9.200571905655376e-09, "loss": 0.9773, "step": 9251 }, { "epoch": 0.9730369279713936, "grad_norm": 2.6614512876627745, "learning_rate": 9.129154946982687e-09, "loss": 1.009, "step": 9252 }, { "epoch": 0.973142098413241, "grad_norm": 2.5591923309907956, "learning_rate": 9.058015739758786e-09, "loss": 0.9382, "step": 9253 }, { "epoch": 0.9732472688550883, "grad_norm": 3.4612772871088255, "learning_rate": 8.98715429191649e-09, "loss": 0.9797, "step": 9254 }, { "epoch": 0.9733524392969356, "grad_norm": 2.161508443645297, "learning_rate": 8.916570611357534e-09, "loss": 1.0028, "step": 9255 }, { "epoch": 0.9734576097387829, "grad_norm": 2.777809434537492, "learning_rate": 8.84626470595229e-09, "loss": 0.9781, "step": 9256 }, { "epoch": 0.9735627801806302, "grad_norm": 3.256651493127484, "learning_rate": 8.776236583540321e-09, "loss": 0.9788, "step": 9257 }, { "epoch": 0.9736679506224776, "grad_norm": 3.137061562152753, "learning_rate": 8.706486251930657e-09, "loss": 0.9969, "step": 9258 }, { "epoch": 0.9737731210643249, "grad_norm": 1.6215241145993706, "learning_rate": 8.637013718900689e-09, "loss": 0.9675, "step": 9259 }, { "epoch": 0.9738782915061722, "grad_norm": 2.1994977784123075, "learning_rate": 8.567818992197274e-09, "loss": 0.9876, "step": 9260 }, { "epoch": 0.9739834619480195, "grad_norm": 2.4037382184819585, "learning_rate": 8.498902079536186e-09, "loss": 0.9774, "step": 9261 }, { "epoch": 0.9740886323898669, "grad_norm": 2.824206149642182, "learning_rate": 8.430262988602389e-09, "loss": 1.0053, "step": 9262 }, { "epoch": 0.9741938028317142, "grad_norm": 2.9847969505243213, "learning_rate": 8.361901727049204e-09, "loss": 0.9833, "step": 9263 }, { "epoch": 0.9742989732735615, "grad_norm": 2.358889620889671, "learning_rate": 8.293818302499701e-09, "loss": 0.9803, "step": 9264 }, { "epoch": 0.9744041437154087, "grad_norm": 2.5381745630996044, "learning_rate": 8.226012722545863e-09, "loss": 0.9937, "step": 9265 }, { "epoch": 0.9745093141572561, "grad_norm": 2.6358498110575583, "learning_rate": 8.158484994748306e-09, "loss": 0.9967, "step": 9266 }, { "epoch": 0.9746144845991034, "grad_norm": 3.2362925404197846, "learning_rate": 8.09123512663712e-09, "loss": 1.0093, "step": 9267 }, { "epoch": 0.9747196550409507, "grad_norm": 2.1160461048243304, "learning_rate": 8.024263125710751e-09, "loss": 1.0194, "step": 9268 }, { "epoch": 0.974824825482798, "grad_norm": 2.1609581262482456, "learning_rate": 7.95756899943767e-09, "loss": 0.9649, "step": 9269 }, { "epoch": 0.9749299959246454, "grad_norm": 3.1491039134025276, "learning_rate": 7.891152755254427e-09, "loss": 0.9911, "step": 9270 }, { "epoch": 0.9750351663664927, "grad_norm": 2.8428124781388378, "learning_rate": 7.825014400567044e-09, "loss": 0.9895, "step": 9271 }, { "epoch": 0.97514033680834, "grad_norm": 2.2055413871514307, "learning_rate": 7.759153942750174e-09, "loss": 0.9915, "step": 9272 }, { "epoch": 0.9752455072501873, "grad_norm": 2.1726712220112545, "learning_rate": 7.693571389148224e-09, "loss": 0.9897, "step": 9273 }, { "epoch": 0.9753506776920347, "grad_norm": 2.720353151960456, "learning_rate": 7.628266747074231e-09, "loss": 0.9897, "step": 9274 }, { "epoch": 0.975455848133882, "grad_norm": 2.7591952094504424, "learning_rate": 7.563240023809593e-09, "loss": 0.9615, "step": 9275 }, { "epoch": 0.9755610185757293, "grad_norm": 2.737597454130178, "learning_rate": 7.498491226605731e-09, "loss": 0.9927, "step": 9276 }, { "epoch": 0.9756661890175766, "grad_norm": 2.5948670808268184, "learning_rate": 7.434020362682703e-09, "loss": 0.9945, "step": 9277 }, { "epoch": 0.975771359459424, "grad_norm": 2.784078394241727, "learning_rate": 7.369827439228927e-09, "loss": 0.96, "step": 9278 }, { "epoch": 0.9758765299012713, "grad_norm": 3.0703548564961394, "learning_rate": 7.30591246340312e-09, "loss": 0.9259, "step": 9279 }, { "epoch": 0.9759817003431186, "grad_norm": 2.8232566736257385, "learning_rate": 7.242275442332081e-09, "loss": 0.9477, "step": 9280 }, { "epoch": 0.9760868707849659, "grad_norm": 2.0272196755850214, "learning_rate": 7.178916383111523e-09, "loss": 0.9936, "step": 9281 }, { "epoch": 0.9761920412268132, "grad_norm": 2.292451442175712, "learning_rate": 7.1158352928066276e-09, "loss": 0.9719, "step": 9282 }, { "epoch": 0.9762972116686606, "grad_norm": 2.830967058321669, "learning_rate": 7.053032178451769e-09, "loss": 0.9359, "step": 9283 }, { "epoch": 0.9764023821105079, "grad_norm": 2.5809673977523286, "learning_rate": 6.990507047049677e-09, "loss": 0.9662, "step": 9284 }, { "epoch": 0.9765075525523551, "grad_norm": 2.6394164418653476, "learning_rate": 6.928259905572277e-09, "loss": 1.0017, "step": 9285 }, { "epoch": 0.9766127229942024, "grad_norm": 2.486791493687076, "learning_rate": 6.866290760960959e-09, "loss": 1.0181, "step": 9286 }, { "epoch": 0.9767178934360498, "grad_norm": 2.2338709539918225, "learning_rate": 6.804599620125196e-09, "loss": 0.9647, "step": 9287 }, { "epoch": 0.9768230638778971, "grad_norm": 2.1691387566361082, "learning_rate": 6.743186489944764e-09, "loss": 0.9912, "step": 9288 }, { "epoch": 0.9769282343197444, "grad_norm": 3.245572611882046, "learning_rate": 6.68205137726724e-09, "loss": 0.9692, "step": 9289 }, { "epoch": 0.9770334047615917, "grad_norm": 3.04733299754532, "learning_rate": 6.621194288909671e-09, "loss": 0.9791, "step": 9290 }, { "epoch": 0.977138575203439, "grad_norm": 1.8878390554596858, "learning_rate": 6.560615231658296e-09, "loss": 0.9996, "step": 9291 }, { "epoch": 0.9772437456452864, "grad_norm": 2.2627442103553017, "learning_rate": 6.500314212267989e-09, "loss": 1.0112, "step": 9292 }, { "epoch": 0.9773489160871337, "grad_norm": 2.218078785102874, "learning_rate": 6.440291237462815e-09, "loss": 0.9641, "step": 9293 }, { "epoch": 0.977454086528981, "grad_norm": 1.8552740152450433, "learning_rate": 6.380546313935754e-09, "loss": 0.9732, "step": 9294 }, { "epoch": 0.9775592569708283, "grad_norm": 2.4608837759222735, "learning_rate": 6.321079448348977e-09, "loss": 0.9696, "step": 9295 }, { "epoch": 0.9776644274126757, "grad_norm": 2.552592842090153, "learning_rate": 6.261890647333568e-09, "loss": 0.9417, "step": 9296 }, { "epoch": 0.977769597854523, "grad_norm": 2.009057625011869, "learning_rate": 6.202979917489249e-09, "loss": 0.9384, "step": 9297 }, { "epoch": 0.9778747682963703, "grad_norm": 1.9808092212897492, "learning_rate": 6.144347265384931e-09, "loss": 0.9631, "step": 9298 }, { "epoch": 0.9779799387382176, "grad_norm": 3.045529761172713, "learning_rate": 6.085992697559273e-09, "loss": 0.9743, "step": 9299 }, { "epoch": 0.978085109180065, "grad_norm": 3.059783975457296, "learning_rate": 6.027916220518459e-09, "loss": 0.9701, "step": 9300 }, { "epoch": 0.9781902796219123, "grad_norm": 3.0504326853392993, "learning_rate": 5.970117840738976e-09, "loss": 0.9683, "step": 9301 }, { "epoch": 0.9782954500637596, "grad_norm": 2.7901710880220065, "learning_rate": 5.912597564665667e-09, "loss": 1.0016, "step": 9302 }, { "epoch": 0.9784006205056069, "grad_norm": 2.4240633625968497, "learning_rate": 5.85535539871257e-09, "loss": 1.0195, "step": 9303 }, { "epoch": 0.9785057909474543, "grad_norm": 2.4233089687632283, "learning_rate": 5.798391349262356e-09, "loss": 0.9825, "step": 9304 }, { "epoch": 0.9786109613893015, "grad_norm": 1.8446993064170987, "learning_rate": 5.741705422667443e-09, "loss": 1.0112, "step": 9305 }, { "epoch": 0.9787161318311488, "grad_norm": 2.2841403028774048, "learning_rate": 5.685297625248054e-09, "loss": 0.9792, "step": 9306 }, { "epoch": 0.9788213022729961, "grad_norm": 2.755338437254536, "learning_rate": 5.62916796329499e-09, "loss": 0.9768, "step": 9307 }, { "epoch": 0.9789264727148435, "grad_norm": 2.7700700654544432, "learning_rate": 5.573316443066301e-09, "loss": 0.9787, "step": 9308 }, { "epoch": 0.9790316431566908, "grad_norm": 2.8359479362123223, "learning_rate": 5.517743070790061e-09, "loss": 0.9891, "step": 9309 }, { "epoch": 0.9791368135985381, "grad_norm": 2.7613280805123583, "learning_rate": 5.462447852663532e-09, "loss": 0.9876, "step": 9310 }, { "epoch": 0.9792419840403854, "grad_norm": 2.4322910758293825, "learning_rate": 5.407430794852342e-09, "loss": 1.0221, "step": 9311 }, { "epoch": 0.9793471544822328, "grad_norm": 1.9160133184659012, "learning_rate": 5.352691903491303e-09, "loss": 1.0014, "step": 9312 }, { "epoch": 0.9794523249240801, "grad_norm": 2.3481712518499207, "learning_rate": 5.2982311846841436e-09, "loss": 0.9383, "step": 9313 }, { "epoch": 0.9795574953659274, "grad_norm": 2.588135744749517, "learning_rate": 5.2440486445037855e-09, "loss": 0.9673, "step": 9314 }, { "epoch": 0.9796626658077747, "grad_norm": 3.1082126273430664, "learning_rate": 5.190144288991783e-09, "loss": 1.0052, "step": 9315 }, { "epoch": 0.979767836249622, "grad_norm": 2.1785968088651773, "learning_rate": 5.136518124159162e-09, "loss": 0.9681, "step": 9316 }, { "epoch": 0.9798730066914694, "grad_norm": 2.5729066271800622, "learning_rate": 5.0831701559855835e-09, "loss": 1.0088, "step": 9317 }, { "epoch": 0.9799781771333167, "grad_norm": 1.9530621342015528, "learning_rate": 5.030100390419623e-09, "loss": 0.9577, "step": 9318 }, { "epoch": 0.980083347575164, "grad_norm": 2.267823738676506, "learning_rate": 4.977308833379324e-09, "loss": 1.018, "step": 9319 }, { "epoch": 0.9801885180170113, "grad_norm": 2.2816893245073584, "learning_rate": 4.924795490750811e-09, "loss": 0.9672, "step": 9320 }, { "epoch": 0.9802936884588587, "grad_norm": 1.9259466407396693, "learning_rate": 4.872560368390233e-09, "loss": 0.9453, "step": 9321 }, { "epoch": 0.980398858900706, "grad_norm": 1.7974805667836606, "learning_rate": 4.820603472121821e-09, "loss": 0.9947, "step": 9322 }, { "epoch": 0.9805040293425533, "grad_norm": 2.3791236198376646, "learning_rate": 4.768924807739273e-09, "loss": 0.967, "step": 9323 }, { "epoch": 0.9806091997844006, "grad_norm": 2.4419036752163743, "learning_rate": 4.717524381005478e-09, "loss": 0.9996, "step": 9324 }, { "epoch": 0.980714370226248, "grad_norm": 2.7332285105011347, "learning_rate": 4.666402197651687e-09, "loss": 0.9636, "step": 9325 }, { "epoch": 0.9808195406680952, "grad_norm": 1.8101579312807496, "learning_rate": 4.615558263378617e-09, "loss": 1.0082, "step": 9326 }, { "epoch": 0.9809247111099425, "grad_norm": 3.2434727994808683, "learning_rate": 4.5649925838553435e-09, "loss": 1.0134, "step": 9327 }, { "epoch": 0.9810298815517898, "grad_norm": 2.391788541909677, "learning_rate": 4.5147051647206895e-09, "loss": 1.0044, "step": 9328 }, { "epoch": 0.9811350519936372, "grad_norm": 2.25620752561833, "learning_rate": 4.464696011582115e-09, "loss": 0.9879, "step": 9329 }, { "epoch": 0.9812402224354845, "grad_norm": 2.4842392384829406, "learning_rate": 4.414965130015991e-09, "loss": 1.0062, "step": 9330 }, { "epoch": 0.9813453928773318, "grad_norm": 2.2203661202851284, "learning_rate": 4.365512525567605e-09, "loss": 0.9546, "step": 9331 }, { "epoch": 0.9814505633191791, "grad_norm": 2.711772293584609, "learning_rate": 4.316338203751158e-09, "loss": 0.9978, "step": 9332 }, { "epoch": 0.9815557337610264, "grad_norm": 2.7700162537251076, "learning_rate": 4.267442170050318e-09, "loss": 0.9552, "step": 9333 }, { "epoch": 0.9816609042028738, "grad_norm": 2.2577456211849176, "learning_rate": 4.2188244299171126e-09, "loss": 0.9859, "step": 9334 }, { "epoch": 0.9817660746447211, "grad_norm": 2.5568997931283657, "learning_rate": 4.170484988773038e-09, "loss": 0.9332, "step": 9335 }, { "epoch": 0.9818712450865684, "grad_norm": 1.9925306382045842, "learning_rate": 4.122423852007951e-09, "loss": 0.9782, "step": 9336 }, { "epoch": 0.9819764155284157, "grad_norm": 2.921254419402326, "learning_rate": 4.0746410249814515e-09, "loss": 0.981, "step": 9337 }, { "epoch": 0.9820815859702631, "grad_norm": 2.267912103577595, "learning_rate": 4.027136513021501e-09, "loss": 0.9974, "step": 9338 }, { "epoch": 0.9821867564121104, "grad_norm": 4.541966161576452, "learning_rate": 3.979910321425251e-09, "loss": 1.0016, "step": 9339 }, { "epoch": 0.9822919268539577, "grad_norm": 3.2761474483878206, "learning_rate": 3.932962455458489e-09, "loss": 0.9835, "step": 9340 }, { "epoch": 0.982397097295805, "grad_norm": 2.228104089247583, "learning_rate": 3.886292920356749e-09, "loss": 1.0091, "step": 9341 }, { "epoch": 0.9825022677376524, "grad_norm": 5.889922296145182, "learning_rate": 3.839901721323925e-09, "loss": 0.9941, "step": 9342 }, { "epoch": 0.9826074381794997, "grad_norm": 1.9237694450938765, "learning_rate": 3.793788863532822e-09, "loss": 0.9619, "step": 9343 }, { "epoch": 0.982712608621347, "grad_norm": 2.5296195092756477, "learning_rate": 3.747954352125438e-09, "loss": 0.958, "step": 9344 }, { "epoch": 0.9828177790631943, "grad_norm": 2.864791152152932, "learning_rate": 3.702398192212686e-09, "loss": 0.9767, "step": 9345 }, { "epoch": 0.9829229495050416, "grad_norm": 2.4375684665326727, "learning_rate": 3.6571203888746685e-09, "loss": 0.9733, "step": 9346 }, { "epoch": 0.9830281199468889, "grad_norm": 2.0742623744580695, "learning_rate": 3.6121209471595697e-09, "loss": 1.0078, "step": 9347 }, { "epoch": 0.9831332903887362, "grad_norm": 2.4063986427828614, "learning_rate": 3.567399872085875e-09, "loss": 0.998, "step": 9348 }, { "epoch": 0.9832384608305835, "grad_norm": 2.383631755468102, "learning_rate": 3.522957168640151e-09, "loss": 0.9822, "step": 9349 }, { "epoch": 0.9833436312724309, "grad_norm": 2.393452033459606, "learning_rate": 3.4787928417778783e-09, "loss": 0.9491, "step": 9350 }, { "epoch": 0.9834488017142782, "grad_norm": 2.1318023663228094, "learning_rate": 3.4349068964240063e-09, "loss": 1.0221, "step": 9351 }, { "epoch": 0.9835539721561255, "grad_norm": 2.126532018705832, "learning_rate": 3.391299337471843e-09, "loss": 1.0536, "step": 9352 }, { "epoch": 0.9836591425979728, "grad_norm": 2.4227691688428643, "learning_rate": 3.3479701697841647e-09, "loss": 0.9667, "step": 9353 }, { "epoch": 0.9837643130398201, "grad_norm": 2.472893734369122, "learning_rate": 3.304919398192663e-09, "loss": 0.961, "step": 9354 }, { "epoch": 0.9838694834816675, "grad_norm": 2.646191353766477, "learning_rate": 3.2621470274973867e-09, "loss": 1.0272, "step": 9355 }, { "epoch": 0.9839746539235148, "grad_norm": 3.2371826662759817, "learning_rate": 3.219653062468131e-09, "loss": 0.9586, "step": 9356 }, { "epoch": 0.9840798243653621, "grad_norm": 2.6252562394371273, "learning_rate": 3.1774375078433284e-09, "loss": 1.0218, "step": 9357 }, { "epoch": 0.9841849948072094, "grad_norm": 2.883626529975225, "learning_rate": 3.1355003683303243e-09, "loss": 1.0251, "step": 9358 }, { "epoch": 0.9842901652490568, "grad_norm": 2.101061184091713, "learning_rate": 3.0938416486051004e-09, "loss": 0.9871, "step": 9359 }, { "epoch": 0.9843953356909041, "grad_norm": 2.825052483693705, "learning_rate": 3.0524613533133853e-09, "loss": 1.0467, "step": 9360 }, { "epoch": 0.9845005061327514, "grad_norm": 2.1917761304189898, "learning_rate": 3.011359487068988e-09, "loss": 0.9531, "step": 9361 }, { "epoch": 0.9846056765745987, "grad_norm": 2.087827643664742, "learning_rate": 2.970536054455464e-09, "loss": 0.9927, "step": 9362 }, { "epoch": 0.9847108470164461, "grad_norm": 2.849510889632073, "learning_rate": 2.9299910600247285e-09, "loss": 0.9576, "step": 9363 }, { "epoch": 0.9848160174582934, "grad_norm": 2.3562660790190324, "learning_rate": 2.8897245082978863e-09, "loss": 0.9732, "step": 9364 }, { "epoch": 0.9849211879001407, "grad_norm": 2.3002496067135803, "learning_rate": 2.849736403764958e-09, "loss": 0.9836, "step": 9365 }, { "epoch": 0.985026358341988, "grad_norm": 1.726852714144181, "learning_rate": 2.810026750885153e-09, "loss": 0.9782, "step": 9366 }, { "epoch": 0.9851315287838353, "grad_norm": 2.796815170845847, "learning_rate": 2.770595554086042e-09, "loss": 0.9365, "step": 9367 }, { "epoch": 0.9852366992256826, "grad_norm": 2.6158136518772763, "learning_rate": 2.7314428177646622e-09, "loss": 0.9473, "step": 9368 }, { "epoch": 0.9853418696675299, "grad_norm": 2.2921022351525555, "learning_rate": 2.6925685462869664e-09, "loss": 0.9774, "step": 9369 }, { "epoch": 0.9854470401093772, "grad_norm": 1.8665257532281978, "learning_rate": 2.6539727439875427e-09, "loss": 0.9579, "step": 9370 }, { "epoch": 0.9855522105512246, "grad_norm": 2.0803248457969206, "learning_rate": 2.6156554151704484e-09, "loss": 0.9765, "step": 9371 }, { "epoch": 0.9856573809930719, "grad_norm": 3.028285191313156, "learning_rate": 2.5776165641080986e-09, "loss": 0.9999, "step": 9372 }, { "epoch": 0.9857625514349192, "grad_norm": 3.576679364872881, "learning_rate": 2.539856195041823e-09, "loss": 0.977, "step": 9373 }, { "epoch": 0.9858677218767665, "grad_norm": 2.4305778370700826, "learning_rate": 2.5023743121826983e-09, "loss": 1.0111, "step": 9374 }, { "epoch": 0.9859728923186138, "grad_norm": 2.47884229852146, "learning_rate": 2.465170919710158e-09, "loss": 0.9983, "step": 9375 }, { "epoch": 0.9860780627604612, "grad_norm": 2.7349159526364994, "learning_rate": 2.428246021772551e-09, "loss": 0.991, "step": 9376 }, { "epoch": 0.9861832332023085, "grad_norm": 2.637560167507425, "learning_rate": 2.391599622487417e-09, "loss": 0.9667, "step": 9377 }, { "epoch": 0.9862884036441558, "grad_norm": 1.9300449879076964, "learning_rate": 2.3552317259409317e-09, "loss": 1.0041, "step": 9378 }, { "epoch": 0.9863935740860031, "grad_norm": 2.6765740954561363, "learning_rate": 2.3191423361884626e-09, "loss": 1.0011, "step": 9379 }, { "epoch": 0.9864987445278505, "grad_norm": 2.616883579256923, "learning_rate": 2.28333145725429e-09, "loss": 1.0176, "step": 9380 }, { "epoch": 0.9866039149696978, "grad_norm": 1.8573269651299802, "learning_rate": 2.247799093131886e-09, "loss": 0.969, "step": 9381 }, { "epoch": 0.9867090854115451, "grad_norm": 2.60301904370189, "learning_rate": 2.2125452477828047e-09, "loss": 0.9769, "step": 9382 }, { "epoch": 0.9868142558533924, "grad_norm": 1.9682961895311704, "learning_rate": 2.1775699251383455e-09, "loss": 0.9771, "step": 9383 }, { "epoch": 0.9869194262952398, "grad_norm": 3.191223466664238, "learning_rate": 2.1428731290987215e-09, "loss": 1.046, "step": 9384 }, { "epoch": 0.9870245967370871, "grad_norm": 2.892416445308266, "learning_rate": 2.1084548635327828e-09, "loss": 0.9734, "step": 9385 }, { "epoch": 0.9871297671789344, "grad_norm": 3.095226990473517, "learning_rate": 2.0743151322785703e-09, "loss": 0.9719, "step": 9386 }, { "epoch": 0.9872349376207816, "grad_norm": 2.7327344515972545, "learning_rate": 2.0404539391427614e-09, "loss": 0.9671, "step": 9387 }, { "epoch": 0.987340108062629, "grad_norm": 2.9017278864296263, "learning_rate": 2.0068712879009468e-09, "loss": 1.0231, "step": 9388 }, { "epoch": 0.9874452785044763, "grad_norm": 3.392301107777531, "learning_rate": 1.973567182298186e-09, "loss": 0.9928, "step": 9389 }, { "epoch": 0.9875504489463236, "grad_norm": 2.3644487612910856, "learning_rate": 1.9405416260481757e-09, "loss": 0.9683, "step": 9390 }, { "epoch": 0.9876556193881709, "grad_norm": 2.6559023017178194, "learning_rate": 1.907794622833248e-09, "loss": 0.9537, "step": 9391 }, { "epoch": 0.9877607898300182, "grad_norm": 2.6251794536087494, "learning_rate": 1.875326176304926e-09, "loss": 1.0005, "step": 9392 }, { "epoch": 0.9878659602718656, "grad_norm": 2.1131946840820253, "learning_rate": 1.8431362900839244e-09, "loss": 0.9866, "step": 9393 }, { "epoch": 0.9879711307137129, "grad_norm": 3.060384152042341, "learning_rate": 1.8112249677598726e-09, "loss": 1.0004, "step": 9394 }, { "epoch": 0.9880763011555602, "grad_norm": 2.7092939576558366, "learning_rate": 1.7795922128904796e-09, "loss": 0.9733, "step": 9395 }, { "epoch": 0.9881814715974075, "grad_norm": 3.15026244560097, "learning_rate": 1.7482380290034795e-09, "loss": 0.9553, "step": 9396 }, { "epoch": 0.9882866420392549, "grad_norm": 2.3143889813143637, "learning_rate": 1.7171624195952418e-09, "loss": 0.9775, "step": 9397 }, { "epoch": 0.9883918124811022, "grad_norm": 1.749676378937021, "learning_rate": 1.6863653881307728e-09, "loss": 1.0136, "step": 9398 }, { "epoch": 0.9884969829229495, "grad_norm": 2.4561058588577636, "learning_rate": 1.6558469380439923e-09, "loss": 0.9778, "step": 9399 }, { "epoch": 0.9886021533647968, "grad_norm": 2.869598988661229, "learning_rate": 1.6256070727380114e-09, "loss": 0.9961, "step": 9400 }, { "epoch": 0.9887073238066442, "grad_norm": 1.3519472395346488, "learning_rate": 1.5956457955848547e-09, "loss": 0.9565, "step": 9401 }, { "epoch": 0.9888124942484915, "grad_norm": 2.550759243468463, "learning_rate": 1.5659631099257389e-09, "loss": 0.9986, "step": 9402 }, { "epoch": 0.9889176646903388, "grad_norm": 2.2780299064704668, "learning_rate": 1.5365590190699608e-09, "loss": 0.9818, "step": 9403 }, { "epoch": 0.9890228351321861, "grad_norm": 2.5188559087529003, "learning_rate": 1.507433526296842e-09, "loss": 0.9911, "step": 9404 }, { "epoch": 0.9891280055740335, "grad_norm": 2.188706310697837, "learning_rate": 1.4785866348537848e-09, "loss": 0.9693, "step": 9405 }, { "epoch": 0.9892331760158808, "grad_norm": 1.809924536593071, "learning_rate": 1.4500183479573825e-09, "loss": 0.9738, "step": 9406 }, { "epoch": 0.989338346457728, "grad_norm": 3.093352924429147, "learning_rate": 1.4217286687936982e-09, "loss": 1.0348, "step": 9407 }, { "epoch": 0.9894435168995753, "grad_norm": 2.7413192203962407, "learning_rate": 1.3937176005165975e-09, "loss": 1.0094, "step": 9408 }, { "epoch": 0.9895486873414227, "grad_norm": 1.9967362712782808, "learning_rate": 1.3659851462499708e-09, "loss": 0.9604, "step": 9409 }, { "epoch": 0.98965385778327, "grad_norm": 3.023690617394417, "learning_rate": 1.3385313090857888e-09, "loss": 0.9499, "step": 9410 }, { "epoch": 0.9897590282251173, "grad_norm": 3.0257951189785106, "learning_rate": 1.3113560920860468e-09, "loss": 0.9351, "step": 9411 }, { "epoch": 0.9898641986669646, "grad_norm": 2.413833812119721, "learning_rate": 1.284459498280266e-09, "loss": 1.0105, "step": 9412 }, { "epoch": 0.989969369108812, "grad_norm": 2.037001468545561, "learning_rate": 1.257841530668269e-09, "loss": 0.9782, "step": 9413 }, { "epoch": 0.9900745395506593, "grad_norm": 1.6511342738858803, "learning_rate": 1.2315021922176817e-09, "loss": 0.9412, "step": 9414 }, { "epoch": 0.9901797099925066, "grad_norm": 2.8902872997925804, "learning_rate": 1.2054414858655995e-09, "loss": 0.9708, "step": 9415 }, { "epoch": 0.9902848804343539, "grad_norm": 4.027061186967938, "learning_rate": 1.1796594145183081e-09, "loss": 1.0044, "step": 9416 }, { "epoch": 0.9903900508762012, "grad_norm": 2.0376936451035483, "learning_rate": 1.1541559810504532e-09, "loss": 0.9808, "step": 9417 }, { "epoch": 0.9904952213180486, "grad_norm": 2.5382357729640037, "learning_rate": 1.1289311883058707e-09, "loss": 0.9738, "step": 9418 }, { "epoch": 0.9906003917598959, "grad_norm": 1.7785026493838247, "learning_rate": 1.1039850390973107e-09, "loss": 0.9819, "step": 9419 }, { "epoch": 0.9907055622017432, "grad_norm": 2.6469976920281453, "learning_rate": 1.0793175362067143e-09, "loss": 0.9929, "step": 9420 }, { "epoch": 0.9908107326435905, "grad_norm": 2.0316413833430387, "learning_rate": 1.054928682384382e-09, "loss": 0.9638, "step": 9421 }, { "epoch": 0.9909159030854379, "grad_norm": 2.6241976883897, "learning_rate": 1.0308184803498046e-09, "loss": 0.9819, "step": 9422 }, { "epoch": 0.9910210735272852, "grad_norm": 2.229629028463766, "learning_rate": 1.0069869327919424e-09, "loss": 0.9947, "step": 9423 }, { "epoch": 0.9911262439691325, "grad_norm": 2.788186432126167, "learning_rate": 9.834340423678368e-10, "loss": 0.9522, "step": 9424 }, { "epoch": 0.9912314144109798, "grad_norm": 2.698863781368917, "learning_rate": 9.601598117037202e-10, "loss": 0.9553, "step": 9425 }, { "epoch": 0.9913365848528272, "grad_norm": 2.3066739529849736, "learning_rate": 9.371642433952943e-10, "loss": 0.9726, "step": 9426 }, { "epoch": 0.9914417552946745, "grad_norm": 2.7776673360748387, "learning_rate": 9.144473400060639e-10, "loss": 1.005, "step": 9427 }, { "epoch": 0.9915469257365217, "grad_norm": 2.033776373402885, "learning_rate": 8.920091040698353e-10, "loss": 0.9883, "step": 9428 }, { "epoch": 0.991652096178369, "grad_norm": 2.9626974448308045, "learning_rate": 8.698495380882188e-10, "loss": 1.0091, "step": 9429 }, { "epoch": 0.9917572666202163, "grad_norm": 2.412674687043095, "learning_rate": 8.479686445322932e-10, "loss": 0.9696, "step": 9430 }, { "epoch": 0.9918624370620637, "grad_norm": 2.280018692073066, "learning_rate": 8.263664258420512e-10, "loss": 0.952, "step": 9431 }, { "epoch": 0.991967607503911, "grad_norm": 2.536989287684638, "learning_rate": 8.050428844261215e-10, "loss": 1.0399, "step": 9432 }, { "epoch": 0.9920727779457583, "grad_norm": 2.4868353766120803, "learning_rate": 7.839980226623245e-10, "loss": 0.9822, "step": 9433 }, { "epoch": 0.9921779483876056, "grad_norm": 2.2345599750935303, "learning_rate": 7.632318428976715e-10, "loss": 0.9763, "step": 9434 }, { "epoch": 0.992283118829453, "grad_norm": 2.7320164282700006, "learning_rate": 7.427443474469776e-10, "loss": 0.9614, "step": 9435 }, { "epoch": 0.9923882892713003, "grad_norm": 2.693307353444875, "learning_rate": 7.225355385953592e-10, "loss": 0.9834, "step": 9436 }, { "epoch": 0.9924934597131476, "grad_norm": 2.7375269267620266, "learning_rate": 7.02605418596014e-10, "loss": 1.0151, "step": 9437 }, { "epoch": 0.9925986301549949, "grad_norm": 1.9154701081163854, "learning_rate": 6.829539896716087e-10, "loss": 0.9614, "step": 9438 }, { "epoch": 0.9927038005968423, "grad_norm": 2.7318506358078727, "learning_rate": 6.635812540131681e-10, "loss": 0.9716, "step": 9439 }, { "epoch": 0.9928089710386896, "grad_norm": 2.35587299097919, "learning_rate": 6.444872137806313e-10, "loss": 0.9932, "step": 9440 }, { "epoch": 0.9929141414805369, "grad_norm": 2.7986557506186425, "learning_rate": 6.256718711036835e-10, "loss": 0.9715, "step": 9441 }, { "epoch": 0.9930193119223842, "grad_norm": 2.958571293660485, "learning_rate": 6.071352280800912e-10, "loss": 0.988, "step": 9442 }, { "epoch": 0.9931244823642316, "grad_norm": 3.0504234064777522, "learning_rate": 5.888772867770897e-10, "loss": 0.9765, "step": 9443 }, { "epoch": 0.9932296528060789, "grad_norm": 2.2184835377963266, "learning_rate": 5.708980492302729e-10, "loss": 0.9987, "step": 9444 }, { "epoch": 0.9933348232479262, "grad_norm": 1.9829209945604436, "learning_rate": 5.531975174444259e-10, "loss": 0.9188, "step": 9445 }, { "epoch": 0.9934399936897735, "grad_norm": 2.833298967270071, "learning_rate": 5.357756933935254e-10, "loss": 1.0, "step": 9446 }, { "epoch": 0.9935451641316209, "grad_norm": 2.299238400231606, "learning_rate": 5.186325790199065e-10, "loss": 0.9838, "step": 9447 }, { "epoch": 0.9936503345734681, "grad_norm": 2.1636055939503707, "learning_rate": 5.017681762356507e-10, "loss": 0.9973, "step": 9448 }, { "epoch": 0.9937555050153154, "grad_norm": 2.607403264070489, "learning_rate": 4.851824869211985e-10, "loss": 1.0123, "step": 9449 }, { "epoch": 0.9938606754571627, "grad_norm": 2.921227734664202, "learning_rate": 4.688755129256262e-10, "loss": 1.0149, "step": 9450 }, { "epoch": 0.99396584589901, "grad_norm": 2.106489666904462, "learning_rate": 4.5284725606747903e-10, "loss": 0.9528, "step": 9451 }, { "epoch": 0.9940710163408574, "grad_norm": 2.2948653311351146, "learning_rate": 4.3709771813393864e-10, "loss": 0.9819, "step": 9452 }, { "epoch": 0.9941761867827047, "grad_norm": 2.5397350328464, "learning_rate": 4.216269008813778e-10, "loss": 1.0424, "step": 9453 }, { "epoch": 0.994281357224552, "grad_norm": 2.6859348273107098, "learning_rate": 4.0643480603480554e-10, "loss": 0.9152, "step": 9454 }, { "epoch": 0.9943865276663993, "grad_norm": 2.305716938487438, "learning_rate": 3.9152143528842226e-10, "loss": 0.9636, "step": 9455 }, { "epoch": 0.9944916981082467, "grad_norm": 2.317768982172829, "learning_rate": 3.768867903047868e-10, "loss": 0.9895, "step": 9456 }, { "epoch": 0.994596868550094, "grad_norm": 1.9307647936153862, "learning_rate": 3.625308727162047e-10, "loss": 0.9515, "step": 9457 }, { "epoch": 0.9947020389919413, "grad_norm": 2.9486289730118234, "learning_rate": 3.4845368412306235e-10, "loss": 0.9608, "step": 9458 }, { "epoch": 0.9948072094337886, "grad_norm": 2.821432702813321, "learning_rate": 3.346552260954927e-10, "loss": 1.0182, "step": 9459 }, { "epoch": 0.994912379875636, "grad_norm": 2.0682478422118526, "learning_rate": 3.2113550017198734e-10, "loss": 0.9518, "step": 9460 }, { "epoch": 0.9950175503174833, "grad_norm": 2.4469821699306205, "learning_rate": 3.0789450785995155e-10, "loss": 1.0303, "step": 9461 }, { "epoch": 0.9951227207593306, "grad_norm": 2.318744276916468, "learning_rate": 2.94932250635982e-10, "loss": 0.9428, "step": 9462 }, { "epoch": 0.9952278912011779, "grad_norm": 1.9099911442636568, "learning_rate": 2.8224872994558895e-10, "loss": 0.9649, "step": 9463 }, { "epoch": 0.9953330616430253, "grad_norm": 2.244161598411751, "learning_rate": 2.6984394720291904e-10, "loss": 0.9282, "step": 9464 }, { "epoch": 0.9954382320848726, "grad_norm": 2.0228783659285217, "learning_rate": 2.5771790379103267e-10, "loss": 1.0379, "step": 9465 }, { "epoch": 0.9955434025267199, "grad_norm": 2.7663617676626044, "learning_rate": 2.45870601062459e-10, "loss": 0.9732, "step": 9466 }, { "epoch": 0.9956485729685672, "grad_norm": 2.783584442780796, "learning_rate": 2.3430204033808577e-10, "loss": 0.981, "step": 9467 }, { "epoch": 0.9957537434104144, "grad_norm": 2.2784586979725168, "learning_rate": 2.2301222290771473e-10, "loss": 0.9731, "step": 9468 }, { "epoch": 0.9958589138522618, "grad_norm": 2.474615759545288, "learning_rate": 2.1200115003061628e-10, "loss": 0.9718, "step": 9469 }, { "epoch": 0.9959640842941091, "grad_norm": 2.7375505869626546, "learning_rate": 2.012688229344195e-10, "loss": 0.9472, "step": 9470 }, { "epoch": 0.9960692547359564, "grad_norm": 3.181469509796547, "learning_rate": 1.9081524281566732e-10, "loss": 0.9785, "step": 9471 }, { "epoch": 0.9961744251778037, "grad_norm": 3.8615566778110924, "learning_rate": 1.8064041084037143e-10, "loss": 0.9676, "step": 9472 }, { "epoch": 0.9962795956196511, "grad_norm": 3.381624272619375, "learning_rate": 1.707443281429022e-10, "loss": 1.0194, "step": 9473 }, { "epoch": 0.9963847660614984, "grad_norm": 3.1413976288468852, "learning_rate": 1.6112699582682135e-10, "loss": 1.0142, "step": 9474 }, { "epoch": 0.9964899365033457, "grad_norm": 2.6439768248020177, "learning_rate": 1.517884149646043e-10, "loss": 0.9825, "step": 9475 }, { "epoch": 0.996595106945193, "grad_norm": 2.47459179696731, "learning_rate": 1.4272858659736265e-10, "loss": 0.9735, "step": 9476 }, { "epoch": 0.9967002773870404, "grad_norm": 1.6572520116151759, "learning_rate": 1.33947511735677e-10, "loss": 0.967, "step": 9477 }, { "epoch": 0.9968054478288877, "grad_norm": 2.1650133863954624, "learning_rate": 1.2544519135820887e-10, "loss": 0.9935, "step": 9478 }, { "epoch": 0.996910618270735, "grad_norm": 2.6395311326751396, "learning_rate": 1.1722162641336633e-10, "loss": 0.9942, "step": 9479 }, { "epoch": 0.9970157887125823, "grad_norm": 2.3369728830368817, "learning_rate": 1.0927681781819354e-10, "loss": 0.9496, "step": 9480 }, { "epoch": 0.9971209591544297, "grad_norm": 2.9099781993657454, "learning_rate": 1.0161076645837098e-10, "loss": 0.9617, "step": 9481 }, { "epoch": 0.997226129596277, "grad_norm": 1.9322457713584655, "learning_rate": 9.422347318877034e-11, "loss": 1.0036, "step": 9482 }, { "epoch": 0.9973313000381243, "grad_norm": 3.2635257365367516, "learning_rate": 8.711493883317712e-11, "loss": 0.9886, "step": 9483 }, { "epoch": 0.9974364704799716, "grad_norm": 2.220418457360787, "learning_rate": 8.028516418456811e-11, "loss": 0.9676, "step": 9484 }, { "epoch": 0.997541640921819, "grad_norm": 2.3115695199846384, "learning_rate": 7.37341500040012e-11, "loss": 0.9517, "step": 9485 }, { "epoch": 0.9976468113636663, "grad_norm": 2.013983684435446, "learning_rate": 6.746189702228067e-11, "loss": 0.9492, "step": 9486 }, { "epoch": 0.9977519818055136, "grad_norm": 2.816746556589395, "learning_rate": 6.14684059385695e-11, "loss": 0.9404, "step": 9487 }, { "epoch": 0.9978571522473609, "grad_norm": 2.0503969665852066, "learning_rate": 5.575367742149951e-11, "loss": 0.9634, "step": 9488 }, { "epoch": 0.9979623226892081, "grad_norm": 1.724401911640861, "learning_rate": 5.0317712108061135e-11, "loss": 0.9739, "step": 9489 }, { "epoch": 0.9980674931310555, "grad_norm": 2.1346980648205705, "learning_rate": 4.5160510604436156e-11, "loss": 0.9721, "step": 9490 }, { "epoch": 0.9981726635729028, "grad_norm": 2.090678339510078, "learning_rate": 4.028207348572011e-11, "loss": 0.9147, "step": 9491 }, { "epoch": 0.9982778340147501, "grad_norm": 2.392059438797523, "learning_rate": 3.568240129619982e-11, "loss": 1.0114, "step": 9492 }, { "epoch": 0.9983830044565974, "grad_norm": 2.506048349048106, "learning_rate": 3.1361494548243224e-11, "loss": 0.9004, "step": 9493 }, { "epoch": 0.9984881748984448, "grad_norm": 2.7270228934874545, "learning_rate": 2.7319353723964657e-11, "loss": 0.9964, "step": 9494 }, { "epoch": 0.9985933453402921, "grad_norm": 2.3101031627179864, "learning_rate": 2.355597927411468e-11, "loss": 0.9873, "step": 9495 }, { "epoch": 0.9986985157821394, "grad_norm": 1.8549205557474275, "learning_rate": 2.0071371618080037e-11, "loss": 1.0167, "step": 9496 }, { "epoch": 0.9988036862239867, "grad_norm": 2.4247474783252194, "learning_rate": 1.686553114471634e-11, "loss": 1.0092, "step": 9497 }, { "epoch": 0.9989088566658341, "grad_norm": 2.5711740916551227, "learning_rate": 1.3938458211515404e-11, "loss": 0.996, "step": 9498 }, { "epoch": 0.9990140271076814, "grad_norm": 2.6142784134493415, "learning_rate": 1.1290153144605243e-11, "loss": 0.936, "step": 9499 }, { "epoch": 0.9991191975495287, "grad_norm": 2.4638656000993873, "learning_rate": 8.920616239582735e-12, "loss": 0.9754, "step": 9500 }, { "epoch": 0.999224367991376, "grad_norm": 2.5496610952882843, "learning_rate": 6.829847760403408e-12, "loss": 0.9836, "step": 9501 }, { "epoch": 0.9993295384332234, "grad_norm": 2.000881646427515, "learning_rate": 5.017847940214093e-12, "loss": 0.988, "step": 9502 }, { "epoch": 0.9994347088750707, "grad_norm": 2.4163049388267765, "learning_rate": 3.4846169813529394e-12, "loss": 0.9964, "step": 9503 }, { "epoch": 0.999539879316918, "grad_norm": 2.5790803945058864, "learning_rate": 2.230155054516736e-12, "loss": 0.9758, "step": 9504 }, { "epoch": 0.9996450497587653, "grad_norm": 2.555299051963278, "learning_rate": 1.2544622998711398e-12, "loss": 1.0216, "step": 9505 }, { "epoch": 0.9997502202006127, "grad_norm": 2.506038816699829, "learning_rate": 5.575388259404512e-13, "loss": 0.9635, "step": 9506 }, { "epoch": 0.99985539064246, "grad_norm": 2.9286325043640606, "learning_rate": 1.3938471044028234e-13, "loss": 0.9874, "step": 9507 }, { "epoch": 0.9999605610843073, "grad_norm": 1.8297745470230433, "learning_rate": 0.0, "loss": 0.948, "step": 9508 }, { "epoch": 0.9999605610843073, "step": 9508, "total_flos": 7610494285643776.0, "train_loss": 1.0062551115383658, "train_runtime": 85567.0625, "train_samples_per_second": 7.112, "train_steps_per_second": 0.111 } ], "logging_steps": 1.0, "max_steps": 9508, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 7610494285643776.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }