diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -3,21859 +3,608 @@ "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, - "global_step": 15590, + "global_step": 406, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 6.414368184733804e-05, - "grad_norm": 2.122934421744886, - "learning_rate": 1.2828736369467608e-07, - "loss": 1.1536, + "epoch": 0.0024630541871921183, + "grad_norm": 0.16909417510032654, + "learning_rate": 4.8780487804878055e-06, + "loss": 0.8252, "step": 1 }, { - "epoch": 0.00032071840923669016, - "grad_norm": 2.6120313710646594, - "learning_rate": 6.414368184733804e-07, - "loss": 0.8767, + "epoch": 0.012315270935960592, + "grad_norm": 0.15486516058444977, + "learning_rate": 2.4390243902439026e-05, + "loss": 0.7899, "step": 5 }, { - "epoch": 0.0006414368184733803, - "grad_norm": 1.9036320491341159, - "learning_rate": 1.2828736369467608e-06, - "loss": 1.1617, + "epoch": 0.024630541871921183, + "grad_norm": 0.16919347643852234, + "learning_rate": 4.878048780487805e-05, + "loss": 0.8156, "step": 10 }, { - "epoch": 0.0009621552277100705, - "grad_norm": 2.2675442011112223, - "learning_rate": 1.9243104554201413e-06, - "loss": 0.9348, + "epoch": 0.03694581280788178, + "grad_norm": 0.18892544507980347, + "learning_rate": 7.317073170731707e-05, + "loss": 0.7756, "step": 15 }, { - "epoch": 0.0012828736369467607, - "grad_norm": 1.8668180685612314, - "learning_rate": 2.5657472738935216e-06, - "loss": 0.9887, + "epoch": 0.04926108374384237, + "grad_norm": 0.15618345141410828, + "learning_rate": 9.75609756097561e-05, + "loss": 0.7755, "step": 20 }, { - "epoch": 0.001603592046183451, - "grad_norm": 3.7598222149067344, - "learning_rate": 3.2071840923669024e-06, - "loss": 0.9933, + "epoch": 0.06157635467980296, + "grad_norm": 0.11770500987768173, + "learning_rate": 0.00012195121951219512, + "loss": 0.7548, "step": 25 }, { - "epoch": 0.001924310455420141, - "grad_norm": 0.7497789448952673, - "learning_rate": 3.848620910840283e-06, - "loss": 0.8951, + "epoch": 0.07389162561576355, + "grad_norm": 0.10984448343515396, + "learning_rate": 0.00014634146341463414, + "loss": 0.7463, "step": 30 }, { - "epoch": 0.0022450288646568314, - "grad_norm": 0.7497466999070768, - "learning_rate": 4.490057729313663e-06, - "loss": 1.1075, + "epoch": 0.08620689655172414, + "grad_norm": 0.10522622615098953, + "learning_rate": 0.0001707317073170732, + "loss": 0.7447, "step": 35 }, { - "epoch": 0.0025657472738935213, - "grad_norm": 0.6269808680953072, - "learning_rate": 5.131494547787043e-06, - "loss": 0.9902, + "epoch": 0.09852216748768473, + "grad_norm": 0.11389700323343277, + "learning_rate": 0.0001951219512195122, + "loss": 0.7476, "step": 40 }, { - "epoch": 0.0028864656831302116, - "grad_norm": 0.7670239135600633, - "learning_rate": 5.7729313662604236e-06, - "loss": 0.8813, + "epoch": 0.11083743842364532, + "grad_norm": 0.11819540709257126, + "learning_rate": 0.0001999407400739705, + "loss": 0.7548, "step": 45 }, { - "epoch": 0.003207184092366902, - "grad_norm": 2.7386818812948612, - "learning_rate": 6.414368184733805e-06, - "loss": 1.0156, + "epoch": 0.12315270935960591, + "grad_norm": 0.11218090355396271, + "learning_rate": 0.00019970011699250152, + "loss": 0.7347, "step": 50 }, { - "epoch": 0.003527902501603592, - "grad_norm": 0.6074882325288049, - "learning_rate": 7.055805003207184e-06, - "loss": 0.8396, + "epoch": 0.1354679802955665, + "grad_norm": 0.11884038895368576, + "learning_rate": 0.00019927487224577402, + "loss": 0.7485, "step": 55 }, { - "epoch": 0.003848620910840282, - "grad_norm": 0.5127156771589034, - "learning_rate": 7.697241821680565e-06, - "loss": 0.8716, + "epoch": 0.1477832512315271, + "grad_norm": 0.10330455005168915, + "learning_rate": 0.0001986657932891657, + "loss": 0.731, "step": 60 }, { - "epoch": 0.004169339320076972, - "grad_norm": 1.9056440284966447, - "learning_rate": 8.338678640153946e-06, - "loss": 0.854, + "epoch": 0.16009852216748768, + "grad_norm": 0.10771363228559494, + "learning_rate": 0.00019787400799669154, + "loss": 0.7517, "step": 65 }, { - "epoch": 0.004490057729313663, - "grad_norm": 0.5220731546600494, - "learning_rate": 8.980115458627326e-06, - "loss": 0.7033, + "epoch": 0.1724137931034483, + "grad_norm": 0.10541368275880814, + "learning_rate": 0.00019690098257244064, + "loss": 0.7342, "step": 70 }, { - "epoch": 0.004810776138550353, - "grad_norm": 0.8528246103677898, - "learning_rate": 9.621552277100706e-06, - "loss": 0.7803, + "epoch": 0.18472906403940886, + "grad_norm": 0.10425801575183868, + "learning_rate": 0.00019574851883550395, + "loss": 0.748, "step": 75 }, { - "epoch": 0.005131494547787043, - "grad_norm": 0.9553069537717978, - "learning_rate": 1.0262989095574087e-05, - "loss": 0.8577, + "epoch": 0.19704433497536947, + "grad_norm": 0.10108363628387451, + "learning_rate": 0.00019441875088341997, + "loss": 0.7404, "step": 80 }, { - "epoch": 0.005452212957023733, - "grad_norm": 1.089966886886208, - "learning_rate": 1.0904425914047467e-05, - "loss": 0.5766, + "epoch": 0.20935960591133004, + "grad_norm": 0.09765681624412537, + "learning_rate": 0.00019291414114031743, + "loss": 0.7454, "step": 85 }, { - "epoch": 0.005772931366260423, - "grad_norm": 1.3259601452666736, - "learning_rate": 1.1545862732520847e-05, - "loss": 0.6605, + "epoch": 0.22167487684729065, + "grad_norm": 0.10201067477464676, + "learning_rate": 0.00019123747579707275, + "loss": 0.7398, "step": 90 }, { - "epoch": 0.006093649775497113, - "grad_norm": 0.5501705053457395, - "learning_rate": 1.2187299550994227e-05, - "loss": 0.622, + "epoch": 0.23399014778325122, + "grad_norm": 0.1033327504992485, + "learning_rate": 0.0001893918596519257, + "loss": 0.7284, "step": 95 }, { - "epoch": 0.006414368184733804, - "grad_norm": 0.565393913728341, - "learning_rate": 1.282873636946761e-05, - "loss": 0.6598, + "epoch": 0.24630541871921183, + "grad_norm": 0.10564754903316498, + "learning_rate": 0.00018738071036110808, + "loss": 0.7164, "step": 100 }, { - "epoch": 0.006735086593970494, - "grad_norm": 0.8355349528605185, - "learning_rate": 1.3470173187940988e-05, - "loss": 0.57, + "epoch": 0.25862068965517243, + "grad_norm": 0.09552671015262604, + "learning_rate": 0.00018520775211013093, + "loss": 0.7197, "step": 105 }, { - "epoch": 0.007055805003207184, - "grad_norm": 0.7311107470147664, - "learning_rate": 1.4111610006414368e-05, - "loss": 0.7012, + "epoch": 0.270935960591133, + "grad_norm": 0.10289793461561203, + "learning_rate": 0.00018287700871745036, + "loss": 0.7342, "step": 110 }, { - "epoch": 0.0073765234124438745, - "grad_norm": 0.7172503923642882, - "learning_rate": 1.4753046824887749e-05, - "loss": 0.6794, + "epoch": 0.2832512315270936, + "grad_norm": 0.11379728466272354, + "learning_rate": 0.00018039279618328212, + "loss": 0.7542, "step": 115 }, { - "epoch": 0.007697241821680564, - "grad_norm": 0.6517343625027339, - "learning_rate": 1.539448364336113e-05, - "loss": 0.7512, + "epoch": 0.2955665024630542, + "grad_norm": 0.12156806141138077, + "learning_rate": 0.0001777597146973627, + "loss": 0.7591, "step": 120 }, { - "epoch": 0.008017960230917255, - "grad_norm": 0.7506138412539792, - "learning_rate": 1.603592046183451e-05, - "loss": 0.6939, + "epoch": 0.3078817733990148, + "grad_norm": 0.10124150663614273, + "learning_rate": 0.00017498264012045687, + "loss": 0.7414, "step": 125 }, { - "epoch": 0.008338678640153944, - "grad_norm": 0.6731898912128177, - "learning_rate": 1.667735728030789e-05, - "loss": 0.6853, + "epoch": 0.32019704433497537, + "grad_norm": 0.10434222966432571, + "learning_rate": 0.00017206671495538612, + "loss": 0.7414, "step": 130 }, { - "epoch": 0.008659397049390635, - "grad_norm": 0.5526935784877048, - "learning_rate": 1.731879409878127e-05, - "loss": 0.7614, + "epoch": 0.33251231527093594, + "grad_norm": 0.09697817265987396, + "learning_rate": 0.0001690173388242972, + "loss": 0.7355, "step": 135 }, { - "epoch": 0.008980115458627326, - "grad_norm": 0.7407967682593112, - "learning_rate": 1.7960230917254652e-05, - "loss": 0.6847, + "epoch": 0.3448275862068966, + "grad_norm": 0.10179964452981949, + "learning_rate": 0.0001658401584698049, + "loss": 0.7308, "step": 140 }, { - "epoch": 0.009300833867864015, - "grad_norm": 1.0558900513241394, - "learning_rate": 1.8601667735728032e-05, - "loss": 0.6291, + "epoch": 0.35714285714285715, + "grad_norm": 0.10015404224395752, + "learning_rate": 0.00016254105729852464, + "loss": 0.7561, "step": 145 }, { - "epoch": 0.009621552277100705, - "grad_norm": 0.534720549756236, - "learning_rate": 1.9243104554201412e-05, - "loss": 0.6933, + "epoch": 0.3694581280788177, + "grad_norm": 0.09731245785951614, + "learning_rate": 0.00015912614448635782, + "loss": 0.7329, "step": 150 }, { - "epoch": 0.009942270686337396, - "grad_norm": 0.8533880407106053, - "learning_rate": 1.9884541372674793e-05, - "loss": 0.7405, + "epoch": 0.3817733990147783, + "grad_norm": 0.10137064009904861, + "learning_rate": 0.00015560174366570446, + "loss": 0.7234, "step": 155 }, { - "epoch": 0.010262989095574085, - "grad_norm": 2.2157811263492633, - "learning_rate": 2.0525978191148173e-05, - "loss": 0.6605, + "epoch": 0.39408866995073893, + "grad_norm": 0.09750425070524216, + "learning_rate": 0.0001519743812155516, + "loss": 0.7477, "step": 160 }, { - "epoch": 0.010583707504810776, - "grad_norm": 0.741553600188979, - "learning_rate": 2.1167415009621553e-05, - "loss": 0.6929, + "epoch": 0.4064039408866995, + "grad_norm": 0.09764320403337479, + "learning_rate": 0.00014825077417612186, + "loss": 0.7428, "step": 165 }, { - "epoch": 0.010904425914047467, - "grad_norm": 0.592672329081525, - "learning_rate": 2.1808851828094934e-05, - "loss": 0.7712, + "epoch": 0.4187192118226601, + "grad_norm": 0.0995442345738411, + "learning_rate": 0.00014443781781046136, + "loss": 0.7341, "step": 170 }, { - "epoch": 0.011225144323284156, - "grad_norm": 0.7143661642401767, - "learning_rate": 2.2450288646568314e-05, - "loss": 0.7264, + "epoch": 0.43103448275862066, + "grad_norm": 0.10272233188152313, + "learning_rate": 0.00014054257283599973, + "loss": 0.7409, "step": 175 }, { - "epoch": 0.011545862732520847, - "grad_norm": 0.7168820160805862, - "learning_rate": 2.3091725465041694e-05, - "loss": 0.7147, + "epoch": 0.4433497536945813, + "grad_norm": 0.09888844192028046, + "learning_rate": 0.00013657225234972695, + "loss": 0.7315, "step": 180 }, { - "epoch": 0.011866581141757537, - "grad_norm": 0.8106566714421187, - "learning_rate": 2.3733162283515075e-05, - "loss": 0.7091, + "epoch": 0.45566502463054187, + "grad_norm": 0.09412329643964767, + "learning_rate": 0.00013253420847119803, + "loss": 0.7506, "step": 185 }, { - "epoch": 0.012187299550994226, - "grad_norm": 1.131984585130431, - "learning_rate": 2.4374599101988455e-05, - "loss": 0.6725, + "epoch": 0.46798029556650245, + "grad_norm": 0.10598532110452652, + "learning_rate": 0.0001284359187281004, + "loss": 0.7325, "step": 190 }, { - "epoch": 0.012508017960230917, - "grad_norm": 0.5991057607118903, - "learning_rate": 2.5016035920461832e-05, - "loss": 0.5288, + "epoch": 0.4802955665024631, + "grad_norm": 0.1129438728094101, + "learning_rate": 0.0001242849722095936, + "loss": 0.7306, "step": 195 }, { - "epoch": 0.012828736369467608, - "grad_norm": 0.7441333776346593, - "learning_rate": 2.565747273893522e-05, - "loss": 0.6001, + "epoch": 0.49261083743842365, + "grad_norm": 0.11556894332170486, + "learning_rate": 0.00012008905551306356, + "loss": 0.7082, "step": 200 }, { - "epoch": 0.013149454778704297, - "grad_norm": 0.7177668887803592, - "learning_rate": 2.6298909557408596e-05, - "loss": 0.729, + "epoch": 0.5049261083743842, + "grad_norm": 0.10456952452659607, + "learning_rate": 0.00011585593851031347, + "loss": 0.7293, "step": 205 }, { - "epoch": 0.013470173187940988, - "grad_norm": 1.152356658408425, - "learning_rate": 2.6940346375881976e-05, - "loss": 0.649, + "epoch": 0.5172413793103449, + "grad_norm": 0.1134599968791008, + "learning_rate": 0.00011159345995955006, + "loss": 0.7379, "step": 210 }, { - "epoch": 0.013790891597177678, - "grad_norm": 0.8692844040434968, - "learning_rate": 2.758178319435536e-05, - "loss": 0.7514, + "epoch": 0.5295566502463054, + "grad_norm": 0.09614575654268265, + "learning_rate": 0.00010730951298980776, + "loss": 0.7418, "step": 215 }, { - "epoch": 0.014111610006414367, - "grad_norm": 0.7731506164196528, - "learning_rate": 2.8223220012828737e-05, - "loss": 0.7303, + "epoch": 0.541871921182266, + "grad_norm": 0.10285273939371109, + "learning_rate": 0.00010301203048469083, + "loss": 0.74, "step": 220 }, { - "epoch": 0.014432328415651058, - "grad_norm": 0.6675669855403799, - "learning_rate": 2.8864656831302117e-05, - "loss": 0.5974, + "epoch": 0.5541871921182266, + "grad_norm": 0.10371249169111252, + "learning_rate": 9.870897039249911e-05, + "loss": 0.7269, "step": 225 }, { - "epoch": 0.014753046824887749, - "grad_norm": 0.6511258667141646, - "learning_rate": 2.9506093649775497e-05, - "loss": 0.6502, + "epoch": 0.5665024630541872, + "grad_norm": 0.1026730090379715, + "learning_rate": 9.440830098993969e-05, + "loss": 0.734, "step": 230 }, { - "epoch": 0.015073765234124438, - "grad_norm": 0.8153736796805081, - "learning_rate": 3.014753046824888e-05, - "loss": 0.7187, + "epoch": 0.5788177339901478, + "grad_norm": 0.10388628393411636, + "learning_rate": 9.011798612671286e-05, + "loss": 0.7362, "step": 235 }, { - "epoch": 0.015394483643361129, - "grad_norm": 0.682020511101791, - "learning_rate": 3.078896728672226e-05, - "loss": 0.7687, + "epoch": 0.5911330049261084, + "grad_norm": 0.09820030629634857, + "learning_rate": 8.58459704782957e-05, + "loss": 0.7388, "step": 240 }, { - "epoch": 0.01571520205259782, - "grad_norm": 0.9723518475601368, - "learning_rate": 3.143040410519564e-05, - "loss": 0.6333, + "epoch": 0.603448275862069, + "grad_norm": 0.09999756515026093, + "learning_rate": 8.160016483423199e-05, + "loss": 0.7435, "step": 245 }, { - "epoch": 0.01603592046183451, - "grad_norm": 0.6642430373016617, - "learning_rate": 3.207184092366902e-05, - "loss": 0.7503, + "epoch": 0.6157635467980296, + "grad_norm": 0.09983257204294205, + "learning_rate": 7.738843144917119e-05, + "loss": 0.7298, "step": 250 }, { - "epoch": 0.0163566388710712, - "grad_norm": 1.0604072659225818, - "learning_rate": 3.27132777421424e-05, - "loss": 0.7296, + "epoch": 0.6280788177339901, + "grad_norm": 0.10523591935634613, + "learning_rate": 7.321856948378259e-05, + "loss": 0.7246, "step": 255 }, { - "epoch": 0.01667735728030789, - "grad_norm": 0.5389238146909613, - "learning_rate": 3.335471456061578e-05, - "loss": 0.6449, + "epoch": 0.6403940886699507, + "grad_norm": 0.09543181210756302, + "learning_rate": 6.909830056250527e-05, + "loss": 0.7361, "step": 260 }, { - "epoch": 0.01699807568954458, - "grad_norm": 1.0886777633244675, - "learning_rate": 3.3996151379089166e-05, - "loss": 0.6087, + "epoch": 0.6527093596059114, + "grad_norm": 0.09938356280326843, + "learning_rate": 6.503525447487715e-05, + "loss": 0.7129, "step": 265 }, { - "epoch": 0.01731879409878127, - "grad_norm": 0.7740455363235514, - "learning_rate": 3.463758819756254e-05, - "loss": 0.7716, + "epoch": 0.6650246305418719, + "grad_norm": 0.10975624620914459, + "learning_rate": 6.103695504692122e-05, + "loss": 0.7132, "step": 270 }, { - "epoch": 0.01763951250801796, - "grad_norm": 0.7842668340726671, - "learning_rate": 3.527902501603592e-05, - "loss": 0.6184, + "epoch": 0.6773399014778325, + "grad_norm": 0.09858807176351547, + "learning_rate": 5.7110806208751655e-05, + "loss": 0.7369, "step": 275 }, { - "epoch": 0.01796023091725465, - "grad_norm": 0.8724306321758412, - "learning_rate": 3.5920461834509304e-05, - "loss": 0.5647, + "epoch": 0.6896551724137931, + "grad_norm": 0.10205443948507309, + "learning_rate": 5.326407828419979e-05, + "loss": 0.7263, "step": 280 }, { - "epoch": 0.018280949326491342, - "grad_norm": 0.6108159651722537, - "learning_rate": 3.656189865298269e-05, - "loss": 0.7748, + "epoch": 0.7019704433497537, + "grad_norm": 0.09989740699529648, + "learning_rate": 4.9503894527847964e-05, + "loss": 0.7247, "step": 285 }, { - "epoch": 0.01860166773572803, - "grad_norm": 0.554729905784846, - "learning_rate": 3.7203335471456064e-05, - "loss": 0.6969, + "epoch": 0.7142857142857143, + "grad_norm": 0.10319098085165024, + "learning_rate": 4.583721793440188e-05, + "loss": 0.7618, "step": 290 }, { - "epoch": 0.01892238614496472, - "grad_norm": 0.5263100377774543, - "learning_rate": 3.784477228992944e-05, - "loss": 0.6331, + "epoch": 0.7266009852216748, + "grad_norm": 0.09355153143405914, + "learning_rate": 4.227083834482728e-05, + "loss": 0.728, "step": 295 }, { - "epoch": 0.01924310455420141, - "grad_norm": 0.7458575860438468, - "learning_rate": 3.8486209108402825e-05, - "loss": 0.7178, + "epoch": 0.7389162561576355, + "grad_norm": 0.10289464145898819, + "learning_rate": 3.881135987312757e-05, + "loss": 0.7315, "step": 300 }, { - "epoch": 0.0195638229634381, - "grad_norm": 0.7209749688824592, - "learning_rate": 3.912764592687621e-05, - "loss": 0.7774, + "epoch": 0.7512315270935961, + "grad_norm": 0.10275447368621826, + "learning_rate": 3.546518867704499e-05, + "loss": 0.7417, "step": 305 }, { - "epoch": 0.019884541372674792, - "grad_norm": 0.8894616503150261, - "learning_rate": 3.9769082745349585e-05, - "loss": 0.8354, + "epoch": 0.7635467980295566, + "grad_norm": 0.1038035973906517, + "learning_rate": 3.223852109533112e-05, + "loss": 0.7328, "step": 310 }, { - "epoch": 0.020205259781911483, - "grad_norm": 0.6322923436990817, - "learning_rate": 4.041051956382296e-05, - "loss": 0.6009, + "epoch": 0.7758620689655172, + "grad_norm": 0.10236402601003647, + "learning_rate": 2.9137332173554043e-05, + "loss": 0.7185, "step": 315 }, { - "epoch": 0.02052597819114817, - "grad_norm": 0.9519419320088668, - "learning_rate": 4.1051956382296346e-05, - "loss": 0.61, + "epoch": 0.7881773399014779, + "grad_norm": 0.10697459429502487, + "learning_rate": 2.616736459968936e-05, + "loss": 0.721, "step": 320 }, { - "epoch": 0.02084669660038486, - "grad_norm": 0.654969001631436, - "learning_rate": 4.169339320076972e-05, - "loss": 0.602, + "epoch": 0.8004926108374384, + "grad_norm": 0.09831953048706055, + "learning_rate": 2.33341180699841e-05, + "loss": 0.7252, "step": 325 }, { - "epoch": 0.021167415009621552, - "grad_norm": 0.6250956091655624, - "learning_rate": 4.233483001924311e-05, - "loss": 0.6451, + "epoch": 0.812807881773399, + "grad_norm": 0.0965956300497055, + "learning_rate": 2.0642839104785272e-05, + "loss": 0.7289, "step": 330 }, { - "epoch": 0.021488133418858243, - "grad_norm": 0.7392153639819625, - "learning_rate": 4.297626683771649e-05, - "loss": 0.7724, + "epoch": 0.8251231527093597, + "grad_norm": 0.10319492220878601, + "learning_rate": 1.8098511333192024e-05, + "loss": 0.7465, "step": 335 }, { - "epoch": 0.021808851828094934, - "grad_norm": 0.7914340872699686, - "learning_rate": 4.361770365618987e-05, - "loss": 0.7245, + "epoch": 0.8374384236453202, + "grad_norm": 0.10018905252218246, + "learning_rate": 1.570584626452173e-05, + "loss": 0.7323, "step": 340 }, { - "epoch": 0.022129570237331624, - "grad_norm": 0.5688389882467555, - "learning_rate": 4.4259140474663244e-05, - "loss": 0.5756, + "epoch": 0.8497536945812808, + "grad_norm": 0.09453442692756653, + "learning_rate": 1.3469274563679402e-05, + "loss": 0.716, "step": 345 }, { - "epoch": 0.02245028864656831, - "grad_norm": 0.6860675746425041, - "learning_rate": 4.490057729313663e-05, - "loss": 0.6515, + "epoch": 0.8620689655172413, + "grad_norm": 0.10382165759801865, + "learning_rate": 1.1392937846586215e-05, + "loss": 0.7309, "step": 350 }, { - "epoch": 0.022771007055805002, - "grad_norm": 0.8497624484329163, - "learning_rate": 4.554201411161001e-05, - "loss": 0.742, + "epoch": 0.874384236453202, + "grad_norm": 0.10078588873147964, + "learning_rate": 9.48068101086026e-06, + "loss": 0.7286, "step": 355 }, { - "epoch": 0.023091725465041693, - "grad_norm": 0.9589070592978919, - "learning_rate": 4.618345093008339e-05, - "loss": 0.7261, + "epoch": 0.8866995073891626, + "grad_norm": 0.09680734574794769, + "learning_rate": 7.736045115951251e-06, + "loss": 0.7392, "step": 360 }, { - "epoch": 0.023412443874278384, - "grad_norm": 0.5397605849852198, - "learning_rate": 4.6824887748556765e-05, - "loss": 0.721, + "epoch": 0.8990147783251231, + "grad_norm": 0.1009003072977066, + "learning_rate": 6.16226082591359e-06, + "loss": 0.7203, "step": 365 }, { - "epoch": 0.023733162283515075, - "grad_norm": 0.4218758453965537, - "learning_rate": 4.746632456703015e-05, - "loss": 0.7008, + "epoch": 0.9113300492610837, + "grad_norm": 0.10036424547433853, + "learning_rate": 4.762242426960262e-06, + "loss": 0.7332, "step": 370 }, { - "epoch": 0.024053880692751765, - "grad_norm": 0.4660237223228576, - "learning_rate": 4.810776138550353e-05, - "loss": 0.5954, + "epoch": 0.9236453201970444, + "grad_norm": 0.1032857745885849, + "learning_rate": 3.5385824308756587e-06, + "loss": 0.7102, "step": 375 }, { - "epoch": 0.024374599101988453, - "grad_norm": 1.1414044523272346, - "learning_rate": 4.874919820397691e-05, - "loss": 0.7092, + "epoch": 0.9359605911330049, + "grad_norm": 0.10901583731174469, + "learning_rate": 2.493546774280531e-06, + "loss": 0.7397, "step": 380 }, { - "epoch": 0.024695317511225143, - "grad_norm": 0.7794538849217394, - "learning_rate": 4.939063502245029e-05, - "loss": 0.6556, + "epoch": 0.9482758620689655, + "grad_norm": 0.10311324894428253, + "learning_rate": 1.6290706226390285e-06, + "loss": 0.7263, "step": 385 }, { - "epoch": 0.025016035920461834, - "grad_norm": 0.6784254428885176, - "learning_rate": 5.0032071840923663e-05, - "loss": 0.6523, + "epoch": 0.9605911330049262, + "grad_norm": 0.10355194658041, + "learning_rate": 9.46754786777726e-07, + "loss": 0.7135, "step": 390 }, { - "epoch": 0.025336754329698525, - "grad_norm": 0.5550050199692612, - "learning_rate": 5.0673508659397054e-05, - "loss": 0.7065, + "epoch": 0.9729064039408867, + "grad_norm": 0.09248184412717819, + "learning_rate": 4.4786275855247527e-07, + "loss": 0.7232, "step": 395 }, { - "epoch": 0.025657472738935216, - "grad_norm": 1.3489642897531091, - "learning_rate": 5.131494547787044e-05, - "loss": 0.657, + "epoch": 0.9852216748768473, + "grad_norm": 0.10015437006950378, + "learning_rate": 1.333183711524133e-07, + "loss": 0.7208, "step": 400 }, { - "epoch": 0.025978191148171906, - "grad_norm": 0.8799442657849393, - "learning_rate": 5.195638229634381e-05, - "loss": 0.7712, + "epoch": 0.9975369458128078, + "grad_norm": 0.09519725292921066, + "learning_rate": 3.7040883734462683e-09, + "loss": 0.702, "step": 405 }, - { - "epoch": 0.026298909557408594, - "grad_norm": 0.6211518086394292, - "learning_rate": 5.259781911481719e-05, - "loss": 0.6556, - "step": 410 - }, - { - "epoch": 0.026619627966645285, - "grad_norm": 0.527786179579098, - "learning_rate": 5.3239255933290575e-05, - "loss": 0.6304, - "step": 415 - }, - { - "epoch": 0.026940346375881975, - "grad_norm": 0.6225940856068456, - "learning_rate": 5.388069275176395e-05, - "loss": 0.7504, - "step": 420 - }, - { - "epoch": 0.027261064785118666, - "grad_norm": 0.7472577597094603, - "learning_rate": 5.4522129570237336e-05, - "loss": 0.5737, - "step": 425 - }, - { - "epoch": 0.027581783194355357, - "grad_norm": 0.9003123884674169, - "learning_rate": 5.516356638871072e-05, - "loss": 0.6751, - "step": 430 - }, - { - "epoch": 0.027902501603592048, - "grad_norm": 1.193348964937134, - "learning_rate": 5.580500320718409e-05, - "loss": 0.6685, - "step": 435 - }, - { - "epoch": 0.028223220012828735, - "grad_norm": 0.8207452374854483, - "learning_rate": 5.644644002565747e-05, - "loss": 0.5606, - "step": 440 - }, - { - "epoch": 0.028543938422065426, - "grad_norm": 0.6253317338492933, - "learning_rate": 5.7087876844130864e-05, - "loss": 0.6848, - "step": 445 - }, - { - "epoch": 0.028864656831302116, - "grad_norm": 0.5089340890778841, - "learning_rate": 5.7729313662604234e-05, - "loss": 0.5969, - "step": 450 - }, - { - "epoch": 0.029185375240538807, - "grad_norm": 0.6403611822232731, - "learning_rate": 5.837075048107762e-05, - "loss": 0.6663, - "step": 455 - }, - { - "epoch": 0.029506093649775498, - "grad_norm": 0.9017481128452324, - "learning_rate": 5.9012187299550994e-05, - "loss": 0.6253, - "step": 460 - }, - { - "epoch": 0.02982681205901219, - "grad_norm": 0.7102935907261797, - "learning_rate": 5.965362411802438e-05, - "loss": 0.6032, - "step": 465 - }, - { - "epoch": 0.030147530468248876, - "grad_norm": 0.572528044090495, - "learning_rate": 6.029506093649776e-05, - "loss": 0.7059, - "step": 470 - }, - { - "epoch": 0.030468248877485567, - "grad_norm": 0.6507630672872388, - "learning_rate": 6.093649775497113e-05, - "loss": 0.551, - "step": 475 - }, - { - "epoch": 0.030788967286722257, - "grad_norm": 0.4787872258590136, - "learning_rate": 6.157793457344452e-05, - "loss": 0.4953, - "step": 480 - }, - { - "epoch": 0.031109685695958948, - "grad_norm": 0.6446626662145857, - "learning_rate": 6.22193713919179e-05, - "loss": 0.7073, - "step": 485 - }, - { - "epoch": 0.03143040410519564, - "grad_norm": 0.46176975999305003, - "learning_rate": 6.286080821039128e-05, - "loss": 0.7031, - "step": 490 - }, - { - "epoch": 0.03175112251443233, - "grad_norm": 0.6364571216466376, - "learning_rate": 6.350224502886466e-05, - "loss": 0.7208, - "step": 495 - }, - { - "epoch": 0.03207184092366902, - "grad_norm": 0.6441271299481783, - "learning_rate": 6.414368184733804e-05, - "loss": 0.651, - "step": 500 - }, - { - "epoch": 0.03239255933290571, - "grad_norm": 0.5277240516380076, - "learning_rate": 6.478511866581141e-05, - "loss": 0.7596, - "step": 505 - }, - { - "epoch": 0.0327132777421424, - "grad_norm": 0.6102741778617242, - "learning_rate": 6.54265554842848e-05, - "loss": 0.8127, - "step": 510 - }, - { - "epoch": 0.033033996151379086, - "grad_norm": 1.2909493866489476, - "learning_rate": 6.606799230275818e-05, - "loss": 0.6172, - "step": 515 - }, - { - "epoch": 0.03335471456061578, - "grad_norm": 0.8290001510292774, - "learning_rate": 6.670942912123156e-05, - "loss": 0.7024, - "step": 520 - }, - { - "epoch": 0.03367543296985247, - "grad_norm": 0.5082074367378367, - "learning_rate": 6.735086593970495e-05, - "loss": 0.5993, - "step": 525 - }, - { - "epoch": 0.03399615137908916, - "grad_norm": 0.8948141239538124, - "learning_rate": 6.799230275817833e-05, - "loss": 0.6288, - "step": 530 - }, - { - "epoch": 0.03431686978832585, - "grad_norm": 0.704188041016483, - "learning_rate": 6.86337395766517e-05, - "loss": 0.6173, - "step": 535 - }, - { - "epoch": 0.03463758819756254, - "grad_norm": 0.8493617205406083, - "learning_rate": 6.927517639512509e-05, - "loss": 0.6472, - "step": 540 - }, - { - "epoch": 0.03495830660679923, - "grad_norm": 0.6071336551640186, - "learning_rate": 6.991661321359846e-05, - "loss": 0.7066, - "step": 545 - }, - { - "epoch": 0.03527902501603592, - "grad_norm": 0.6299761061285323, - "learning_rate": 7.055805003207184e-05, - "loss": 0.5004, - "step": 550 - }, - { - "epoch": 0.03559974342527261, - "grad_norm": 0.36030076856010784, - "learning_rate": 7.119948685054522e-05, - "loss": 0.5939, - "step": 555 - }, - { - "epoch": 0.0359204618345093, - "grad_norm": 0.5657747344505833, - "learning_rate": 7.184092366901861e-05, - "loss": 0.6394, - "step": 560 - }, - { - "epoch": 0.03624118024374599, - "grad_norm": 0.5512464769253931, - "learning_rate": 7.248236048749199e-05, - "loss": 0.7496, - "step": 565 - }, - { - "epoch": 0.036561898652982684, - "grad_norm": 1.6012481016769327, - "learning_rate": 7.312379730596537e-05, - "loss": 0.74, - "step": 570 - }, - { - "epoch": 0.03688261706221937, - "grad_norm": 0.539931431422469, - "learning_rate": 7.376523412443874e-05, - "loss": 0.655, - "step": 575 - }, - { - "epoch": 0.03720333547145606, - "grad_norm": 0.5792692922947517, - "learning_rate": 7.440667094291213e-05, - "loss": 0.6268, - "step": 580 - }, - { - "epoch": 0.03752405388069275, - "grad_norm": 0.44904646394711184, - "learning_rate": 7.504810776138551e-05, - "loss": 0.621, - "step": 585 - }, - { - "epoch": 0.03784477228992944, - "grad_norm": 0.38995414700568637, - "learning_rate": 7.568954457985888e-05, - "loss": 0.479, - "step": 590 - }, - { - "epoch": 0.03816549069916613, - "grad_norm": 0.6100292909911376, - "learning_rate": 7.633098139833227e-05, - "loss": 0.5087, - "step": 595 - }, - { - "epoch": 0.03848620910840282, - "grad_norm": 0.767232067956154, - "learning_rate": 7.697241821680565e-05, - "loss": 0.7094, - "step": 600 - }, - { - "epoch": 0.03880692751763951, - "grad_norm": 0.5093223662182627, - "learning_rate": 7.761385503527902e-05, - "loss": 0.6216, - "step": 605 - }, - { - "epoch": 0.0391276459268762, - "grad_norm": 0.604996949026468, - "learning_rate": 7.825529185375242e-05, - "loss": 0.6343, - "step": 610 - }, - { - "epoch": 0.039448364336112894, - "grad_norm": 0.6313031887029451, - "learning_rate": 7.88967286722258e-05, - "loss": 0.6814, - "step": 615 - }, - { - "epoch": 0.039769082745349585, - "grad_norm": 0.5515684818028812, - "learning_rate": 7.953816549069917e-05, - "loss": 0.6319, - "step": 620 - }, - { - "epoch": 0.040089801154586276, - "grad_norm": 0.9067875561472081, - "learning_rate": 8.017960230917255e-05, - "loss": 0.626, - "step": 625 - }, - { - "epoch": 0.040410519563822966, - "grad_norm": 0.4402348046376401, - "learning_rate": 8.082103912764592e-05, - "loss": 0.6581, - "step": 630 - }, - { - "epoch": 0.04073123797305965, - "grad_norm": 0.6653624732467279, - "learning_rate": 8.146247594611931e-05, - "loss": 0.6266, - "step": 635 - }, - { - "epoch": 0.04105195638229634, - "grad_norm": 0.7506028416479603, - "learning_rate": 8.210391276459269e-05, - "loss": 0.7304, - "step": 640 - }, - { - "epoch": 0.04137267479153303, - "grad_norm": 0.43305772472870374, - "learning_rate": 8.274534958306606e-05, - "loss": 0.6272, - "step": 645 - }, - { - "epoch": 0.04169339320076972, - "grad_norm": 0.7883927079167802, - "learning_rate": 8.338678640153945e-05, - "loss": 0.564, - "step": 650 - }, - { - "epoch": 0.04201411161000641, - "grad_norm": 0.6406069976891953, - "learning_rate": 8.402822322001283e-05, - "loss": 0.6594, - "step": 655 - }, - { - "epoch": 0.042334830019243104, - "grad_norm": 0.6650787540082842, - "learning_rate": 8.466966003848621e-05, - "loss": 0.6086, - "step": 660 - }, - { - "epoch": 0.042655548428479795, - "grad_norm": 0.6280025445964529, - "learning_rate": 8.53110968569596e-05, - "loss": 0.6188, - "step": 665 - }, - { - "epoch": 0.042976266837716486, - "grad_norm": 0.6181001304138187, - "learning_rate": 8.595253367543298e-05, - "loss": 0.6454, - "step": 670 - }, - { - "epoch": 0.043296985246953176, - "grad_norm": 0.9164302121431295, - "learning_rate": 8.659397049390635e-05, - "loss": 0.7409, - "step": 675 - }, - { - "epoch": 0.04361770365618987, - "grad_norm": 0.5146934352157929, - "learning_rate": 8.723540731237973e-05, - "loss": 0.7961, - "step": 680 - }, - { - "epoch": 0.04393842206542656, - "grad_norm": 0.8884783771604745, - "learning_rate": 8.787684413085312e-05, - "loss": 0.7023, - "step": 685 - }, - { - "epoch": 0.04425914047466325, - "grad_norm": 0.5972459928844025, - "learning_rate": 8.851828094932649e-05, - "loss": 0.6437, - "step": 690 - }, - { - "epoch": 0.04457985888389993, - "grad_norm": 1.027137591537084, - "learning_rate": 8.915971776779987e-05, - "loss": 0.6461, - "step": 695 - }, - { - "epoch": 0.04490057729313662, - "grad_norm": 0.684561126713197, - "learning_rate": 8.980115458627326e-05, - "loss": 0.6417, - "step": 700 - }, - { - "epoch": 0.045221295702373314, - "grad_norm": 0.5791897637489775, - "learning_rate": 9.044259140474664e-05, - "loss": 0.6545, - "step": 705 - }, - { - "epoch": 0.045542014111610005, - "grad_norm": 0.6093322265483176, - "learning_rate": 9.108402822322002e-05, - "loss": 0.5431, - "step": 710 - }, - { - "epoch": 0.045862732520846695, - "grad_norm": 1.20412780035678, - "learning_rate": 9.172546504169339e-05, - "loss": 0.6122, - "step": 715 - }, - { - "epoch": 0.046183450930083386, - "grad_norm": 0.4344736289735069, - "learning_rate": 9.236690186016678e-05, - "loss": 0.6896, - "step": 720 - }, - { - "epoch": 0.04650416933932008, - "grad_norm": 0.479553471093618, - "learning_rate": 9.300833867864016e-05, - "loss": 0.7446, - "step": 725 - }, - { - "epoch": 0.04682488774855677, - "grad_norm": 0.4175717995477323, - "learning_rate": 9.364977549711353e-05, - "loss": 0.5635, - "step": 730 - }, - { - "epoch": 0.04714560615779346, - "grad_norm": 0.43527442203162864, - "learning_rate": 9.429121231558691e-05, - "loss": 0.5984, - "step": 735 - }, - { - "epoch": 0.04746632456703015, - "grad_norm": 0.6764034597420034, - "learning_rate": 9.49326491340603e-05, - "loss": 0.6575, - "step": 740 - }, - { - "epoch": 0.04778704297626684, - "grad_norm": 0.6994297524226791, - "learning_rate": 9.557408595253368e-05, - "loss": 0.6381, - "step": 745 - }, - { - "epoch": 0.04810776138550353, - "grad_norm": 0.5924112864276749, - "learning_rate": 9.621552277100707e-05, - "loss": 0.6273, - "step": 750 - }, - { - "epoch": 0.04842847979474022, - "grad_norm": 0.529839489096258, - "learning_rate": 9.685695958948045e-05, - "loss": 0.5524, - "step": 755 - }, - { - "epoch": 0.048749198203976905, - "grad_norm": 0.5412474092793377, - "learning_rate": 9.749839640795382e-05, - "loss": 0.6584, - "step": 760 - }, - { - "epoch": 0.049069916613213596, - "grad_norm": 0.62325178443721, - "learning_rate": 9.81398332264272e-05, - "loss": 0.7556, - "step": 765 - }, - { - "epoch": 0.04939063502245029, - "grad_norm": 0.6185109985068113, - "learning_rate": 9.878127004490059e-05, - "loss": 0.6396, - "step": 770 - }, - { - "epoch": 0.04971135343168698, - "grad_norm": 0.5650081284141024, - "learning_rate": 9.942270686337396e-05, - "loss": 0.6761, - "step": 775 - }, - { - "epoch": 0.05003207184092367, - "grad_norm": 0.6838574740900004, - "learning_rate": 0.00010006414368184733, - "loss": 0.6228, - "step": 780 - }, - { - "epoch": 0.05035279025016036, - "grad_norm": 0.6196830613093786, - "learning_rate": 0.00010070558050032072, - "loss": 0.6648, - "step": 785 - }, - { - "epoch": 0.05067350865939705, - "grad_norm": 0.5504649558203162, - "learning_rate": 0.00010134701731879411, - "loss": 0.697, - "step": 790 - }, - { - "epoch": 0.05099422706863374, - "grad_norm": 0.654837344932131, - "learning_rate": 0.00010198845413726748, - "loss": 0.6986, - "step": 795 - }, - { - "epoch": 0.05131494547787043, - "grad_norm": 0.7011329232246133, - "learning_rate": 0.00010262989095574088, - "loss": 0.7206, - "step": 800 - }, - { - "epoch": 0.05163566388710712, - "grad_norm": 0.6807528459174979, - "learning_rate": 0.00010327132777421425, - "loss": 0.6834, - "step": 805 - }, - { - "epoch": 0.05195638229634381, - "grad_norm": 0.8856217259425705, - "learning_rate": 0.00010391276459268762, - "loss": 0.7028, - "step": 810 - }, - { - "epoch": 0.052277100705580504, - "grad_norm": 0.5962908888781525, - "learning_rate": 0.00010455420141116101, - "loss": 0.5113, - "step": 815 - }, - { - "epoch": 0.05259781911481719, - "grad_norm": 0.9014177998142, - "learning_rate": 0.00010519563822963438, - "loss": 0.6129, - "step": 820 - }, - { - "epoch": 0.05291853752405388, - "grad_norm": 0.6753791164158136, - "learning_rate": 0.00010583707504810775, - "loss": 0.756, - "step": 825 - }, - { - "epoch": 0.05323925593329057, - "grad_norm": 0.48791891735015575, - "learning_rate": 0.00010647851186658115, - "loss": 0.5352, - "step": 830 - }, - { - "epoch": 0.05355997434252726, - "grad_norm": 0.7373582383544524, - "learning_rate": 0.00010711994868505453, - "loss": 0.7345, - "step": 835 - }, - { - "epoch": 0.05388069275176395, - "grad_norm": 0.49964472362766127, - "learning_rate": 0.0001077613855035279, - "loss": 0.7314, - "step": 840 - }, - { - "epoch": 0.05420141116100064, - "grad_norm": 0.48415921267506284, - "learning_rate": 0.0001084028223220013, - "loss": 0.5548, - "step": 845 - }, - { - "epoch": 0.05452212957023733, - "grad_norm": 0.6197607704084165, - "learning_rate": 0.00010904425914047467, - "loss": 0.6271, - "step": 850 - }, - { - "epoch": 0.05484284797947402, - "grad_norm": 0.677683386452661, - "learning_rate": 0.00010968569595894804, - "loss": 0.7739, - "step": 855 - }, - { - "epoch": 0.055163566388710714, - "grad_norm": 0.7298215600744931, - "learning_rate": 0.00011032713277742144, - "loss": 0.6813, - "step": 860 - }, - { - "epoch": 0.055484284797947404, - "grad_norm": 0.49556474863687744, - "learning_rate": 0.00011096856959589481, - "loss": 0.7165, - "step": 865 - }, - { - "epoch": 0.055805003207184095, - "grad_norm": 0.4755941527376833, - "learning_rate": 0.00011161000641436818, - "loss": 0.7439, - "step": 870 - }, - { - "epoch": 0.056125721616420786, - "grad_norm": 0.8183131489420952, - "learning_rate": 0.00011225144323284158, - "loss": 0.7741, - "step": 875 - }, - { - "epoch": 0.05644644002565747, - "grad_norm": 0.577588746397813, - "learning_rate": 0.00011289288005131495, - "loss": 0.6951, - "step": 880 - }, - { - "epoch": 0.05676715843489416, - "grad_norm": 0.3104626766912227, - "learning_rate": 0.00011353431686978833, - "loss": 0.6068, - "step": 885 - }, - { - "epoch": 0.05708787684413085, - "grad_norm": 0.6364607751424182, - "learning_rate": 0.00011417575368826173, - "loss": 0.6601, - "step": 890 - }, - { - "epoch": 0.05740859525336754, - "grad_norm": 0.5489548053878326, - "learning_rate": 0.0001148171905067351, - "loss": 0.6498, - "step": 895 - }, - { - "epoch": 0.05772931366260423, - "grad_norm": 0.8290809901584166, - "learning_rate": 0.00011545862732520847, - "loss": 0.7598, - "step": 900 - }, - { - "epoch": 0.058050032071840924, - "grad_norm": 0.9889805070312973, - "learning_rate": 0.00011610006414368186, - "loss": 0.6528, - "step": 905 - }, - { - "epoch": 0.058370750481077614, - "grad_norm": 0.5034027315098741, - "learning_rate": 0.00011674150096215524, - "loss": 0.6916, - "step": 910 - }, - { - "epoch": 0.058691468890314305, - "grad_norm": 0.5211514737547632, - "learning_rate": 0.0001173829377806286, - "loss": 0.6455, - "step": 915 - }, - { - "epoch": 0.059012187299550996, - "grad_norm": 0.5915915443611912, - "learning_rate": 0.00011802437459910199, - "loss": 0.619, - "step": 920 - }, - { - "epoch": 0.05933290570878769, - "grad_norm": 0.6356669965403786, - "learning_rate": 0.00011866581141757537, - "loss": 0.6339, - "step": 925 - }, - { - "epoch": 0.05965362411802438, - "grad_norm": 0.5203747383599147, - "learning_rate": 0.00011930724823604876, - "loss": 0.7174, - "step": 930 - }, - { - "epoch": 0.05997434252726107, - "grad_norm": 0.4400681567105204, - "learning_rate": 0.00011994868505452213, - "loss": 0.757, - "step": 935 - }, - { - "epoch": 0.06029506093649775, - "grad_norm": 0.5134463977576896, - "learning_rate": 0.00012059012187299552, - "loss": 0.5941, - "step": 940 - }, - { - "epoch": 0.06061577934573444, - "grad_norm": 2.1514572255404563, - "learning_rate": 0.0001212315586914689, - "loss": 0.5872, - "step": 945 - }, - { - "epoch": 0.06093649775497113, - "grad_norm": 0.5533804918183362, - "learning_rate": 0.00012187299550994226, - "loss": 0.681, - "step": 950 - }, - { - "epoch": 0.061257216164207824, - "grad_norm": 0.43736512301454394, - "learning_rate": 0.00012251443232841566, - "loss": 0.6241, - "step": 955 - }, - { - "epoch": 0.061577934573444515, - "grad_norm": 0.7036625039029036, - "learning_rate": 0.00012315586914688904, - "loss": 0.7528, - "step": 960 - }, - { - "epoch": 0.061898652982681206, - "grad_norm": 0.5883952786255479, - "learning_rate": 0.0001237973059653624, - "loss": 0.6132, - "step": 965 - }, - { - "epoch": 0.062219371391917896, - "grad_norm": 0.593687347482467, - "learning_rate": 0.0001244387427838358, - "loss": 0.6453, - "step": 970 - }, - { - "epoch": 0.06254008980115458, - "grad_norm": 0.8797836564455341, - "learning_rate": 0.00012508017960230917, - "loss": 0.6658, - "step": 975 - }, - { - "epoch": 0.06286080821039128, - "grad_norm": 0.8231331839998992, - "learning_rate": 0.00012572161642078255, - "loss": 0.6615, - "step": 980 - }, - { - "epoch": 0.06318152661962796, - "grad_norm": 0.5202568995405973, - "learning_rate": 0.00012636305323925594, - "loss": 0.8156, - "step": 985 - }, - { - "epoch": 0.06350224502886466, - "grad_norm": 0.623580493806845, - "learning_rate": 0.00012700449005772932, - "loss": 0.6959, - "step": 990 - }, - { - "epoch": 0.06382296343810134, - "grad_norm": 0.5798575607273242, - "learning_rate": 0.0001276459268762027, - "loss": 0.5538, - "step": 995 - }, - { - "epoch": 0.06414368184733804, - "grad_norm": 0.6970653558425355, - "learning_rate": 0.0001282873636946761, - "loss": 0.7063, - "step": 1000 - }, - { - "epoch": 0.06446440025657472, - "grad_norm": 0.8241115273976609, - "learning_rate": 0.00012892880051314947, - "loss": 0.6371, - "step": 1005 - }, - { - "epoch": 0.06478511866581142, - "grad_norm": 0.7769868872755683, - "learning_rate": 0.00012957023733162283, - "loss": 0.6202, - "step": 1010 - }, - { - "epoch": 0.0651058370750481, - "grad_norm": 0.4974832858382039, - "learning_rate": 0.00013021167415009624, - "loss": 0.652, - "step": 1015 - }, - { - "epoch": 0.0654265554842848, - "grad_norm": 0.7988613498086312, - "learning_rate": 0.0001308531109685696, - "loss": 0.6179, - "step": 1020 - }, - { - "epoch": 0.06574727389352149, - "grad_norm": 0.5975032929676001, - "learning_rate": 0.00013149454778704298, - "loss": 0.7551, - "step": 1025 - }, - { - "epoch": 0.06606799230275817, - "grad_norm": 0.46478481189365806, - "learning_rate": 0.00013213598460551636, - "loss": 0.6643, - "step": 1030 - }, - { - "epoch": 0.06638871071199487, - "grad_norm": 0.5467473022741837, - "learning_rate": 0.00013277742142398975, - "loss": 0.6786, - "step": 1035 - }, - { - "epoch": 0.06670942912123155, - "grad_norm": 0.788511157965346, - "learning_rate": 0.00013341885824246313, - "loss": 0.699, - "step": 1040 - }, - { - "epoch": 0.06703014753046825, - "grad_norm": 0.7378591658959022, - "learning_rate": 0.0001340602950609365, - "loss": 0.5498, - "step": 1045 - }, - { - "epoch": 0.06735086593970493, - "grad_norm": 0.524580967213953, - "learning_rate": 0.0001347017318794099, - "loss": 0.7092, - "step": 1050 - }, - { - "epoch": 0.06767158434894163, - "grad_norm": 10.11033461685559, - "learning_rate": 0.00013534316869788325, - "loss": 0.6694, - "step": 1055 - }, - { - "epoch": 0.06799230275817832, - "grad_norm": 0.6039061177211199, - "learning_rate": 0.00013598460551635666, - "loss": 0.6105, - "step": 1060 - }, - { - "epoch": 0.06831302116741501, - "grad_norm": 0.7863303522868051, - "learning_rate": 0.00013662604233483002, - "loss": 0.6867, - "step": 1065 - }, - { - "epoch": 0.0686337395766517, - "grad_norm": 0.6197712573428509, - "learning_rate": 0.0001372674791533034, - "loss": 0.6893, - "step": 1070 - }, - { - "epoch": 0.0689544579858884, - "grad_norm": 0.43888192750291055, - "learning_rate": 0.0001379089159717768, - "loss": 0.7157, - "step": 1075 - }, - { - "epoch": 0.06927517639512508, - "grad_norm": 0.7306535592576365, - "learning_rate": 0.00013855035279025017, - "loss": 0.7648, - "step": 1080 - }, - { - "epoch": 0.06959589480436178, - "grad_norm": 0.5833095869044383, - "learning_rate": 0.00013919178960872356, - "loss": 0.6655, - "step": 1085 - }, - { - "epoch": 0.06991661321359846, - "grad_norm": 0.3330431009666685, - "learning_rate": 0.0001398332264271969, - "loss": 0.5681, - "step": 1090 - }, - { - "epoch": 0.07023733162283514, - "grad_norm": 0.8485768964159431, - "learning_rate": 0.00014047466324567032, - "loss": 0.6139, - "step": 1095 - }, - { - "epoch": 0.07055805003207184, - "grad_norm": 0.48935398848123357, - "learning_rate": 0.00014111610006414368, - "loss": 0.6591, - "step": 1100 - }, - { - "epoch": 0.07087876844130853, - "grad_norm": 0.6694840056428312, - "learning_rate": 0.00014175753688261706, - "loss": 0.5986, - "step": 1105 - }, - { - "epoch": 0.07119948685054522, - "grad_norm": 0.7907065672480846, - "learning_rate": 0.00014239897370109045, - "loss": 0.8477, - "step": 1110 - }, - { - "epoch": 0.07152020525978191, - "grad_norm": 0.45721463553494507, - "learning_rate": 0.00014304041051956383, - "loss": 0.6511, - "step": 1115 - }, - { - "epoch": 0.0718409236690186, - "grad_norm": 0.5932773719713492, - "learning_rate": 0.00014368184733803721, - "loss": 0.6205, - "step": 1120 - }, - { - "epoch": 0.07216164207825529, - "grad_norm": 0.7933284443225256, - "learning_rate": 0.0001443232841565106, - "loss": 0.524, - "step": 1125 - }, - { - "epoch": 0.07248236048749199, - "grad_norm": 0.4677884329123659, - "learning_rate": 0.00014496472097498398, - "loss": 0.555, - "step": 1130 - }, - { - "epoch": 0.07280307889672867, - "grad_norm": 0.850254756515873, - "learning_rate": 0.00014560615779345734, - "loss": 0.7627, - "step": 1135 - }, - { - "epoch": 0.07312379730596537, - "grad_norm": 0.522103651356661, - "learning_rate": 0.00014624759461193075, - "loss": 0.7255, - "step": 1140 - }, - { - "epoch": 0.07344451571520205, - "grad_norm": 0.6063292373713933, - "learning_rate": 0.0001468890314304041, - "loss": 0.6222, - "step": 1145 - }, - { - "epoch": 0.07376523412443874, - "grad_norm": 0.9713303841273095, - "learning_rate": 0.0001475304682488775, - "loss": 0.7341, - "step": 1150 - }, - { - "epoch": 0.07408595253367543, - "grad_norm": 0.837884018201796, - "learning_rate": 0.00014817190506735087, - "loss": 0.6822, - "step": 1155 - }, - { - "epoch": 0.07440667094291212, - "grad_norm": 0.39437246960153793, - "learning_rate": 0.00014881334188582426, - "loss": 0.7116, - "step": 1160 - }, - { - "epoch": 0.07472738935214882, - "grad_norm": 0.6202094758512229, - "learning_rate": 0.0001494547787042976, - "loss": 0.6015, - "step": 1165 - }, - { - "epoch": 0.0750481077613855, - "grad_norm": 0.8135054592447762, - "learning_rate": 0.00015009621552277102, - "loss": 0.6487, - "step": 1170 - }, - { - "epoch": 0.0753688261706222, - "grad_norm": 0.5507524560111344, - "learning_rate": 0.0001507376523412444, - "loss": 0.5846, - "step": 1175 - }, - { - "epoch": 0.07568954457985888, - "grad_norm": 0.5961939171868111, - "learning_rate": 0.00015137908915971776, - "loss": 0.6111, - "step": 1180 - }, - { - "epoch": 0.07601026298909558, - "grad_norm": 0.5352884760699661, - "learning_rate": 0.00015202052597819118, - "loss": 0.6401, - "step": 1185 - }, - { - "epoch": 0.07633098139833226, - "grad_norm": 0.6620834657515849, - "learning_rate": 0.00015266196279666453, - "loss": 0.7108, - "step": 1190 - }, - { - "epoch": 0.07665169980756896, - "grad_norm": 0.24886726646481336, - "learning_rate": 0.00015330339961513792, - "loss": 0.465, - "step": 1195 - }, - { - "epoch": 0.07697241821680564, - "grad_norm": 0.5949618384904851, - "learning_rate": 0.0001539448364336113, - "loss": 0.6872, - "step": 1200 - }, - { - "epoch": 0.07729313662604234, - "grad_norm": 0.7888477619326826, - "learning_rate": 0.00015458627325208468, - "loss": 0.5609, - "step": 1205 - }, - { - "epoch": 0.07761385503527903, - "grad_norm": 1.0669700966748508, - "learning_rate": 0.00015522771007055804, - "loss": 0.7743, - "step": 1210 - }, - { - "epoch": 0.07793457344451571, - "grad_norm": 0.7068283314311553, - "learning_rate": 0.00015586914688903145, - "loss": 0.6263, - "step": 1215 - }, - { - "epoch": 0.0782552918537524, - "grad_norm": 0.5841407337661559, - "learning_rate": 0.00015651058370750483, - "loss": 0.6187, - "step": 1220 - }, - { - "epoch": 0.07857601026298909, - "grad_norm": 0.6229227132294815, - "learning_rate": 0.0001571520205259782, - "loss": 0.7183, - "step": 1225 - }, - { - "epoch": 0.07889672867222579, - "grad_norm": 0.6002586833079545, - "learning_rate": 0.0001577934573444516, - "loss": 0.7077, - "step": 1230 - }, - { - "epoch": 0.07921744708146247, - "grad_norm": 0.5383734940611982, - "learning_rate": 0.00015843489416292496, - "loss": 0.6251, - "step": 1235 - }, - { - "epoch": 0.07953816549069917, - "grad_norm": 0.6051581628244698, - "learning_rate": 0.00015907633098139834, - "loss": 0.6742, - "step": 1240 - }, - { - "epoch": 0.07985888389993585, - "grad_norm": 0.6524111511727346, - "learning_rate": 0.0001597177677998717, - "loss": 0.6258, - "step": 1245 - }, - { - "epoch": 0.08017960230917255, - "grad_norm": 0.8452071724294624, - "learning_rate": 0.0001603592046183451, - "loss": 0.6583, - "step": 1250 - }, - { - "epoch": 0.08050032071840924, - "grad_norm": 0.5380526459581828, - "learning_rate": 0.00016100064143681847, - "loss": 0.7976, - "step": 1255 - }, - { - "epoch": 0.08082103912764593, - "grad_norm": 0.846065125270878, - "learning_rate": 0.00016164207825529185, - "loss": 0.5684, - "step": 1260 - }, - { - "epoch": 0.08114175753688262, - "grad_norm": 1.2668855662638892, - "learning_rate": 0.00016228351507376523, - "loss": 0.6079, - "step": 1265 - }, - { - "epoch": 0.0814624759461193, - "grad_norm": 0.7795964267281216, - "learning_rate": 0.00016292495189223862, - "loss": 0.646, - "step": 1270 - }, - { - "epoch": 0.081783194355356, - "grad_norm": 0.7027735707273621, - "learning_rate": 0.000163566388710712, - "loss": 0.7358, - "step": 1275 - }, - { - "epoch": 0.08210391276459268, - "grad_norm": 0.6792816013615487, - "learning_rate": 0.00016420782552918538, - "loss": 0.6695, - "step": 1280 - }, - { - "epoch": 0.08242463117382938, - "grad_norm": 0.6182179483058359, - "learning_rate": 0.00016484926234765877, - "loss": 0.6096, - "step": 1285 - }, - { - "epoch": 0.08274534958306606, - "grad_norm": 0.7701573171054498, - "learning_rate": 0.00016549069916613212, - "loss": 0.6467, - "step": 1290 - }, - { - "epoch": 0.08306606799230276, - "grad_norm": 0.8699247842006342, - "learning_rate": 0.00016613213598460554, - "loss": 0.5635, - "step": 1295 - }, - { - "epoch": 0.08338678640153944, - "grad_norm": 1.5815526952211336, - "learning_rate": 0.0001667735728030789, - "loss": 0.7091, - "step": 1300 - }, - { - "epoch": 0.08370750481077614, - "grad_norm": 1.1184328365345817, - "learning_rate": 0.00016741500962155228, - "loss": 0.6598, - "step": 1305 - }, - { - "epoch": 0.08402822322001283, - "grad_norm": 0.5795213958251844, - "learning_rate": 0.00016805644644002566, - "loss": 0.6638, - "step": 1310 - }, - { - "epoch": 0.08434894162924952, - "grad_norm": 0.9373149156332843, - "learning_rate": 0.00016869788325849904, - "loss": 0.6091, - "step": 1315 - }, - { - "epoch": 0.08466966003848621, - "grad_norm": 1.581754110063961, - "learning_rate": 0.00016933932007697243, - "loss": 0.6641, - "step": 1320 - }, - { - "epoch": 0.0849903784477229, - "grad_norm": 0.8932544552326179, - "learning_rate": 0.0001699807568954458, - "loss": 0.7052, - "step": 1325 - }, - { - "epoch": 0.08531109685695959, - "grad_norm": 0.7663989496912428, - "learning_rate": 0.0001706221937139192, - "loss": 0.6517, - "step": 1330 - }, - { - "epoch": 0.08563181526619627, - "grad_norm": 0.46405474836741084, - "learning_rate": 0.00017126363053239255, - "loss": 0.6357, - "step": 1335 - }, - { - "epoch": 0.08595253367543297, - "grad_norm": 0.6382018266002509, - "learning_rate": 0.00017190506735086596, - "loss": 0.5614, - "step": 1340 - }, - { - "epoch": 0.08627325208466965, - "grad_norm": 0.43085923514031815, - "learning_rate": 0.00017254650416933932, - "loss": 0.6499, - "step": 1345 - }, - { - "epoch": 0.08659397049390635, - "grad_norm": 0.8362450675258178, - "learning_rate": 0.0001731879409878127, - "loss": 0.7166, - "step": 1350 - }, - { - "epoch": 0.08691468890314304, - "grad_norm": 0.6383324045212788, - "learning_rate": 0.00017382937780628609, - "loss": 0.6072, - "step": 1355 - }, - { - "epoch": 0.08723540731237973, - "grad_norm": 0.8935601954358443, - "learning_rate": 0.00017447081462475947, - "loss": 0.6483, - "step": 1360 - }, - { - "epoch": 0.08755612572161642, - "grad_norm": 0.7202566228037989, - "learning_rate": 0.00017511225144323285, - "loss": 0.5967, - "step": 1365 - }, - { - "epoch": 0.08787684413085312, - "grad_norm": 0.5230697995372986, - "learning_rate": 0.00017575368826170624, - "loss": 0.7513, - "step": 1370 - }, - { - "epoch": 0.0881975625400898, - "grad_norm": 0.816705171178794, - "learning_rate": 0.00017639512508017962, - "loss": 0.6651, - "step": 1375 - }, - { - "epoch": 0.0885182809493265, - "grad_norm": 0.5342326962298032, - "learning_rate": 0.00017703656189865298, - "loss": 0.5963, - "step": 1380 - }, - { - "epoch": 0.08883899935856318, - "grad_norm": 0.5984545509333057, - "learning_rate": 0.0001776779987171264, - "loss": 0.6455, - "step": 1385 - }, - { - "epoch": 0.08915971776779986, - "grad_norm": 0.6477898184624558, - "learning_rate": 0.00017831943553559974, - "loss": 0.7328, - "step": 1390 - }, - { - "epoch": 0.08948043617703656, - "grad_norm": 0.5092110599627088, - "learning_rate": 0.00017896087235407313, - "loss": 0.6279, - "step": 1395 - }, - { - "epoch": 0.08980115458627325, - "grad_norm": 0.7029802255673286, - "learning_rate": 0.0001796023091725465, - "loss": 0.7776, - "step": 1400 - }, - { - "epoch": 0.09012187299550994, - "grad_norm": 0.8832343335799728, - "learning_rate": 0.0001802437459910199, - "loss": 0.6111, - "step": 1405 - }, - { - "epoch": 0.09044259140474663, - "grad_norm": 0.8016746694750925, - "learning_rate": 0.00018088518280949328, - "loss": 0.6695, - "step": 1410 - }, - { - "epoch": 0.09076330981398333, - "grad_norm": 1.1634306884211862, - "learning_rate": 0.00018152661962796664, - "loss": 0.7114, - "step": 1415 - }, - { - "epoch": 0.09108402822322001, - "grad_norm": 0.6624473023568856, - "learning_rate": 0.00018216805644644005, - "loss": 0.7559, - "step": 1420 - }, - { - "epoch": 0.09140474663245671, - "grad_norm": 0.8042759336949421, - "learning_rate": 0.0001828094932649134, - "loss": 0.7583, - "step": 1425 - }, - { - "epoch": 0.09172546504169339, - "grad_norm": 0.9772784468918035, - "learning_rate": 0.00018345093008338679, - "loss": 0.6853, - "step": 1430 - }, - { - "epoch": 0.09204618345093009, - "grad_norm": 0.5803428867246113, - "learning_rate": 0.00018409236690186017, - "loss": 0.6201, - "step": 1435 - }, - { - "epoch": 0.09236690186016677, - "grad_norm": 1.0135605254585267, - "learning_rate": 0.00018473380372033355, - "loss": 0.5897, - "step": 1440 - }, - { - "epoch": 0.09268762026940347, - "grad_norm": 1.0146665351265378, - "learning_rate": 0.00018537524053880694, - "loss": 0.7739, - "step": 1445 - }, - { - "epoch": 0.09300833867864015, - "grad_norm": 0.6409550994368336, - "learning_rate": 0.00018601667735728032, - "loss": 0.652, - "step": 1450 - }, - { - "epoch": 0.09332905708787684, - "grad_norm": 0.9063409381829404, - "learning_rate": 0.0001866581141757537, - "loss": 0.5091, - "step": 1455 - }, - { - "epoch": 0.09364977549711354, - "grad_norm": 0.6035311851346433, - "learning_rate": 0.00018729955099422706, - "loss": 0.5951, - "step": 1460 - }, - { - "epoch": 0.09397049390635022, - "grad_norm": 0.4305914555852047, - "learning_rate": 0.00018794098781270047, - "loss": 0.6979, - "step": 1465 - }, - { - "epoch": 0.09429121231558692, - "grad_norm": 0.592322337116948, - "learning_rate": 0.00018858242463117383, - "loss": 0.7894, - "step": 1470 - }, - { - "epoch": 0.0946119307248236, - "grad_norm": 0.7019994823024447, - "learning_rate": 0.0001892238614496472, - "loss": 0.6685, - "step": 1475 - }, - { - "epoch": 0.0949326491340603, - "grad_norm": 0.6511984672543305, - "learning_rate": 0.0001898652982681206, - "loss": 0.7993, - "step": 1480 - }, - { - "epoch": 0.09525336754329698, - "grad_norm": 0.7220123377652353, - "learning_rate": 0.00019050673508659398, - "loss": 0.6424, - "step": 1485 - }, - { - "epoch": 0.09557408595253368, - "grad_norm": 0.569165004645741, - "learning_rate": 0.00019114817190506736, - "loss": 0.5879, - "step": 1490 - }, - { - "epoch": 0.09589480436177036, - "grad_norm": 0.6841283140830406, - "learning_rate": 0.00019178960872354075, - "loss": 0.6944, - "step": 1495 - }, - { - "epoch": 0.09621552277100706, - "grad_norm": 0.5806780565962407, - "learning_rate": 0.00019243104554201413, - "loss": 0.7039, - "step": 1500 - }, - { - "epoch": 0.09653624118024375, - "grad_norm": 1.0231588558162683, - "learning_rate": 0.0001930724823604875, - "loss": 0.7447, - "step": 1505 - }, - { - "epoch": 0.09685695958948044, - "grad_norm": 0.6513202839027658, - "learning_rate": 0.0001937139191789609, - "loss": 0.6013, - "step": 1510 - }, - { - "epoch": 0.09717767799871713, - "grad_norm": 0.7845659853361092, - "learning_rate": 0.00019435535599743425, - "loss": 0.6069, - "step": 1515 - }, - { - "epoch": 0.09749839640795381, - "grad_norm": 0.7194048768316849, - "learning_rate": 0.00019499679281590764, - "loss": 0.7641, - "step": 1520 - }, - { - "epoch": 0.09781911481719051, - "grad_norm": 0.6191788469641755, - "learning_rate": 0.00019563822963438102, - "loss": 0.7448, - "step": 1525 - }, - { - "epoch": 0.09813983322642719, - "grad_norm": 0.7426546495672112, - "learning_rate": 0.0001962796664528544, - "loss": 0.7761, - "step": 1530 - }, - { - "epoch": 0.09846055163566389, - "grad_norm": 0.7572762314827131, - "learning_rate": 0.0001969211032713278, - "loss": 0.8618, - "step": 1535 - }, - { - "epoch": 0.09878127004490057, - "grad_norm": 0.6372317781767599, - "learning_rate": 0.00019756254008980117, - "loss": 0.6666, - "step": 1540 - }, - { - "epoch": 0.09910198845413727, - "grad_norm": 0.6326871836739665, - "learning_rate": 0.00019820397690827456, - "loss": 0.6547, - "step": 1545 - }, - { - "epoch": 0.09942270686337396, - "grad_norm": 1.1563371750862326, - "learning_rate": 0.0001988454137267479, - "loss": 0.6453, - "step": 1550 - }, - { - "epoch": 0.09974342527261065, - "grad_norm": 0.9479492008239019, - "learning_rate": 0.00019948685054522132, - "loss": 0.651, - "step": 1555 - }, - { - "epoch": 0.10006414368184734, - "grad_norm": 0.7535422723224012, - "learning_rate": 0.00019999999749335695, - "loss": 0.7093, - "step": 1560 - }, - { - "epoch": 0.10038486209108403, - "grad_norm": 0.5932112107729582, - "learning_rate": 0.00019999990976086248, - "loss": 0.7182, - "step": 1565 - }, - { - "epoch": 0.10070558050032072, - "grad_norm": 0.6993778331415806, - "learning_rate": 0.00019999969669633985, - "loss": 0.6146, - "step": 1570 - }, - { - "epoch": 0.1010262989095574, - "grad_norm": 0.7283971397341802, - "learning_rate": 0.00019999935830005615, - "loss": 0.6496, - "step": 1575 - }, - { - "epoch": 0.1013470173187941, - "grad_norm": 1.0242803435192598, - "learning_rate": 0.00019999889457243545, - "loss": 0.8042, - "step": 1580 - }, - { - "epoch": 0.10166773572803078, - "grad_norm": 0.6322290861624766, - "learning_rate": 0.000199998305514059, - "loss": 0.7667, - "step": 1585 - }, - { - "epoch": 0.10198845413726748, - "grad_norm": 0.6155965395909687, - "learning_rate": 0.00019999759112566498, - "loss": 0.6363, - "step": 1590 - }, - { - "epoch": 0.10230917254650417, - "grad_norm": 0.4557230080410517, - "learning_rate": 0.00019999675140814887, - "loss": 0.5606, - "step": 1595 - }, - { - "epoch": 0.10262989095574086, - "grad_norm": 0.6477761450960091, - "learning_rate": 0.00019999578636256302, - "loss": 0.6693, - "step": 1600 - }, - { - "epoch": 0.10295060936497755, - "grad_norm": 0.8654904236010101, - "learning_rate": 0.000199994695990117, - "loss": 0.6314, - "step": 1605 - }, - { - "epoch": 0.10327132777421424, - "grad_norm": 0.6903326653951578, - "learning_rate": 0.00019999348029217732, - "loss": 0.6179, - "step": 1610 - }, - { - "epoch": 0.10359204618345093, - "grad_norm": 0.8840202106741641, - "learning_rate": 0.00019999213927026775, - "loss": 0.6985, - "step": 1615 - }, - { - "epoch": 0.10391276459268763, - "grad_norm": 0.668673191642802, - "learning_rate": 0.00019999067292606894, - "loss": 0.6218, - "step": 1620 - }, - { - "epoch": 0.10423348300192431, - "grad_norm": 0.8731375253636731, - "learning_rate": 0.00019998908126141868, - "loss": 0.6898, - "step": 1625 - }, - { - "epoch": 0.10455420141116101, - "grad_norm": 1.0983344411460778, - "learning_rate": 0.00019998736427831194, - "loss": 0.7532, - "step": 1630 - }, - { - "epoch": 0.10487491982039769, - "grad_norm": 0.5721731282366914, - "learning_rate": 0.00019998552197890052, - "loss": 0.6003, - "step": 1635 - }, - { - "epoch": 0.10519563822963438, - "grad_norm": 0.679795611170959, - "learning_rate": 0.0001999835543654935, - "loss": 0.7003, - "step": 1640 - }, - { - "epoch": 0.10551635663887107, - "grad_norm": 0.7659746917304108, - "learning_rate": 0.0001999814614405569, - "loss": 0.6359, - "step": 1645 - }, - { - "epoch": 0.10583707504810776, - "grad_norm": 1.1962756283471876, - "learning_rate": 0.00019997924320671383, - "loss": 0.6308, - "step": 1650 - }, - { - "epoch": 0.10615779345734445, - "grad_norm": 0.7621683185763631, - "learning_rate": 0.00019997689966674446, - "loss": 0.7957, - "step": 1655 - }, - { - "epoch": 0.10647851186658114, - "grad_norm": 0.7338531701197929, - "learning_rate": 0.00019997443082358601, - "loss": 0.6757, - "step": 1660 - }, - { - "epoch": 0.10679923027581784, - "grad_norm": 0.7150664806057576, - "learning_rate": 0.00019997183668033267, - "loss": 0.694, - "step": 1665 - }, - { - "epoch": 0.10711994868505452, - "grad_norm": 0.7869356473972234, - "learning_rate": 0.0001999691172402358, - "loss": 0.719, - "step": 1670 - }, - { - "epoch": 0.10744066709429122, - "grad_norm": 0.611503667039071, - "learning_rate": 0.00019996627250670374, - "loss": 0.6343, - "step": 1675 - }, - { - "epoch": 0.1077613855035279, - "grad_norm": 0.7766135920581687, - "learning_rate": 0.00019996330248330183, - "loss": 0.693, - "step": 1680 - }, - { - "epoch": 0.1080821039127646, - "grad_norm": 0.4786388847248821, - "learning_rate": 0.00019996020717375247, - "loss": 0.6194, - "step": 1685 - }, - { - "epoch": 0.10840282232200128, - "grad_norm": 0.6991936018277035, - "learning_rate": 0.000199956986581935, - "loss": 0.7263, - "step": 1690 - }, - { - "epoch": 0.10872354073123797, - "grad_norm": 0.7205841321201338, - "learning_rate": 0.000199953640711886, - "loss": 0.4831, - "step": 1695 - }, - { - "epoch": 0.10904425914047466, - "grad_norm": 0.9131191032401795, - "learning_rate": 0.00019995016956779886, - "loss": 0.5177, - "step": 1700 - }, - { - "epoch": 0.10936497754971135, - "grad_norm": 0.5536147800325968, - "learning_rate": 0.000199946573154024, - "loss": 0.6789, - "step": 1705 - }, - { - "epoch": 0.10968569595894805, - "grad_norm": 0.6451976876558219, - "learning_rate": 0.00019994285147506888, - "loss": 0.7275, - "step": 1710 - }, - { - "epoch": 0.11000641436818473, - "grad_norm": 0.9579506214333907, - "learning_rate": 0.00019993900453559805, - "loss": 0.6589, - "step": 1715 - }, - { - "epoch": 0.11032713277742143, - "grad_norm": 0.9260040237199151, - "learning_rate": 0.00019993503234043284, - "loss": 0.6823, - "step": 1720 - }, - { - "epoch": 0.11064785118665811, - "grad_norm": 0.9505358223036796, - "learning_rate": 0.00019993093489455182, - "loss": 0.7616, - "step": 1725 - }, - { - "epoch": 0.11096856959589481, - "grad_norm": 0.7825553328319829, - "learning_rate": 0.0001999267122030903, - "loss": 0.6443, - "step": 1730 - }, - { - "epoch": 0.11128928800513149, - "grad_norm": 1.277608679789176, - "learning_rate": 0.00019992236427134069, - "loss": 0.6155, - "step": 1735 - }, - { - "epoch": 0.11161000641436819, - "grad_norm": 0.5889261013180431, - "learning_rate": 0.00019991789110475238, - "loss": 0.6994, - "step": 1740 - }, - { - "epoch": 0.11193072482360487, - "grad_norm": 0.8029959511201281, - "learning_rate": 0.00019991329270893163, - "loss": 0.5902, - "step": 1745 - }, - { - "epoch": 0.11225144323284157, - "grad_norm": 0.8303612970994603, - "learning_rate": 0.00019990856908964178, - "loss": 0.783, - "step": 1750 - }, - { - "epoch": 0.11257216164207826, - "grad_norm": 0.7054559375502497, - "learning_rate": 0.00019990372025280304, - "loss": 0.6792, - "step": 1755 - }, - { - "epoch": 0.11289288005131494, - "grad_norm": 0.7420987703476908, - "learning_rate": 0.0001998987462044925, - "loss": 0.6013, - "step": 1760 - }, - { - "epoch": 0.11321359846055164, - "grad_norm": 0.7094425366646243, - "learning_rate": 0.00019989364695094426, - "loss": 0.5688, - "step": 1765 - }, - { - "epoch": 0.11353431686978832, - "grad_norm": 0.569373653159604, - "learning_rate": 0.00019988842249854934, - "loss": 0.58, - "step": 1770 - }, - { - "epoch": 0.11385503527902502, - "grad_norm": 0.46978550262066865, - "learning_rate": 0.00019988307285385566, - "loss": 0.7256, - "step": 1775 - }, - { - "epoch": 0.1141757536882617, - "grad_norm": 0.6612438373633108, - "learning_rate": 0.00019987759802356803, - "loss": 0.7488, - "step": 1780 - }, - { - "epoch": 0.1144964720974984, - "grad_norm": 0.7309333682103005, - "learning_rate": 0.00019987199801454816, - "loss": 0.7284, - "step": 1785 - }, - { - "epoch": 0.11481719050673508, - "grad_norm": 0.9460563497076551, - "learning_rate": 0.00019986627283381472, - "loss": 0.6057, - "step": 1790 - }, - { - "epoch": 0.11513790891597178, - "grad_norm": 0.6266870049609108, - "learning_rate": 0.00019986042248854312, - "loss": 0.6476, - "step": 1795 - }, - { - "epoch": 0.11545862732520847, - "grad_norm": 0.7739200309128734, - "learning_rate": 0.0001998544469860658, - "loss": 0.6622, - "step": 1800 - }, - { - "epoch": 0.11577934573444516, - "grad_norm": 0.7469556806210228, - "learning_rate": 0.00019984834633387193, - "loss": 0.5747, - "step": 1805 - }, - { - "epoch": 0.11610006414368185, - "grad_norm": 0.5367955199234249, - "learning_rate": 0.00019984212053960763, - "loss": 0.671, - "step": 1810 - }, - { - "epoch": 0.11642078255291853, - "grad_norm": 1.0704497861003814, - "learning_rate": 0.00019983576961107576, - "loss": 0.6748, - "step": 1815 - }, - { - "epoch": 0.11674150096215523, - "grad_norm": 0.6669764759339204, - "learning_rate": 0.00019982929355623615, - "loss": 0.7167, - "step": 1820 - }, - { - "epoch": 0.11706221937139191, - "grad_norm": 0.6039796198063991, - "learning_rate": 0.00019982269238320532, - "loss": 0.6067, - "step": 1825 - }, - { - "epoch": 0.11738293778062861, - "grad_norm": 1.7098001118613075, - "learning_rate": 0.00019981596610025668, - "loss": 0.7805, - "step": 1830 - }, - { - "epoch": 0.1177036561898653, - "grad_norm": 0.7398855694010563, - "learning_rate": 0.00019980911471582043, - "loss": 0.6427, - "step": 1835 - }, - { - "epoch": 0.11802437459910199, - "grad_norm": 0.8354800121875872, - "learning_rate": 0.0001998021382384836, - "loss": 0.7408, - "step": 1840 - }, - { - "epoch": 0.11834509300833868, - "grad_norm": 0.6722235019789473, - "learning_rate": 0.00019979503667698985, - "loss": 0.6435, - "step": 1845 - }, - { - "epoch": 0.11866581141757537, - "grad_norm": 0.717593721397057, - "learning_rate": 0.00019978781004023982, - "loss": 0.6241, - "step": 1850 - }, - { - "epoch": 0.11898652982681206, - "grad_norm": 0.7195515776738803, - "learning_rate": 0.00019978045833729074, - "loss": 0.5947, - "step": 1855 - }, - { - "epoch": 0.11930724823604875, - "grad_norm": 0.8882886022840869, - "learning_rate": 0.00019977298157735672, - "loss": 0.7388, - "step": 1860 - }, - { - "epoch": 0.11962796664528544, - "grad_norm": 0.989988319302347, - "learning_rate": 0.0001997653797698085, - "loss": 0.7599, - "step": 1865 - }, - { - "epoch": 0.11994868505452214, - "grad_norm": 0.8403633651058144, - "learning_rate": 0.00019975765292417358, - "loss": 0.6432, - "step": 1870 - }, - { - "epoch": 0.12026940346375882, - "grad_norm": 1.2049771636877937, - "learning_rate": 0.00019974980105013623, - "loss": 0.7333, - "step": 1875 - }, - { - "epoch": 0.1205901218729955, - "grad_norm": 0.8525983520687547, - "learning_rate": 0.00019974182415753732, - "loss": 0.6699, - "step": 1880 - }, - { - "epoch": 0.1209108402822322, - "grad_norm": 0.5716659731530915, - "learning_rate": 0.00019973372225637453, - "loss": 0.5793, - "step": 1885 - }, - { - "epoch": 0.12123155869146889, - "grad_norm": 0.6060632420377923, - "learning_rate": 0.00019972549535680206, - "loss": 0.671, - "step": 1890 - }, - { - "epoch": 0.12155227710070558, - "grad_norm": 0.7561918292328402, - "learning_rate": 0.00019971714346913086, - "loss": 0.5316, - "step": 1895 - }, - { - "epoch": 0.12187299550994227, - "grad_norm": 0.9824211285333242, - "learning_rate": 0.00019970866660382863, - "loss": 0.5868, - "step": 1900 - }, - { - "epoch": 0.12219371391917896, - "grad_norm": 0.7951038927386893, - "learning_rate": 0.00019970006477151953, - "loss": 0.7, - "step": 1905 - }, - { - "epoch": 0.12251443232841565, - "grad_norm": 0.747912075117886, - "learning_rate": 0.0001996913379829844, - "loss": 0.5798, - "step": 1910 - }, - { - "epoch": 0.12283515073765235, - "grad_norm": 1.2254454430699995, - "learning_rate": 0.00019968248624916077, - "loss": 0.6667, - "step": 1915 - }, - { - "epoch": 0.12315586914688903, - "grad_norm": 1.1768102485963885, - "learning_rate": 0.00019967350958114267, - "loss": 0.5774, - "step": 1920 - }, - { - "epoch": 0.12347658755612573, - "grad_norm": 0.6310183951664794, - "learning_rate": 0.0001996644079901808, - "loss": 0.4486, - "step": 1925 - }, - { - "epoch": 0.12379730596536241, - "grad_norm": 0.8260925792950813, - "learning_rate": 0.00019965518148768233, - "loss": 0.5623, - "step": 1930 - }, - { - "epoch": 0.1241180243745991, - "grad_norm": 0.9150306074218141, - "learning_rate": 0.000199645830085211, - "loss": 0.83, - "step": 1935 - }, - { - "epoch": 0.12443874278383579, - "grad_norm": 0.9369210275043979, - "learning_rate": 0.00019963635379448722, - "loss": 0.7223, - "step": 1940 - }, - { - "epoch": 0.12475946119307248, - "grad_norm": 0.748894355642791, - "learning_rate": 0.00019962675262738774, - "loss": 0.6919, - "step": 1945 - }, - { - "epoch": 0.12508017960230916, - "grad_norm": 1.1961745083017192, - "learning_rate": 0.00019961702659594598, - "loss": 0.5536, - "step": 1950 - }, - { - "epoch": 0.12540089801154586, - "grad_norm": 0.568991855421978, - "learning_rate": 0.00019960717571235173, - "loss": 0.639, - "step": 1955 - }, - { - "epoch": 0.12572161642078256, - "grad_norm": 1.0900526061976745, - "learning_rate": 0.00019959719998895135, - "loss": 0.6571, - "step": 1960 - }, - { - "epoch": 0.12604233483001925, - "grad_norm": 0.7953938211319622, - "learning_rate": 0.00019958709943824758, - "loss": 0.7077, - "step": 1965 - }, - { - "epoch": 0.12636305323925592, - "grad_norm": 1.0090362549424627, - "learning_rate": 0.0001995768740728997, - "loss": 0.629, - "step": 1970 - }, - { - "epoch": 0.12668377164849262, - "grad_norm": 0.7822194115921188, - "learning_rate": 0.0001995665239057234, - "loss": 0.7948, - "step": 1975 - }, - { - "epoch": 0.12700449005772932, - "grad_norm": 0.82569207599097, - "learning_rate": 0.00019955604894969067, - "loss": 0.6823, - "step": 1980 - }, - { - "epoch": 0.12732520846696602, - "grad_norm": 0.5455388809406508, - "learning_rate": 0.0001995454492179301, - "loss": 0.5594, - "step": 1985 - }, - { - "epoch": 0.1276459268762027, - "grad_norm": 0.7695218529222057, - "learning_rate": 0.00019953472472372647, - "loss": 0.7198, - "step": 1990 - }, - { - "epoch": 0.12796664528543938, - "grad_norm": 0.8673513110262479, - "learning_rate": 0.00019952387548052112, - "loss": 0.7148, - "step": 1995 - }, - { - "epoch": 0.12828736369467608, - "grad_norm": 0.919881076337375, - "learning_rate": 0.00019951290150191158, - "loss": 0.6439, - "step": 2000 - }, - { - "epoch": 0.12860808210391275, - "grad_norm": 0.9262998866124367, - "learning_rate": 0.00019950180280165175, - "loss": 0.5764, - "step": 2005 - }, - { - "epoch": 0.12892880051314945, - "grad_norm": 0.6765034342263078, - "learning_rate": 0.00019949057939365193, - "loss": 0.4096, - "step": 2010 - }, - { - "epoch": 0.12924951892238615, - "grad_norm": 0.7219277816800387, - "learning_rate": 0.00019947923129197862, - "loss": 0.7127, - "step": 2015 - }, - { - "epoch": 0.12957023733162285, - "grad_norm": 0.8406570776216719, - "learning_rate": 0.0001994677585108546, - "loss": 0.6191, - "step": 2020 - }, - { - "epoch": 0.12989095574085952, - "grad_norm": 0.7458490203268737, - "learning_rate": 0.00019945616106465904, - "loss": 0.5701, - "step": 2025 - }, - { - "epoch": 0.1302116741500962, - "grad_norm": 1.293735176011679, - "learning_rate": 0.0001994444389679272, - "loss": 0.6852, - "step": 2030 - }, - { - "epoch": 0.1305323925593329, - "grad_norm": 0.9148850105541353, - "learning_rate": 0.00019943259223535067, - "loss": 0.7057, - "step": 2035 - }, - { - "epoch": 0.1308531109685696, - "grad_norm": 0.6641079479178653, - "learning_rate": 0.0001994206208817772, - "loss": 0.7629, - "step": 2040 - }, - { - "epoch": 0.13117382937780628, - "grad_norm": 0.791984066260629, - "learning_rate": 0.00019940852492221075, - "loss": 0.6992, - "step": 2045 - }, - { - "epoch": 0.13149454778704298, - "grad_norm": 0.849479398893481, - "learning_rate": 0.00019939630437181143, - "loss": 0.6966, - "step": 2050 - }, - { - "epoch": 0.13181526619627967, - "grad_norm": 0.8367106501858504, - "learning_rate": 0.00019938395924589552, - "loss": 0.5852, - "step": 2055 - }, - { - "epoch": 0.13213598460551634, - "grad_norm": 0.6790358847768235, - "learning_rate": 0.00019937148955993545, - "loss": 0.6393, - "step": 2060 - }, - { - "epoch": 0.13245670301475304, - "grad_norm": 0.9502499514885022, - "learning_rate": 0.00019935889532955977, - "loss": 0.6777, - "step": 2065 - }, - { - "epoch": 0.13277742142398974, - "grad_norm": 0.8134631960781032, - "learning_rate": 0.000199346176570553, - "loss": 0.6862, - "step": 2070 - }, - { - "epoch": 0.13309813983322644, - "grad_norm": 0.6366664689319048, - "learning_rate": 0.00019933333329885593, - "loss": 0.604, - "step": 2075 - }, - { - "epoch": 0.1334188582424631, - "grad_norm": 0.8465634973529981, - "learning_rate": 0.00019932036553056524, - "loss": 0.7162, - "step": 2080 - }, - { - "epoch": 0.1337395766516998, - "grad_norm": 0.8425039370601171, - "learning_rate": 0.00019930727328193378, - "loss": 0.6855, - "step": 2085 - }, - { - "epoch": 0.1340602950609365, - "grad_norm": 1.14970228361299, - "learning_rate": 0.00019929405656937032, - "loss": 0.7191, - "step": 2090 - }, - { - "epoch": 0.1343810134701732, - "grad_norm": 1.0969227215850126, - "learning_rate": 0.0001992807154094396, - "loss": 0.728, - "step": 2095 - }, - { - "epoch": 0.13470173187940987, - "grad_norm": 0.5634883710558874, - "learning_rate": 0.00019926724981886244, - "loss": 0.6794, - "step": 2100 - }, - { - "epoch": 0.13502245028864657, - "grad_norm": 0.9532151941436401, - "learning_rate": 0.0001992536598145155, - "loss": 0.6422, - "step": 2105 - }, - { - "epoch": 0.13534316869788326, - "grad_norm": 0.8529397357920244, - "learning_rate": 0.0001992399454134315, - "loss": 0.8323, - "step": 2110 - }, - { - "epoch": 0.13566388710711993, - "grad_norm": 0.5995161683553816, - "learning_rate": 0.00019922610663279894, - "loss": 0.6443, - "step": 2115 - }, - { - "epoch": 0.13598460551635663, - "grad_norm": 1.1645114047730085, - "learning_rate": 0.00019921214348996228, - "loss": 0.638, - "step": 2120 - }, - { - "epoch": 0.13630532392559333, - "grad_norm": 0.7254426735765782, - "learning_rate": 0.00019919805600242176, - "loss": 0.6233, - "step": 2125 - }, - { - "epoch": 0.13662604233483003, - "grad_norm": 1.2630556570142795, - "learning_rate": 0.00019918384418783362, - "loss": 0.7941, - "step": 2130 - }, - { - "epoch": 0.1369467607440667, - "grad_norm": 0.5842349667453849, - "learning_rate": 0.00019916950806400983, - "loss": 0.7714, - "step": 2135 - }, - { - "epoch": 0.1372674791533034, - "grad_norm": 0.5918400976970277, - "learning_rate": 0.00019915504764891808, - "loss": 0.7118, - "step": 2140 - }, - { - "epoch": 0.1375881975625401, - "grad_norm": 0.8666504796220831, - "learning_rate": 0.000199140462960682, - "loss": 0.7462, - "step": 2145 - }, - { - "epoch": 0.1379089159717768, - "grad_norm": 0.7764199666330917, - "learning_rate": 0.00019912575401758082, - "loss": 0.6395, - "step": 2150 - }, - { - "epoch": 0.13822963438101346, - "grad_norm": 0.9186504138753783, - "learning_rate": 0.00019911092083804962, - "loss": 0.6289, - "step": 2155 - }, - { - "epoch": 0.13855035279025016, - "grad_norm": 0.8035713423211853, - "learning_rate": 0.00019909596344067914, - "loss": 0.7541, - "step": 2160 - }, - { - "epoch": 0.13887107119948686, - "grad_norm": 0.7189520752077799, - "learning_rate": 0.00019908088184421578, - "loss": 0.6826, - "step": 2165 - }, - { - "epoch": 0.13919178960872355, - "grad_norm": 0.6655350088157191, - "learning_rate": 0.00019906567606756167, - "loss": 0.7409, - "step": 2170 - }, - { - "epoch": 0.13951250801796022, - "grad_norm": 0.3224597929224718, - "learning_rate": 0.0001990503461297745, - "loss": 0.5904, - "step": 2175 - }, - { - "epoch": 0.13983322642719692, - "grad_norm": 0.8267424045917116, - "learning_rate": 0.00019903489205006764, - "loss": 0.6894, - "step": 2180 - }, - { - "epoch": 0.14015394483643362, - "grad_norm": 0.6123341217762982, - "learning_rate": 0.00019901931384780995, - "loss": 0.703, - "step": 2185 - }, - { - "epoch": 0.1404746632456703, - "grad_norm": 0.45163827780119753, - "learning_rate": 0.00019900361154252602, - "loss": 0.59, - "step": 2190 - }, - { - "epoch": 0.140795381654907, - "grad_norm": 0.9556170145817368, - "learning_rate": 0.00019898778515389584, - "loss": 0.5857, - "step": 2195 - }, - { - "epoch": 0.14111610006414368, - "grad_norm": 0.7479105122087544, - "learning_rate": 0.00019897183470175494, - "loss": 0.6585, - "step": 2200 - }, - { - "epoch": 0.14143681847338038, - "grad_norm": 1.0326719597420064, - "learning_rate": 0.0001989557602060944, - "loss": 0.7534, - "step": 2205 - }, - { - "epoch": 0.14175753688261705, - "grad_norm": 0.8658293920784573, - "learning_rate": 0.00019893956168706066, - "loss": 0.7002, - "step": 2210 - }, - { - "epoch": 0.14207825529185375, - "grad_norm": 0.8622344203075765, - "learning_rate": 0.00019892323916495582, - "loss": 0.7086, - "step": 2215 - }, - { - "epoch": 0.14239897370109045, - "grad_norm": 0.7259813554322444, - "learning_rate": 0.00019890679266023709, - "loss": 0.4999, - "step": 2220 - }, - { - "epoch": 0.14271969211032715, - "grad_norm": 0.6647794000879613, - "learning_rate": 0.0001988902221935173, - "loss": 0.7005, - "step": 2225 - }, - { - "epoch": 0.14304041051956382, - "grad_norm": 0.8451399712054074, - "learning_rate": 0.00019887352778556454, - "loss": 0.6435, - "step": 2230 - }, - { - "epoch": 0.1433611289288005, - "grad_norm": 0.7567525634116421, - "learning_rate": 0.0001988567094573023, - "loss": 0.7609, - "step": 2235 - }, - { - "epoch": 0.1436818473380372, - "grad_norm": 0.8106441964345322, - "learning_rate": 0.00019883976722980936, - "loss": 0.6969, - "step": 2240 - }, - { - "epoch": 0.14400256574727388, - "grad_norm": 0.6312440700944748, - "learning_rate": 0.00019882270112431974, - "loss": 0.6787, - "step": 2245 - }, - { - "epoch": 0.14432328415651058, - "grad_norm": 0.8698670635315567, - "learning_rate": 0.00019880551116222277, - "loss": 0.79, - "step": 2250 - }, - { - "epoch": 0.14464400256574728, - "grad_norm": 0.5675337075202405, - "learning_rate": 0.00019878819736506297, - "loss": 0.6922, - "step": 2255 - }, - { - "epoch": 0.14496472097498397, - "grad_norm": 0.8080748220001619, - "learning_rate": 0.00019877075975454015, - "loss": 0.6723, - "step": 2260 - }, - { - "epoch": 0.14528543938422064, - "grad_norm": 1.18598966284805, - "learning_rate": 0.00019875319835250922, - "loss": 0.6078, - "step": 2265 - }, - { - "epoch": 0.14560615779345734, - "grad_norm": 0.7396735588781944, - "learning_rate": 0.00019873551318098026, - "loss": 0.6555, - "step": 2270 - }, - { - "epoch": 0.14592687620269404, - "grad_norm": 0.9421384978371221, - "learning_rate": 0.00019871770426211843, - "loss": 0.6763, - "step": 2275 - }, - { - "epoch": 0.14624759461193074, - "grad_norm": 1.3557865695262534, - "learning_rate": 0.0001986997716182441, - "loss": 0.6517, - "step": 2280 - }, - { - "epoch": 0.1465683130211674, - "grad_norm": 0.7620628179190014, - "learning_rate": 0.0001986817152718326, - "loss": 0.8213, - "step": 2285 - }, - { - "epoch": 0.1468890314304041, - "grad_norm": 1.1665229535256436, - "learning_rate": 0.0001986635352455143, - "loss": 0.6593, - "step": 2290 - }, - { - "epoch": 0.1472097498396408, - "grad_norm": 0.549262325529975, - "learning_rate": 0.0001986452315620747, - "loss": 0.5682, - "step": 2295 - }, - { - "epoch": 0.14753046824887747, - "grad_norm": 0.6290840720109729, - "learning_rate": 0.00019862680424445413, - "loss": 0.5891, - "step": 2300 - }, - { - "epoch": 0.14785118665811417, - "grad_norm": 0.6806013181414412, - "learning_rate": 0.00019860825331574798, - "loss": 0.7814, - "step": 2305 - }, - { - "epoch": 0.14817190506735087, - "grad_norm": 0.9105112621167852, - "learning_rate": 0.00019858957879920647, - "loss": 0.6707, - "step": 2310 - }, - { - "epoch": 0.14849262347658757, - "grad_norm": 0.8528821816779594, - "learning_rate": 0.00019857078071823484, - "loss": 0.664, - "step": 2315 - }, - { - "epoch": 0.14881334188582424, - "grad_norm": 0.7181914153458827, - "learning_rate": 0.0001985518590963931, - "loss": 0.6854, - "step": 2320 - }, - { - "epoch": 0.14913406029506093, - "grad_norm": 0.7397278453494517, - "learning_rate": 0.00019853281395739613, - "loss": 0.6665, - "step": 2325 - }, - { - "epoch": 0.14945477870429763, - "grad_norm": 0.8745968398949746, - "learning_rate": 0.00019851364532511362, - "loss": 0.5766, - "step": 2330 - }, - { - "epoch": 0.14977549711353433, - "grad_norm": 1.2088886679730004, - "learning_rate": 0.00019849435322356995, - "loss": 0.7018, - "step": 2335 - }, - { - "epoch": 0.150096215522771, - "grad_norm": 1.0443479254100274, - "learning_rate": 0.00019847493767694444, - "loss": 0.5986, - "step": 2340 - }, - { - "epoch": 0.1504169339320077, - "grad_norm": 1.0921241128817574, - "learning_rate": 0.00019845539870957092, - "loss": 0.5923, - "step": 2345 - }, - { - "epoch": 0.1507376523412444, - "grad_norm": 0.9646802917631114, - "learning_rate": 0.00019843573634593806, - "loss": 0.7926, - "step": 2350 - }, - { - "epoch": 0.1510583707504811, - "grad_norm": 0.7656847484095911, - "learning_rate": 0.00019841595061068906, - "loss": 0.7207, - "step": 2355 - }, - { - "epoch": 0.15137908915971776, - "grad_norm": 0.5049528849051477, - "learning_rate": 0.0001983960415286219, - "loss": 0.6228, - "step": 2360 - }, - { - "epoch": 0.15169980756895446, - "grad_norm": 0.9068993192806996, - "learning_rate": 0.00019837600912468893, - "loss": 0.5693, - "step": 2365 - }, - { - "epoch": 0.15202052597819116, - "grad_norm": 0.8676250105736654, - "learning_rate": 0.00019835585342399732, - "loss": 0.5743, - "step": 2370 - }, - { - "epoch": 0.15234124438742783, - "grad_norm": 0.5246385631697503, - "learning_rate": 0.00019833557445180855, - "loss": 0.7401, - "step": 2375 - }, - { - "epoch": 0.15266196279666452, - "grad_norm": 0.7016792226152242, - "learning_rate": 0.0001983151722335387, - "loss": 0.6403, - "step": 2380 - }, - { - "epoch": 0.15298268120590122, - "grad_norm": 0.7722496289657441, - "learning_rate": 0.00019829464679475836, - "loss": 0.5484, - "step": 2385 - }, - { - "epoch": 0.15330339961513792, - "grad_norm": 1.2298123662291214, - "learning_rate": 0.00019827399816119243, - "loss": 0.7674, - "step": 2390 - }, - { - "epoch": 0.1536241180243746, - "grad_norm": 0.7861238282945989, - "learning_rate": 0.00019825322635872036, - "loss": 0.619, - "step": 2395 - }, - { - "epoch": 0.1539448364336113, - "grad_norm": 0.9211911752664865, - "learning_rate": 0.00019823233141337584, - "loss": 0.6211, - "step": 2400 - }, - { - "epoch": 0.15426555484284799, - "grad_norm": 0.7151255909037463, - "learning_rate": 0.00019821131335134696, - "loss": 0.548, - "step": 2405 - }, - { - "epoch": 0.15458627325208468, - "grad_norm": 0.9458426635711992, - "learning_rate": 0.00019819017219897613, - "loss": 0.6482, - "step": 2410 - }, - { - "epoch": 0.15490699166132135, - "grad_norm": 1.0258204800171964, - "learning_rate": 0.00019816890798276, - "loss": 0.6717, - "step": 2415 - }, - { - "epoch": 0.15522771007055805, - "grad_norm": 1.2116376507078799, - "learning_rate": 0.00019814752072934945, - "loss": 0.6242, - "step": 2420 - }, - { - "epoch": 0.15554842847979475, - "grad_norm": 0.7799968415850017, - "learning_rate": 0.00019812601046554962, - "loss": 0.6257, - "step": 2425 - }, - { - "epoch": 0.15586914688903142, - "grad_norm": 0.4916761578519649, - "learning_rate": 0.00019810437721831976, - "loss": 0.7221, - "step": 2430 - }, - { - "epoch": 0.15618986529826812, - "grad_norm": 0.9089669003206741, - "learning_rate": 0.00019808262101477328, - "loss": 0.6457, - "step": 2435 - }, - { - "epoch": 0.1565105837075048, - "grad_norm": 0.5752941624716728, - "learning_rate": 0.00019806074188217766, - "loss": 0.5367, - "step": 2440 - }, - { - "epoch": 0.1568313021167415, - "grad_norm": 0.7889396413468497, - "learning_rate": 0.0001980387398479546, - "loss": 0.5704, - "step": 2445 - }, - { - "epoch": 0.15715202052597818, - "grad_norm": 0.7974301152247996, - "learning_rate": 0.00019801661493967955, - "loss": 0.7438, - "step": 2450 - }, - { - "epoch": 0.15747273893521488, - "grad_norm": 0.9099718674001662, - "learning_rate": 0.00019799436718508228, - "loss": 0.7057, - "step": 2455 - }, - { - "epoch": 0.15779345734445158, - "grad_norm": 0.7460789907183486, - "learning_rate": 0.0001979719966120463, - "loss": 0.6769, - "step": 2460 - }, - { - "epoch": 0.15811417575368827, - "grad_norm": 0.9026682063218279, - "learning_rate": 0.00019794950324860918, - "loss": 0.6677, - "step": 2465 - }, - { - "epoch": 0.15843489416292494, - "grad_norm": 0.706813388972018, - "learning_rate": 0.0001979268871229623, - "loss": 0.652, - "step": 2470 - }, - { - "epoch": 0.15875561257216164, - "grad_norm": 0.7951893501420781, - "learning_rate": 0.00019790414826345094, - "loss": 0.7231, - "step": 2475 - }, - { - "epoch": 0.15907633098139834, - "grad_norm": 0.9695064104615378, - "learning_rate": 0.0001978812866985742, - "loss": 0.6308, - "step": 2480 - }, - { - "epoch": 0.159397049390635, - "grad_norm": 0.5344509876021667, - "learning_rate": 0.00019785830245698497, - "loss": 0.6997, - "step": 2485 - }, - { - "epoch": 0.1597177677998717, - "grad_norm": 0.834051661967047, - "learning_rate": 0.00019783519556748987, - "loss": 0.6783, - "step": 2490 - }, - { - "epoch": 0.1600384862091084, - "grad_norm": 0.9723305146917021, - "learning_rate": 0.0001978119660590493, - "loss": 0.7798, - "step": 2495 - }, - { - "epoch": 0.1603592046183451, - "grad_norm": 0.8859242414039744, - "learning_rate": 0.00019778861396077725, - "loss": 0.793, - "step": 2500 - }, - { - "epoch": 0.16067992302758177, - "grad_norm": 0.7241777810857887, - "learning_rate": 0.00019776513930194148, - "loss": 0.504, - "step": 2505 - }, - { - "epoch": 0.16100064143681847, - "grad_norm": 1.054121315907452, - "learning_rate": 0.00019774154211196318, - "loss": 0.7509, - "step": 2510 - }, - { - "epoch": 0.16132135984605517, - "grad_norm": 0.8701449793412597, - "learning_rate": 0.0001977178224204173, - "loss": 0.7875, - "step": 2515 - }, - { - "epoch": 0.16164207825529187, - "grad_norm": 0.7757819809049686, - "learning_rate": 0.00019769398025703224, - "loss": 0.6047, - "step": 2520 - }, - { - "epoch": 0.16196279666452854, - "grad_norm": 1.0713357367053484, - "learning_rate": 0.00019767001565168982, - "loss": 0.7384, - "step": 2525 - }, - { - "epoch": 0.16228351507376523, - "grad_norm": 0.43793306094407475, - "learning_rate": 0.00019764592863442544, - "loss": 0.6156, - "step": 2530 - }, - { - "epoch": 0.16260423348300193, - "grad_norm": 0.9034469617213254, - "learning_rate": 0.0001976217192354279, - "loss": 0.6383, - "step": 2535 - }, - { - "epoch": 0.1629249518922386, - "grad_norm": 0.7090465404578327, - "learning_rate": 0.0001975973874850393, - "loss": 0.59, - "step": 2540 - }, - { - "epoch": 0.1632456703014753, - "grad_norm": 0.7781025944113742, - "learning_rate": 0.00019757293341375517, - "loss": 0.6829, - "step": 2545 - }, - { - "epoch": 0.163566388710712, - "grad_norm": 0.701765797555506, - "learning_rate": 0.00019754835705222435, - "loss": 0.6682, - "step": 2550 - }, - { - "epoch": 0.1638871071199487, - "grad_norm": 0.8486110822681391, - "learning_rate": 0.00019752365843124885, - "loss": 0.7107, - "step": 2555 - }, - { - "epoch": 0.16420782552918536, - "grad_norm": 1.2183183484648679, - "learning_rate": 0.00019749883758178404, - "loss": 0.7092, - "step": 2560 - }, - { - "epoch": 0.16452854393842206, - "grad_norm": 0.5747438190450085, - "learning_rate": 0.0001974738945349384, - "loss": 0.5635, - "step": 2565 - }, - { - "epoch": 0.16484926234765876, - "grad_norm": 0.754766366798954, - "learning_rate": 0.0001974488293219736, - "loss": 0.7561, - "step": 2570 - }, - { - "epoch": 0.16516998075689546, - "grad_norm": 0.9579439740753294, - "learning_rate": 0.00019742364197430443, - "loss": 0.6015, - "step": 2575 - }, - { - "epoch": 0.16549069916613213, - "grad_norm": 0.6786544154968012, - "learning_rate": 0.00019739833252349867, - "loss": 0.5112, - "step": 2580 - }, - { - "epoch": 0.16581141757536882, - "grad_norm": 0.7934214823629537, - "learning_rate": 0.00019737290100127722, - "loss": 0.7203, - "step": 2585 - }, - { - "epoch": 0.16613213598460552, - "grad_norm": 1.33220621050734, - "learning_rate": 0.00019734734743951396, - "loss": 0.6863, - "step": 2590 - }, - { - "epoch": 0.16645285439384222, - "grad_norm": 0.8267900862256077, - "learning_rate": 0.00019732167187023572, - "loss": 0.6449, - "step": 2595 - }, - { - "epoch": 0.1667735728030789, - "grad_norm": 0.7287938245757929, - "learning_rate": 0.0001972958743256222, - "loss": 0.7308, - "step": 2600 - }, - { - "epoch": 0.1670942912123156, - "grad_norm": 0.5363094807734924, - "learning_rate": 0.00019726995483800613, - "loss": 0.6403, - "step": 2605 - }, - { - "epoch": 0.16741500962155229, - "grad_norm": 0.7277617239159246, - "learning_rate": 0.00019724391343987284, - "loss": 0.6777, - "step": 2610 - }, - { - "epoch": 0.16773572803078896, - "grad_norm": 0.9462519719607535, - "learning_rate": 0.00019721775016386057, - "loss": 0.6895, - "step": 2615 - }, - { - "epoch": 0.16805644644002565, - "grad_norm": 0.8528897030121969, - "learning_rate": 0.0001971914650427604, - "loss": 0.5536, - "step": 2620 - }, - { - "epoch": 0.16837716484926235, - "grad_norm": 0.9319172497451002, - "learning_rate": 0.000197165058109516, - "loss": 0.5724, - "step": 2625 - }, - { - "epoch": 0.16869788325849905, - "grad_norm": 0.7410196474628663, - "learning_rate": 0.0001971385293972237, - "loss": 0.6785, - "step": 2630 - }, - { - "epoch": 0.16901860166773572, - "grad_norm": 0.9192207798068145, - "learning_rate": 0.00019711187893913255, - "loss": 0.7219, - "step": 2635 - }, - { - "epoch": 0.16933932007697242, - "grad_norm": 0.5750937169325536, - "learning_rate": 0.00019708510676864414, - "loss": 0.482, - "step": 2640 - }, - { - "epoch": 0.1696600384862091, - "grad_norm": 0.7158603995106417, - "learning_rate": 0.0001970582129193126, - "loss": 0.577, - "step": 2645 - }, - { - "epoch": 0.1699807568954458, - "grad_norm": 0.9152254783119084, - "learning_rate": 0.00019703119742484453, - "loss": 0.6657, - "step": 2650 - }, - { - "epoch": 0.17030147530468248, - "grad_norm": 0.7435319188039847, - "learning_rate": 0.00019700406031909905, - "loss": 0.6779, - "step": 2655 - }, - { - "epoch": 0.17062219371391918, - "grad_norm": 1.504228508241335, - "learning_rate": 0.0001969768016360877, - "loss": 0.7278, - "step": 2660 - }, - { - "epoch": 0.17094291212315588, - "grad_norm": 1.2092049917834673, - "learning_rate": 0.00019694942140997435, - "loss": 0.7341, - "step": 2665 - }, - { - "epoch": 0.17126363053239255, - "grad_norm": 0.6080302726719192, - "learning_rate": 0.00019692191967507524, - "loss": 0.6543, - "step": 2670 - }, - { - "epoch": 0.17158434894162924, - "grad_norm": 0.7373008700852878, - "learning_rate": 0.0001968942964658589, - "loss": 0.6152, - "step": 2675 - }, - { - "epoch": 0.17190506735086594, - "grad_norm": 0.9214476765346659, - "learning_rate": 0.000196866551816946, - "loss": 0.6878, - "step": 2680 - }, - { - "epoch": 0.17222578576010264, - "grad_norm": 0.7450194855735123, - "learning_rate": 0.0001968386857631096, - "loss": 0.6173, - "step": 2685 - }, - { - "epoch": 0.1725465041693393, - "grad_norm": 0.6242054305521421, - "learning_rate": 0.00019681069833927476, - "loss": 0.6746, - "step": 2690 - }, - { - "epoch": 0.172867222578576, - "grad_norm": 0.711220248168634, - "learning_rate": 0.00019678258958051877, - "loss": 0.6821, - "step": 2695 - }, - { - "epoch": 0.1731879409878127, - "grad_norm": 0.7496584977206721, - "learning_rate": 0.00019675435952207088, - "loss": 0.5238, - "step": 2700 - }, - { - "epoch": 0.1735086593970494, - "grad_norm": 0.7084413643635924, - "learning_rate": 0.00019672600819931247, - "loss": 0.7056, - "step": 2705 - }, - { - "epoch": 0.17382937780628607, - "grad_norm": 1.0439027628488613, - "learning_rate": 0.00019669753564777688, - "loss": 0.6513, - "step": 2710 - }, - { - "epoch": 0.17415009621552277, - "grad_norm": 0.71498067288977, - "learning_rate": 0.0001966689419031493, - "loss": 0.7406, - "step": 2715 - }, - { - "epoch": 0.17447081462475947, - "grad_norm": 0.7033452927937216, - "learning_rate": 0.00019664022700126695, - "loss": 0.6923, - "step": 2720 - }, - { - "epoch": 0.17479153303399614, - "grad_norm": 0.8919976779446186, - "learning_rate": 0.00019661139097811877, - "loss": 0.6326, - "step": 2725 - }, - { - "epoch": 0.17511225144323284, - "grad_norm": 0.9493437873661492, - "learning_rate": 0.00019658243386984562, - "loss": 0.5783, - "step": 2730 - }, - { - "epoch": 0.17543296985246953, - "grad_norm": 0.9860728443591087, - "learning_rate": 0.00019655335571274003, - "loss": 0.7279, - "step": 2735 - }, - { - "epoch": 0.17575368826170623, - "grad_norm": 0.6352021684421743, - "learning_rate": 0.0001965241565432463, - "loss": 0.6397, - "step": 2740 - }, - { - "epoch": 0.1760744066709429, - "grad_norm": 1.099016920497353, - "learning_rate": 0.00019649483639796032, - "loss": 0.6756, - "step": 2745 - }, - { - "epoch": 0.1763951250801796, - "grad_norm": 0.7058834343210731, - "learning_rate": 0.00019646539531362973, - "loss": 0.7218, - "step": 2750 - }, - { - "epoch": 0.1767158434894163, - "grad_norm": 0.8020832284905198, - "learning_rate": 0.00019643583332715366, - "loss": 0.5708, - "step": 2755 - }, - { - "epoch": 0.177036561898653, - "grad_norm": 0.8014855578510585, - "learning_rate": 0.0001964061504755827, - "loss": 0.7843, - "step": 2760 - }, - { - "epoch": 0.17735728030788966, - "grad_norm": 1.0134184586337234, - "learning_rate": 0.0001963763467961191, - "loss": 0.6599, - "step": 2765 - }, - { - "epoch": 0.17767799871712636, - "grad_norm": 0.6050193347531744, - "learning_rate": 0.0001963464223261164, - "loss": 0.7984, - "step": 2770 - }, - { - "epoch": 0.17799871712636306, - "grad_norm": 0.7479913165773774, - "learning_rate": 0.0001963163771030796, - "loss": 0.7469, - "step": 2775 - }, - { - "epoch": 0.17831943553559973, - "grad_norm": 1.091278392341476, - "learning_rate": 0.00019628621116466502, - "loss": 0.6991, - "step": 2780 - }, - { - "epoch": 0.17864015394483643, - "grad_norm": 1.0105012542968526, - "learning_rate": 0.00019625592454868026, - "loss": 0.6867, - "step": 2785 - }, - { - "epoch": 0.17896087235407312, - "grad_norm": 0.8032083651463552, - "learning_rate": 0.0001962255172930842, - "loss": 0.7184, - "step": 2790 - }, - { - "epoch": 0.17928159076330982, - "grad_norm": 0.8193497605449357, - "learning_rate": 0.00019619498943598688, - "loss": 0.5785, - "step": 2795 - }, - { - "epoch": 0.1796023091725465, - "grad_norm": 0.7772046040254091, - "learning_rate": 0.00019616434101564956, - "loss": 0.7471, - "step": 2800 - }, - { - "epoch": 0.1799230275817832, - "grad_norm": 1.224565960941351, - "learning_rate": 0.00019613357207048452, - "loss": 0.856, - "step": 2805 - }, - { - "epoch": 0.1802437459910199, - "grad_norm": 0.6591412427417273, - "learning_rate": 0.00019610268263905515, - "loss": 0.5893, - "step": 2810 - }, - { - "epoch": 0.18056446440025659, - "grad_norm": 0.8875976837711199, - "learning_rate": 0.00019607167276007587, - "loss": 0.7161, - "step": 2815 - }, - { - "epoch": 0.18088518280949326, - "grad_norm": 0.8225479052301773, - "learning_rate": 0.00019604054247241193, - "loss": 0.5873, - "step": 2820 - }, - { - "epoch": 0.18120590121872995, - "grad_norm": 1.2087539785527361, - "learning_rate": 0.00019600929181507972, - "loss": 0.6542, - "step": 2825 - }, - { - "epoch": 0.18152661962796665, - "grad_norm": 0.8050140113302814, - "learning_rate": 0.00019597792082724625, - "loss": 0.5778, - "step": 2830 - }, - { - "epoch": 0.18184733803720335, - "grad_norm": 1.321288241534433, - "learning_rate": 0.00019594642954822952, - "loss": 0.5994, - "step": 2835 - }, - { - "epoch": 0.18216805644644002, - "grad_norm": 0.9376939681240336, - "learning_rate": 0.00019591481801749816, - "loss": 0.5046, - "step": 2840 - }, - { - "epoch": 0.18248877485567672, - "grad_norm": 0.6185458970009285, - "learning_rate": 0.00019588308627467162, - "loss": 0.6859, - "step": 2845 - }, - { - "epoch": 0.18280949326491341, - "grad_norm": 0.7801762201714135, - "learning_rate": 0.00019585123435952, - "loss": 0.7015, - "step": 2850 - }, - { - "epoch": 0.18313021167415008, - "grad_norm": 0.7265831165052501, - "learning_rate": 0.00019581926231196391, - "loss": 0.823, - "step": 2855 - }, - { - "epoch": 0.18345093008338678, - "grad_norm": 0.8151220320154888, - "learning_rate": 0.00019578717017207467, - "loss": 0.689, - "step": 2860 - }, - { - "epoch": 0.18377164849262348, - "grad_norm": 0.9213195972340709, - "learning_rate": 0.000195754957980074, - "loss": 0.7382, - "step": 2865 - }, - { - "epoch": 0.18409236690186018, - "grad_norm": 0.782822592817081, - "learning_rate": 0.0001957226257763342, - "loss": 0.6929, - "step": 2870 - }, - { - "epoch": 0.18441308531109685, - "grad_norm": 0.980335474676683, - "learning_rate": 0.0001956901736013778, - "loss": 0.6156, - "step": 2875 - }, - { - "epoch": 0.18473380372033354, - "grad_norm": 0.9039810035947186, - "learning_rate": 0.00019565760149587794, - "loss": 0.7664, - "step": 2880 - }, - { - "epoch": 0.18505452212957024, - "grad_norm": 0.000701834979829147, - "learning_rate": 0.0001956249095006578, - "loss": 0.5249, - "step": 2885 - }, - { - "epoch": 0.18537524053880694, - "grad_norm": 1.0237955976436885, - "learning_rate": 0.00019559209765669105, - "loss": 0.6839, - "step": 2890 - }, - { - "epoch": 0.1856959589480436, - "grad_norm": 0.6769833810242086, - "learning_rate": 0.00019555916600510145, - "loss": 0.6537, - "step": 2895 - }, - { - "epoch": 0.1860166773572803, - "grad_norm": 0.6462485885713231, - "learning_rate": 0.00019552611458716296, - "loss": 0.723, - "step": 2900 - }, - { - "epoch": 0.186337395766517, - "grad_norm": 0.8722147531755802, - "learning_rate": 0.0001954929434442996, - "loss": 0.6837, - "step": 2905 - }, - { - "epoch": 0.18665811417575368, - "grad_norm": 0.6906487731551919, - "learning_rate": 0.0001954596526180855, - "loss": 0.6678, - "step": 2910 - }, - { - "epoch": 0.18697883258499037, - "grad_norm": 0.8754536117451718, - "learning_rate": 0.00019542624215024474, - "loss": 0.7607, - "step": 2915 - }, - { - "epoch": 0.18729955099422707, - "grad_norm": 0.7481215119155424, - "learning_rate": 0.0001953927120826514, - "loss": 0.7354, - "step": 2920 - }, - { - "epoch": 0.18762026940346377, - "grad_norm": 0.7173045174318763, - "learning_rate": 0.0001953590624573294, - "loss": 0.6889, - "step": 2925 - }, - { - "epoch": 0.18794098781270044, - "grad_norm": 0.688657494500447, - "learning_rate": 0.00019532529331645258, - "loss": 0.7716, - "step": 2930 - }, - { - "epoch": 0.18826170622193714, - "grad_norm": 0.8542179699315836, - "learning_rate": 0.0001952914047023445, - "loss": 0.6846, - "step": 2935 - }, - { - "epoch": 0.18858242463117383, - "grad_norm": 0.6693936334963977, - "learning_rate": 0.0001952573966574785, - "loss": 0.6893, - "step": 2940 - }, - { - "epoch": 0.18890314304041053, - "grad_norm": 1.1047249058364512, - "learning_rate": 0.00019522326922447755, - "loss": 0.7203, - "step": 2945 - }, - { - "epoch": 0.1892238614496472, - "grad_norm": 0.6082855408476369, - "learning_rate": 0.00019518902244611435, - "loss": 0.7069, - "step": 2950 - }, - { - "epoch": 0.1895445798588839, - "grad_norm": 0.5867678432004605, - "learning_rate": 0.00019515465636531107, - "loss": 0.7485, - "step": 2955 - }, - { - "epoch": 0.1898652982681206, - "grad_norm": 0.6389524482986783, - "learning_rate": 0.0001951201710251395, - "loss": 0.6291, - "step": 2960 - }, - { - "epoch": 0.19018601667735727, - "grad_norm": 0.40852828777296263, - "learning_rate": 0.00019508556646882083, - "loss": 0.6572, - "step": 2965 - }, - { - "epoch": 0.19050673508659396, - "grad_norm": 0.6625359401782684, - "learning_rate": 0.00019505084273972568, - "loss": 0.6905, - "step": 2970 - }, - { - "epoch": 0.19082745349583066, - "grad_norm": 0.6733266631590418, - "learning_rate": 0.00019501599988137406, - "loss": 0.6065, - "step": 2975 - }, - { - "epoch": 0.19114817190506736, - "grad_norm": 0.8217762217578838, - "learning_rate": 0.00019498103793743528, - "loss": 0.6843, - "step": 2980 - }, - { - "epoch": 0.19146889031430403, - "grad_norm": 1.220514466724885, - "learning_rate": 0.00019494595695172787, - "loss": 0.604, - "step": 2985 - }, - { - "epoch": 0.19178960872354073, - "grad_norm": 0.792446196427873, - "learning_rate": 0.00019491075696821962, - "loss": 0.6326, - "step": 2990 - }, - { - "epoch": 0.19211032713277743, - "grad_norm": 0.8158356531364367, - "learning_rate": 0.00019487543803102736, - "loss": 0.7795, - "step": 2995 - }, - { - "epoch": 0.19243104554201412, - "grad_norm": 1.3297681323714916, - "learning_rate": 0.00019484000018441715, - "loss": 0.6776, - "step": 3000 - }, - { - "epoch": 0.1927517639512508, - "grad_norm": 1.1206878255004398, - "learning_rate": 0.00019480444347280392, - "loss": 0.7425, - "step": 3005 - }, - { - "epoch": 0.1930724823604875, - "grad_norm": 0.5668482553685025, - "learning_rate": 0.00019476876794075168, - "loss": 0.6463, - "step": 3010 - }, - { - "epoch": 0.1933932007697242, - "grad_norm": 0.9274228876056752, - "learning_rate": 0.0001947329736329734, - "loss": 0.7253, - "step": 3015 - }, - { - "epoch": 0.19371391917896089, - "grad_norm": 0.8934110376365801, - "learning_rate": 0.0001946970605943308, - "loss": 0.8008, - "step": 3020 - }, - { - "epoch": 0.19403463758819756, - "grad_norm": 0.7054346176332205, - "learning_rate": 0.00019466102886983445, - "loss": 0.6421, - "step": 3025 - }, - { - "epoch": 0.19435535599743425, - "grad_norm": 1.112312708275422, - "learning_rate": 0.0001946248785046437, - "loss": 0.5448, - "step": 3030 - }, - { - "epoch": 0.19467607440667095, - "grad_norm": 0.9514480454813623, - "learning_rate": 0.00019458860954406655, - "loss": 0.8921, - "step": 3035 - }, - { - "epoch": 0.19499679281590762, - "grad_norm": 0.8289559763958162, - "learning_rate": 0.00019455222203355974, - "loss": 0.6384, - "step": 3040 - }, - { - "epoch": 0.19531751122514432, - "grad_norm": 1.6772904982725059, - "learning_rate": 0.00019451571601872842, - "loss": 0.593, - "step": 3045 - }, - { - "epoch": 0.19563822963438102, - "grad_norm": 0.933959150583705, - "learning_rate": 0.00019447909154532642, - "loss": 0.7033, - "step": 3050 - }, - { - "epoch": 0.19595894804361771, - "grad_norm": 0.9836848697506737, - "learning_rate": 0.00019444234865925597, - "loss": 0.694, - "step": 3055 - }, - { - "epoch": 0.19627966645285438, - "grad_norm": 0.752058149609346, - "learning_rate": 0.00019440548740656772, - "loss": 0.8419, - "step": 3060 - }, - { - "epoch": 0.19660038486209108, - "grad_norm": 0.5564595991041628, - "learning_rate": 0.00019436850783346063, - "loss": 0.5868, - "step": 3065 - }, - { - "epoch": 0.19692110327132778, - "grad_norm": 1.1233031900082198, - "learning_rate": 0.00019433140998628202, - "loss": 0.7213, - "step": 3070 - }, - { - "epoch": 0.19724182168056448, - "grad_norm": 0.9846847511141703, - "learning_rate": 0.00019429419391152743, - "loss": 0.6083, - "step": 3075 - }, - { - "epoch": 0.19756254008980115, - "grad_norm": 0.9133697850179805, - "learning_rate": 0.00019425685965584056, - "loss": 0.7509, - "step": 3080 - }, - { - "epoch": 0.19788325849903785, - "grad_norm": 1.1268873349974773, - "learning_rate": 0.0001942194072660132, - "loss": 0.6734, - "step": 3085 - }, - { - "epoch": 0.19820397690827454, - "grad_norm": 0.663450697814864, - "learning_rate": 0.00019418183678898525, - "loss": 0.7093, - "step": 3090 - }, - { - "epoch": 0.1985246953175112, - "grad_norm": 0.6245075928754343, - "learning_rate": 0.0001941441482718446, - "loss": 0.7194, - "step": 3095 - }, - { - "epoch": 0.1988454137267479, - "grad_norm": 0.9587885835266485, - "learning_rate": 0.00019410634176182705, - "loss": 0.6995, - "step": 3100 - }, - { - "epoch": 0.1991661321359846, - "grad_norm": 0.8163502504890695, - "learning_rate": 0.00019406841730631636, - "loss": 0.7503, - "step": 3105 - }, - { - "epoch": 0.1994868505452213, - "grad_norm": 0.9426439782405206, - "learning_rate": 0.00019403037495284398, - "loss": 0.7404, - "step": 3110 - }, - { - "epoch": 0.19980756895445798, - "grad_norm": 0.8220300785309613, - "learning_rate": 0.00019399221474908932, - "loss": 0.6744, - "step": 3115 - }, - { - "epoch": 0.20012828736369467, - "grad_norm": 0.9955681688037235, - "learning_rate": 0.00019395393674287927, - "loss": 0.6852, - "step": 3120 - }, - { - "epoch": 0.20044900577293137, - "grad_norm": 1.1278721654085937, - "learning_rate": 0.00019391554098218853, - "loss": 0.8426, - "step": 3125 - }, - { - "epoch": 0.20076972418216807, - "grad_norm": 1.289322139002122, - "learning_rate": 0.00019387702751513932, - "loss": 0.7352, - "step": 3130 - }, - { - "epoch": 0.20109044259140474, - "grad_norm": 1.4969951218148942, - "learning_rate": 0.0001938383963900014, - "loss": 0.7202, - "step": 3135 - }, - { - "epoch": 0.20141116100064144, - "grad_norm": 0.8939306827167222, - "learning_rate": 0.000193799647655192, - "loss": 0.6326, - "step": 3140 - }, - { - "epoch": 0.20173187940987813, - "grad_norm": 1.038193039895127, - "learning_rate": 0.00019376078135927566, - "loss": 0.5945, - "step": 3145 - }, - { - "epoch": 0.2020525978191148, - "grad_norm": 0.8466700431352269, - "learning_rate": 0.00019372179755096448, - "loss": 0.4709, - "step": 3150 - }, - { - "epoch": 0.2023733162283515, - "grad_norm": 0.8353167491615692, - "learning_rate": 0.00019368269627911757, - "loss": 0.6145, - "step": 3155 - }, - { - "epoch": 0.2026940346375882, - "grad_norm": 0.5826569638112876, - "learning_rate": 0.00019364347759274144, - "loss": 0.6798, - "step": 3160 - }, - { - "epoch": 0.2030147530468249, - "grad_norm": 0.6596971126256945, - "learning_rate": 0.0001936041415409897, - "loss": 0.7164, - "step": 3165 - }, - { - "epoch": 0.20333547145606157, - "grad_norm": 1.1459761657771013, - "learning_rate": 0.00019356468817316311, - "loss": 0.6503, - "step": 3170 - }, - { - "epoch": 0.20365618986529826, - "grad_norm": 0.6795054057142108, - "learning_rate": 0.0001935251175387094, - "loss": 0.624, - "step": 3175 - }, - { - "epoch": 0.20397690827453496, - "grad_norm": 0.740763733162126, - "learning_rate": 0.00019348542968722324, - "loss": 0.6297, - "step": 3180 - }, - { - "epoch": 0.20429762668377166, - "grad_norm": 0.7064796503029271, - "learning_rate": 0.00019344562466844635, - "loss": 0.6003, - "step": 3185 - }, - { - "epoch": 0.20461834509300833, - "grad_norm": 1.6506358182547065, - "learning_rate": 0.00019340570253226712, - "loss": 0.4787, - "step": 3190 - }, - { - "epoch": 0.20493906350224503, - "grad_norm": 1.1332295207671033, - "learning_rate": 0.0001933656633287209, - "loss": 0.7126, - "step": 3195 - }, - { - "epoch": 0.20525978191148173, - "grad_norm": 0.617200353783866, - "learning_rate": 0.00019332550710798966, - "loss": 0.598, - "step": 3200 - }, - { - "epoch": 0.2055805003207184, - "grad_norm": 0.868513802069887, - "learning_rate": 0.000193285233920402, - "loss": 0.7152, - "step": 3205 - }, - { - "epoch": 0.2059012187299551, - "grad_norm": 1.1852925025104672, - "learning_rate": 0.00019324484381643325, - "loss": 0.7774, - "step": 3210 - }, - { - "epoch": 0.2062219371391918, - "grad_norm": 1.0280680170586727, - "learning_rate": 0.00019320433684670514, - "loss": 0.7043, - "step": 3215 - }, - { - "epoch": 0.2065426555484285, - "grad_norm": 0.6987881012001924, - "learning_rate": 0.00019316371306198592, - "loss": 0.7619, - "step": 3220 - }, - { - "epoch": 0.20686337395766516, - "grad_norm": 0.8392027535004901, - "learning_rate": 0.00019312297251319026, - "loss": 0.6781, - "step": 3225 - }, - { - "epoch": 0.20718409236690186, - "grad_norm": 1.2842078269698645, - "learning_rate": 0.00019308211525137915, - "loss": 0.7145, - "step": 3230 - }, - { - "epoch": 0.20750481077613855, - "grad_norm": 0.6603411917591546, - "learning_rate": 0.0001930411413277599, - "loss": 0.6411, - "step": 3235 - }, - { - "epoch": 0.20782552918537525, - "grad_norm": 1.3159150838945801, - "learning_rate": 0.000193000050793686, - "loss": 0.7067, - "step": 3240 - }, - { - "epoch": 0.20814624759461192, - "grad_norm": 1.2826837962016335, - "learning_rate": 0.0001929588437006571, - "loss": 0.657, - "step": 3245 - }, - { - "epoch": 0.20846696600384862, - "grad_norm": 0.7429467281992763, - "learning_rate": 0.00019291752010031887, - "loss": 0.6783, - "step": 3250 - }, - { - "epoch": 0.20878768441308532, - "grad_norm": 0.9388767995389723, - "learning_rate": 0.00019287608004446314, - "loss": 0.6873, - "step": 3255 - }, - { - "epoch": 0.20910840282232201, - "grad_norm": 0.8840070141339184, - "learning_rate": 0.0001928345235850276, - "loss": 0.6159, - "step": 3260 - }, - { - "epoch": 0.20942912123155868, - "grad_norm": 1.0732885802726535, - "learning_rate": 0.00019279285077409582, - "loss": 0.6713, - "step": 3265 - }, - { - "epoch": 0.20974983964079538, - "grad_norm": 0.7289657532988314, - "learning_rate": 0.00019275106166389725, - "loss": 0.6831, - "step": 3270 - }, - { - "epoch": 0.21007055805003208, - "grad_norm": 0.6492856906135663, - "learning_rate": 0.00019270915630680707, - "loss": 0.7126, - "step": 3275 - }, - { - "epoch": 0.21039127645926875, - "grad_norm": 0.8073736143636202, - "learning_rate": 0.0001926671347553462, - "loss": 0.7527, - "step": 3280 - }, - { - "epoch": 0.21071199486850545, - "grad_norm": 0.8682418292741673, - "learning_rate": 0.0001926249970621811, - "loss": 0.5924, - "step": 3285 - }, - { - "epoch": 0.21103271327774215, - "grad_norm": 0.553914766273313, - "learning_rate": 0.00019258274328012384, - "loss": 0.5456, - "step": 3290 - }, - { - "epoch": 0.21135343168697884, - "grad_norm": 0.9718939215705609, - "learning_rate": 0.00019254037346213204, - "loss": 0.5976, - "step": 3295 - }, - { - "epoch": 0.2116741500962155, - "grad_norm": 0.9064065621099515, - "learning_rate": 0.00019249788766130863, - "loss": 0.7424, - "step": 3300 - }, - { - "epoch": 0.2119948685054522, - "grad_norm": 0.6693670165919959, - "learning_rate": 0.00019245528593090204, - "loss": 0.7834, - "step": 3305 - }, - { - "epoch": 0.2123155869146889, - "grad_norm": 0.68000110275399, - "learning_rate": 0.0001924125683243059, - "loss": 0.8261, - "step": 3310 - }, - { - "epoch": 0.2126363053239256, - "grad_norm": 0.8936655552945705, - "learning_rate": 0.0001923697348950591, - "loss": 0.7315, - "step": 3315 - }, - { - "epoch": 0.21295702373316228, - "grad_norm": 0.9370537429273521, - "learning_rate": 0.0001923267856968457, - "loss": 0.6054, - "step": 3320 - }, - { - "epoch": 0.21327774214239897, - "grad_norm": 1.5321045176308976, - "learning_rate": 0.00019228372078349486, - "loss": 0.6995, - "step": 3325 - }, - { - "epoch": 0.21359846055163567, - "grad_norm": 0.8164083897600656, - "learning_rate": 0.00019224054020898073, - "loss": 0.7217, - "step": 3330 - }, - { - "epoch": 0.21391917896087234, - "grad_norm": 0.9360751302506096, - "learning_rate": 0.00019219724402742247, - "loss": 0.7071, - "step": 3335 - }, - { - "epoch": 0.21423989737010904, - "grad_norm": 1.1474158049320227, - "learning_rate": 0.00019215383229308412, - "loss": 0.696, - "step": 3340 - }, - { - "epoch": 0.21456061577934574, - "grad_norm": 0.6286443687036616, - "learning_rate": 0.0001921103050603745, - "loss": 0.6582, - "step": 3345 - }, - { - "epoch": 0.21488133418858243, - "grad_norm": 0.930008180786893, - "learning_rate": 0.00019206666238384728, - "loss": 0.7267, - "step": 3350 - }, - { - "epoch": 0.2152020525978191, - "grad_norm": 0.8966235538817937, - "learning_rate": 0.0001920229043182007, - "loss": 0.7461, - "step": 3355 - }, - { - "epoch": 0.2155227710070558, - "grad_norm": 0.6075118442836386, - "learning_rate": 0.0001919790309182777, - "loss": 0.6218, - "step": 3360 - }, - { - "epoch": 0.2158434894162925, - "grad_norm": 1.120521483944113, - "learning_rate": 0.00019193504223906577, - "loss": 0.7854, - "step": 3365 - }, - { - "epoch": 0.2161642078255292, - "grad_norm": 0.7536443555714086, - "learning_rate": 0.00019189093833569686, - "loss": 0.6665, - "step": 3370 - }, - { - "epoch": 0.21648492623476587, - "grad_norm": 0.7306155955546904, - "learning_rate": 0.00019184671926344732, - "loss": 0.5562, - "step": 3375 - }, - { - "epoch": 0.21680564464400257, - "grad_norm": 1.4066089443224215, - "learning_rate": 0.00019180238507773788, - "loss": 0.7206, - "step": 3380 - }, - { - "epoch": 0.21712636305323926, - "grad_norm": 1.0420087314885336, - "learning_rate": 0.0001917579358341335, - "loss": 0.8488, - "step": 3385 - }, - { - "epoch": 0.21744708146247593, - "grad_norm": 1.24092779077047, - "learning_rate": 0.0001917133715883434, - "loss": 0.7737, - "step": 3390 - }, - { - "epoch": 0.21776779987171263, - "grad_norm": 1.2683256948043233, - "learning_rate": 0.00019166869239622085, - "loss": 0.5991, - "step": 3395 - }, - { - "epoch": 0.21808851828094933, - "grad_norm": 1.0154708506536307, - "learning_rate": 0.0001916238983137633, - "loss": 0.6921, - "step": 3400 - }, - { - "epoch": 0.21840923669018603, - "grad_norm": 1.250860867590444, - "learning_rate": 0.00019157898939711212, - "loss": 0.772, - "step": 3405 - }, - { - "epoch": 0.2187299550994227, - "grad_norm": 1.0205976247637063, - "learning_rate": 0.0001915339657025526, - "loss": 0.6262, - "step": 3410 - }, - { - "epoch": 0.2190506735086594, - "grad_norm": 0.6808470166264919, - "learning_rate": 0.0001914888272865139, - "loss": 0.5628, - "step": 3415 - }, - { - "epoch": 0.2193713919178961, - "grad_norm": 1.0460679318245396, - "learning_rate": 0.00019144357420556893, - "loss": 0.6497, - "step": 3420 - }, - { - "epoch": 0.2196921103271328, - "grad_norm": 0.8912439646989759, - "learning_rate": 0.00019139820651643442, - "loss": 0.5868, - "step": 3425 - }, - { - "epoch": 0.22001282873636946, - "grad_norm": 0.6690277429678054, - "learning_rate": 0.00019135272427597063, - "loss": 0.6833, - "step": 3430 - }, - { - "epoch": 0.22033354714560616, - "grad_norm": 1.0200781753500376, - "learning_rate": 0.00019130712754118138, - "loss": 0.6225, - "step": 3435 - }, - { - "epoch": 0.22065426555484285, - "grad_norm": 1.0186432727769665, - "learning_rate": 0.00019126141636921414, - "loss": 0.769, - "step": 3440 - }, - { - "epoch": 0.22097498396407952, - "grad_norm": 0.671761473616358, - "learning_rate": 0.0001912155908173596, - "loss": 0.6917, - "step": 3445 - }, - { - "epoch": 0.22129570237331622, - "grad_norm": 0.7493482108843831, - "learning_rate": 0.00019116965094305197, - "loss": 0.7762, - "step": 3450 - }, - { - "epoch": 0.22161642078255292, - "grad_norm": 0.9676529237022933, - "learning_rate": 0.00019112359680386863, - "loss": 0.6426, - "step": 3455 - }, - { - "epoch": 0.22193713919178962, - "grad_norm": 0.7117654744699794, - "learning_rate": 0.00019107742845753025, - "loss": 0.6968, - "step": 3460 - }, - { - "epoch": 0.2222578576010263, - "grad_norm": 1.0489562483489054, - "learning_rate": 0.0001910311459619006, - "loss": 0.7852, - "step": 3465 - }, - { - "epoch": 0.22257857601026299, - "grad_norm": 0.7103830582474117, - "learning_rate": 0.00019098474937498652, - "loss": 0.6496, - "step": 3470 - }, - { - "epoch": 0.22289929441949968, - "grad_norm": 1.1088261693908699, - "learning_rate": 0.00019093823875493784, - "loss": 0.7313, - "step": 3475 - }, - { - "epoch": 0.22322001282873638, - "grad_norm": 1.1659589438084368, - "learning_rate": 0.00019089161416004733, - "loss": 0.6526, - "step": 3480 - }, - { - "epoch": 0.22354073123797305, - "grad_norm": 0.7493230462026259, - "learning_rate": 0.0001908448756487506, - "loss": 0.6629, - "step": 3485 - }, - { - "epoch": 0.22386144964720975, - "grad_norm": 0.8650060759204274, - "learning_rate": 0.000190798023279626, - "loss": 0.7321, - "step": 3490 - }, - { - "epoch": 0.22418216805644645, - "grad_norm": 0.8002336983221607, - "learning_rate": 0.0001907510571113946, - "loss": 0.7816, - "step": 3495 - }, - { - "epoch": 0.22450288646568314, - "grad_norm": 0.6840069838552998, - "learning_rate": 0.00019070397720292014, - "loss": 0.6472, - "step": 3500 - }, - { - "epoch": 0.2248236048749198, - "grad_norm": 0.9253534124109082, - "learning_rate": 0.0001906567836132089, - "loss": 0.7952, - "step": 3505 - }, - { - "epoch": 0.2251443232841565, - "grad_norm": 0.8707427934510977, - "learning_rate": 0.0001906094764014095, - "loss": 0.6403, - "step": 3510 - }, - { - "epoch": 0.2254650416933932, - "grad_norm": 0.8952137846177877, - "learning_rate": 0.00019056205562681324, - "loss": 0.7713, - "step": 3515 - }, - { - "epoch": 0.22578576010262988, - "grad_norm": 1.2157321282590767, - "learning_rate": 0.00019051452134885346, - "loss": 0.7791, - "step": 3520 - }, - { - "epoch": 0.22610647851186658, - "grad_norm": 1.1942747630269164, - "learning_rate": 0.000190466873627106, - "loss": 0.7107, - "step": 3525 - }, - { - "epoch": 0.22642719692110327, - "grad_norm": 0.7534228887260359, - "learning_rate": 0.00019041911252128864, - "loss": 0.7748, - "step": 3530 - }, - { - "epoch": 0.22674791533033997, - "grad_norm": 0.7020738108193582, - "learning_rate": 0.0001903712380912615, - "loss": 0.641, - "step": 3535 - }, - { - "epoch": 0.22706863373957664, - "grad_norm": 0.8822584692031392, - "learning_rate": 0.0001903232503970266, - "loss": 0.7302, - "step": 3540 - }, - { - "epoch": 0.22738935214881334, - "grad_norm": 0.7669563154301963, - "learning_rate": 0.00019027514949872794, - "loss": 0.6305, - "step": 3545 - }, - { - "epoch": 0.22771007055805004, - "grad_norm": 0.75341665833547, - "learning_rate": 0.0001902269354566514, - "loss": 0.5966, - "step": 3550 - }, - { - "epoch": 0.22803078896728673, - "grad_norm": 1.3621102982113154, - "learning_rate": 0.00019017860833122466, - "loss": 0.7256, - "step": 3555 - }, - { - "epoch": 0.2283515073765234, - "grad_norm": 0.6413371506739955, - "learning_rate": 0.00019013016818301718, - "loss": 0.7576, - "step": 3560 - }, - { - "epoch": 0.2286722257857601, - "grad_norm": 0.9240762303756279, - "learning_rate": 0.00019008161507274004, - "loss": 0.6412, - "step": 3565 - }, - { - "epoch": 0.2289929441949968, - "grad_norm": 0.600216888507175, - "learning_rate": 0.0001900329490612458, - "loss": 0.6077, - "step": 3570 - }, - { - "epoch": 0.22931366260423347, - "grad_norm": 0.7764633127488129, - "learning_rate": 0.0001899841702095287, - "loss": 0.7296, - "step": 3575 - }, - { - "epoch": 0.22963438101347017, - "grad_norm": 0.8982484209272996, - "learning_rate": 0.00018993527857872437, - "loss": 0.7016, - "step": 3580 - }, - { - "epoch": 0.22995509942270687, - "grad_norm": 1.0720659350142319, - "learning_rate": 0.0001898862742301096, - "loss": 0.7538, - "step": 3585 - }, - { - "epoch": 0.23027581783194356, - "grad_norm": 1.1146855770453603, - "learning_rate": 0.00018983715722510267, - "loss": 0.7336, - "step": 3590 - }, - { - "epoch": 0.23059653624118023, - "grad_norm": 1.0183157286000422, - "learning_rate": 0.00018978792762526297, - "loss": 0.7608, - "step": 3595 - }, - { - "epoch": 0.23091725465041693, - "grad_norm": 0.5987067875621542, - "learning_rate": 0.000189738585492291, - "loss": 0.7482, - "step": 3600 - }, - { - "epoch": 0.23123797305965363, - "grad_norm": 1.2051854914953493, - "learning_rate": 0.0001896891308880283, - "loss": 0.6866, - "step": 3605 - }, - { - "epoch": 0.23155869146889033, - "grad_norm": 0.6469997389423526, - "learning_rate": 0.00018963956387445743, - "loss": 0.5533, - "step": 3610 - }, - { - "epoch": 0.231879409878127, - "grad_norm": 0.751435050187464, - "learning_rate": 0.00018958988451370172, - "loss": 0.5345, - "step": 3615 - }, - { - "epoch": 0.2322001282873637, - "grad_norm": 0.9296699512717883, - "learning_rate": 0.00018954009286802545, - "loss": 0.6395, - "step": 3620 - }, - { - "epoch": 0.2325208466966004, - "grad_norm": 0.8523320100136826, - "learning_rate": 0.0001894901889998335, - "loss": 0.6699, - "step": 3625 - }, - { - "epoch": 0.23284156510583706, - "grad_norm": 0.8927205659717501, - "learning_rate": 0.0001894401729716715, - "loss": 0.7016, - "step": 3630 - }, - { - "epoch": 0.23316228351507376, - "grad_norm": 0.9773519130062428, - "learning_rate": 0.00018939004484622556, - "loss": 0.5938, - "step": 3635 - }, - { - "epoch": 0.23348300192431046, - "grad_norm": 1.205672119851859, - "learning_rate": 0.00018933980468632236, - "loss": 0.6659, - "step": 3640 - }, - { - "epoch": 0.23380372033354715, - "grad_norm": 0.7579640404532227, - "learning_rate": 0.00018928945255492898, - "loss": 0.6189, - "step": 3645 - }, - { - "epoch": 0.23412443874278382, - "grad_norm": 0.7167559954703847, - "learning_rate": 0.0001892389885151528, - "loss": 0.7174, - "step": 3650 - }, - { - "epoch": 0.23444515715202052, - "grad_norm": 0.9211676236510546, - "learning_rate": 0.0001891884126302415, - "loss": 0.7194, - "step": 3655 - }, - { - "epoch": 0.23476587556125722, - "grad_norm": 1.0264289808335763, - "learning_rate": 0.00018913772496358293, - "loss": 0.7518, - "step": 3660 - }, - { - "epoch": 0.23508659397049392, - "grad_norm": 0.7037785727516465, - "learning_rate": 0.000189086925578705, - "loss": 0.6463, - "step": 3665 - }, - { - "epoch": 0.2354073123797306, - "grad_norm": 0.7939519982595736, - "learning_rate": 0.0001890360145392757, - "loss": 0.6679, - "step": 3670 - }, - { - "epoch": 0.23572803078896729, - "grad_norm": 0.9346634485226615, - "learning_rate": 0.00018898499190910285, - "loss": 0.6707, - "step": 3675 - }, - { - "epoch": 0.23604874919820398, - "grad_norm": 0.9205144038862676, - "learning_rate": 0.00018893385775213428, - "loss": 0.5932, - "step": 3680 - }, - { - "epoch": 0.23636946760744068, - "grad_norm": 0.7662986014450179, - "learning_rate": 0.00018888261213245751, - "loss": 0.626, - "step": 3685 - }, - { - "epoch": 0.23669018601667735, - "grad_norm": 0.9540864146877855, - "learning_rate": 0.00018883125511429976, - "loss": 0.6775, - "step": 3690 - }, - { - "epoch": 0.23701090442591405, - "grad_norm": 0.8236472390358622, - "learning_rate": 0.0001887797867620279, - "loss": 0.5783, - "step": 3695 - }, - { - "epoch": 0.23733162283515075, - "grad_norm": 1.1046319576589374, - "learning_rate": 0.00018872820714014828, - "loss": 0.7178, - "step": 3700 - }, - { - "epoch": 0.23765234124438742, - "grad_norm": 0.8687058181792315, - "learning_rate": 0.0001886765163133068, - "loss": 0.7188, - "step": 3705 - }, - { - "epoch": 0.2379730596536241, - "grad_norm": 0.8074055463421766, - "learning_rate": 0.0001886247143462886, - "loss": 0.6839, - "step": 3710 - }, - { - "epoch": 0.2382937780628608, - "grad_norm": 0.9477091526553252, - "learning_rate": 0.0001885728013040183, - "loss": 0.694, - "step": 3715 - }, - { - "epoch": 0.2386144964720975, - "grad_norm": 1.4070444194213776, - "learning_rate": 0.00018852077725155955, - "loss": 0.6443, - "step": 3720 - }, - { - "epoch": 0.23893521488133418, - "grad_norm": 0.7885481772614231, - "learning_rate": 0.00018846864225411522, - "loss": 0.6975, - "step": 3725 - }, - { - "epoch": 0.23925593329057088, - "grad_norm": 1.416662073982706, - "learning_rate": 0.0001884163963770272, - "loss": 0.5101, - "step": 3730 - }, - { - "epoch": 0.23957665169980757, - "grad_norm": 1.1458969994696415, - "learning_rate": 0.00018836403968577642, - "loss": 0.6615, - "step": 3735 - }, - { - "epoch": 0.23989737010904427, - "grad_norm": 0.8353107592687541, - "learning_rate": 0.00018831157224598265, - "loss": 0.6361, - "step": 3740 - }, - { - "epoch": 0.24021808851828094, - "grad_norm": 0.9588837283118316, - "learning_rate": 0.0001882589941234044, - "loss": 0.6013, - "step": 3745 - }, - { - "epoch": 0.24053880692751764, - "grad_norm": 0.9378372320194371, - "learning_rate": 0.00018820630538393896, - "loss": 0.6638, - "step": 3750 - }, - { - "epoch": 0.24085952533675434, - "grad_norm": 0.657630819098, - "learning_rate": 0.0001881535060936223, - "loss": 0.6291, - "step": 3755 - }, - { - "epoch": 0.241180243745991, - "grad_norm": 0.8483718480641205, - "learning_rate": 0.00018810059631862885, - "loss": 0.7489, - "step": 3760 - }, - { - "epoch": 0.2415009621552277, - "grad_norm": 0.6502718844446955, - "learning_rate": 0.0001880475761252716, - "loss": 0.7414, - "step": 3765 - }, - { - "epoch": 0.2418216805644644, - "grad_norm": 1.1168778404379636, - "learning_rate": 0.00018799444558000188, - "loss": 0.5148, - "step": 3770 - }, - { - "epoch": 0.2421423989737011, - "grad_norm": 0.7913864245267141, - "learning_rate": 0.00018794120474940936, - "loss": 0.7854, - "step": 3775 - }, - { - "epoch": 0.24246311738293777, - "grad_norm": 0.6448828952136001, - "learning_rate": 0.00018788785370022187, - "loss": 0.7078, - "step": 3780 - }, - { - "epoch": 0.24278383579217447, - "grad_norm": 1.5060141096885609, - "learning_rate": 0.00018783439249930544, - "loss": 0.6149, - "step": 3785 - }, - { - "epoch": 0.24310455420141117, - "grad_norm": 1.1449759900992198, - "learning_rate": 0.00018778082121366415, - "loss": 0.6848, - "step": 3790 - }, - { - "epoch": 0.24342527261064786, - "grad_norm": 0.8978384550293506, - "learning_rate": 0.00018772713991044006, - "loss": 0.5786, - "step": 3795 - }, - { - "epoch": 0.24374599101988453, - "grad_norm": 1.0307173194583823, - "learning_rate": 0.0001876733486569131, - "loss": 0.6089, - "step": 3800 - }, - { - "epoch": 0.24406670942912123, - "grad_norm": 1.0460496173819018, - "learning_rate": 0.00018761944752050092, - "loss": 0.7205, - "step": 3805 - }, - { - "epoch": 0.24438742783835793, - "grad_norm": 0.7905784500183457, - "learning_rate": 0.00018756543656875903, - "loss": 0.6866, - "step": 3810 - }, - { - "epoch": 0.2447081462475946, - "grad_norm": 0.8146037687112702, - "learning_rate": 0.0001875113158693805, - "loss": 0.6722, - "step": 3815 - }, - { - "epoch": 0.2450288646568313, - "grad_norm": 0.6700527883378358, - "learning_rate": 0.00018745708549019598, - "loss": 0.69, - "step": 3820 - }, - { - "epoch": 0.245349583066068, - "grad_norm": 0.86059539710882, - "learning_rate": 0.00018740274549917355, - "loss": 0.6951, - "step": 3825 - }, - { - "epoch": 0.2456703014753047, - "grad_norm": 0.754486021920581, - "learning_rate": 0.00018734829596441869, - "loss": 0.669, - "step": 3830 - }, - { - "epoch": 0.24599101988454136, - "grad_norm": 1.2671234138000913, - "learning_rate": 0.00018729373695417411, - "loss": 0.53, - "step": 3835 - }, - { - "epoch": 0.24631173829377806, - "grad_norm": 0.6932982987761634, - "learning_rate": 0.0001872390685368199, - "loss": 0.6588, - "step": 3840 - }, - { - "epoch": 0.24663245670301476, - "grad_norm": 0.8973942648351731, - "learning_rate": 0.00018718429078087306, - "loss": 0.759, - "step": 3845 - }, - { - "epoch": 0.24695317511225146, - "grad_norm": 0.8232879633687452, - "learning_rate": 0.00018712940375498777, - "loss": 0.7228, - "step": 3850 - }, - { - "epoch": 0.24727389352148813, - "grad_norm": 0.6326649992249508, - "learning_rate": 0.0001870744075279551, - "loss": 0.7392, - "step": 3855 - }, - { - "epoch": 0.24759461193072482, - "grad_norm": 1.097141467166474, - "learning_rate": 0.000187019302168703, - "loss": 0.6787, - "step": 3860 - }, - { - "epoch": 0.24791533033996152, - "grad_norm": 0.3009107744843191, - "learning_rate": 0.00018696408774629623, - "loss": 0.5101, - "step": 3865 - }, - { - "epoch": 0.2482360487491982, - "grad_norm": 0.8763665765416497, - "learning_rate": 0.00018690876432993616, - "loss": 0.6693, - "step": 3870 - }, - { - "epoch": 0.2485567671584349, - "grad_norm": 0.8358957515633696, - "learning_rate": 0.00018685333198896085, - "loss": 0.4624, - "step": 3875 - }, - { - "epoch": 0.24887748556767159, - "grad_norm": 0.7954157351888587, - "learning_rate": 0.00018679779079284478, - "loss": 0.6448, - "step": 3880 - }, - { - "epoch": 0.24919820397690828, - "grad_norm": 0.8015671945298257, - "learning_rate": 0.00018674214081119899, - "loss": 0.7378, - "step": 3885 - }, - { - "epoch": 0.24951892238614495, - "grad_norm": 0.4176253877935304, - "learning_rate": 0.00018668638211377075, - "loss": 0.6243, - "step": 3890 - }, - { - "epoch": 0.24983964079538165, - "grad_norm": 0.9442754652275936, - "learning_rate": 0.00018663051477044363, - "loss": 0.7179, - "step": 3895 - }, - { - "epoch": 0.2501603592046183, - "grad_norm": 0.4823245844586911, - "learning_rate": 0.00018657453885123743, - "loss": 0.6911, - "step": 3900 - }, - { - "epoch": 0.250481077613855, - "grad_norm": 1.2379921804802545, - "learning_rate": 0.00018651845442630788, - "loss": 0.7287, - "step": 3905 - }, - { - "epoch": 0.2508017960230917, - "grad_norm": 0.8025900155844875, - "learning_rate": 0.00018646226156594683, - "loss": 0.6996, - "step": 3910 - }, - { - "epoch": 0.2511225144323284, - "grad_norm": 0.7107570481507937, - "learning_rate": 0.00018640596034058202, - "loss": 0.6547, - "step": 3915 - }, - { - "epoch": 0.2514432328415651, - "grad_norm": 1.0641358272949475, - "learning_rate": 0.00018634955082077694, - "loss": 0.6644, - "step": 3920 - }, - { - "epoch": 0.2517639512508018, - "grad_norm": 0.47480734009901776, - "learning_rate": 0.00018629303307723087, - "loss": 0.573, - "step": 3925 - }, - { - "epoch": 0.2520846696600385, - "grad_norm": 0.793188561410365, - "learning_rate": 0.0001862364071807787, - "loss": 0.5214, - "step": 3930 - }, - { - "epoch": 0.25240538806927515, - "grad_norm": 1.0592935580458442, - "learning_rate": 0.00018617967320239088, - "loss": 0.7271, - "step": 3935 - }, - { - "epoch": 0.25272610647851185, - "grad_norm": 1.2256726599433683, - "learning_rate": 0.00018612283121317334, - "loss": 0.6422, - "step": 3940 - }, - { - "epoch": 0.25304682488774854, - "grad_norm": 0.7519903384129473, - "learning_rate": 0.00018606588128436733, - "loss": 0.5867, - "step": 3945 - }, - { - "epoch": 0.25336754329698524, - "grad_norm": 0.7245403184900441, - "learning_rate": 0.00018600882348734942, - "loss": 0.595, - "step": 3950 - }, - { - "epoch": 0.25368826170622194, - "grad_norm": 0.8118238034713691, - "learning_rate": 0.0001859516578936314, - "loss": 0.6789, - "step": 3955 - }, - { - "epoch": 0.25400898011545864, - "grad_norm": 0.94671989401086, - "learning_rate": 0.0001858943845748601, - "loss": 0.5563, - "step": 3960 - }, - { - "epoch": 0.25432969852469534, - "grad_norm": 1.2366250568429358, - "learning_rate": 0.00018583700360281743, - "loss": 0.7508, - "step": 3965 - }, - { - "epoch": 0.25465041693393203, - "grad_norm": 0.79253106009907, - "learning_rate": 0.00018577951504942014, - "loss": 0.8067, - "step": 3970 - }, - { - "epoch": 0.2549711353431687, - "grad_norm": 0.8702530726486416, - "learning_rate": 0.0001857219189867199, - "loss": 0.617, - "step": 3975 - }, - { - "epoch": 0.2552918537524054, - "grad_norm": 1.0941049074741396, - "learning_rate": 0.0001856642154869031, - "loss": 0.6722, - "step": 3980 - }, - { - "epoch": 0.25561257216164207, - "grad_norm": 0.8439431895631772, - "learning_rate": 0.00018560640462229072, - "loss": 0.4939, - "step": 3985 - }, - { - "epoch": 0.25593329057087877, - "grad_norm": 0.6351905484581176, - "learning_rate": 0.00018554848646533842, - "loss": 0.6447, - "step": 3990 - }, - { - "epoch": 0.25625400898011547, - "grad_norm": 0.5405523691592523, - "learning_rate": 0.00018549046108863623, - "loss": 0.619, - "step": 3995 - }, - { - "epoch": 0.25657472738935216, - "grad_norm": 0.9663208760661458, - "learning_rate": 0.00018543232856490857, - "loss": 0.7077, - "step": 4000 - }, - { - "epoch": 0.25689544579858886, - "grad_norm": 1.1847646315539586, - "learning_rate": 0.00018537408896701426, - "loss": 0.645, - "step": 4005 - }, - { - "epoch": 0.2572161642078255, - "grad_norm": 0.9615403982388305, - "learning_rate": 0.00018531574236794614, - "loss": 0.6811, - "step": 4010 - }, - { - "epoch": 0.2575368826170622, - "grad_norm": 0.8358212875135942, - "learning_rate": 0.0001852572888408313, - "loss": 0.7614, - "step": 4015 - }, - { - "epoch": 0.2578576010262989, - "grad_norm": 0.654849517944886, - "learning_rate": 0.00018519872845893084, - "loss": 0.7217, - "step": 4020 - }, - { - "epoch": 0.2581783194355356, - "grad_norm": 1.2575079996892056, - "learning_rate": 0.00018514006129563966, - "loss": 0.6607, - "step": 4025 - }, - { - "epoch": 0.2584990378447723, - "grad_norm": 0.9922068320402926, - "learning_rate": 0.00018508128742448664, - "loss": 0.837, - "step": 4030 - }, - { - "epoch": 0.258819756254009, - "grad_norm": 0.6769732353504583, - "learning_rate": 0.00018502240691913423, - "loss": 0.5391, - "step": 4035 - }, - { - "epoch": 0.2591404746632457, - "grad_norm": 1.0085400425349142, - "learning_rate": 0.00018496341985337872, - "loss": 0.6348, - "step": 4040 - }, - { - "epoch": 0.2594611930724824, - "grad_norm": 1.0848700957447277, - "learning_rate": 0.00018490432630114987, - "loss": 0.6778, - "step": 4045 - }, - { - "epoch": 0.25978191148171903, - "grad_norm": 2.0271957707532953, - "learning_rate": 0.00018484512633651083, - "loss": 0.654, - "step": 4050 - }, - { - "epoch": 0.2601026298909557, - "grad_norm": 0.7805695373329654, - "learning_rate": 0.00018478582003365822, - "loss": 0.7096, - "step": 4055 - }, - { - "epoch": 0.2604233483001924, - "grad_norm": 0.9870035129297559, - "learning_rate": 0.0001847264074669219, - "loss": 0.6384, - "step": 4060 - }, - { - "epoch": 0.2607440667094291, - "grad_norm": 1.4231275295206969, - "learning_rate": 0.00018466688871076492, - "loss": 0.7516, - "step": 4065 - }, - { - "epoch": 0.2610647851186658, - "grad_norm": 0.9526984436593213, - "learning_rate": 0.00018460726383978337, - "loss": 0.7593, - "step": 4070 - }, - { - "epoch": 0.2613855035279025, - "grad_norm": 0.8092373561884175, - "learning_rate": 0.00018454753292870645, - "loss": 0.7056, - "step": 4075 - }, - { - "epoch": 0.2617062219371392, - "grad_norm": 1.0372403017182314, - "learning_rate": 0.0001844876960523961, - "loss": 0.7301, - "step": 4080 - }, - { - "epoch": 0.26202694034637586, - "grad_norm": 1.0864230414581424, - "learning_rate": 0.0001844277532858472, - "loss": 0.7108, - "step": 4085 - }, - { - "epoch": 0.26234765875561256, - "grad_norm": 1.1180610427980169, - "learning_rate": 0.00018436770470418734, - "loss": 0.6945, - "step": 4090 - }, - { - "epoch": 0.26266837716484925, - "grad_norm": 0.7213205274182185, - "learning_rate": 0.00018430755038267664, - "loss": 0.5532, - "step": 4095 - }, - { - "epoch": 0.26298909557408595, - "grad_norm": 1.1163686122257008, - "learning_rate": 0.00018424729039670786, - "loss": 0.6516, - "step": 4100 - }, - { - "epoch": 0.26330981398332265, - "grad_norm": 1.2583036183921432, - "learning_rate": 0.00018418692482180605, - "loss": 0.6414, - "step": 4105 - }, - { - "epoch": 0.26363053239255935, - "grad_norm": 0.9930140372439703, - "learning_rate": 0.0001841264537336287, - "loss": 0.6207, - "step": 4110 - }, - { - "epoch": 0.26395125080179604, - "grad_norm": 1.0089622154428168, - "learning_rate": 0.00018406587720796555, - "loss": 0.584, - "step": 4115 - }, - { - "epoch": 0.2642719692110327, - "grad_norm": 0.7458841041229098, - "learning_rate": 0.00018400519532073845, - "loss": 0.5883, - "step": 4120 - }, - { - "epoch": 0.2645926876202694, - "grad_norm": 0.8089823917563255, - "learning_rate": 0.0001839444081480013, - "loss": 0.7034, - "step": 4125 - }, - { - "epoch": 0.2649134060295061, - "grad_norm": 0.6692062310802624, - "learning_rate": 0.00018388351576594, - "loss": 0.6344, - "step": 4130 - }, - { - "epoch": 0.2652341244387428, - "grad_norm": 1.1933403776576017, - "learning_rate": 0.0001838225182508722, - "loss": 0.6661, - "step": 4135 - }, - { - "epoch": 0.2655548428479795, - "grad_norm": 0.8440572180162611, - "learning_rate": 0.00018376141567924746, - "loss": 0.748, - "step": 4140 - }, - { - "epoch": 0.2658755612572162, - "grad_norm": 0.8186841087339073, - "learning_rate": 0.0001837002081276469, - "loss": 0.7713, - "step": 4145 - }, - { - "epoch": 0.2661962796664529, - "grad_norm": 1.0666433490645642, - "learning_rate": 0.0001836388956727833, - "loss": 0.8609, - "step": 4150 - }, - { - "epoch": 0.26651699807568957, - "grad_norm": 1.1355241254608384, - "learning_rate": 0.00018357747839150082, - "loss": 0.6469, - "step": 4155 - }, - { - "epoch": 0.2668377164849262, - "grad_norm": 0.7464964673319473, - "learning_rate": 0.00018351595636077509, - "loss": 0.5979, - "step": 4160 - }, - { - "epoch": 0.2671584348941629, - "grad_norm": 0.8983502422541593, - "learning_rate": 0.00018345432965771296, - "loss": 0.6956, - "step": 4165 - }, - { - "epoch": 0.2674791533033996, - "grad_norm": 1.0667530685360391, - "learning_rate": 0.00018339259835955252, - "loss": 0.613, - "step": 4170 - }, - { - "epoch": 0.2677998717126363, - "grad_norm": 0.9132017699113576, - "learning_rate": 0.00018333076254366292, - "loss": 0.7377, - "step": 4175 - }, - { - "epoch": 0.268120590121873, - "grad_norm": 0.820877622590415, - "learning_rate": 0.0001832688222875443, - "loss": 0.6287, - "step": 4180 - }, - { - "epoch": 0.2684413085311097, - "grad_norm": 1.118619920969021, - "learning_rate": 0.00018320677766882777, - "loss": 0.6384, - "step": 4185 - }, - { - "epoch": 0.2687620269403464, - "grad_norm": 1.4366554572404993, - "learning_rate": 0.00018314462876527508, - "loss": 0.6833, - "step": 4190 - }, - { - "epoch": 0.26908274534958304, - "grad_norm": 1.0835964639148083, - "learning_rate": 0.00018308237565477887, - "loss": 0.5727, - "step": 4195 - }, - { - "epoch": 0.26940346375881974, - "grad_norm": 0.9256686315486947, - "learning_rate": 0.00018302001841536222, - "loss": 0.6766, - "step": 4200 - }, - { - "epoch": 0.26972418216805644, - "grad_norm": 0.9133924374197757, - "learning_rate": 0.00018295755712517887, - "loss": 0.6114, - "step": 4205 - }, - { - "epoch": 0.27004490057729313, - "grad_norm": 0.9886601065235708, - "learning_rate": 0.00018289499186251282, - "loss": 0.6487, - "step": 4210 - }, - { - "epoch": 0.27036561898652983, - "grad_norm": 0.7921503565458989, - "learning_rate": 0.00018283232270577854, - "loss": 0.5979, - "step": 4215 - }, - { - "epoch": 0.27068633739576653, - "grad_norm": 0.6150099468882971, - "learning_rate": 0.00018276954973352053, - "loss": 0.6981, - "step": 4220 - }, - { - "epoch": 0.2710070558050032, - "grad_norm": 1.0834800425960802, - "learning_rate": 0.00018270667302441355, - "loss": 0.5754, - "step": 4225 - }, - { - "epoch": 0.27132777421423987, - "grad_norm": 1.6569395813805736, - "learning_rate": 0.00018264369265726232, - "loss": 0.6754, - "step": 4230 - }, - { - "epoch": 0.27164849262347657, - "grad_norm": 1.1904706994873762, - "learning_rate": 0.0001825806087110015, - "loss": 0.6955, - "step": 4235 - }, - { - "epoch": 0.27196921103271327, - "grad_norm": 0.9036845887010689, - "learning_rate": 0.00018251742126469553, - "loss": 0.6245, - "step": 4240 - }, - { - "epoch": 0.27228992944194996, - "grad_norm": 1.2154289806047023, - "learning_rate": 0.00018245413039753858, - "loss": 0.6966, - "step": 4245 - }, - { - "epoch": 0.27261064785118666, - "grad_norm": 0.7781670764658554, - "learning_rate": 0.00018239073618885447, - "loss": 0.5014, - "step": 4250 - }, - { - "epoch": 0.27293136626042336, - "grad_norm": 0.9312674308580604, - "learning_rate": 0.00018232723871809654, - "loss": 0.7177, - "step": 4255 - }, - { - "epoch": 0.27325208466966006, - "grad_norm": 0.7997579086131462, - "learning_rate": 0.00018226363806484749, - "loss": 0.6622, - "step": 4260 - }, - { - "epoch": 0.27357280307889675, - "grad_norm": 1.1414064891921076, - "learning_rate": 0.00018219993430881935, - "loss": 0.7326, - "step": 4265 - }, - { - "epoch": 0.2738935214881334, - "grad_norm": 0.8488220516302005, - "learning_rate": 0.00018213612752985346, - "loss": 0.6111, - "step": 4270 - }, - { - "epoch": 0.2742142398973701, - "grad_norm": 0.6785943182404776, - "learning_rate": 0.00018207221780792022, - "loss": 0.568, - "step": 4275 - }, - { - "epoch": 0.2745349583066068, - "grad_norm": 0.7407135493281501, - "learning_rate": 0.00018200820522311907, - "loss": 0.9428, - "step": 4280 - }, - { - "epoch": 0.2748556767158435, - "grad_norm": 0.7785838981084623, - "learning_rate": 0.00018194408985567826, - "loss": 0.6602, - "step": 4285 - }, - { - "epoch": 0.2751763951250802, - "grad_norm": 1.3274741440702664, - "learning_rate": 0.00018187987178595506, - "loss": 0.6326, - "step": 4290 - }, - { - "epoch": 0.2754971135343169, - "grad_norm": 0.7698326162883183, - "learning_rate": 0.00018181555109443527, - "loss": 0.7828, - "step": 4295 - }, - { - "epoch": 0.2758178319435536, - "grad_norm": 0.9874438661020553, - "learning_rate": 0.00018175112786173345, - "loss": 0.6177, - "step": 4300 - }, - { - "epoch": 0.2761385503527902, - "grad_norm": 1.2983806783457539, - "learning_rate": 0.0001816866021685926, - "loss": 0.5931, - "step": 4305 - }, - { - "epoch": 0.2764592687620269, - "grad_norm": 0.6650133276949847, - "learning_rate": 0.00018162197409588414, - "loss": 0.6065, - "step": 4310 - }, - { - "epoch": 0.2767799871712636, - "grad_norm": 0.6615532414642794, - "learning_rate": 0.0001815572437246078, - "loss": 0.6777, - "step": 4315 - }, - { - "epoch": 0.2771007055805003, - "grad_norm": 0.9856674878658384, - "learning_rate": 0.00018149241113589158, - "loss": 0.7992, - "step": 4320 - }, - { - "epoch": 0.277421423989737, - "grad_norm": 0.9736624117716728, - "learning_rate": 0.00018142747641099156, - "loss": 0.6433, - "step": 4325 - }, - { - "epoch": 0.2777421423989737, - "grad_norm": 0.6411826659070557, - "learning_rate": 0.00018136243963129176, - "loss": 0.6934, - "step": 4330 - }, - { - "epoch": 0.2780628608082104, - "grad_norm": 1.1535749419623087, - "learning_rate": 0.00018129730087830423, - "loss": 0.6763, - "step": 4335 - }, - { - "epoch": 0.2783835792174471, - "grad_norm": 0.9545043501616219, - "learning_rate": 0.00018123206023366875, - "loss": 0.6913, - "step": 4340 - }, - { - "epoch": 0.27870429762668375, - "grad_norm": 0.8726709507710128, - "learning_rate": 0.00018116671777915279, - "loss": 0.6719, - "step": 4345 - }, - { - "epoch": 0.27902501603592045, - "grad_norm": 0.8365717106126314, - "learning_rate": 0.00018110127359665144, - "loss": 0.8124, - "step": 4350 - }, - { - "epoch": 0.27934573444515715, - "grad_norm": 1.2549482014888076, - "learning_rate": 0.00018103572776818734, - "loss": 0.6818, - "step": 4355 - }, - { - "epoch": 0.27966645285439384, - "grad_norm": 1.0842835676700455, - "learning_rate": 0.00018097008037591046, - "loss": 0.6671, - "step": 4360 - }, - { - "epoch": 0.27998717126363054, - "grad_norm": 0.9380406537541407, - "learning_rate": 0.00018090433150209809, - "loss": 0.6949, - "step": 4365 - }, - { - "epoch": 0.28030788967286724, - "grad_norm": 1.150794578223368, - "learning_rate": 0.00018083848122915468, - "loss": 0.7515, - "step": 4370 - }, - { - "epoch": 0.28062860808210394, - "grad_norm": 0.8083227750174746, - "learning_rate": 0.0001807725296396118, - "loss": 0.7616, - "step": 4375 - }, - { - "epoch": 0.2809493264913406, - "grad_norm": 0.7534176713677331, - "learning_rate": 0.000180706476816128, - "loss": 0.7793, - "step": 4380 - }, - { - "epoch": 0.2812700449005773, - "grad_norm": 0.8339195487244033, - "learning_rate": 0.00018064032284148868, - "loss": 0.6498, - "step": 4385 - }, - { - "epoch": 0.281590763309814, - "grad_norm": 1.0737472499663367, - "learning_rate": 0.00018057406779860603, - "loss": 0.717, - "step": 4390 - }, - { - "epoch": 0.28191148171905067, - "grad_norm": 0.9978477560799941, - "learning_rate": 0.00018050771177051896, - "loss": 0.5892, - "step": 4395 - }, - { - "epoch": 0.28223220012828737, - "grad_norm": 1.3027101386742324, - "learning_rate": 0.00018044125484039284, - "loss": 0.7084, - "step": 4400 - }, - { - "epoch": 0.28255291853752407, - "grad_norm": 0.930029771124351, - "learning_rate": 0.0001803746970915196, - "loss": 0.6916, - "step": 4405 - }, - { - "epoch": 0.28287363694676076, - "grad_norm": 0.7778850969886842, - "learning_rate": 0.00018030803860731744, - "loss": 0.7685, - "step": 4410 - }, - { - "epoch": 0.2831943553559974, - "grad_norm": 0.7650986542927773, - "learning_rate": 0.00018024127947133096, - "loss": 0.6537, - "step": 4415 - }, - { - "epoch": 0.2835150737652341, - "grad_norm": 1.5408988991120984, - "learning_rate": 0.00018017441976723073, - "loss": 0.7775, - "step": 4420 - }, - { - "epoch": 0.2838357921744708, - "grad_norm": 1.2912216339714508, - "learning_rate": 0.0001801074595788135, - "loss": 0.6968, - "step": 4425 - }, - { - "epoch": 0.2841565105837075, - "grad_norm": 1.0528277674684878, - "learning_rate": 0.00018004039899000186, - "loss": 0.6352, - "step": 4430 - }, - { - "epoch": 0.2844772289929442, - "grad_norm": 0.9968577641995723, - "learning_rate": 0.00017997323808484434, - "loss": 0.681, - "step": 4435 - }, - { - "epoch": 0.2847979474021809, - "grad_norm": 0.7048566927661232, - "learning_rate": 0.0001799059769475151, - "loss": 0.589, - "step": 4440 - }, - { - "epoch": 0.2851186658114176, - "grad_norm": 1.2752536855080614, - "learning_rate": 0.00017983861566231397, - "loss": 0.6021, - "step": 4445 - }, - { - "epoch": 0.2854393842206543, - "grad_norm": 0.6838772733375945, - "learning_rate": 0.0001797711543136663, - "loss": 0.62, - "step": 4450 - }, - { - "epoch": 0.28576010262989093, - "grad_norm": 1.0992940781905054, - "learning_rate": 0.00017970359298612282, - "loss": 0.7695, - "step": 4455 - }, - { - "epoch": 0.28608082103912763, - "grad_norm": 0.9891320713998334, - "learning_rate": 0.00017963593176435964, - "loss": 0.7417, - "step": 4460 - }, - { - "epoch": 0.28640153944836433, - "grad_norm": 1.0219509493165506, - "learning_rate": 0.00017956817073317793, - "loss": 0.8078, - "step": 4465 - }, - { - "epoch": 0.286722257857601, - "grad_norm": 0.601838514745307, - "learning_rate": 0.00017950030997750414, - "loss": 0.6521, - "step": 4470 - }, - { - "epoch": 0.2870429762668377, - "grad_norm": 0.6658616403524804, - "learning_rate": 0.00017943234958238952, - "loss": 0.4757, - "step": 4475 - }, - { - "epoch": 0.2873636946760744, - "grad_norm": 1.007316511383742, - "learning_rate": 0.00017936428963301036, - "loss": 0.7311, - "step": 4480 - }, - { - "epoch": 0.2876844130853111, - "grad_norm": 1.1189936485732135, - "learning_rate": 0.00017929613021466765, - "loss": 0.6303, - "step": 4485 - }, - { - "epoch": 0.28800513149454776, - "grad_norm": 0.7720709103171642, - "learning_rate": 0.000179227871412787, - "loss": 0.5517, - "step": 4490 - }, - { - "epoch": 0.28832584990378446, - "grad_norm": 0.840259961080622, - "learning_rate": 0.00017915951331291864, - "loss": 0.7003, - "step": 4495 - }, - { - "epoch": 0.28864656831302116, - "grad_norm": 0.7950998217641071, - "learning_rate": 0.00017909105600073726, - "loss": 0.6693, - "step": 4500 - }, - { - "epoch": 0.28896728672225785, - "grad_norm": 0.8828219239731676, - "learning_rate": 0.00017902249956204183, - "loss": 0.613, - "step": 4505 - }, - { - "epoch": 0.28928800513149455, - "grad_norm": 0.8050366826668545, - "learning_rate": 0.0001789538440827557, - "loss": 0.5657, - "step": 4510 - }, - { - "epoch": 0.28960872354073125, - "grad_norm": 1.0967164706749888, - "learning_rate": 0.00017888508964892616, - "loss": 0.8128, - "step": 4515 - }, - { - "epoch": 0.28992944194996795, - "grad_norm": 0.9150715640614145, - "learning_rate": 0.00017881623634672465, - "loss": 0.7572, - "step": 4520 - }, - { - "epoch": 0.29025016035920465, - "grad_norm": 1.2602671775870735, - "learning_rate": 0.00017874728426244647, - "loss": 0.6905, - "step": 4525 - }, - { - "epoch": 0.2905708787684413, - "grad_norm": 0.9346668957570068, - "learning_rate": 0.00017867823348251076, - "loss": 0.7051, - "step": 4530 - }, - { - "epoch": 0.290891597177678, - "grad_norm": 0.7910849436025686, - "learning_rate": 0.00017860908409346034, - "loss": 0.709, - "step": 4535 - }, - { - "epoch": 0.2912123155869147, - "grad_norm": 0.8218374279342303, - "learning_rate": 0.0001785398361819616, - "loss": 0.5839, - "step": 4540 - }, - { - "epoch": 0.2915330339961514, - "grad_norm": 0.8511332345341893, - "learning_rate": 0.0001784704898348045, - "loss": 0.7218, - "step": 4545 - }, - { - "epoch": 0.2918537524053881, - "grad_norm": 1.2396495867604176, - "learning_rate": 0.0001784010451389022, - "loss": 0.5707, - "step": 4550 - }, - { - "epoch": 0.2921744708146248, - "grad_norm": 0.5453795713818735, - "learning_rate": 0.00017833150218129129, - "loss": 0.7248, - "step": 4555 - }, - { - "epoch": 0.2924951892238615, - "grad_norm": 0.8544441259057197, - "learning_rate": 0.00017826186104913142, - "loss": 0.6706, - "step": 4560 - }, - { - "epoch": 0.2928159076330981, - "grad_norm": 0.7078874543955929, - "learning_rate": 0.00017819212182970535, - "loss": 0.6732, - "step": 4565 - }, - { - "epoch": 0.2931366260423348, - "grad_norm": 1.1258864806353122, - "learning_rate": 0.0001781222846104187, - "loss": 0.696, - "step": 4570 - }, - { - "epoch": 0.2934573444515715, - "grad_norm": 0.8952983146425741, - "learning_rate": 0.00017805234947879993, - "loss": 0.6778, - "step": 4575 - }, - { - "epoch": 0.2937780628608082, - "grad_norm": 1.078013753440664, - "learning_rate": 0.0001779823165225003, - "loss": 0.6494, - "step": 4580 - }, - { - "epoch": 0.2940987812700449, - "grad_norm": 1.2457998074637708, - "learning_rate": 0.0001779121858292936, - "loss": 0.6356, - "step": 4585 - }, - { - "epoch": 0.2944194996792816, - "grad_norm": 0.9452414867290724, - "learning_rate": 0.0001778419574870761, - "loss": 0.7049, - "step": 4590 - }, - { - "epoch": 0.2947402180885183, - "grad_norm": 1.0903318911783695, - "learning_rate": 0.00017777163158386647, - "loss": 0.653, - "step": 4595 - }, - { - "epoch": 0.29506093649775494, - "grad_norm": 1.172298521370259, - "learning_rate": 0.00017770120820780573, - "loss": 0.7285, - "step": 4600 - }, - { - "epoch": 0.29538165490699164, - "grad_norm": 0.6583420678299451, - "learning_rate": 0.00017763068744715697, - "loss": 0.6031, - "step": 4605 - }, - { - "epoch": 0.29570237331622834, - "grad_norm": 0.8591774180151724, - "learning_rate": 0.00017756006939030535, - "loss": 0.7409, - "step": 4610 - }, - { - "epoch": 0.29602309172546504, - "grad_norm": 0.6898541329818539, - "learning_rate": 0.00017748935412575804, - "loss": 0.589, - "step": 4615 - }, - { - "epoch": 0.29634381013470174, - "grad_norm": 0.5395272492697519, - "learning_rate": 0.000177418541742144, - "loss": 0.708, - "step": 4620 - }, - { - "epoch": 0.29666452854393843, - "grad_norm": 1.0169898045901036, - "learning_rate": 0.0001773476323282138, - "loss": 0.6948, - "step": 4625 - }, - { - "epoch": 0.29698524695317513, - "grad_norm": 1.0000948614259928, - "learning_rate": 0.00017727662597283986, - "loss": 0.7215, - "step": 4630 - }, - { - "epoch": 0.29730596536241183, - "grad_norm": 0.9689865733719959, - "learning_rate": 0.00017720552276501592, - "loss": 0.6701, - "step": 4635 - }, - { - "epoch": 0.29762668377164847, - "grad_norm": 0.6557948134140331, - "learning_rate": 0.00017713432279385712, - "loss": 0.6235, - "step": 4640 - }, - { - "epoch": 0.29794740218088517, - "grad_norm": 1.1877573091679572, - "learning_rate": 0.00017706302614859992, - "loss": 0.7863, - "step": 4645 - }, - { - "epoch": 0.29826812059012187, - "grad_norm": 0.8462973100804213, - "learning_rate": 0.00017699163291860198, - "loss": 0.5724, - "step": 4650 - }, - { - "epoch": 0.29858883899935856, - "grad_norm": 0.9236445624740109, - "learning_rate": 0.0001769201431933419, - "loss": 0.5787, - "step": 4655 - }, - { - "epoch": 0.29890955740859526, - "grad_norm": 1.0716376234952218, - "learning_rate": 0.00017684855706241934, - "loss": 0.7401, - "step": 4660 - }, - { - "epoch": 0.29923027581783196, - "grad_norm": 1.1600311786248418, - "learning_rate": 0.00017677687461555467, - "loss": 0.708, - "step": 4665 - }, - { - "epoch": 0.29955099422706866, - "grad_norm": 0.7413385734559219, - "learning_rate": 0.00017670509594258912, - "loss": 0.5718, - "step": 4670 - }, - { - "epoch": 0.2998717126363053, - "grad_norm": 0.9348593211146833, - "learning_rate": 0.00017663322113348434, - "loss": 0.7492, - "step": 4675 - }, - { - "epoch": 0.300192431045542, - "grad_norm": 1.5696315279326167, - "learning_rate": 0.0001765612502783226, - "loss": 0.6552, - "step": 4680 - }, - { - "epoch": 0.3005131494547787, - "grad_norm": 1.0990775256909542, - "learning_rate": 0.00017648918346730653, - "loss": 0.582, - "step": 4685 - }, - { - "epoch": 0.3008338678640154, - "grad_norm": 0.7467674097224691, - "learning_rate": 0.00017641702079075904, - "loss": 0.6326, - "step": 4690 - }, - { - "epoch": 0.3011545862732521, - "grad_norm": 0.7256436706311058, - "learning_rate": 0.00017634476233912308, - "loss": 0.7717, - "step": 4695 - }, - { - "epoch": 0.3014753046824888, - "grad_norm": 0.754840650778496, - "learning_rate": 0.00017627240820296177, - "loss": 0.6896, - "step": 4700 - }, - { - "epoch": 0.3017960230917255, - "grad_norm": 0.7072150395545665, - "learning_rate": 0.0001761999584729581, - "loss": 0.6332, - "step": 4705 - }, - { - "epoch": 0.3021167415009622, - "grad_norm": 1.2009873604762311, - "learning_rate": 0.00017612741323991488, - "loss": 0.6393, - "step": 4710 - }, - { - "epoch": 0.3024374599101988, - "grad_norm": 0.6086745243060716, - "learning_rate": 0.0001760547725947545, - "loss": 0.6681, - "step": 4715 - }, - { - "epoch": 0.3027581783194355, - "grad_norm": 0.9853085984018423, - "learning_rate": 0.0001759820366285192, - "loss": 0.5961, - "step": 4720 - }, - { - "epoch": 0.3030788967286722, - "grad_norm": 1.0109466174974706, - "learning_rate": 0.00017590920543237036, - "loss": 0.7225, - "step": 4725 - }, - { - "epoch": 0.3033996151379089, - "grad_norm": 1.2139597067132748, - "learning_rate": 0.00017583627909758902, - "loss": 0.6542, - "step": 4730 - }, - { - "epoch": 0.3037203335471456, - "grad_norm": 0.9478885183065455, - "learning_rate": 0.00017576325771557518, - "loss": 0.6881, - "step": 4735 - }, - { - "epoch": 0.3040410519563823, - "grad_norm": 0.8539507613861936, - "learning_rate": 0.00017569014137784822, - "loss": 0.6331, - "step": 4740 - }, - { - "epoch": 0.304361770365619, - "grad_norm": 0.9679885840401695, - "learning_rate": 0.00017561693017604637, - "loss": 0.7997, - "step": 4745 - }, - { - "epoch": 0.30468248877485565, - "grad_norm": 0.9422216475894025, - "learning_rate": 0.00017554362420192676, - "loss": 0.6769, - "step": 4750 - }, - { - "epoch": 0.30500320718409235, - "grad_norm": 1.0998446041770769, - "learning_rate": 0.00017547022354736538, - "loss": 0.6072, - "step": 4755 - }, - { - "epoch": 0.30532392559332905, - "grad_norm": 1.0857238442878236, - "learning_rate": 0.00017539672830435682, - "loss": 0.7689, - "step": 4760 - }, - { - "epoch": 0.30564464400256575, - "grad_norm": 0.7440444931879342, - "learning_rate": 0.00017532313856501427, - "loss": 0.5841, - "step": 4765 - }, - { - "epoch": 0.30596536241180244, - "grad_norm": 0.7172978744287396, - "learning_rate": 0.0001752494544215693, - "loss": 0.6583, - "step": 4770 - }, - { - "epoch": 0.30628608082103914, - "grad_norm": 1.2045039512423583, - "learning_rate": 0.00017517567596637184, - "loss": 0.6052, - "step": 4775 - }, - { - "epoch": 0.30660679923027584, - "grad_norm": 0.6334336485782317, - "learning_rate": 0.00017510180329189, - "loss": 0.6194, - "step": 4780 - }, - { - "epoch": 0.3069275176395125, - "grad_norm": 1.3899325242838065, - "learning_rate": 0.00017502783649070994, - "loss": 0.7102, - "step": 4785 - }, - { - "epoch": 0.3072482360487492, - "grad_norm": 1.1877009077958471, - "learning_rate": 0.00017495377565553594, - "loss": 0.683, - "step": 4790 - }, - { - "epoch": 0.3075689544579859, - "grad_norm": 1.1043105680832985, - "learning_rate": 0.00017487962087918993, - "loss": 0.6165, - "step": 4795 - }, - { - "epoch": 0.3078896728672226, - "grad_norm": 0.9571802341999754, - "learning_rate": 0.00017480537225461178, - "loss": 0.499, - "step": 4800 - }, - { - "epoch": 0.3082103912764593, - "grad_norm": 1.0846077393930171, - "learning_rate": 0.00017473102987485876, - "loss": 0.7685, - "step": 4805 - }, - { - "epoch": 0.30853110968569597, - "grad_norm": 0.9095961738585777, - "learning_rate": 0.00017465659383310587, - "loss": 0.6373, - "step": 4810 - }, - { - "epoch": 0.30885182809493267, - "grad_norm": 1.1872255037042634, - "learning_rate": 0.00017458206422264533, - "loss": 0.6564, - "step": 4815 - }, - { - "epoch": 0.30917254650416937, - "grad_norm": 1.0600317447426089, - "learning_rate": 0.00017450744113688672, - "loss": 0.6103, - "step": 4820 - }, - { - "epoch": 0.309493264913406, - "grad_norm": 0.89956531270657, - "learning_rate": 0.00017443272466935675, - "loss": 0.7056, - "step": 4825 - }, - { - "epoch": 0.3098139833226427, - "grad_norm": 0.6138048573378617, - "learning_rate": 0.00017435791491369917, - "loss": 0.6437, - "step": 4830 - }, - { - "epoch": 0.3101347017318794, - "grad_norm": 0.6479672204769544, - "learning_rate": 0.00017428301196367464, - "loss": 0.7149, - "step": 4835 - }, - { - "epoch": 0.3104554201411161, - "grad_norm": 0.9059240016877552, - "learning_rate": 0.00017420801591316062, - "loss": 0.6641, - "step": 4840 - }, - { - "epoch": 0.3107761385503528, - "grad_norm": 0.7000331742442105, - "learning_rate": 0.00017413292685615134, - "loss": 0.6227, - "step": 4845 - }, - { - "epoch": 0.3110968569595895, - "grad_norm": 0.8706735159170973, - "learning_rate": 0.00017405774488675742, - "loss": 0.6191, - "step": 4850 - }, - { - "epoch": 0.3114175753688262, - "grad_norm": 0.9657278531523165, - "learning_rate": 0.0001739824700992061, - "loss": 0.5956, - "step": 4855 - }, - { - "epoch": 0.31173829377806284, - "grad_norm": 0.9553637466697323, - "learning_rate": 0.0001739071025878409, - "loss": 0.7627, - "step": 4860 - }, - { - "epoch": 0.31205901218729953, - "grad_norm": 1.1595347795694808, - "learning_rate": 0.00017383164244712146, - "loss": 0.6432, - "step": 4865 - }, - { - "epoch": 0.31237973059653623, - "grad_norm": 1.3557930665103466, - "learning_rate": 0.0001737560897716236, - "loss": 0.6965, - "step": 4870 - }, - { - "epoch": 0.31270044900577293, - "grad_norm": 0.919377290874929, - "learning_rate": 0.00017368044465603915, - "loss": 0.6913, - "step": 4875 - }, - { - "epoch": 0.3130211674150096, - "grad_norm": 0.9179711638304333, - "learning_rate": 0.00017360470719517577, - "loss": 0.5516, - "step": 4880 - }, - { - "epoch": 0.3133418858242463, - "grad_norm": 0.8074363475177312, - "learning_rate": 0.00017352887748395678, - "loss": 0.6421, - "step": 4885 - }, - { - "epoch": 0.313662604233483, - "grad_norm": 1.3217851235374773, - "learning_rate": 0.00017345295561742123, - "loss": 0.7387, - "step": 4890 - }, - { - "epoch": 0.31398332264271966, - "grad_norm": 0.8100107368582629, - "learning_rate": 0.0001733769416907236, - "loss": 0.6104, - "step": 4895 - }, - { - "epoch": 0.31430404105195636, - "grad_norm": 1.0974582938152775, - "learning_rate": 0.0001733008357991338, - "loss": 0.649, - "step": 4900 - }, - { - "epoch": 0.31462475946119306, - "grad_norm": 1.233711986487123, - "learning_rate": 0.00017322463803803688, - "loss": 0.5448, - "step": 4905 - }, - { - "epoch": 0.31494547787042976, - "grad_norm": 0.8777266459889339, - "learning_rate": 0.00017314834850293325, - "loss": 0.7512, - "step": 4910 - }, - { - "epoch": 0.31526619627966646, - "grad_norm": 0.8794148401176598, - "learning_rate": 0.00017307196728943812, - "loss": 0.6314, - "step": 4915 - }, - { - "epoch": 0.31558691468890315, - "grad_norm": 0.7021113325319495, - "learning_rate": 0.00017299549449328175, - "loss": 0.5404, - "step": 4920 - }, - { - "epoch": 0.31590763309813985, - "grad_norm": 0.76819009517203, - "learning_rate": 0.00017291893021030913, - "loss": 0.7646, - "step": 4925 - }, - { - "epoch": 0.31622835150737655, - "grad_norm": 1.3281150753972946, - "learning_rate": 0.00017284227453647993, - "loss": 0.6404, - "step": 4930 - }, - { - "epoch": 0.3165490699166132, - "grad_norm": 0.8777792257027988, - "learning_rate": 0.00017276552756786831, - "loss": 0.7211, - "step": 4935 - }, - { - "epoch": 0.3168697883258499, - "grad_norm": 0.9522765071117524, - "learning_rate": 0.00017268868940066288, - "loss": 0.7659, - "step": 4940 - }, - { - "epoch": 0.3171905067350866, - "grad_norm": 0.7347381221386469, - "learning_rate": 0.0001726117601311666, - "loss": 0.7521, - "step": 4945 - }, - { - "epoch": 0.3175112251443233, - "grad_norm": 0.947686463596072, - "learning_rate": 0.00017253473985579657, - "loss": 0.6981, - "step": 4950 - }, - { - "epoch": 0.31783194355356, - "grad_norm": 0.9948270615790568, - "learning_rate": 0.0001724576286710839, - "loss": 0.5347, - "step": 4955 - }, - { - "epoch": 0.3181526619627967, - "grad_norm": 0.7412951434019396, - "learning_rate": 0.00017238042667367377, - "loss": 0.6563, - "step": 4960 - }, - { - "epoch": 0.3184733803720334, - "grad_norm": 0.9060455966464537, - "learning_rate": 0.00017230313396032504, - "loss": 0.8452, - "step": 4965 - }, - { - "epoch": 0.31879409878127, - "grad_norm": 0.7926379737171755, - "learning_rate": 0.00017222575062791033, - "loss": 0.6834, - "step": 4970 - }, - { - "epoch": 0.3191148171905067, - "grad_norm": 1.1978749811848812, - "learning_rate": 0.00017214827677341582, - "loss": 0.5959, - "step": 4975 - }, - { - "epoch": 0.3194355355997434, - "grad_norm": 1.1382243993856835, - "learning_rate": 0.00017207071249394118, - "loss": 0.8144, - "step": 4980 - }, - { - "epoch": 0.3197562540089801, - "grad_norm": 0.9207041310652729, - "learning_rate": 0.00017199305788669937, - "loss": 0.7515, - "step": 4985 - }, - { - "epoch": 0.3200769724182168, - "grad_norm": 0.7762438521118743, - "learning_rate": 0.00017191531304901653, - "loss": 0.7128, - "step": 4990 - }, - { - "epoch": 0.3203976908274535, - "grad_norm": 1.0657161158728048, - "learning_rate": 0.000171837478078332, - "loss": 0.7206, - "step": 4995 - }, - { - "epoch": 0.3207184092366902, - "grad_norm": 0.8853471042976426, - "learning_rate": 0.00017175955307219796, - "loss": 0.6661, - "step": 5000 - }, - { - "epoch": 0.3210391276459269, - "grad_norm": 0.730931049927295, - "learning_rate": 0.00017168153812827957, - "loss": 0.7177, - "step": 5005 - }, - { - "epoch": 0.32135984605516354, - "grad_norm": 1.24238938271146, - "learning_rate": 0.0001716034333443545, - "loss": 0.7264, - "step": 5010 - }, - { - "epoch": 0.32168056446440024, - "grad_norm": 1.0598509644567646, - "learning_rate": 0.00017152523881831325, - "loss": 0.5868, - "step": 5015 - }, - { - "epoch": 0.32200128287363694, - "grad_norm": 1.142674205123222, - "learning_rate": 0.00017144695464815866, - "loss": 0.7652, - "step": 5020 - }, - { - "epoch": 0.32232200128287364, - "grad_norm": 1.2248444413302872, - "learning_rate": 0.00017136858093200593, - "loss": 0.6078, - "step": 5025 - }, - { - "epoch": 0.32264271969211034, - "grad_norm": 0.9090404485944782, - "learning_rate": 0.00017129011776808258, - "loss": 0.6921, - "step": 5030 - }, - { - "epoch": 0.32296343810134703, - "grad_norm": 1.0978730524660503, - "learning_rate": 0.00017121156525472814, - "loss": 0.7593, - "step": 5035 - }, - { - "epoch": 0.32328415651058373, - "grad_norm": 1.8023280272488704, - "learning_rate": 0.00017113292349039413, - "loss": 0.7583, - "step": 5040 - }, - { - "epoch": 0.3236048749198204, - "grad_norm": 1.0487723489551213, - "learning_rate": 0.000171054192573644, - "loss": 0.7754, - "step": 5045 - }, - { - "epoch": 0.32392559332905707, - "grad_norm": 0.7931120571928945, - "learning_rate": 0.0001709753726031529, - "loss": 0.7182, - "step": 5050 - }, - { - "epoch": 0.32424631173829377, - "grad_norm": 1.3448284362405596, - "learning_rate": 0.00017089646367770756, - "loss": 0.6391, - "step": 5055 - }, - { - "epoch": 0.32456703014753047, - "grad_norm": 0.9771883061194023, - "learning_rate": 0.0001708174658962062, - "loss": 0.632, - "step": 5060 - }, - { - "epoch": 0.32488774855676716, - "grad_norm": 0.944625885099161, - "learning_rate": 0.00017073837935765846, - "loss": 0.6235, - "step": 5065 - }, - { - "epoch": 0.32520846696600386, - "grad_norm": 0.9899695819556337, - "learning_rate": 0.00017065920416118522, - "loss": 0.7345, - "step": 5070 - }, - { - "epoch": 0.32552918537524056, - "grad_norm": 0.5815153267452241, - "learning_rate": 0.00017057994040601838, - "loss": 0.5988, - "step": 5075 - }, - { - "epoch": 0.3258499037844772, - "grad_norm": 0.7182304509869034, - "learning_rate": 0.00017050058819150098, - "loss": 0.5962, - "step": 5080 - }, - { - "epoch": 0.3261706221937139, - "grad_norm": 0.7916342652857238, - "learning_rate": 0.0001704211476170868, - "loss": 0.5903, - "step": 5085 - }, - { - "epoch": 0.3264913406029506, - "grad_norm": 1.186592480709318, - "learning_rate": 0.00017034161878234043, - "loss": 0.7071, - "step": 5090 - }, - { - "epoch": 0.3268120590121873, - "grad_norm": 1.4501384859209354, - "learning_rate": 0.00017026200178693704, - "loss": 0.5699, - "step": 5095 - }, - { - "epoch": 0.327132777421424, - "grad_norm": 0.4770414244602479, - "learning_rate": 0.0001701822967306624, - "loss": 0.6942, - "step": 5100 - }, - { - "epoch": 0.3274534958306607, - "grad_norm": 1.2188679878291713, - "learning_rate": 0.00017010250371341244, - "loss": 0.6633, - "step": 5105 - }, - { - "epoch": 0.3277742142398974, - "grad_norm": 1.0813857287425748, - "learning_rate": 0.0001700226228351935, - "loss": 0.6257, - "step": 5110 - }, - { - "epoch": 0.3280949326491341, - "grad_norm": 0.8540165463861037, - "learning_rate": 0.00016994265419612205, - "loss": 0.5918, - "step": 5115 - }, - { - "epoch": 0.32841565105837073, - "grad_norm": 1.1642007608342173, - "learning_rate": 0.00016986259789642444, - "loss": 0.6911, - "step": 5120 - }, - { - "epoch": 0.3287363694676074, - "grad_norm": 0.8539433327300491, - "learning_rate": 0.00016978245403643694, - "loss": 0.7732, - "step": 5125 - }, - { - "epoch": 0.3290570878768441, - "grad_norm": 1.0202618411725253, - "learning_rate": 0.0001697022227166056, - "loss": 0.7798, - "step": 5130 - }, - { - "epoch": 0.3293778062860808, - "grad_norm": 0.8876324268732894, - "learning_rate": 0.00016962190403748605, - "loss": 0.714, - "step": 5135 - }, - { - "epoch": 0.3296985246953175, - "grad_norm": 0.7783501191713772, - "learning_rate": 0.0001695414980997434, - "loss": 0.7987, - "step": 5140 - }, - { - "epoch": 0.3300192431045542, - "grad_norm": 1.204240570280653, - "learning_rate": 0.00016946100500415213, - "loss": 0.6914, - "step": 5145 - }, - { - "epoch": 0.3303399615137909, - "grad_norm": 0.7152048301163425, - "learning_rate": 0.00016938042485159594, - "loss": 0.6703, - "step": 5150 - }, - { - "epoch": 0.33066067992302756, - "grad_norm": 1.191922058294469, - "learning_rate": 0.0001692997577430677, - "loss": 0.6539, - "step": 5155 - }, - { - "epoch": 0.33098139833226425, - "grad_norm": 0.8187793173057333, - "learning_rate": 0.00016921900377966923, - "loss": 0.7468, - "step": 5160 - }, - { - "epoch": 0.33130211674150095, - "grad_norm": 0.9381392106872509, - "learning_rate": 0.00016913816306261112, - "loss": 0.766, - "step": 5165 - }, - { - "epoch": 0.33162283515073765, - "grad_norm": 0.7128118797176758, - "learning_rate": 0.00016905723569321288, - "loss": 0.6719, - "step": 5170 - }, - { - "epoch": 0.33194355355997435, - "grad_norm": 1.500297575057347, - "learning_rate": 0.00016897622177290244, - "loss": 0.7072, - "step": 5175 - }, - { - "epoch": 0.33226427196921104, - "grad_norm": 0.9800774031498481, - "learning_rate": 0.0001688951214032163, - "loss": 0.6549, - "step": 5180 - }, - { - "epoch": 0.33258499037844774, - "grad_norm": 0.8808790723791357, - "learning_rate": 0.00016881393468579932, - "loss": 0.6955, - "step": 5185 - }, - { - "epoch": 0.33290570878768444, - "grad_norm": 0.8920914860291771, - "learning_rate": 0.00016873266172240452, - "loss": 0.5649, - "step": 5190 - }, - { - "epoch": 0.3332264271969211, - "grad_norm": 0.6851960157071083, - "learning_rate": 0.00016865130261489305, - "loss": 0.6897, - "step": 5195 - }, - { - "epoch": 0.3335471456061578, - "grad_norm": 0.8407283098592762, - "learning_rate": 0.00016856985746523405, - "loss": 0.6559, - "step": 5200 - }, - { - "epoch": 0.3338678640153945, - "grad_norm": 0.9215186470532375, - "learning_rate": 0.00016848832637550437, - "loss": 0.7664, - "step": 5205 - }, - { - "epoch": 0.3341885824246312, - "grad_norm": 0.7299164606010856, - "learning_rate": 0.00016840670944788882, - "loss": 0.5981, - "step": 5210 - }, - { - "epoch": 0.3345093008338679, - "grad_norm": 0.8732424966610127, - "learning_rate": 0.00016832500678467952, - "loss": 0.7035, - "step": 5215 - }, - { - "epoch": 0.33483001924310457, - "grad_norm": 0.9750167638289885, - "learning_rate": 0.00016824321848827624, - "loss": 0.5995, - "step": 5220 - }, - { - "epoch": 0.33515073765234127, - "grad_norm": 1.0976388995980935, - "learning_rate": 0.00016816134466118596, - "loss": 0.7107, - "step": 5225 - }, - { - "epoch": 0.3354714560615779, - "grad_norm": 1.0135781126967063, - "learning_rate": 0.00016807938540602292, - "loss": 0.7174, - "step": 5230 - }, - { - "epoch": 0.3357921744708146, - "grad_norm": 0.8189118457664761, - "learning_rate": 0.00016799734082550844, - "loss": 0.6645, - "step": 5235 - }, - { - "epoch": 0.3361128928800513, - "grad_norm": 0.6996919391876488, - "learning_rate": 0.0001679152110224707, - "loss": 0.6629, - "step": 5240 - }, - { - "epoch": 0.336433611289288, - "grad_norm": 0.7381428623848976, - "learning_rate": 0.00016783299609984478, - "loss": 0.6016, - "step": 5245 - }, - { - "epoch": 0.3367543296985247, - "grad_norm": 0.9095764087290898, - "learning_rate": 0.00016775069616067233, - "loss": 0.8577, - "step": 5250 - }, - { - "epoch": 0.3370750481077614, - "grad_norm": 0.7032412366347235, - "learning_rate": 0.00016766831130810171, - "loss": 0.7342, - "step": 5255 - }, - { - "epoch": 0.3373957665169981, - "grad_norm": 0.9697869860649856, - "learning_rate": 0.00016758584164538757, - "loss": 0.6338, - "step": 5260 - }, - { - "epoch": 0.33771648492623474, - "grad_norm": 0.7784503288752077, - "learning_rate": 0.00016750328727589095, - "loss": 0.6666, - "step": 5265 - }, - { - "epoch": 0.33803720333547144, - "grad_norm": 0.5156266401874552, - "learning_rate": 0.00016742064830307897, - "loss": 0.7699, - "step": 5270 - }, - { - "epoch": 0.33835792174470813, - "grad_norm": 1.0003590365934907, - "learning_rate": 0.0001673379248305248, - "loss": 0.6751, - "step": 5275 - }, - { - "epoch": 0.33867864015394483, - "grad_norm": 0.8026066074245787, - "learning_rate": 0.0001672551169619076, - "loss": 0.7573, - "step": 5280 - }, - { - "epoch": 0.33899935856318153, - "grad_norm": 1.0369937352211243, - "learning_rate": 0.00016717222480101221, - "loss": 0.667, - "step": 5285 - }, - { - "epoch": 0.3393200769724182, - "grad_norm": 0.9644006720446381, - "learning_rate": 0.0001670892484517292, - "loss": 0.6383, - "step": 5290 - }, - { - "epoch": 0.3396407953816549, - "grad_norm": 1.0076204289252497, - "learning_rate": 0.00016700618801805453, - "loss": 0.7178, - "step": 5295 - }, - { - "epoch": 0.3399615137908916, - "grad_norm": 0.5579579624666732, - "learning_rate": 0.00016692304360408966, - "loss": 0.6665, - "step": 5300 - }, - { - "epoch": 0.34028223220012827, - "grad_norm": 0.8064350566112853, - "learning_rate": 0.00016683981531404125, - "loss": 0.5122, - "step": 5305 - }, - { - "epoch": 0.34060295060936496, - "grad_norm": 0.9816255727453933, - "learning_rate": 0.0001667565032522211, - "loss": 0.6926, - "step": 5310 - }, - { - "epoch": 0.34092366901860166, - "grad_norm": 0.817929460216783, - "learning_rate": 0.00016667310752304602, - "loss": 0.5491, - "step": 5315 - }, - { - "epoch": 0.34124438742783836, - "grad_norm": 0.9215347160545883, - "learning_rate": 0.00016658962823103764, - "loss": 0.6835, - "step": 5320 - }, - { - "epoch": 0.34156510583707506, - "grad_norm": 1.1290419292904414, - "learning_rate": 0.00016650606548082236, - "loss": 0.735, - "step": 5325 - }, - { - "epoch": 0.34188582424631175, - "grad_norm": 1.1930691902617288, - "learning_rate": 0.0001664224193771312, - "loss": 0.5138, - "step": 5330 - }, - { - "epoch": 0.34220654265554845, - "grad_norm": 0.8088938421114102, - "learning_rate": 0.0001663386900247995, - "loss": 0.6654, - "step": 5335 - }, - { - "epoch": 0.3425272610647851, - "grad_norm": 0.5514542526950761, - "learning_rate": 0.0001662548775287672, - "loss": 0.6456, - "step": 5340 - }, - { - "epoch": 0.3428479794740218, - "grad_norm": 0.8205842308107273, - "learning_rate": 0.00016617098199407814, - "loss": 0.7144, - "step": 5345 - }, - { - "epoch": 0.3431686978832585, - "grad_norm": 0.9295493105678805, - "learning_rate": 0.00016608700352588053, - "loss": 0.6876, - "step": 5350 - }, - { - "epoch": 0.3434894162924952, - "grad_norm": 0.7296614219020304, - "learning_rate": 0.00016600294222942626, - "loss": 0.6785, - "step": 5355 - }, - { - "epoch": 0.3438101347017319, - "grad_norm": 0.6002339895362847, - "learning_rate": 0.00016591879821007126, - "loss": 0.5796, - "step": 5360 - }, - { - "epoch": 0.3441308531109686, - "grad_norm": 1.6160052086574104, - "learning_rate": 0.00016583457157327497, - "loss": 0.7118, - "step": 5365 - }, - { - "epoch": 0.3444515715202053, - "grad_norm": 1.2282552121625845, - "learning_rate": 0.00016575026242460046, - "loss": 0.6564, - "step": 5370 - }, - { - "epoch": 0.344772289929442, - "grad_norm": 0.9643175110463178, - "learning_rate": 0.00016566587086971416, - "loss": 0.669, - "step": 5375 - }, - { - "epoch": 0.3450930083386786, - "grad_norm": 0.9607772443483632, - "learning_rate": 0.00016558139701438584, - "loss": 0.6276, - "step": 5380 - }, - { - "epoch": 0.3454137267479153, - "grad_norm": 0.9147875672042459, - "learning_rate": 0.0001654968409644884, - "loss": 0.5905, - "step": 5385 - }, - { - "epoch": 0.345734445157152, - "grad_norm": 0.7334238812099275, - "learning_rate": 0.00016541220282599773, - "loss": 0.6261, - "step": 5390 - }, - { - "epoch": 0.3460551635663887, - "grad_norm": 1.1742953273617749, - "learning_rate": 0.00016532748270499262, - "loss": 0.7, - "step": 5395 - }, - { - "epoch": 0.3463758819756254, - "grad_norm": 1.1387016781633938, - "learning_rate": 0.00016524268070765465, - "loss": 0.7061, - "step": 5400 - }, - { - "epoch": 0.3466966003848621, - "grad_norm": 0.9794060869341327, - "learning_rate": 0.0001651577969402679, - "loss": 0.7031, - "step": 5405 - }, - { - "epoch": 0.3470173187940988, - "grad_norm": 0.9732807122694793, - "learning_rate": 0.0001650728315092191, - "loss": 0.6588, - "step": 5410 - }, - { - "epoch": 0.34733803720333545, - "grad_norm": 1.2045887990242425, - "learning_rate": 0.0001649877845209972, - "loss": 0.5635, - "step": 5415 - }, - { - "epoch": 0.34765875561257215, - "grad_norm": 0.9098967972234847, - "learning_rate": 0.0001649026560821934, - "loss": 0.6877, - "step": 5420 - }, - { - "epoch": 0.34797947402180884, - "grad_norm": 0.8919518792507914, - "learning_rate": 0.000164817446299501, - "loss": 0.852, - "step": 5425 - }, - { - "epoch": 0.34830019243104554, - "grad_norm": 1.082286394388753, - "learning_rate": 0.00016473215527971528, - "loss": 0.6497, - "step": 5430 - }, - { - "epoch": 0.34862091084028224, - "grad_norm": 0.7681820908697059, - "learning_rate": 0.00016464678312973327, - "loss": 0.7075, - "step": 5435 - }, - { - "epoch": 0.34894162924951894, - "grad_norm": 0.8577629521944062, - "learning_rate": 0.00016456132995655372, - "loss": 0.6942, - "step": 5440 - }, - { - "epoch": 0.34926234765875563, - "grad_norm": 0.7981749008936162, - "learning_rate": 0.00016447579586727692, - "loss": 0.6658, - "step": 5445 - }, - { - "epoch": 0.3495830660679923, - "grad_norm": 0.6566080494812765, - "learning_rate": 0.0001643901809691046, - "loss": 0.6325, - "step": 5450 - }, - { - "epoch": 0.349903784477229, - "grad_norm": 0.7729498372329889, - "learning_rate": 0.00016430448536933965, - "loss": 0.5609, - "step": 5455 - }, - { - "epoch": 0.35022450288646567, - "grad_norm": 1.0464507162443157, - "learning_rate": 0.00016421870917538635, - "loss": 0.6353, - "step": 5460 - }, - { - "epoch": 0.35054522129570237, - "grad_norm": 1.3013839685098925, - "learning_rate": 0.00016413285249474975, - "loss": 0.5724, - "step": 5465 - }, - { - "epoch": 0.35086593970493907, - "grad_norm": 0.813558813259816, - "learning_rate": 0.00016404691543503588, - "loss": 0.7074, - "step": 5470 - }, - { - "epoch": 0.35118665811417576, - "grad_norm": 1.001748370098994, - "learning_rate": 0.0001639608981039515, - "loss": 0.7945, - "step": 5475 - }, - { - "epoch": 0.35150737652341246, - "grad_norm": 0.870149957049954, - "learning_rate": 0.00016387480060930395, - "loss": 0.689, - "step": 5480 - }, - { - "epoch": 0.35182809493264916, - "grad_norm": 0.8680578535676656, - "learning_rate": 0.00016378862305900112, - "loss": 0.6239, - "step": 5485 - }, - { - "epoch": 0.3521488133418858, - "grad_norm": 0.8274627515878666, - "learning_rate": 0.0001637023655610511, - "loss": 0.6437, - "step": 5490 - }, - { - "epoch": 0.3524695317511225, - "grad_norm": 0.8836905220838523, - "learning_rate": 0.00016361602822356232, - "loss": 0.581, - "step": 5495 - }, - { - "epoch": 0.3527902501603592, - "grad_norm": 0.645087928333498, - "learning_rate": 0.0001635296111547432, - "loss": 0.65, - "step": 5500 - }, - { - "epoch": 0.3531109685695959, - "grad_norm": 0.9138176884852274, - "learning_rate": 0.00016344311446290212, - "loss": 0.6039, - "step": 5505 - }, - { - "epoch": 0.3534316869788326, - "grad_norm": 0.8932196439321753, - "learning_rate": 0.00016335653825644717, - "loss": 0.6447, - "step": 5510 - }, - { - "epoch": 0.3537524053880693, - "grad_norm": 0.700814257534255, - "learning_rate": 0.00016326988264388624, - "loss": 0.634, - "step": 5515 - }, - { - "epoch": 0.354073123797306, - "grad_norm": 0.8079984489578869, - "learning_rate": 0.0001631831477338266, - "loss": 0.5378, - "step": 5520 - }, - { - "epoch": 0.35439384220654263, - "grad_norm": 1.0368102707808613, - "learning_rate": 0.00016309633363497503, - "loss": 0.6121, - "step": 5525 - }, - { - "epoch": 0.35471456061577933, - "grad_norm": 1.0720279870828384, - "learning_rate": 0.00016300944045613745, - "loss": 0.615, - "step": 5530 - }, - { - "epoch": 0.355035279025016, - "grad_norm": 0.6936759908598535, - "learning_rate": 0.00016292246830621897, - "loss": 0.7186, - "step": 5535 - }, - { - "epoch": 0.3553559974342527, - "grad_norm": 0.8578757956070833, - "learning_rate": 0.00016283541729422368, - "loss": 0.6859, - "step": 5540 - }, - { - "epoch": 0.3556767158434894, - "grad_norm": 0.6299846194893505, - "learning_rate": 0.0001627482875292544, - "loss": 0.7011, - "step": 5545 - }, - { - "epoch": 0.3559974342527261, - "grad_norm": 2.8465820906119697, - "learning_rate": 0.00016266107912051275, - "loss": 0.6824, - "step": 5550 - }, - { - "epoch": 0.3563181526619628, - "grad_norm": 0.8212652492805361, - "learning_rate": 0.00016257379217729897, - "loss": 0.7353, - "step": 5555 - }, - { - "epoch": 0.35663887107119946, - "grad_norm": 0.8592127708286107, - "learning_rate": 0.00016248642680901157, - "loss": 0.7493, - "step": 5560 - }, - { - "epoch": 0.35695958948043616, - "grad_norm": 1.5401896960046906, - "learning_rate": 0.00016239898312514747, - "loss": 0.6233, - "step": 5565 - }, - { - "epoch": 0.35728030788967285, - "grad_norm": 0.9880669672357292, - "learning_rate": 0.00016231146123530169, - "loss": 0.7483, - "step": 5570 - }, - { - "epoch": 0.35760102629890955, - "grad_norm": 1.0054106975653296, - "learning_rate": 0.00016222386124916733, - "loss": 0.7477, - "step": 5575 - }, - { - "epoch": 0.35792174470814625, - "grad_norm": 0.8851121102484797, - "learning_rate": 0.0001621361832765353, - "loss": 0.7338, - "step": 5580 - }, - { - "epoch": 0.35824246311738295, - "grad_norm": 0.7868381457390292, - "learning_rate": 0.0001620484274272943, - "loss": 0.8315, - "step": 5585 - }, - { - "epoch": 0.35856318152661965, - "grad_norm": 2.2302567668996907, - "learning_rate": 0.00016196059381143056, - "loss": 0.6057, - "step": 5590 - }, - { - "epoch": 0.35888389993585634, - "grad_norm": 0.8632558537630518, - "learning_rate": 0.0001618726825390279, - "loss": 0.6017, - "step": 5595 - }, - { - "epoch": 0.359204618345093, - "grad_norm": 0.9301897471057365, - "learning_rate": 0.0001617846937202674, - "loss": 0.7127, - "step": 5600 - }, - { - "epoch": 0.3595253367543297, - "grad_norm": 1.0314386924705863, - "learning_rate": 0.00016169662746542724, - "loss": 0.6471, - "step": 5605 - }, - { - "epoch": 0.3598460551635664, - "grad_norm": 0.7527220509268685, - "learning_rate": 0.00016160848388488283, - "loss": 0.5149, - "step": 5610 - }, - { - "epoch": 0.3601667735728031, - "grad_norm": 0.9964259981347259, - "learning_rate": 0.0001615202630891064, - "loss": 0.7551, - "step": 5615 - }, - { - "epoch": 0.3604874919820398, - "grad_norm": 0.9534877288363439, - "learning_rate": 0.0001614319651886669, - "loss": 0.7869, - "step": 5620 - }, - { - "epoch": 0.3608082103912765, - "grad_norm": 0.6624325233415048, - "learning_rate": 0.00016134359029423004, - "loss": 0.6187, - "step": 5625 - }, - { - "epoch": 0.36112892880051317, - "grad_norm": 1.1438885759745019, - "learning_rate": 0.000161255138516558, - "loss": 0.6818, - "step": 5630 - }, - { - "epoch": 0.3614496472097498, - "grad_norm": 1.0060076302436596, - "learning_rate": 0.00016116660996650918, - "loss": 0.7134, - "step": 5635 - }, - { - "epoch": 0.3617703656189865, - "grad_norm": 0.824054815580278, - "learning_rate": 0.0001610780047550384, - "loss": 0.6322, - "step": 5640 - }, - { - "epoch": 0.3620910840282232, - "grad_norm": 1.1593592610393137, - "learning_rate": 0.00016098932299319642, - "loss": 0.6549, - "step": 5645 - }, - { - "epoch": 0.3624118024374599, - "grad_norm": 1.3453462014445998, - "learning_rate": 0.00016090056479213, - "loss": 0.6626, - "step": 5650 - }, - { - "epoch": 0.3627325208466966, - "grad_norm": 0.6303823430985745, - "learning_rate": 0.00016081173026308168, - "loss": 0.6129, - "step": 5655 - }, - { - "epoch": 0.3630532392559333, - "grad_norm": 0.9682139214042652, - "learning_rate": 0.00016072281951738974, - "loss": 0.5327, - "step": 5660 - }, - { - "epoch": 0.36337395766517, - "grad_norm": 0.6265113009833752, - "learning_rate": 0.00016063383266648788, - "loss": 0.7972, - "step": 5665 - }, - { - "epoch": 0.3636946760744067, - "grad_norm": 1.0602611989591288, - "learning_rate": 0.0001605447698219052, - "loss": 0.7568, - "step": 5670 - }, - { - "epoch": 0.36401539448364334, - "grad_norm": 0.8085898565934937, - "learning_rate": 0.0001604556310952661, - "loss": 0.7088, - "step": 5675 - }, - { - "epoch": 0.36433611289288004, - "grad_norm": 0.9259612439090465, - "learning_rate": 0.00016036641659829005, - "loss": 0.6433, - "step": 5680 - }, - { - "epoch": 0.36465683130211674, - "grad_norm": 1.0560925548902709, - "learning_rate": 0.00016027712644279147, - "loss": 0.6389, - "step": 5685 - }, - { - "epoch": 0.36497754971135343, - "grad_norm": 0.9202003497456687, - "learning_rate": 0.00016018776074067965, - "loss": 0.6588, - "step": 5690 - }, - { - "epoch": 0.36529826812059013, - "grad_norm": 0.7606894269431724, - "learning_rate": 0.00016009831960395854, - "loss": 0.6249, - "step": 5695 - }, - { - "epoch": 0.36561898652982683, - "grad_norm": 1.0194051743569745, - "learning_rate": 0.00016000880314472662, - "loss": 0.7063, - "step": 5700 - }, - { - "epoch": 0.3659397049390635, - "grad_norm": 0.8971345599358044, - "learning_rate": 0.0001599192114751768, - "loss": 0.7758, - "step": 5705 - }, - { - "epoch": 0.36626042334830017, - "grad_norm": 0.8114509690004853, - "learning_rate": 0.0001598295447075962, - "loss": 0.687, - "step": 5710 - }, - { - "epoch": 0.36658114175753687, - "grad_norm": 1.1086821486366683, - "learning_rate": 0.00015973980295436613, - "loss": 0.7663, - "step": 5715 - }, - { - "epoch": 0.36690186016677356, - "grad_norm": 0.8305079494288046, - "learning_rate": 0.00015964998632796187, - "loss": 0.7841, - "step": 5720 - }, - { - "epoch": 0.36722257857601026, - "grad_norm": 0.9332565471912556, - "learning_rate": 0.00015956009494095245, - "loss": 0.7629, - "step": 5725 - }, - { - "epoch": 0.36754329698524696, - "grad_norm": 1.2026329331281138, - "learning_rate": 0.00015947012890600072, - "loss": 0.6034, - "step": 5730 - }, - { - "epoch": 0.36786401539448366, - "grad_norm": 0.8890367793012931, - "learning_rate": 0.00015938008833586307, - "loss": 0.673, - "step": 5735 - }, - { - "epoch": 0.36818473380372035, - "grad_norm": 1.1168519576569294, - "learning_rate": 0.00015928997334338924, - "loss": 0.7265, - "step": 5740 - }, - { - "epoch": 0.368505452212957, - "grad_norm": 0.7323689106049717, - "learning_rate": 0.00015919978404152225, - "loss": 0.5286, - "step": 5745 - }, - { - "epoch": 0.3688261706221937, - "grad_norm": 0.7491408637491445, - "learning_rate": 0.00015910952054329832, - "loss": 0.6603, - "step": 5750 - }, - { - "epoch": 0.3691468890314304, - "grad_norm": 0.5720787370255552, - "learning_rate": 0.00015901918296184664, - "loss": 0.7637, - "step": 5755 - }, - { - "epoch": 0.3694676074406671, - "grad_norm": 1.247050118094861, - "learning_rate": 0.00015892877141038917, - "loss": 0.6643, - "step": 5760 - }, - { - "epoch": 0.3697883258499038, - "grad_norm": 0.8428619170851901, - "learning_rate": 0.00015883828600224073, - "loss": 0.603, - "step": 5765 - }, - { - "epoch": 0.3701090442591405, - "grad_norm": 0.6414166600611392, - "learning_rate": 0.00015874772685080853, - "loss": 0.6775, - "step": 5770 - }, - { - "epoch": 0.3704297626683772, - "grad_norm": 1.39629472630112, - "learning_rate": 0.0001586570940695924, - "loss": 0.7512, - "step": 5775 - }, - { - "epoch": 0.3707504810776139, - "grad_norm": 1.0547557813661854, - "learning_rate": 0.00015856638777218422, - "loss": 0.7574, - "step": 5780 - }, - { - "epoch": 0.3710711994868505, - "grad_norm": 0.8689805862522758, - "learning_rate": 0.00015847560807226823, - "loss": 0.6427, - "step": 5785 - }, - { - "epoch": 0.3713919178960872, - "grad_norm": 1.068120678282078, - "learning_rate": 0.00015838475508362051, - "loss": 0.7343, - "step": 5790 - }, - { - "epoch": 0.3717126363053239, - "grad_norm": 0.8164191154263224, - "learning_rate": 0.00015829382892010912, - "loss": 0.7685, - "step": 5795 - }, - { - "epoch": 0.3720333547145606, - "grad_norm": 0.9769245060606544, - "learning_rate": 0.00015820282969569374, - "loss": 0.6804, - "step": 5800 - }, - { - "epoch": 0.3723540731237973, - "grad_norm": 0.676619842133273, - "learning_rate": 0.00015811175752442562, - "loss": 0.7244, - "step": 5805 - }, - { - "epoch": 0.372674791533034, - "grad_norm": 3.577185251797483, - "learning_rate": 0.00015802061252044748, - "loss": 0.7426, - "step": 5810 - }, - { - "epoch": 0.3729955099422707, - "grad_norm": 0.5176738358349613, - "learning_rate": 0.00015792939479799333, - "loss": 0.6545, - "step": 5815 - }, - { - "epoch": 0.37331622835150735, - "grad_norm": 0.9510093482774353, - "learning_rate": 0.00015783810447138826, - "loss": 0.6358, - "step": 5820 - }, - { - "epoch": 0.37363694676074405, - "grad_norm": 0.8940071414235186, - "learning_rate": 0.0001577467416550484, - "loss": 0.7573, - "step": 5825 - }, - { - "epoch": 0.37395766516998075, - "grad_norm": 0.8502887517010003, - "learning_rate": 0.0001576553064634807, - "loss": 0.6371, - "step": 5830 - }, - { - "epoch": 0.37427838357921744, - "grad_norm": 0.7260357322627535, - "learning_rate": 0.00015756379901128294, - "loss": 0.6106, - "step": 5835 - }, - { - "epoch": 0.37459910198845414, - "grad_norm": 0.5018237254264993, - "learning_rate": 0.00015747221941314325, - "loss": 0.6329, - "step": 5840 - }, - { - "epoch": 0.37491982039769084, - "grad_norm": 0.9130075924966622, - "learning_rate": 0.00015738056778384038, - "loss": 0.6868, - "step": 5845 - }, - { - "epoch": 0.37524053880692754, - "grad_norm": 0.803836499340597, - "learning_rate": 0.00015728884423824323, - "loss": 0.5845, - "step": 5850 - }, - { - "epoch": 0.37556125721616423, - "grad_norm": 0.7604942646833414, - "learning_rate": 0.0001571970488913109, - "loss": 0.6911, - "step": 5855 - }, - { - "epoch": 0.3758819756254009, - "grad_norm": 0.6458258737911328, - "learning_rate": 0.00015710518185809246, - "loss": 0.5681, - "step": 5860 - }, - { - "epoch": 0.3762026940346376, - "grad_norm": 1.4247194077938075, - "learning_rate": 0.00015701324325372688, - "loss": 0.7889, - "step": 5865 - }, - { - "epoch": 0.3765234124438743, - "grad_norm": 0.9972586435085499, - "learning_rate": 0.00015692123319344272, - "loss": 0.5962, - "step": 5870 - }, - { - "epoch": 0.37684413085311097, - "grad_norm": 0.8022131053222762, - "learning_rate": 0.0001568291517925582, - "loss": 0.7065, - "step": 5875 - }, - { - "epoch": 0.37716484926234767, - "grad_norm": 1.0767684802416355, - "learning_rate": 0.00015673699916648085, - "loss": 0.5781, - "step": 5880 - }, - { - "epoch": 0.37748556767158437, - "grad_norm": 0.9496229847137114, - "learning_rate": 0.00015664477543070757, - "loss": 0.7056, - "step": 5885 - }, - { - "epoch": 0.37780628608082106, - "grad_norm": 0.9633177655644503, - "learning_rate": 0.00015655248070082438, - "loss": 0.6939, - "step": 5890 - }, - { - "epoch": 0.3781270044900577, - "grad_norm": 1.115479048562113, - "learning_rate": 0.00015646011509250617, - "loss": 0.7378, - "step": 5895 - }, - { - "epoch": 0.3784477228992944, - "grad_norm": 0.7941062299537334, - "learning_rate": 0.0001563676787215168, - "loss": 0.5145, - "step": 5900 - }, - { - "epoch": 0.3787684413085311, - "grad_norm": 0.7169731124858206, - "learning_rate": 0.0001562751717037087, - "loss": 0.5164, - "step": 5905 - }, - { - "epoch": 0.3790891597177678, - "grad_norm": 0.844339179385339, - "learning_rate": 0.00015618259415502291, - "loss": 0.7001, - "step": 5910 - }, - { - "epoch": 0.3794098781270045, - "grad_norm": 0.8954099088632127, - "learning_rate": 0.00015608994619148886, - "loss": 0.7601, - "step": 5915 - }, - { - "epoch": 0.3797305965362412, - "grad_norm": 0.9177657222066289, - "learning_rate": 0.00015599722792922425, - "loss": 0.6568, - "step": 5920 - }, - { - "epoch": 0.3800513149454779, - "grad_norm": 0.6243318997688123, - "learning_rate": 0.00015590443948443482, - "loss": 0.696, - "step": 5925 - }, - { - "epoch": 0.38037203335471453, - "grad_norm": 1.4596235194468596, - "learning_rate": 0.00015581158097341435, - "loss": 0.5778, - "step": 5930 - }, - { - "epoch": 0.38069275176395123, - "grad_norm": 1.2871148477384242, - "learning_rate": 0.0001557186525125444, - "loss": 0.6818, - "step": 5935 - }, - { - "epoch": 0.38101347017318793, - "grad_norm": 0.999208910914117, - "learning_rate": 0.00015562565421829415, - "loss": 0.763, - "step": 5940 - }, - { - "epoch": 0.3813341885824246, - "grad_norm": 0.7364320418930576, - "learning_rate": 0.0001555325862072204, - "loss": 0.5347, - "step": 5945 - }, - { - "epoch": 0.3816549069916613, - "grad_norm": 1.2608428725949008, - "learning_rate": 0.0001554394485959673, - "loss": 0.7863, - "step": 5950 - }, - { - "epoch": 0.381975625400898, - "grad_norm": 1.1072984033964586, - "learning_rate": 0.00015534624150126617, - "loss": 0.6498, - "step": 5955 - }, - { - "epoch": 0.3822963438101347, - "grad_norm": 1.058590608018293, - "learning_rate": 0.00015525296503993548, - "loss": 0.5703, - "step": 5960 - }, - { - "epoch": 0.3826170622193714, - "grad_norm": 1.0908171799744935, - "learning_rate": 0.0001551596193288806, - "loss": 0.7091, - "step": 5965 - }, - { - "epoch": 0.38293778062860806, - "grad_norm": 0.947173201252904, - "learning_rate": 0.0001550662044850937, - "loss": 0.7283, - "step": 5970 - }, - { - "epoch": 0.38325849903784476, - "grad_norm": 1.3607073347278296, - "learning_rate": 0.00015497272062565362, - "loss": 0.6388, - "step": 5975 - }, - { - "epoch": 0.38357921744708146, - "grad_norm": 0.6419239829629664, - "learning_rate": 0.0001548791678677257, - "loss": 0.6622, - "step": 5980 - }, - { - "epoch": 0.38389993585631815, - "grad_norm": 0.7871877023929343, - "learning_rate": 0.0001547855463285616, - "loss": 0.6371, - "step": 5985 - }, - { - "epoch": 0.38422065426555485, - "grad_norm": 0.8230439216037291, - "learning_rate": 0.00015469185612549917, - "loss": 0.6582, - "step": 5990 - }, - { - "epoch": 0.38454137267479155, - "grad_norm": 0.9173584864551694, - "learning_rate": 0.00015459809737596237, - "loss": 0.6135, - "step": 5995 - }, - { - "epoch": 0.38486209108402825, - "grad_norm": 0.8582018157822806, - "learning_rate": 0.0001545042701974611, - "loss": 0.7084, - "step": 6000 - }, - { - "epoch": 0.3851828094932649, - "grad_norm": 1.2042820074202123, - "learning_rate": 0.0001544103747075909, - "loss": 0.8395, - "step": 6005 - }, - { - "epoch": 0.3855035279025016, - "grad_norm": 1.1522620963550567, - "learning_rate": 0.00015431641102403302, - "loss": 0.7, - "step": 6010 - }, - { - "epoch": 0.3858242463117383, - "grad_norm": 1.2293665111038254, - "learning_rate": 0.00015422237926455417, - "loss": 0.8011, - "step": 6015 - }, - { - "epoch": 0.386144964720975, - "grad_norm": 0.8550912373038257, - "learning_rate": 0.00015412827954700632, - "loss": 0.7712, - "step": 6020 - }, - { - "epoch": 0.3864656831302117, - "grad_norm": 1.216484260092555, - "learning_rate": 0.00015403411198932672, - "loss": 0.5951, - "step": 6025 - }, - { - "epoch": 0.3867864015394484, - "grad_norm": 1.0566678632706732, - "learning_rate": 0.00015393987670953756, - "loss": 0.6986, - "step": 6030 - }, - { - "epoch": 0.3871071199486851, - "grad_norm": 0.9868957913863856, - "learning_rate": 0.00015384557382574595, - "loss": 0.583, - "step": 6035 - }, - { - "epoch": 0.38742783835792177, - "grad_norm": 0.7263178308133398, - "learning_rate": 0.0001537512034561437, - "loss": 0.7377, - "step": 6040 - }, - { - "epoch": 0.3877485567671584, - "grad_norm": 0.8464297973296825, - "learning_rate": 0.00015365676571900725, - "loss": 0.6738, - "step": 6045 - }, - { - "epoch": 0.3880692751763951, - "grad_norm": 0.754402075351536, - "learning_rate": 0.00015356226073269736, - "loss": 0.8025, - "step": 6050 - }, - { - "epoch": 0.3883899935856318, - "grad_norm": 1.0080496408230515, - "learning_rate": 0.0001534676886156592, - "loss": 0.6925, - "step": 6055 - }, - { - "epoch": 0.3887107119948685, - "grad_norm": 0.859538871785963, - "learning_rate": 0.000153373049486422, - "loss": 0.6198, - "step": 6060 - }, - { - "epoch": 0.3890314304041052, - "grad_norm": 0.617907781420839, - "learning_rate": 0.0001532783434635991, - "loss": 0.708, - "step": 6065 - }, - { - "epoch": 0.3893521488133419, - "grad_norm": 0.9321179061358089, - "learning_rate": 0.00015318357066588747, - "loss": 0.8021, - "step": 6070 - }, - { - "epoch": 0.3896728672225786, - "grad_norm": 1.0543925706918078, - "learning_rate": 0.00015308873121206798, - "loss": 0.6394, - "step": 6075 - }, - { - "epoch": 0.38999358563181524, - "grad_norm": 0.7635204958993133, - "learning_rate": 0.00015299382522100484, - "loss": 0.7279, - "step": 6080 - }, - { - "epoch": 0.39031430404105194, - "grad_norm": 0.4808058715738424, - "learning_rate": 0.00015289885281164587, - "loss": 0.6074, - "step": 6085 - }, - { - "epoch": 0.39063502245028864, - "grad_norm": 0.8001578622671749, - "learning_rate": 0.00015280381410302197, - "loss": 0.7391, - "step": 6090 - }, - { - "epoch": 0.39095574085952534, - "grad_norm": 0.6800874640636567, - "learning_rate": 0.00015270870921424721, - "loss": 0.6633, - "step": 6095 - }, - { - "epoch": 0.39127645926876203, - "grad_norm": 1.6563190981003801, - "learning_rate": 0.00015261353826451858, - "loss": 0.5687, - "step": 6100 - }, - { - "epoch": 0.39159717767799873, - "grad_norm": 1.541052532833452, - "learning_rate": 0.00015251830137311587, - "loss": 0.7656, - "step": 6105 - }, - { - "epoch": 0.39191789608723543, - "grad_norm": 0.8841300600956734, - "learning_rate": 0.00015242299865940147, - "loss": 0.5984, - "step": 6110 - }, - { - "epoch": 0.39223861449647207, - "grad_norm": 1.2069800426391173, - "learning_rate": 0.00015232763024282034, - "loss": 0.8064, - "step": 6115 - }, - { - "epoch": 0.39255933290570877, - "grad_norm": 1.12582638671757, - "learning_rate": 0.00015223219624289978, - "loss": 0.7329, - "step": 6120 - }, - { - "epoch": 0.39288005131494547, - "grad_norm": 0.8200206186838468, - "learning_rate": 0.0001521366967792493, - "loss": 0.5894, - "step": 6125 - }, - { - "epoch": 0.39320076972418216, - "grad_norm": 0.8420632536848158, - "learning_rate": 0.0001520411319715603, - "loss": 0.7387, - "step": 6130 - }, - { - "epoch": 0.39352148813341886, - "grad_norm": 0.8067132371420835, - "learning_rate": 0.00015194550193960632, - "loss": 0.682, - "step": 6135 - }, - { - "epoch": 0.39384220654265556, - "grad_norm": 0.7708975305048692, - "learning_rate": 0.00015184980680324248, - "loss": 0.68, - "step": 6140 - }, - { - "epoch": 0.39416292495189226, - "grad_norm": 1.0673984272805985, - "learning_rate": 0.00015175404668240554, - "loss": 0.765, - "step": 6145 - }, - { - "epoch": 0.39448364336112896, - "grad_norm": 1.3041455682451786, - "learning_rate": 0.00015165822169711373, - "loss": 0.6576, - "step": 6150 - }, - { - "epoch": 0.3948043617703656, - "grad_norm": 0.6831544367344609, - "learning_rate": 0.00015156233196746653, - "loss": 0.7366, - "step": 6155 - }, - { - "epoch": 0.3951250801796023, - "grad_norm": 0.9906492347644728, - "learning_rate": 0.00015146637761364457, - "loss": 0.7104, - "step": 6160 - }, - { - "epoch": 0.395445798588839, - "grad_norm": 0.8542271989350849, - "learning_rate": 0.00015137035875590956, - "loss": 0.6678, - "step": 6165 - }, - { - "epoch": 0.3957665169980757, - "grad_norm": 1.542102394105923, - "learning_rate": 0.00015127427551460396, - "loss": 0.665, - "step": 6170 - }, - { - "epoch": 0.3960872354073124, - "grad_norm": 0.8016623705576872, - "learning_rate": 0.00015117812801015095, - "loss": 0.5812, - "step": 6175 - }, - { - "epoch": 0.3964079538165491, - "grad_norm": 1.2073109631978751, - "learning_rate": 0.00015108191636305427, - "loss": 0.7527, - "step": 6180 - }, - { - "epoch": 0.3967286722257858, - "grad_norm": 0.8328169453200382, - "learning_rate": 0.000150985640693898, - "loss": 0.6733, - "step": 6185 - }, - { - "epoch": 0.3970493906350224, - "grad_norm": 0.9951192780366616, - "learning_rate": 0.00015088930112334653, - "loss": 0.733, - "step": 6190 - }, - { - "epoch": 0.3973701090442591, - "grad_norm": 0.7405889864532202, - "learning_rate": 0.0001507928977721443, - "loss": 0.5478, - "step": 6195 - }, - { - "epoch": 0.3976908274534958, - "grad_norm": 1.080626962102723, - "learning_rate": 0.0001506964307611157, - "loss": 0.6115, - "step": 6200 - }, - { - "epoch": 0.3980115458627325, - "grad_norm": 0.7995884570597525, - "learning_rate": 0.0001505999002111649, - "loss": 0.5829, - "step": 6205 - }, - { - "epoch": 0.3983322642719692, - "grad_norm": 0.4992231946350308, - "learning_rate": 0.0001505033062432757, - "loss": 0.5649, - "step": 6210 - }, - { - "epoch": 0.3986529826812059, - "grad_norm": 0.8489355096183382, - "learning_rate": 0.00015040664897851138, - "loss": 0.7291, - "step": 6215 - }, - { - "epoch": 0.3989737010904426, - "grad_norm": 1.136002981763331, - "learning_rate": 0.00015030992853801454, - "loss": 0.7918, - "step": 6220 - }, - { - "epoch": 0.39929441949967925, - "grad_norm": 0.895880595156802, - "learning_rate": 0.00015021314504300704, - "loss": 0.5635, - "step": 6225 - }, - { - "epoch": 0.39961513790891595, - "grad_norm": 0.8226243605298355, - "learning_rate": 0.0001501162986147897, - "loss": 0.815, - "step": 6230 - }, - { - "epoch": 0.39993585631815265, - "grad_norm": 0.9921294907910895, - "learning_rate": 0.00015001938937474218, - "loss": 0.7156, - "step": 6235 - }, - { - "epoch": 0.40025657472738935, - "grad_norm": 0.9510451771447491, - "learning_rate": 0.0001499224174443229, - "loss": 0.681, - "step": 6240 - }, - { - "epoch": 0.40057729313662604, - "grad_norm": 0.9952627757450367, - "learning_rate": 0.0001498253829450689, - "loss": 0.712, - "step": 6245 - }, - { - "epoch": 0.40089801154586274, - "grad_norm": 0.6514927458391138, - "learning_rate": 0.00014972828599859556, - "loss": 0.633, - "step": 6250 - }, - { - "epoch": 0.40121872995509944, - "grad_norm": 0.9621219480196492, - "learning_rate": 0.0001496311267265966, - "loss": 0.6988, - "step": 6255 - }, - { - "epoch": 0.40153944836433614, - "grad_norm": 1.0155290688557055, - "learning_rate": 0.00014953390525084377, - "loss": 0.7093, - "step": 6260 - }, - { - "epoch": 0.4018601667735728, - "grad_norm": 0.6507458129551235, - "learning_rate": 0.00014943662169318686, - "loss": 0.6781, - "step": 6265 - }, - { - "epoch": 0.4021808851828095, - "grad_norm": 0.8206284722853324, - "learning_rate": 0.00014933927617555342, - "loss": 0.6472, - "step": 6270 - }, - { - "epoch": 0.4025016035920462, - "grad_norm": 0.969513442832448, - "learning_rate": 0.00014924186881994867, - "loss": 0.6322, - "step": 6275 - }, - { - "epoch": 0.4028223220012829, - "grad_norm": 1.0110426378326145, - "learning_rate": 0.00014914439974845532, - "loss": 0.6192, - "step": 6280 - }, - { - "epoch": 0.40314304041051957, - "grad_norm": 0.9182180122329154, - "learning_rate": 0.0001490468690832335, - "loss": 0.7624, - "step": 6285 - }, - { - "epoch": 0.40346375881975627, - "grad_norm": 1.0221754081762093, - "learning_rate": 0.00014894927694652046, - "loss": 0.5685, - "step": 6290 - }, - { - "epoch": 0.40378447722899297, - "grad_norm": 0.7951566985169003, - "learning_rate": 0.00014885162346063048, - "loss": 0.6114, - "step": 6295 - }, - { - "epoch": 0.4041051956382296, - "grad_norm": 0.9205666830852229, - "learning_rate": 0.00014875390874795482, - "loss": 0.6126, - "step": 6300 - }, - { - "epoch": 0.4044259140474663, - "grad_norm": 0.8495232187331296, - "learning_rate": 0.00014865613293096132, - "loss": 0.6743, - "step": 6305 - }, - { - "epoch": 0.404746632456703, - "grad_norm": 0.5863050150246784, - "learning_rate": 0.0001485582961321946, - "loss": 0.5965, - "step": 6310 - }, - { - "epoch": 0.4050673508659397, - "grad_norm": 0.732145223215556, - "learning_rate": 0.00014846039847427563, - "loss": 0.6549, - "step": 6315 - }, - { - "epoch": 0.4053880692751764, - "grad_norm": 0.7872248738108987, - "learning_rate": 0.00014836244007990156, - "loss": 0.675, - "step": 6320 - }, - { - "epoch": 0.4057087876844131, - "grad_norm": 0.6983906622235366, - "learning_rate": 0.0001482644210718458, - "loss": 0.6684, - "step": 6325 - }, - { - "epoch": 0.4060295060936498, - "grad_norm": 1.036082877660743, - "learning_rate": 0.0001481663415729576, - "loss": 0.6682, - "step": 6330 - }, - { - "epoch": 0.4063502245028865, - "grad_norm": 0.8176112608665335, - "learning_rate": 0.00014806820170616222, - "loss": 0.8555, - "step": 6335 - }, - { - "epoch": 0.40667094291212313, - "grad_norm": 0.7770154320072936, - "learning_rate": 0.00014797000159446038, - "loss": 0.557, - "step": 6340 - }, - { - "epoch": 0.40699166132135983, - "grad_norm": 1.5604043527138882, - "learning_rate": 0.00014787174136092837, - "loss": 0.5678, - "step": 6345 - }, - { - "epoch": 0.40731237973059653, - "grad_norm": 0.5000651713384456, - "learning_rate": 0.00014777342112871786, - "loss": 0.6323, - "step": 6350 - }, - { - "epoch": 0.4076330981398332, - "grad_norm": 0.7129539414804645, - "learning_rate": 0.0001476750410210557, - "loss": 0.6531, - "step": 6355 - }, - { - "epoch": 0.4079538165490699, - "grad_norm": 0.6838741535402209, - "learning_rate": 0.0001475766011612438, - "loss": 0.6734, - "step": 6360 - }, - { - "epoch": 0.4082745349583066, - "grad_norm": 0.6003288340459018, - "learning_rate": 0.00014747810167265894, - "loss": 0.5793, - "step": 6365 - }, - { - "epoch": 0.4085952533675433, - "grad_norm": 1.5754948140455838, - "learning_rate": 0.00014737954267875263, - "loss": 0.702, - "step": 6370 - }, - { - "epoch": 0.40891597177677996, - "grad_norm": 1.0150345516766142, - "learning_rate": 0.000147280924303051, - "loss": 0.8569, - "step": 6375 - }, - { - "epoch": 0.40923669018601666, - "grad_norm": 1.0034479899495579, - "learning_rate": 0.0001471822466691545, - "loss": 0.8446, - "step": 6380 - }, - { - "epoch": 0.40955740859525336, - "grad_norm": 0.9184425443953635, - "learning_rate": 0.00014708350990073798, - "loss": 0.6602, - "step": 6385 - }, - { - "epoch": 0.40987812700449006, - "grad_norm": 0.6284500041695303, - "learning_rate": 0.0001469847141215503, - "loss": 0.7291, - "step": 6390 - }, - { - "epoch": 0.41019884541372675, - "grad_norm": 0.9039981636058719, - "learning_rate": 0.0001468858594554144, - "loss": 0.8008, - "step": 6395 - }, - { - "epoch": 0.41051956382296345, - "grad_norm": 0.9662431864347534, - "learning_rate": 0.0001467869460262269, - "loss": 0.5989, - "step": 6400 - }, - { - "epoch": 0.41084028223220015, - "grad_norm": 0.6824016883811361, - "learning_rate": 0.00014668797395795812, - "loss": 0.7651, - "step": 6405 - }, - { - "epoch": 0.4111610006414368, - "grad_norm": 0.8325307304433841, - "learning_rate": 0.00014658894337465187, - "loss": 0.762, - "step": 6410 - }, - { - "epoch": 0.4114817190506735, - "grad_norm": 0.6873637896445222, - "learning_rate": 0.00014648985440042533, - "loss": 0.6868, - "step": 6415 - }, - { - "epoch": 0.4118024374599102, - "grad_norm": 0.8851369890257763, - "learning_rate": 0.0001463907071594688, - "loss": 0.719, - "step": 6420 - }, - { - "epoch": 0.4121231558691469, - "grad_norm": 0.8755806997147045, - "learning_rate": 0.00014629150177604565, - "loss": 0.6161, - "step": 6425 - }, - { - "epoch": 0.4124438742783836, - "grad_norm": 0.9956221559599793, - "learning_rate": 0.00014619223837449211, - "loss": 0.6246, - "step": 6430 - }, - { - "epoch": 0.4127645926876203, - "grad_norm": 0.9146462627716199, - "learning_rate": 0.00014609291707921713, - "loss": 0.665, - "step": 6435 - }, - { - "epoch": 0.413085311096857, - "grad_norm": 0.7096303973864491, - "learning_rate": 0.0001459935380147022, - "loss": 0.7379, - "step": 6440 - }, - { - "epoch": 0.4134060295060937, - "grad_norm": 0.8414445373385668, - "learning_rate": 0.00014589410130550124, - "loss": 0.7533, - "step": 6445 - }, - { - "epoch": 0.4137267479153303, - "grad_norm": 1.1009718984925583, - "learning_rate": 0.0001457946070762404, - "loss": 0.673, - "step": 6450 - }, - { - "epoch": 0.414047466324567, - "grad_norm": 0.9982085240192685, - "learning_rate": 0.000145695055451618, - "loss": 0.6951, - "step": 6455 - }, - { - "epoch": 0.4143681847338037, - "grad_norm": 0.7828125692520432, - "learning_rate": 0.00014559544655640412, - "loss": 0.7779, - "step": 6460 - }, - { - "epoch": 0.4146889031430404, - "grad_norm": 1.0323696606312884, - "learning_rate": 0.0001454957805154408, - "loss": 0.666, - "step": 6465 - }, - { - "epoch": 0.4150096215522771, - "grad_norm": 0.6618186643447491, - "learning_rate": 0.00014539605745364156, - "loss": 0.7354, - "step": 6470 - }, - { - "epoch": 0.4153303399615138, - "grad_norm": 1.3747337158411725, - "learning_rate": 0.00014529627749599146, - "loss": 0.7191, - "step": 6475 - }, - { - "epoch": 0.4156510583707505, - "grad_norm": 0.6208342823219867, - "learning_rate": 0.0001451964407675469, - "loss": 0.648, - "step": 6480 - }, - { - "epoch": 0.41597177677998715, - "grad_norm": 1.0319199547152835, - "learning_rate": 0.00014509654739343534, - "loss": 0.7808, - "step": 6485 - }, - { - "epoch": 0.41629249518922384, - "grad_norm": 1.1954322026767323, - "learning_rate": 0.0001449965974988553, - "loss": 0.7695, - "step": 6490 - }, - { - "epoch": 0.41661321359846054, - "grad_norm": 1.2496250479975701, - "learning_rate": 0.00014489659120907615, - "loss": 0.6214, - "step": 6495 - }, - { - "epoch": 0.41693393200769724, - "grad_norm": 0.6983577410015246, - "learning_rate": 0.00014479652864943788, - "loss": 0.6312, - "step": 6500 - }, - { - "epoch": 0.41725465041693394, - "grad_norm": 0.8629680447857923, - "learning_rate": 0.0001446964099453511, - "loss": 0.7508, - "step": 6505 - }, - { - "epoch": 0.41757536882617063, - "grad_norm": 1.0627571850045838, - "learning_rate": 0.00014459623522229662, - "loss": 0.7044, - "step": 6510 - }, - { - "epoch": 0.41789608723540733, - "grad_norm": 0.900748883113857, - "learning_rate": 0.00014449600460582563, - "loss": 0.7454, - "step": 6515 - }, - { - "epoch": 0.41821680564464403, - "grad_norm": 0.9690669274483888, - "learning_rate": 0.00014439571822155934, - "loss": 0.5726, - "step": 6520 - }, - { - "epoch": 0.41853752405388067, - "grad_norm": 1.0487104357417287, - "learning_rate": 0.00014429537619518873, - "loss": 0.799, - "step": 6525 - }, - { - "epoch": 0.41885824246311737, - "grad_norm": 1.2154258394073059, - "learning_rate": 0.0001441949786524747, - "loss": 0.5219, - "step": 6530 - }, - { - "epoch": 0.41917896087235407, - "grad_norm": 0.7794877423395151, - "learning_rate": 0.0001440945257192476, - "loss": 0.5707, - "step": 6535 - }, - { - "epoch": 0.41949967928159076, - "grad_norm": 0.5347206233594262, - "learning_rate": 0.00014399401752140728, - "loss": 0.55, - "step": 6540 - }, - { - "epoch": 0.41982039769082746, - "grad_norm": 0.6862664210890991, - "learning_rate": 0.00014389345418492272, - "loss": 0.7803, - "step": 6545 - }, - { - "epoch": 0.42014111610006416, - "grad_norm": 1.1652827614213763, - "learning_rate": 0.0001437928358358322, - "loss": 0.6907, - "step": 6550 - }, - { - "epoch": 0.42046183450930086, - "grad_norm": 1.0771142450313498, - "learning_rate": 0.00014369216260024282, - "loss": 0.5868, - "step": 6555 - }, - { - "epoch": 0.4207825529185375, - "grad_norm": 0.7384688516596317, - "learning_rate": 0.00014359143460433046, - "loss": 0.5754, - "step": 6560 - }, - { - "epoch": 0.4211032713277742, - "grad_norm": 0.7961839635706309, - "learning_rate": 0.00014349065197433977, - "loss": 0.6247, - "step": 6565 - }, - { - "epoch": 0.4214239897370109, - "grad_norm": 0.9321579239285109, - "learning_rate": 0.0001433898148365837, - "loss": 0.6856, - "step": 6570 - }, - { - "epoch": 0.4217447081462476, - "grad_norm": 0.7081449574427117, - "learning_rate": 0.00014328892331744362, - "loss": 0.5893, - "step": 6575 - }, - { - "epoch": 0.4220654265554843, - "grad_norm": 0.9200227580239932, - "learning_rate": 0.000143187977543369, - "loss": 0.661, - "step": 6580 - }, - { - "epoch": 0.422386144964721, - "grad_norm": 1.1330174896855054, - "learning_rate": 0.00014308697764087738, - "loss": 0.8342, - "step": 6585 - }, - { - "epoch": 0.4227068633739577, - "grad_norm": 0.851200673216541, - "learning_rate": 0.00014298592373655414, - "loss": 0.8357, - "step": 6590 - }, - { - "epoch": 0.42302758178319433, - "grad_norm": 0.6342829663427049, - "learning_rate": 0.00014288481595705217, - "loss": 0.4643, - "step": 6595 - }, - { - "epoch": 0.423348300192431, - "grad_norm": 0.5823246535632486, - "learning_rate": 0.00014278365442909214, - "loss": 0.6472, - "step": 6600 - }, - { - "epoch": 0.4236690186016677, - "grad_norm": 1.0907798326084035, - "learning_rate": 0.0001426824392794619, - "loss": 0.5667, - "step": 6605 - }, - { - "epoch": 0.4239897370109044, - "grad_norm": 0.6171485537285203, - "learning_rate": 0.00014258117063501658, - "loss": 0.7975, - "step": 6610 - }, - { - "epoch": 0.4243104554201411, - "grad_norm": 0.920203490173087, - "learning_rate": 0.00014247984862267833, - "loss": 0.5432, - "step": 6615 - }, - { - "epoch": 0.4246311738293778, - "grad_norm": 0.6838928556102262, - "learning_rate": 0.0001423784733694362, - "loss": 0.5982, - "step": 6620 - }, - { - "epoch": 0.4249518922386145, - "grad_norm": 1.2229051146263923, - "learning_rate": 0.00014227704500234599, - "loss": 0.8164, - "step": 6625 - }, - { - "epoch": 0.4252726106478512, - "grad_norm": 0.7990664572540562, - "learning_rate": 0.00014217556364853006, - "loss": 0.7974, - "step": 6630 - }, - { - "epoch": 0.42559332905708785, - "grad_norm": 1.439913710180236, - "learning_rate": 0.00014207402943517707, - "loss": 0.6574, - "step": 6635 - }, - { - "epoch": 0.42591404746632455, - "grad_norm": 1.5833188763841297, - "learning_rate": 0.0001419724424895421, - "loss": 0.6127, - "step": 6640 - }, - { - "epoch": 0.42623476587556125, - "grad_norm": 1.0972694183324532, - "learning_rate": 0.00014187080293894623, - "loss": 0.6384, - "step": 6645 - }, - { - "epoch": 0.42655548428479795, - "grad_norm": 0.7755437327444886, - "learning_rate": 0.0001417691109107765, - "loss": 0.6467, - "step": 6650 - }, - { - "epoch": 0.42687620269403465, - "grad_norm": 0.7307053147732903, - "learning_rate": 0.00014166736653248568, - "loss": 0.6857, - "step": 6655 - }, - { - "epoch": 0.42719692110327134, - "grad_norm": 1.1129466425839534, - "learning_rate": 0.00014156556993159215, - "loss": 0.6325, - "step": 6660 - }, - { - "epoch": 0.42751763951250804, - "grad_norm": 1.0829046562773215, - "learning_rate": 0.00014146372123567986, - "loss": 0.4627, - "step": 6665 - }, - { - "epoch": 0.4278383579217447, - "grad_norm": 0.7286117691400992, - "learning_rate": 0.00014136182057239788, - "loss": 0.7129, - "step": 6670 - }, - { - "epoch": 0.4281590763309814, - "grad_norm": 0.9030468850448815, - "learning_rate": 0.00014125986806946052, - "loss": 0.6249, - "step": 6675 - }, - { - "epoch": 0.4284797947402181, - "grad_norm": 1.0569038979072376, - "learning_rate": 0.00014115786385464704, - "loss": 0.5753, - "step": 6680 - }, - { - "epoch": 0.4288005131494548, - "grad_norm": 1.9939364349504531, - "learning_rate": 0.0001410558080558015, - "loss": 0.6928, - "step": 6685 - }, - { - "epoch": 0.4291212315586915, - "grad_norm": 0.7638304398434881, - "learning_rate": 0.00014095370080083262, - "loss": 0.7665, - "step": 6690 - }, - { - "epoch": 0.42944194996792817, - "grad_norm": 1.0470546825430735, - "learning_rate": 0.00014085154221771362, - "loss": 0.5786, - "step": 6695 - }, - { - "epoch": 0.42976266837716487, - "grad_norm": 1.122127513476166, - "learning_rate": 0.00014074933243448203, - "loss": 0.5162, - "step": 6700 - }, - { - "epoch": 0.43008338678640157, - "grad_norm": 0.9808616961072774, - "learning_rate": 0.00014064707157923956, - "loss": 0.5722, - "step": 6705 - }, - { - "epoch": 0.4304041051956382, - "grad_norm": 0.8653107924861354, - "learning_rate": 0.00014054475978015192, - "loss": 0.6378, - "step": 6710 - }, - { - "epoch": 0.4307248236048749, - "grad_norm": 0.8962127595984706, - "learning_rate": 0.00014044239716544868, - "loss": 0.6408, - "step": 6715 - }, - { - "epoch": 0.4310455420141116, - "grad_norm": 0.8365357084309853, - "learning_rate": 0.00014033998386342312, - "loss": 0.6256, - "step": 6720 - }, - { - "epoch": 0.4313662604233483, - "grad_norm": 1.0863245013957081, - "learning_rate": 0.000140237520002432, - "loss": 0.7068, - "step": 6725 - }, - { - "epoch": 0.431686978832585, - "grad_norm": 0.662268709969254, - "learning_rate": 0.0001401350057108955, - "loss": 0.7573, - "step": 6730 - }, - { - "epoch": 0.4320076972418217, - "grad_norm": 1.1715222453521494, - "learning_rate": 0.0001400324411172969, - "loss": 0.7574, - "step": 6735 - }, - { - "epoch": 0.4323284156510584, - "grad_norm": 0.9424425544184805, - "learning_rate": 0.0001399298263501827, - "loss": 0.8143, - "step": 6740 - }, - { - "epoch": 0.43264913406029504, - "grad_norm": 0.7955537139368879, - "learning_rate": 0.00013982716153816213, - "loss": 0.5263, - "step": 6745 - }, - { - "epoch": 0.43296985246953174, - "grad_norm": 0.9554382885880205, - "learning_rate": 0.00013972444680990722, - "loss": 0.6976, - "step": 6750 - }, - { - "epoch": 0.43329057087876843, - "grad_norm": 1.5328515613064213, - "learning_rate": 0.00013962168229415253, - "loss": 0.627, - "step": 6755 - }, - { - "epoch": 0.43361128928800513, - "grad_norm": 1.096222900496091, - "learning_rate": 0.00013951886811969501, - "loss": 0.8235, - "step": 6760 - }, - { - "epoch": 0.43393200769724183, - "grad_norm": 1.3093624744883847, - "learning_rate": 0.00013941600441539392, - "loss": 0.5996, - "step": 6765 - }, - { - "epoch": 0.4342527261064785, - "grad_norm": 0.9375701783813489, - "learning_rate": 0.00013931309131017046, - "loss": 0.8571, - "step": 6770 - }, - { - "epoch": 0.4345734445157152, - "grad_norm": 0.8981486814038466, - "learning_rate": 0.0001392101289330079, - "loss": 0.7036, - "step": 6775 - }, - { - "epoch": 0.43489416292495187, - "grad_norm": 1.030290589948871, - "learning_rate": 0.00013910711741295113, - "loss": 0.5523, - "step": 6780 - }, - { - "epoch": 0.43521488133418856, - "grad_norm": 0.7783354813811616, - "learning_rate": 0.00013900405687910676, - "loss": 0.6957, - "step": 6785 - }, - { - "epoch": 0.43553559974342526, - "grad_norm": 0.7762966668183622, - "learning_rate": 0.00013890094746064273, - "loss": 0.7249, - "step": 6790 - }, - { - "epoch": 0.43585631815266196, - "grad_norm": 1.0757163547744426, - "learning_rate": 0.0001387977892867883, - "loss": 0.7033, - "step": 6795 - }, - { - "epoch": 0.43617703656189866, - "grad_norm": 0.9414991837160046, - "learning_rate": 0.00013869458248683377, - "loss": 0.6503, - "step": 6800 - }, - { - "epoch": 0.43649775497113535, - "grad_norm": 1.1367581767585646, - "learning_rate": 0.0001385913271901305, - "loss": 0.6653, - "step": 6805 - }, - { - "epoch": 0.43681847338037205, - "grad_norm": 0.9718072928244804, - "learning_rate": 0.0001384880235260905, - "loss": 0.6126, - "step": 6810 - }, - { - "epoch": 0.43713919178960875, - "grad_norm": 1.051631260475179, - "learning_rate": 0.00013838467162418652, - "loss": 0.7529, - "step": 6815 - }, - { - "epoch": 0.4374599101988454, - "grad_norm": 1.1255123924704187, - "learning_rate": 0.00013828127161395165, - "loss": 0.7, - "step": 6820 - }, - { - "epoch": 0.4377806286080821, - "grad_norm": 0.6159074752294377, - "learning_rate": 0.00013817782362497938, - "loss": 0.7815, - "step": 6825 - }, - { - "epoch": 0.4381013470173188, - "grad_norm": 0.7651323158439101, - "learning_rate": 0.00013807432778692333, - "loss": 0.6508, - "step": 6830 - }, - { - "epoch": 0.4384220654265555, - "grad_norm": 1.49661820735196, - "learning_rate": 0.00013797078422949697, - "loss": 0.6949, - "step": 6835 - }, - { - "epoch": 0.4387427838357922, - "grad_norm": 0.9888853439915466, - "learning_rate": 0.0001378671930824737, - "loss": 0.6223, - "step": 6840 - }, - { - "epoch": 0.4390635022450289, - "grad_norm": 1.248537199848208, - "learning_rate": 0.00013776355447568648, - "loss": 0.8024, - "step": 6845 - }, - { - "epoch": 0.4393842206542656, - "grad_norm": 0.9575631631075234, - "learning_rate": 0.00013765986853902783, - "loss": 0.6739, - "step": 6850 - }, - { - "epoch": 0.4397049390635022, - "grad_norm": 0.9680343975909423, - "learning_rate": 0.00013755613540244958, - "loss": 0.6917, - "step": 6855 - }, - { - "epoch": 0.4400256574727389, - "grad_norm": 1.117269566951374, - "learning_rate": 0.00013745235519596263, - "loss": 0.7042, - "step": 6860 - }, - { - "epoch": 0.4403463758819756, - "grad_norm": 0.8619372703069825, - "learning_rate": 0.00013734852804963703, - "loss": 0.609, - "step": 6865 - }, - { - "epoch": 0.4406670942912123, - "grad_norm": 0.8117525588458958, - "learning_rate": 0.00013724465409360148, - "loss": 0.6981, - "step": 6870 - }, - { - "epoch": 0.440987812700449, - "grad_norm": 1.01398403519154, - "learning_rate": 0.0001371407334580434, - "loss": 0.6151, - "step": 6875 - }, - { - "epoch": 0.4413085311096857, - "grad_norm": 0.834092658374222, - "learning_rate": 0.00013703676627320886, - "loss": 0.7673, - "step": 6880 - }, - { - "epoch": 0.4416292495189224, - "grad_norm": 1.5311945048848135, - "learning_rate": 0.00013693275266940207, - "loss": 0.7119, - "step": 6885 - }, - { - "epoch": 0.44194996792815905, - "grad_norm": 1.527540376439275, - "learning_rate": 0.00013682869277698557, - "loss": 0.6265, - "step": 6890 - }, - { - "epoch": 0.44227068633739575, - "grad_norm": 0.7951368893260018, - "learning_rate": 0.00013672458672637984, - "loss": 0.8016, - "step": 6895 - }, - { - "epoch": 0.44259140474663244, - "grad_norm": 1.2763559389048758, - "learning_rate": 0.0001366204346480632, - "loss": 0.7206, - "step": 6900 - }, - { - "epoch": 0.44291212315586914, - "grad_norm": 0.8023255338282319, - "learning_rate": 0.00013651623667257164, - "loss": 0.7554, - "step": 6905 - }, - { - "epoch": 0.44323284156510584, - "grad_norm": 0.8695350841504818, - "learning_rate": 0.00013641199293049877, - "loss": 0.8358, - "step": 6910 - }, - { - "epoch": 0.44355355997434254, - "grad_norm": 0.9044131348318595, - "learning_rate": 0.0001363077035524955, - "loss": 0.6412, - "step": 6915 - }, - { - "epoch": 0.44387427838357923, - "grad_norm": 0.8127899752297872, - "learning_rate": 0.00013620336866926997, - "loss": 0.6957, - "step": 6920 - }, - { - "epoch": 0.44419499679281593, - "grad_norm": 0.8688512997555105, - "learning_rate": 0.00013609898841158725, - "loss": 0.724, - "step": 6925 - }, - { - "epoch": 0.4445157152020526, - "grad_norm": 0.8760877608220616, - "learning_rate": 0.0001359945629102694, - "loss": 0.5738, - "step": 6930 - }, - { - "epoch": 0.4448364336112893, - "grad_norm": 1.0325674004426306, - "learning_rate": 0.0001358900922961951, - "loss": 0.5873, - "step": 6935 - }, - { - "epoch": 0.44515715202052597, - "grad_norm": 0.8467908302129974, - "learning_rate": 0.00013578557670029966, - "loss": 0.7058, - "step": 6940 - }, - { - "epoch": 0.44547787042976267, - "grad_norm": 0.8131400613232301, - "learning_rate": 0.00013568101625357465, - "loss": 0.7422, - "step": 6945 - }, - { - "epoch": 0.44579858883899937, - "grad_norm": 0.724722516850653, - "learning_rate": 0.000135576411087068, - "loss": 0.6638, - "step": 6950 - }, - { - "epoch": 0.44611930724823606, - "grad_norm": 0.8948898208956525, - "learning_rate": 0.00013547176133188354, - "loss": 0.7129, - "step": 6955 - }, - { - "epoch": 0.44644002565747276, - "grad_norm": 1.0104789290655904, - "learning_rate": 0.00013536706711918107, - "loss": 0.7032, - "step": 6960 - }, - { - "epoch": 0.4467607440667094, - "grad_norm": 0.8414717932992289, - "learning_rate": 0.0001352623285801761, - "loss": 0.6836, - "step": 6965 - }, - { - "epoch": 0.4470814624759461, - "grad_norm": 1.1406826410807314, - "learning_rate": 0.00013515754584613962, - "loss": 0.6053, - "step": 6970 - }, - { - "epoch": 0.4474021808851828, - "grad_norm": 0.8742591243812547, - "learning_rate": 0.00013505271904839817, - "loss": 0.7431, - "step": 6975 - }, - { - "epoch": 0.4477228992944195, - "grad_norm": 0.6939509932441673, - "learning_rate": 0.00013494784831833337, - "loss": 0.6291, - "step": 6980 - }, - { - "epoch": 0.4480436177036562, - "grad_norm": 1.1945030623029917, - "learning_rate": 0.00013484293378738193, - "loss": 0.6403, - "step": 6985 - }, - { - "epoch": 0.4483643361128929, - "grad_norm": 1.2041604733537394, - "learning_rate": 0.0001347379755870355, - "loss": 0.7259, - "step": 6990 - }, - { - "epoch": 0.4486850545221296, - "grad_norm": 1.2915007724773113, - "learning_rate": 0.00013463297384884047, - "loss": 0.659, - "step": 6995 - }, - { - "epoch": 0.4490057729313663, - "grad_norm": 0.9604685032866782, - "learning_rate": 0.00013452792870439774, - "loss": 0.7607, - "step": 7000 - }, - { - "epoch": 0.44932649134060293, - "grad_norm": 0.683575690655945, - "learning_rate": 0.00013442284028536265, - "loss": 0.6597, - "step": 7005 - }, - { - "epoch": 0.4496472097498396, - "grad_norm": 0.8599337861042293, - "learning_rate": 0.0001343177087234447, - "loss": 0.6324, - "step": 7010 - }, - { - "epoch": 0.4499679281590763, - "grad_norm": 1.0590394622444155, - "learning_rate": 0.00013421253415040764, - "loss": 0.7187, - "step": 7015 - }, - { - "epoch": 0.450288646568313, - "grad_norm": 0.7304239044871675, - "learning_rate": 0.00013410731669806893, - "loss": 0.6951, - "step": 7020 - }, - { - "epoch": 0.4506093649775497, - "grad_norm": 0.6027716061436601, - "learning_rate": 0.00013400205649829986, - "loss": 0.6254, - "step": 7025 - }, - { - "epoch": 0.4509300833867864, - "grad_norm": 0.9290585913030099, - "learning_rate": 0.00013389675368302538, - "loss": 0.6395, - "step": 7030 - }, - { - "epoch": 0.4512508017960231, - "grad_norm": 0.6100444770178587, - "learning_rate": 0.00013379140838422368, - "loss": 0.6956, - "step": 7035 - }, - { - "epoch": 0.45157152020525976, - "grad_norm": 1.0560462270870308, - "learning_rate": 0.00013368602073392626, - "loss": 0.7217, - "step": 7040 - }, - { - "epoch": 0.45189223861449646, - "grad_norm": 0.9506970796048375, - "learning_rate": 0.00013358059086421777, - "loss": 0.7538, - "step": 7045 - }, - { - "epoch": 0.45221295702373315, - "grad_norm": 0.8472683366273123, - "learning_rate": 0.0001334751189072357, - "loss": 0.7699, - "step": 7050 - }, - { - "epoch": 0.45253367543296985, - "grad_norm": 0.8123297983190807, - "learning_rate": 0.00013336960499517035, - "loss": 0.7617, - "step": 7055 - }, - { - "epoch": 0.45285439384220655, - "grad_norm": 0.7432610908008688, - "learning_rate": 0.00013326404926026453, - "loss": 0.4966, - "step": 7060 - }, - { - "epoch": 0.45317511225144325, - "grad_norm": 1.9038556869996193, - "learning_rate": 0.00013315845183481352, - "loss": 0.7716, - "step": 7065 - }, - { - "epoch": 0.45349583066067994, - "grad_norm": 1.517420207283064, - "learning_rate": 0.0001330528128511648, - "loss": 0.7335, - "step": 7070 - }, - { - "epoch": 0.4538165490699166, - "grad_norm": 0.8901376925504432, - "learning_rate": 0.00013294713244171798, - "loss": 0.6803, - "step": 7075 - }, - { - "epoch": 0.4541372674791533, - "grad_norm": 0.9458291501306725, - "learning_rate": 0.0001328414107389246, - "loss": 0.8463, - "step": 7080 - }, - { - "epoch": 0.45445798588839, - "grad_norm": 0.771925264607674, - "learning_rate": 0.00013273564787528796, - "loss": 0.6271, - "step": 7085 - }, - { - "epoch": 0.4547787042976267, - "grad_norm": 0.9552006861914584, - "learning_rate": 0.00013262984398336287, - "loss": 0.6903, - "step": 7090 - }, - { - "epoch": 0.4550994227068634, - "grad_norm": 0.7912142730312611, - "learning_rate": 0.00013252399919575565, - "loss": 0.7355, - "step": 7095 - }, - { - "epoch": 0.4554201411161001, - "grad_norm": 0.8790500769675236, - "learning_rate": 0.0001324181136451238, - "loss": 0.6732, - "step": 7100 - }, - { - "epoch": 0.45574085952533677, - "grad_norm": 1.2386079454717946, - "learning_rate": 0.00013231218746417595, - "loss": 0.7522, - "step": 7105 - }, - { - "epoch": 0.45606157793457347, - "grad_norm": 0.7962051132713993, - "learning_rate": 0.0001322062207856717, - "loss": 0.8145, - "step": 7110 - }, - { - "epoch": 0.4563822963438101, - "grad_norm": 1.0329953407444796, - "learning_rate": 0.00013210021374242134, - "loss": 0.7769, - "step": 7115 - }, - { - "epoch": 0.4567030147530468, - "grad_norm": 0.9259650281367799, - "learning_rate": 0.00013199416646728573, - "loss": 0.6457, - "step": 7120 - }, - { - "epoch": 0.4570237331622835, - "grad_norm": 0.9088503892075743, - "learning_rate": 0.0001318880790931762, - "loss": 0.6294, - "step": 7125 - }, - { - "epoch": 0.4573444515715202, - "grad_norm": 0.8985892524046365, - "learning_rate": 0.00013178195175305438, - "loss": 0.6828, - "step": 7130 - }, - { - "epoch": 0.4576651699807569, - "grad_norm": 0.912515537663532, - "learning_rate": 0.00013167578457993188, - "loss": 0.7064, - "step": 7135 - }, - { - "epoch": 0.4579858883899936, - "grad_norm": 0.9729614181574077, - "learning_rate": 0.0001315695777068703, - "loss": 0.7272, - "step": 7140 - }, - { - "epoch": 0.4583066067992303, - "grad_norm": 0.6424734919666812, - "learning_rate": 0.00013146333126698103, - "loss": 0.6299, - "step": 7145 - }, - { - "epoch": 0.45862732520846694, - "grad_norm": 0.9359545383993509, - "learning_rate": 0.00013135704539342494, - "loss": 0.6424, - "step": 7150 - }, - { - "epoch": 0.45894804361770364, - "grad_norm": 0.7928212174336042, - "learning_rate": 0.00013125072021941248, - "loss": 0.6982, - "step": 7155 - }, - { - "epoch": 0.45926876202694034, - "grad_norm": 0.5352504172374731, - "learning_rate": 0.00013114435587820316, - "loss": 0.5291, - "step": 7160 - }, - { - "epoch": 0.45958948043617703, - "grad_norm": 0.7128732592198029, - "learning_rate": 0.00013103795250310577, - "loss": 0.7029, - "step": 7165 - }, - { - "epoch": 0.45991019884541373, - "grad_norm": 1.0850764381783637, - "learning_rate": 0.00013093151022747793, - "loss": 0.7707, - "step": 7170 - }, - { - "epoch": 0.46023091725465043, - "grad_norm": 1.0237223555264552, - "learning_rate": 0.000130825029184726, - "loss": 0.6769, - "step": 7175 - }, - { - "epoch": 0.4605516356638871, - "grad_norm": 1.1136242211182483, - "learning_rate": 0.00013071850950830492, - "loss": 0.5703, - "step": 7180 - }, - { - "epoch": 0.4608723540731238, - "grad_norm": 0.8143443059526504, - "learning_rate": 0.00013061195133171814, - "loss": 0.6334, - "step": 7185 - }, - { - "epoch": 0.46119307248236047, - "grad_norm": 0.9509973045912795, - "learning_rate": 0.00013050535478851728, - "loss": 0.6757, - "step": 7190 - }, - { - "epoch": 0.46151379089159716, - "grad_norm": 0.6191444236173257, - "learning_rate": 0.00013039872001230208, - "loss": 0.6217, - "step": 7195 - }, - { - "epoch": 0.46183450930083386, - "grad_norm": 0.7788953363838352, - "learning_rate": 0.00013029204713672015, - "loss": 0.7384, - "step": 7200 - }, - { - "epoch": 0.46215522771007056, - "grad_norm": 0.8450930304171778, - "learning_rate": 0.00013018533629546695, - "loss": 0.7298, - "step": 7205 - }, - { - "epoch": 0.46247594611930726, - "grad_norm": 1.0385186485500146, - "learning_rate": 0.0001300785876222854, - "loss": 0.6529, - "step": 7210 - }, - { - "epoch": 0.46279666452854396, - "grad_norm": 0.9152190048763487, - "learning_rate": 0.00012997180125096596, - "loss": 0.4276, - "step": 7215 - }, - { - "epoch": 0.46311738293778065, - "grad_norm": 0.9787836443016305, - "learning_rate": 0.00012986497731534618, - "loss": 0.63, - "step": 7220 - }, - { - "epoch": 0.4634381013470173, - "grad_norm": 0.9734043537474775, - "learning_rate": 0.00012975811594931094, - "loss": 0.7634, - "step": 7225 - }, - { - "epoch": 0.463758819756254, - "grad_norm": 0.9713910942202003, - "learning_rate": 0.00012965121728679175, - "loss": 0.757, - "step": 7230 - }, - { - "epoch": 0.4640795381654907, - "grad_norm": 0.9081157831943877, - "learning_rate": 0.00012954428146176703, - "loss": 0.7426, - "step": 7235 - }, - { - "epoch": 0.4644002565747274, - "grad_norm": 0.7116758820381245, - "learning_rate": 0.00012943730860826174, - "loss": 0.8052, - "step": 7240 - }, - { - "epoch": 0.4647209749839641, - "grad_norm": 0.8501864866133851, - "learning_rate": 0.00012933029886034723, - "loss": 0.7407, - "step": 7245 - }, - { - "epoch": 0.4650416933932008, - "grad_norm": 0.9701598818030126, - "learning_rate": 0.00012922325235214114, - "loss": 0.672, - "step": 7250 - }, - { - "epoch": 0.4653624118024375, - "grad_norm": 0.7147413441513334, - "learning_rate": 0.00012911616921780717, - "loss": 0.572, - "step": 7255 - }, - { - "epoch": 0.4656831302116741, - "grad_norm": 1.1031756310087157, - "learning_rate": 0.00012900904959155482, - "loss": 0.502, - "step": 7260 - }, - { - "epoch": 0.4660038486209108, - "grad_norm": 0.9549539883250536, - "learning_rate": 0.0001289018936076395, - "loss": 0.7697, - "step": 7265 - }, - { - "epoch": 0.4663245670301475, - "grad_norm": 0.7061368474604979, - "learning_rate": 0.00012879470140036205, - "loss": 0.77, - "step": 7270 - }, - { - "epoch": 0.4666452854393842, - "grad_norm": 0.8174054654625066, - "learning_rate": 0.00012868747310406875, - "loss": 0.644, - "step": 7275 - }, - { - "epoch": 0.4669660038486209, - "grad_norm": 1.0847763653058102, - "learning_rate": 0.00012858020885315118, - "loss": 0.6265, - "step": 7280 - }, - { - "epoch": 0.4672867222578576, - "grad_norm": 0.7498493863919715, - "learning_rate": 0.00012847290878204584, - "loss": 0.6246, - "step": 7285 - }, - { - "epoch": 0.4676074406670943, - "grad_norm": 0.981941482754815, - "learning_rate": 0.0001283655730252343, - "loss": 0.6622, - "step": 7290 - }, - { - "epoch": 0.467928159076331, - "grad_norm": 0.9518018861299121, - "learning_rate": 0.00012825820171724267, - "loss": 0.6284, - "step": 7295 - }, - { - "epoch": 0.46824887748556765, - "grad_norm": 0.8663834243061985, - "learning_rate": 0.00012815079499264178, - "loss": 0.5667, - "step": 7300 - }, - { - "epoch": 0.46856959589480435, - "grad_norm": 0.7672027770311252, - "learning_rate": 0.00012804335298604672, - "loss": 0.7221, - "step": 7305 - }, - { - "epoch": 0.46889031430404104, - "grad_norm": 0.8035416637587046, - "learning_rate": 0.00012793587583211693, - "loss": 0.5737, - "step": 7310 - }, - { - "epoch": 0.46921103271327774, - "grad_norm": 0.7309561664000054, - "learning_rate": 0.00012782836366555578, - "loss": 0.6313, - "step": 7315 - }, - { - "epoch": 0.46953175112251444, - "grad_norm": 0.6252749910832299, - "learning_rate": 0.00012772081662111053, - "loss": 0.6736, - "step": 7320 - }, - { - "epoch": 0.46985246953175114, - "grad_norm": 1.025835083057594, - "learning_rate": 0.00012761323483357227, - "loss": 0.5665, - "step": 7325 - }, - { - "epoch": 0.47017318794098784, - "grad_norm": 0.6525095712503345, - "learning_rate": 0.00012750561843777552, - "loss": 0.6443, - "step": 7330 - }, - { - "epoch": 0.4704939063502245, - "grad_norm": 0.7418969128305869, - "learning_rate": 0.00012739796756859825, - "loss": 0.8236, - "step": 7335 - }, - { - "epoch": 0.4708146247594612, - "grad_norm": 1.0413884397203683, - "learning_rate": 0.00012729028236096155, - "loss": 0.6624, - "step": 7340 - }, - { - "epoch": 0.4711353431686979, - "grad_norm": 0.9159067009468284, - "learning_rate": 0.0001271825629498296, - "loss": 0.6376, - "step": 7345 - }, - { - "epoch": 0.47145606157793457, - "grad_norm": 0.5992387879000995, - "learning_rate": 0.0001270748094702095, - "loss": 0.5685, - "step": 7350 - }, - { - "epoch": 0.47177677998717127, - "grad_norm": 1.7163402868588182, - "learning_rate": 0.00012696702205715088, - "loss": 0.5311, - "step": 7355 - }, - { - "epoch": 0.47209749839640797, - "grad_norm": 0.7926851445802399, - "learning_rate": 0.00012685920084574618, - "loss": 0.7548, - "step": 7360 - }, - { - "epoch": 0.47241821680564466, - "grad_norm": 0.9751658539863987, - "learning_rate": 0.0001267513459711299, - "loss": 0.6665, - "step": 7365 - }, - { - "epoch": 0.47273893521488136, - "grad_norm": 1.0752483823874541, - "learning_rate": 0.00012664345756847892, - "loss": 0.583, - "step": 7370 - }, - { - "epoch": 0.473059653624118, - "grad_norm": 1.0127918776763205, - "learning_rate": 0.00012653553577301202, - "loss": 0.749, - "step": 7375 - }, - { - "epoch": 0.4733803720333547, - "grad_norm": 0.9059323990908674, - "learning_rate": 0.00012642758071999, - "loss": 0.7049, - "step": 7380 - }, - { - "epoch": 0.4737010904425914, - "grad_norm": 0.8259800182390388, - "learning_rate": 0.00012631959254471515, - "loss": 0.6771, - "step": 7385 - }, - { - "epoch": 0.4740218088518281, - "grad_norm": 1.47432552983105, - "learning_rate": 0.00012621157138253142, - "loss": 0.5965, - "step": 7390 - }, - { - "epoch": 0.4743425272610648, - "grad_norm": 0.9830245238116091, - "learning_rate": 0.00012610351736882402, - "loss": 0.7302, - "step": 7395 - }, - { - "epoch": 0.4746632456703015, - "grad_norm": 0.9860227904680734, - "learning_rate": 0.00012599543063901935, - "loss": 0.6942, - "step": 7400 - }, - { - "epoch": 0.4749839640795382, - "grad_norm": 0.9011424798066042, - "learning_rate": 0.00012588731132858486, - "loss": 0.6456, - "step": 7405 - }, - { - "epoch": 0.47530468248877483, - "grad_norm": 0.9091580384346607, - "learning_rate": 0.00012577915957302872, - "loss": 0.6091, - "step": 7410 - }, - { - "epoch": 0.47562540089801153, - "grad_norm": 0.9741008974793179, - "learning_rate": 0.00012567097550789997, - "loss": 0.6012, - "step": 7415 - }, - { - "epoch": 0.4759461193072482, - "grad_norm": 0.9602884477063278, - "learning_rate": 0.00012556275926878789, - "loss": 0.6792, - "step": 7420 - }, - { - "epoch": 0.4762668377164849, - "grad_norm": 0.6210052131474215, - "learning_rate": 0.00012545451099132225, - "loss": 0.6193, - "step": 7425 - }, - { - "epoch": 0.4765875561257216, - "grad_norm": 0.8832670583789428, - "learning_rate": 0.000125346230811173, - "loss": 0.6106, - "step": 7430 - }, - { - "epoch": 0.4769082745349583, - "grad_norm": 0.851189577398919, - "learning_rate": 0.00012523791886404986, - "loss": 0.8305, - "step": 7435 - }, - { - "epoch": 0.477228992944195, - "grad_norm": 1.2879732211506167, - "learning_rate": 0.00012512957528570265, - "loss": 0.5887, - "step": 7440 - }, - { - "epoch": 0.47754971135343166, - "grad_norm": 0.5699068076911031, - "learning_rate": 0.0001250212002119207, - "loss": 0.5558, - "step": 7445 - }, - { - "epoch": 0.47787042976266836, - "grad_norm": 1.1918583269997756, - "learning_rate": 0.00012491279377853268, - "loss": 0.6408, - "step": 7450 - }, - { - "epoch": 0.47819114817190506, - "grad_norm": 1.4317720523654553, - "learning_rate": 0.0001248043561214068, - "loss": 0.6172, - "step": 7455 - }, - { - "epoch": 0.47851186658114175, - "grad_norm": 1.0666113380037154, - "learning_rate": 0.0001246958873764503, - "loss": 0.7485, - "step": 7460 - }, - { - "epoch": 0.47883258499037845, - "grad_norm": 1.2123844625766853, - "learning_rate": 0.00012458738767960937, - "loss": 0.7277, - "step": 7465 - }, - { - "epoch": 0.47915330339961515, - "grad_norm": 0.6850700187680755, - "learning_rate": 0.00012447885716686892, - "loss": 0.6412, - "step": 7470 - }, - { - "epoch": 0.47947402180885185, - "grad_norm": 0.7818905955159324, - "learning_rate": 0.00012437029597425268, - "loss": 0.6845, - "step": 7475 - }, - { - "epoch": 0.47979474021808854, - "grad_norm": 0.7985800895037933, - "learning_rate": 0.00012426170423782265, - "loss": 0.7376, - "step": 7480 - }, - { - "epoch": 0.4801154586273252, - "grad_norm": 1.4988959271026578, - "learning_rate": 0.0001241530820936792, - "loss": 0.6025, - "step": 7485 - }, - { - "epoch": 0.4804361770365619, - "grad_norm": 0.7532644364170019, - "learning_rate": 0.00012404442967796077, - "loss": 0.7597, - "step": 7490 - }, - { - "epoch": 0.4807568954457986, - "grad_norm": 0.9781127180520404, - "learning_rate": 0.0001239357471268438, - "loss": 0.7113, - "step": 7495 - }, - { - "epoch": 0.4810776138550353, - "grad_norm": 1.2808191157193494, - "learning_rate": 0.00012382703457654247, - "loss": 0.7197, - "step": 7500 - }, - { - "epoch": 0.481398332264272, - "grad_norm": 0.9577008167614253, - "learning_rate": 0.00012371829216330842, - "loss": 0.6633, - "step": 7505 - }, - { - "epoch": 0.4817190506735087, - "grad_norm": 0.9163574634981259, - "learning_rate": 0.000123609520023431, - "loss": 0.6577, - "step": 7510 - }, - { - "epoch": 0.4820397690827454, - "grad_norm": 0.9436379402563304, - "learning_rate": 0.00012350071829323657, - "loss": 0.665, - "step": 7515 - }, - { - "epoch": 0.482360487491982, - "grad_norm": 0.8955893724229462, - "learning_rate": 0.0001233918871090887, - "loss": 0.65, - "step": 7520 - }, - { - "epoch": 0.4826812059012187, - "grad_norm": 1.1039069837177617, - "learning_rate": 0.0001232830266073879, - "loss": 0.6262, - "step": 7525 - }, - { - "epoch": 0.4830019243104554, - "grad_norm": 0.8240710234420133, - "learning_rate": 0.00012317413692457125, - "loss": 0.7796, - "step": 7530 - }, - { - "epoch": 0.4833226427196921, - "grad_norm": 0.5672101461672577, - "learning_rate": 0.0001230652181971126, - "loss": 0.6606, - "step": 7535 - }, - { - "epoch": 0.4836433611289288, - "grad_norm": 0.6312799174708051, - "learning_rate": 0.00012295627056152205, - "loss": 0.6847, - "step": 7540 - }, - { - "epoch": 0.4839640795381655, - "grad_norm": 0.9279904903302523, - "learning_rate": 0.0001228472941543461, - "loss": 0.7298, - "step": 7545 - }, - { - "epoch": 0.4842847979474022, - "grad_norm": 1.0061624072103414, - "learning_rate": 0.00012273828911216715, - "loss": 0.688, - "step": 7550 - }, - { - "epoch": 0.48460551635663884, - "grad_norm": 0.9531338313200752, - "learning_rate": 0.00012262925557160362, - "loss": 0.7381, - "step": 7555 - }, - { - "epoch": 0.48492623476587554, - "grad_norm": 0.9084381778100004, - "learning_rate": 0.0001225201936693095, - "loss": 0.5676, - "step": 7560 - }, - { - "epoch": 0.48524695317511224, - "grad_norm": 1.0203436397332364, - "learning_rate": 0.00012241110354197448, - "loss": 0.571, - "step": 7565 - }, - { - "epoch": 0.48556767158434894, - "grad_norm": 0.9169062207127215, - "learning_rate": 0.00012230198532632347, - "loss": 0.6456, - "step": 7570 - }, - { - "epoch": 0.48588838999358563, - "grad_norm": 0.6002350728637655, - "learning_rate": 0.0001221928391591167, - "loss": 0.6998, - "step": 7575 - }, - { - "epoch": 0.48620910840282233, - "grad_norm": 0.5575094896397851, - "learning_rate": 0.00012208366517714946, - "loss": 0.6751, - "step": 7580 - }, - { - "epoch": 0.48652982681205903, - "grad_norm": 0.7309868460212633, - "learning_rate": 0.00012197446351725174, - "loss": 0.6152, - "step": 7585 - }, - { - "epoch": 0.4868505452212957, - "grad_norm": 0.9692168543018325, - "learning_rate": 0.0001218652343162884, - "loss": 0.6374, - "step": 7590 - }, - { - "epoch": 0.48717126363053237, - "grad_norm": 0.7189150002506619, - "learning_rate": 0.00012175597771115871, - "loss": 0.7784, - "step": 7595 - }, - { - "epoch": 0.48749198203976907, - "grad_norm": 0.8123916784425887, - "learning_rate": 0.0001216466938387963, - "loss": 0.5559, - "step": 7600 - }, - { - "epoch": 0.48781270044900576, - "grad_norm": 0.903323959073406, - "learning_rate": 0.00012153738283616897, - "loss": 0.6245, - "step": 7605 - }, - { - "epoch": 0.48813341885824246, - "grad_norm": 1.1841897784251287, - "learning_rate": 0.00012142804484027862, - "loss": 0.7076, - "step": 7610 - }, - { - "epoch": 0.48845413726747916, - "grad_norm": 0.96970852663879, - "learning_rate": 0.0001213186799881608, - "loss": 0.6394, - "step": 7615 - }, - { - "epoch": 0.48877485567671586, - "grad_norm": 0.9366182177279975, - "learning_rate": 0.00012120928841688486, - "loss": 0.6738, - "step": 7620 - }, - { - "epoch": 0.48909557408595256, - "grad_norm": 0.6547998596688648, - "learning_rate": 0.0001210998702635536, - "loss": 0.5484, - "step": 7625 - }, - { - "epoch": 0.4894162924951892, - "grad_norm": 0.61835825910844, - "learning_rate": 0.00012099042566530318, - "loss": 0.7106, - "step": 7630 - }, - { - "epoch": 0.4897370109044259, - "grad_norm": 0.9889648893113016, - "learning_rate": 0.00012088095475930281, - "loss": 0.6665, - "step": 7635 - }, - { - "epoch": 0.4900577293136626, - "grad_norm": 1.0009313158645148, - "learning_rate": 0.00012077145768275473, - "loss": 0.7342, - "step": 7640 - }, - { - "epoch": 0.4903784477228993, - "grad_norm": 1.207980433506984, - "learning_rate": 0.00012066193457289397, - "loss": 0.797, - "step": 7645 - }, - { - "epoch": 0.490699166132136, - "grad_norm": 0.7854979595695312, - "learning_rate": 0.00012055238556698816, - "loss": 0.6988, - "step": 7650 - }, - { - "epoch": 0.4910198845413727, - "grad_norm": 0.7188797039130606, - "learning_rate": 0.00012044281080233746, - "loss": 0.7325, - "step": 7655 - }, - { - "epoch": 0.4913406029506094, - "grad_norm": 0.9561317362271494, - "learning_rate": 0.00012033321041627425, - "loss": 0.6506, - "step": 7660 - }, - { - "epoch": 0.4916613213598461, - "grad_norm": 0.7528076899928123, - "learning_rate": 0.00012022358454616306, - "loss": 0.5609, - "step": 7665 - }, - { - "epoch": 0.4919820397690827, - "grad_norm": 0.8596601027470778, - "learning_rate": 0.0001201139333294003, - "loss": 0.6597, - "step": 7670 - }, - { - "epoch": 0.4923027581783194, - "grad_norm": 0.6508137207715219, - "learning_rate": 0.00012000425690341422, - "loss": 0.4953, - "step": 7675 - }, - { - "epoch": 0.4926234765875561, - "grad_norm": 0.8505276898684504, - "learning_rate": 0.00011989455540566462, - "loss": 0.6649, - "step": 7680 - }, - { - "epoch": 0.4929441949967928, - "grad_norm": 0.758748378012195, - "learning_rate": 0.00011978482897364273, - "loss": 0.7204, - "step": 7685 - }, - { - "epoch": 0.4932649134060295, - "grad_norm": 0.8242651845310669, - "learning_rate": 0.00011967507774487108, - "loss": 0.6598, - "step": 7690 - }, - { - "epoch": 0.4935856318152662, - "grad_norm": 0.8816627197677691, - "learning_rate": 0.0001195653018569032, - "loss": 0.8369, - "step": 7695 - }, - { - "epoch": 0.4939063502245029, - "grad_norm": 0.781020774879966, - "learning_rate": 0.00011945550144732354, - "loss": 0.7912, - "step": 7700 - }, - { - "epoch": 0.49422706863373955, - "grad_norm": 0.5912028419510443, - "learning_rate": 0.00011934567665374732, - "loss": 0.673, - "step": 7705 - }, - { - "epoch": 0.49454778704297625, - "grad_norm": 0.7852150600454825, - "learning_rate": 0.00011923582761382031, - "loss": 0.6989, - "step": 7710 - }, - { - "epoch": 0.49486850545221295, - "grad_norm": 0.8345934386959575, - "learning_rate": 0.00011912595446521868, - "loss": 0.6319, - "step": 7715 - }, - { - "epoch": 0.49518922386144965, - "grad_norm": 1.2815263854782484, - "learning_rate": 0.0001190160573456488, - "loss": 0.6247, - "step": 7720 - }, - { - "epoch": 0.49550994227068634, - "grad_norm": 1.1234841964502218, - "learning_rate": 0.00011890613639284704, - "loss": 0.653, - "step": 7725 - }, - { - "epoch": 0.49583066067992304, - "grad_norm": 0.9428012694473118, - "learning_rate": 0.00011879619174457976, - "loss": 0.9064, - "step": 7730 - }, - { - "epoch": 0.49615137908915974, - "grad_norm": 0.7822481283735353, - "learning_rate": 0.00011868622353864285, - "loss": 0.5887, - "step": 7735 - }, - { - "epoch": 0.4964720974983964, - "grad_norm": 0.6197300598147442, - "learning_rate": 0.00011857623191286186, - "loss": 0.5871, - "step": 7740 - }, - { - "epoch": 0.4967928159076331, - "grad_norm": 0.6742268900193886, - "learning_rate": 0.00011846621700509171, - "loss": 0.6153, - "step": 7745 - }, - { - "epoch": 0.4971135343168698, - "grad_norm": 1.0097074349573119, - "learning_rate": 0.00011835617895321633, - "loss": 0.726, - "step": 7750 - }, - { - "epoch": 0.4974342527261065, - "grad_norm": 0.7938742619155006, - "learning_rate": 0.00011824611789514881, - "loss": 0.7576, - "step": 7755 - }, - { - "epoch": 0.49775497113534317, - "grad_norm": 0.7594193522785816, - "learning_rate": 0.00011813603396883108, - "loss": 0.631, - "step": 7760 - }, - { - "epoch": 0.49807568954457987, - "grad_norm": 1.1449681048330884, - "learning_rate": 0.0001180259273122336, - "loss": 0.8346, - "step": 7765 - }, - { - "epoch": 0.49839640795381657, - "grad_norm": 0.6106704277152839, - "learning_rate": 0.00011791579806335547, - "loss": 0.7094, - "step": 7770 - }, - { - "epoch": 0.49871712636305326, - "grad_norm": 0.9764152562715487, - "learning_rate": 0.000117805646360224, - "loss": 0.7922, - "step": 7775 - }, - { - "epoch": 0.4990378447722899, - "grad_norm": 1.4581971435959649, - "learning_rate": 0.00011769547234089469, - "loss": 0.7598, - "step": 7780 - }, - { - "epoch": 0.4993585631815266, - "grad_norm": 1.1726593622900077, - "learning_rate": 0.00011758527614345097, - "loss": 0.6934, - "step": 7785 - }, - { - "epoch": 0.4996792815907633, - "grad_norm": 1.382229173196648, - "learning_rate": 0.00011747505790600412, - "loss": 0.6793, - "step": 7790 - }, - { - "epoch": 0.5, - "grad_norm": 0.7583044707535523, - "learning_rate": 0.00011736481776669306, - "loss": 0.7244, - "step": 7795 - }, - { - "epoch": 0.5003207184092366, - "grad_norm": 1.0327502481504163, - "learning_rate": 0.000117254555863684, - "loss": 0.7023, - "step": 7800 - }, - { - "epoch": 0.5006414368184734, - "grad_norm": 0.6928521319692996, - "learning_rate": 0.00011714427233517069, - "loss": 0.5508, - "step": 7805 - }, - { - "epoch": 0.50096215522771, - "grad_norm": 0.6645980452165248, - "learning_rate": 0.0001170339673193737, - "loss": 0.7463, - "step": 7810 - }, - { - "epoch": 0.5012828736369468, - "grad_norm": 0.6668044106727686, - "learning_rate": 0.00011692364095454076, - "loss": 0.6357, - "step": 7815 - }, - { - "epoch": 0.5016035920461834, - "grad_norm": 0.9287710383565055, - "learning_rate": 0.00011681329337894623, - "loss": 0.6308, - "step": 7820 - }, - { - "epoch": 0.5019243104554202, - "grad_norm": 1.3104043465513664, - "learning_rate": 0.0001167029247308911, - "loss": 0.5399, - "step": 7825 - }, - { - "epoch": 0.5022450288646568, - "grad_norm": 1.428373507944948, - "learning_rate": 0.00011659253514870276, - "loss": 0.7011, - "step": 7830 - }, - { - "epoch": 0.5025657472738935, - "grad_norm": 0.833100109623975, - "learning_rate": 0.00011648212477073484, - "loss": 0.7404, - "step": 7835 - }, - { - "epoch": 0.5028864656831302, - "grad_norm": 1.0751700158927022, - "learning_rate": 0.00011637169373536698, - "loss": 0.6389, - "step": 7840 - }, - { - "epoch": 0.5032071840923669, - "grad_norm": 0.9610389244865, - "learning_rate": 0.00011626124218100483, - "loss": 0.732, - "step": 7845 - }, - { - "epoch": 0.5035279025016036, - "grad_norm": 1.4064338381179782, - "learning_rate": 0.00011615077024607965, - "loss": 0.7248, - "step": 7850 - }, - { - "epoch": 0.5038486209108403, - "grad_norm": 1.0089167449788845, - "learning_rate": 0.00011604027806904833, - "loss": 0.6808, - "step": 7855 - }, - { - "epoch": 0.504169339320077, - "grad_norm": 0.8297282225570892, - "learning_rate": 0.00011592976578839303, - "loss": 0.7505, - "step": 7860 - }, - { - "epoch": 0.5044900577293137, - "grad_norm": 0.8562597418732677, - "learning_rate": 0.00011581923354262117, - "loss": 0.7069, - "step": 7865 - }, - { - "epoch": 0.5048107761385503, - "grad_norm": 1.1555443138727173, - "learning_rate": 0.00011570868147026517, - "loss": 0.6213, - "step": 7870 - }, - { - "epoch": 0.505131494547787, - "grad_norm": 1.4259877059174733, - "learning_rate": 0.00011559810970988232, - "loss": 0.6105, - "step": 7875 - }, - { - "epoch": 0.5054522129570237, - "grad_norm": 0.6183735071336424, - "learning_rate": 0.00011548751840005459, - "loss": 0.4662, - "step": 7880 - }, - { - "epoch": 0.5057729313662604, - "grad_norm": 0.9453435423443054, - "learning_rate": 0.00011537690767938843, - "loss": 0.6083, - "step": 7885 - }, - { - "epoch": 0.5060936497754971, - "grad_norm": 0.6729282582317203, - "learning_rate": 0.00011526627768651459, - "loss": 0.7553, - "step": 7890 - }, - { - "epoch": 0.5064143681847338, - "grad_norm": 0.8579324957843062, - "learning_rate": 0.00011515562856008808, - "loss": 0.7014, - "step": 7895 - }, - { - "epoch": 0.5067350865939705, - "grad_norm": 0.9652710068101304, - "learning_rate": 0.00011504496043878776, - "loss": 0.7203, - "step": 7900 - }, - { - "epoch": 0.5070558050032072, - "grad_norm": 1.3328325121052935, - "learning_rate": 0.00011493427346131636, - "loss": 0.7462, - "step": 7905 - }, - { - "epoch": 0.5073765234124439, - "grad_norm": 0.7750774157499563, - "learning_rate": 0.00011482356776640028, - "loss": 0.7554, - "step": 7910 - }, - { - "epoch": 0.5076972418216805, - "grad_norm": 0.7771858604565626, - "learning_rate": 0.00011471284349278928, - "loss": 0.7032, - "step": 7915 - }, - { - "epoch": 0.5080179602309173, - "grad_norm": 0.9990707053591126, - "learning_rate": 0.0001146021007792565, - "loss": 0.5966, - "step": 7920 - }, - { - "epoch": 0.5083386786401539, - "grad_norm": 0.9864579497103747, - "learning_rate": 0.00011449133976459816, - "loss": 0.701, - "step": 7925 - }, - { - "epoch": 0.5086593970493907, - "grad_norm": 0.9752505086126679, - "learning_rate": 0.0001143805605876334, - "loss": 0.6502, - "step": 7930 - }, - { - "epoch": 0.5089801154586273, - "grad_norm": 1.3306389404931571, - "learning_rate": 0.00011426976338720412, - "loss": 0.6592, - "step": 7935 - }, - { - "epoch": 0.5093008338678641, - "grad_norm": 0.6705402480174242, - "learning_rate": 0.00011415894830217486, - "loss": 0.6531, - "step": 7940 - }, - { - "epoch": 0.5096215522771007, - "grad_norm": 0.8130683741487627, - "learning_rate": 0.00011404811547143251, - "loss": 0.7333, - "step": 7945 - }, - { - "epoch": 0.5099422706863374, - "grad_norm": 1.1664159763922086, - "learning_rate": 0.0001139372650338862, - "loss": 0.8146, - "step": 7950 - }, - { - "epoch": 0.5102629890955741, - "grad_norm": 0.5999515830143689, - "learning_rate": 0.00011382639712846721, - "loss": 0.5825, - "step": 7955 - }, - { - "epoch": 0.5105837075048107, - "grad_norm": 1.1054727651684402, - "learning_rate": 0.00011371551189412868, - "loss": 0.7374, - "step": 7960 - }, - { - "epoch": 0.5109044259140475, - "grad_norm": 1.0319949146313503, - "learning_rate": 0.00011360460946984537, - "loss": 0.7562, - "step": 7965 - }, - { - "epoch": 0.5112251443232841, - "grad_norm": 0.6047170156572763, - "learning_rate": 0.00011349368999461374, - "loss": 0.7588, - "step": 7970 - }, - { - "epoch": 0.5115458627325209, - "grad_norm": 0.8725079332758466, - "learning_rate": 0.00011338275360745147, - "loss": 0.7421, - "step": 7975 - }, - { - "epoch": 0.5118665811417575, - "grad_norm": 0.784376771151006, - "learning_rate": 0.00011327180044739755, - "loss": 0.5837, - "step": 7980 - }, - { - "epoch": 0.5121872995509942, - "grad_norm": 0.8977359490481988, - "learning_rate": 0.00011316083065351195, - "loss": 0.7392, - "step": 7985 - }, - { - "epoch": 0.5125080179602309, - "grad_norm": 0.653772242009018, - "learning_rate": 0.00011304984436487551, - "loss": 0.6166, - "step": 7990 - }, - { - "epoch": 0.5128287363694676, - "grad_norm": 1.2310492343797879, - "learning_rate": 0.00011293884172058971, - "loss": 0.5507, - "step": 7995 - }, - { - "epoch": 0.5131494547787043, - "grad_norm": 1.0077531207139014, - "learning_rate": 0.00011282782285977649, - "loss": 0.6358, - "step": 8000 - }, - { - "epoch": 0.513470173187941, - "grad_norm": 1.19554249733326, - "learning_rate": 0.00011271678792157823, - "loss": 0.6614, - "step": 8005 - }, - { - "epoch": 0.5137908915971777, - "grad_norm": 0.8654028252618859, - "learning_rate": 0.00011260573704515734, - "loss": 0.6444, - "step": 8010 - }, - { - "epoch": 0.5141116100064144, - "grad_norm": 0.9637998906695273, - "learning_rate": 0.00011249467036969632, - "loss": 0.6859, - "step": 8015 - }, - { - "epoch": 0.514432328415651, - "grad_norm": 1.2621981138132725, - "learning_rate": 0.00011238358803439739, - "loss": 0.7247, - "step": 8020 - }, - { - "epoch": 0.5147530468248878, - "grad_norm": 0.6255230049474781, - "learning_rate": 0.0001122724901784824, - "loss": 0.7025, - "step": 8025 - }, - { - "epoch": 0.5150737652341244, - "grad_norm": 0.8124027597004405, - "learning_rate": 0.00011216137694119271, - "loss": 0.6465, - "step": 8030 - }, - { - "epoch": 0.5153944836433612, - "grad_norm": 0.7060753692578354, - "learning_rate": 0.00011205024846178886, - "loss": 0.5977, - "step": 8035 - }, - { - "epoch": 0.5157152020525978, - "grad_norm": 0.9066775542047206, - "learning_rate": 0.00011193910487955059, - "loss": 0.6407, - "step": 8040 - }, - { - "epoch": 0.5160359204618346, - "grad_norm": 0.6903326908804434, - "learning_rate": 0.00011182794633377653, - "loss": 0.6925, - "step": 8045 - }, - { - "epoch": 0.5163566388710712, - "grad_norm": 0.9472934152436594, - "learning_rate": 0.00011171677296378411, - "loss": 0.7609, - "step": 8050 - }, - { - "epoch": 0.5166773572803078, - "grad_norm": 1.0828907895794335, - "learning_rate": 0.0001116055849089092, - "loss": 0.7855, - "step": 8055 - }, - { - "epoch": 0.5169980756895446, - "grad_norm": 1.3155495321215651, - "learning_rate": 0.00011149438230850626, - "loss": 0.6561, - "step": 8060 - }, - { - "epoch": 0.5173187940987812, - "grad_norm": 0.7751536928800652, - "learning_rate": 0.00011138316530194782, - "loss": 0.6302, - "step": 8065 - }, - { - "epoch": 0.517639512508018, - "grad_norm": 1.278374102598091, - "learning_rate": 0.00011127193402862457, - "loss": 0.6741, - "step": 8070 - }, - { - "epoch": 0.5179602309172546, - "grad_norm": 0.7961067269873462, - "learning_rate": 0.00011116068862794506, - "loss": 0.7248, - "step": 8075 - }, - { - "epoch": 0.5182809493264914, - "grad_norm": 0.9325619210714818, - "learning_rate": 0.0001110494292393355, - "loss": 0.6036, - "step": 8080 - }, - { - "epoch": 0.518601667735728, - "grad_norm": 0.9427970552237784, - "learning_rate": 0.00011093815600223966, - "loss": 0.6906, - "step": 8085 - }, - { - "epoch": 0.5189223861449648, - "grad_norm": 0.9820235565256558, - "learning_rate": 0.00011082686905611872, - "loss": 0.6996, - "step": 8090 - }, - { - "epoch": 0.5192431045542014, - "grad_norm": 0.7847448260775505, - "learning_rate": 0.00011071556854045098, - "loss": 0.67, - "step": 8095 - }, - { - "epoch": 0.5195638229634381, - "grad_norm": 0.7114519312016215, - "learning_rate": 0.00011060425459473169, - "loss": 0.6844, - "step": 8100 - }, - { - "epoch": 0.5198845413726748, - "grad_norm": 0.6238373643554763, - "learning_rate": 0.00011049292735847312, - "loss": 0.5971, - "step": 8105 - }, - { - "epoch": 0.5202052597819115, - "grad_norm": 0.9399929160198239, - "learning_rate": 0.00011038158697120395, - "loss": 0.6189, - "step": 8110 - }, - { - "epoch": 0.5205259781911482, - "grad_norm": 1.1129758526237858, - "learning_rate": 0.00011027023357246955, - "loss": 0.7023, - "step": 8115 - }, - { - "epoch": 0.5208466966003849, - "grad_norm": 1.049212324811729, - "learning_rate": 0.00011015886730183152, - "loss": 0.7014, - "step": 8120 - }, - { - "epoch": 0.5211674150096216, - "grad_norm": 0.8599253114644705, - "learning_rate": 0.00011004748829886755, - "loss": 0.6835, - "step": 8125 - }, - { - "epoch": 0.5214881334188582, - "grad_norm": 0.6066610732008468, - "learning_rate": 0.0001099360967031714, - "loss": 0.5214, - "step": 8130 - }, - { - "epoch": 0.5218088518280949, - "grad_norm": 0.8343848602348406, - "learning_rate": 0.00010982469265435249, - "loss": 0.6169, - "step": 8135 - }, - { - "epoch": 0.5221295702373316, - "grad_norm": 0.4237175002588996, - "learning_rate": 0.00010971327629203587, - "loss": 0.5628, - "step": 8140 - }, - { - "epoch": 0.5224502886465683, - "grad_norm": 0.7612853893387608, - "learning_rate": 0.00010960184775586209, - "loss": 0.6496, - "step": 8145 - }, - { - "epoch": 0.522771007055805, - "grad_norm": 0.7090497030288603, - "learning_rate": 0.00010949040718548693, - "loss": 0.6699, - "step": 8150 - }, - { - "epoch": 0.5230917254650417, - "grad_norm": 0.8137233187040953, - "learning_rate": 0.00010937895472058126, - "loss": 0.7825, - "step": 8155 - }, - { - "epoch": 0.5234124438742784, - "grad_norm": 1.106458178679526, - "learning_rate": 0.0001092674905008308, - "loss": 0.5917, - "step": 8160 - }, - { - "epoch": 0.5237331622835151, - "grad_norm": 1.1023421333903827, - "learning_rate": 0.00010915601466593604, - "loss": 0.652, - "step": 8165 - }, - { - "epoch": 0.5240538806927517, - "grad_norm": 1.2339053368878727, - "learning_rate": 0.00010904452735561204, - "loss": 0.7531, - "step": 8170 - }, - { - "epoch": 0.5243745991019885, - "grad_norm": 0.8536672713520308, - "learning_rate": 0.00010893302870958824, - "loss": 0.6808, - "step": 8175 - }, - { - "epoch": 0.5246953175112251, - "grad_norm": 0.9072452347961674, - "learning_rate": 0.00010882151886760827, - "loss": 0.7883, - "step": 8180 - }, - { - "epoch": 0.5250160359204619, - "grad_norm": 0.705408047927468, - "learning_rate": 0.00010870999796942986, - "loss": 0.7448, - "step": 8185 - }, - { - "epoch": 0.5253367543296985, - "grad_norm": 0.84842819642806, - "learning_rate": 0.00010859846615482448, - "loss": 0.7873, - "step": 8190 - }, - { - "epoch": 0.5256574727389353, - "grad_norm": 0.9668127437981949, - "learning_rate": 0.00010848692356357735, - "loss": 0.6553, - "step": 8195 - }, - { - "epoch": 0.5259781911481719, - "grad_norm": 1.3910270737631052, - "learning_rate": 0.00010837537033548718, - "loss": 0.551, - "step": 8200 - }, - { - "epoch": 0.5262989095574085, - "grad_norm": 0.8934045053705592, - "learning_rate": 0.00010826380661036601, - "loss": 0.755, - "step": 8205 - }, - { - "epoch": 0.5266196279666453, - "grad_norm": 0.7580165266865208, - "learning_rate": 0.0001081522325280391, - "loss": 0.6785, - "step": 8210 - }, - { - "epoch": 0.5269403463758819, - "grad_norm": 0.895270436973056, - "learning_rate": 0.00010804064822834461, - "loss": 0.6188, - "step": 8215 - }, - { - "epoch": 0.5272610647851187, - "grad_norm": 0.8349917473129711, - "learning_rate": 0.0001079290538511335, - "loss": 0.5295, - "step": 8220 - }, - { - "epoch": 0.5275817831943553, - "grad_norm": 1.0937712586985149, - "learning_rate": 0.00010781744953626944, - "loss": 0.718, - "step": 8225 - }, - { - "epoch": 0.5279025016035921, - "grad_norm": 0.9776711832493594, - "learning_rate": 0.00010770583542362848, - "loss": 0.7394, - "step": 8230 - }, - { - "epoch": 0.5282232200128287, - "grad_norm": 0.9916244110681041, - "learning_rate": 0.00010759421165309898, - "loss": 0.6302, - "step": 8235 - }, - { - "epoch": 0.5285439384220654, - "grad_norm": 0.7709724576720045, - "learning_rate": 0.00010748257836458142, - "loss": 0.4377, - "step": 8240 - }, - { - "epoch": 0.5288646568313021, - "grad_norm": 0.9553016321868766, - "learning_rate": 0.00010737093569798815, - "loss": 0.5929, - "step": 8245 - }, - { - "epoch": 0.5291853752405388, - "grad_norm": 0.5921375135170813, - "learning_rate": 0.00010725928379324335, - "loss": 0.6308, - "step": 8250 - }, - { - "epoch": 0.5295060936497755, - "grad_norm": 0.9409908884682822, - "learning_rate": 0.00010714762279028275, - "loss": 0.6488, - "step": 8255 - }, - { - "epoch": 0.5298268120590122, - "grad_norm": 0.9164401991956044, - "learning_rate": 0.00010703595282905343, - "loss": 0.7185, - "step": 8260 - }, - { - "epoch": 0.5301475304682489, - "grad_norm": 0.7915811080548818, - "learning_rate": 0.00010692427404951379, - "loss": 0.7002, - "step": 8265 - }, - { - "epoch": 0.5304682488774856, - "grad_norm": 1.1633281858494344, - "learning_rate": 0.00010681258659163322, - "loss": 0.7142, - "step": 8270 - }, - { - "epoch": 0.5307889672867223, - "grad_norm": 1.1360488426032926, - "learning_rate": 0.00010670089059539201, - "loss": 0.6164, - "step": 8275 - }, - { - "epoch": 0.531109685695959, - "grad_norm": 0.9950081272171089, - "learning_rate": 0.0001065891862007811, - "loss": 0.5403, - "step": 8280 - }, - { - "epoch": 0.5314304041051956, - "grad_norm": 1.0499402732473173, - "learning_rate": 0.00010647747354780206, - "loss": 0.6409, - "step": 8285 - }, - { - "epoch": 0.5317511225144324, - "grad_norm": 0.9441134224109928, - "learning_rate": 0.00010636575277646672, - "loss": 0.5947, - "step": 8290 - }, - { - "epoch": 0.532071840923669, - "grad_norm": 1.3058395760608197, - "learning_rate": 0.00010625402402679712, - "loss": 0.6901, - "step": 8295 - }, - { - "epoch": 0.5323925593329057, - "grad_norm": 0.8650565306977751, - "learning_rate": 0.0001061422874388253, - "loss": 0.6536, - "step": 8300 - }, - { - "epoch": 0.5327132777421424, - "grad_norm": 1.1023501837328433, - "learning_rate": 0.0001060305431525931, - "loss": 0.7735, - "step": 8305 - }, - { - "epoch": 0.5330339961513791, - "grad_norm": 0.7402707462941108, - "learning_rate": 0.00010591879130815206, - "loss": 0.7746, - "step": 8310 - }, - { - "epoch": 0.5333547145606158, - "grad_norm": 1.0334014975634367, - "learning_rate": 0.0001058070320455631, - "loss": 0.6197, - "step": 8315 - }, - { - "epoch": 0.5336754329698524, - "grad_norm": 0.8973174424463937, - "learning_rate": 0.00010569526550489656, - "loss": 0.6662, - "step": 8320 - }, - { - "epoch": 0.5339961513790892, - "grad_norm": 1.1260137879030736, - "learning_rate": 0.00010558349182623182, - "loss": 0.7384, - "step": 8325 - }, - { - "epoch": 0.5343168697883258, - "grad_norm": 1.0775603650728314, - "learning_rate": 0.00010547171114965721, - "loss": 0.53, - "step": 8330 - }, - { - "epoch": 0.5346375881975626, - "grad_norm": 0.8657241626493881, - "learning_rate": 0.00010535992361526986, - "loss": 0.6597, - "step": 8335 - }, - { - "epoch": 0.5349583066067992, - "grad_norm": 0.7754986474145258, - "learning_rate": 0.00010524812936317545, - "loss": 0.7155, - "step": 8340 - }, - { - "epoch": 0.535279025016036, - "grad_norm": 0.7235913108295569, - "learning_rate": 0.00010513632853348817, - "loss": 0.63, - "step": 8345 - }, - { - "epoch": 0.5355997434252726, - "grad_norm": 1.0376021153773205, - "learning_rate": 0.00010502452126633033, - "loss": 0.7389, - "step": 8350 - }, - { - "epoch": 0.5359204618345093, - "grad_norm": 1.0736867388991156, - "learning_rate": 0.00010491270770183241, - "loss": 0.7524, - "step": 8355 - }, - { - "epoch": 0.536241180243746, - "grad_norm": 1.2875466262160882, - "learning_rate": 0.00010480088798013274, - "loss": 0.7637, - "step": 8360 - }, - { - "epoch": 0.5365618986529826, - "grad_norm": 1.0698179015991502, - "learning_rate": 0.00010468906224137736, - "loss": 0.7777, - "step": 8365 - }, - { - "epoch": 0.5368826170622194, - "grad_norm": 0.715308845951178, - "learning_rate": 0.00010457723062571984, - "loss": 0.581, - "step": 8370 - }, - { - "epoch": 0.537203335471456, - "grad_norm": 1.9992463200156003, - "learning_rate": 0.00010446539327332121, - "loss": 0.6813, - "step": 8375 - }, - { - "epoch": 0.5375240538806928, - "grad_norm": 0.9082670120549011, - "learning_rate": 0.00010435355032434958, - "loss": 0.8172, - "step": 8380 - }, - { - "epoch": 0.5378447722899294, - "grad_norm": 0.5039137526581597, - "learning_rate": 0.00010424170191898006, - "loss": 0.6443, - "step": 8385 - }, - { - "epoch": 0.5381654906991661, - "grad_norm": 0.8357611125226391, - "learning_rate": 0.00010412984819739473, - "loss": 0.6672, - "step": 8390 - }, - { - "epoch": 0.5384862091084028, - "grad_norm": 0.9107912987485977, - "learning_rate": 0.00010401798929978224, - "loss": 0.6107, - "step": 8395 - }, - { - "epoch": 0.5388069275176395, - "grad_norm": 0.8281442376194428, - "learning_rate": 0.0001039061253663377, - "loss": 0.6075, - "step": 8400 - }, - { - "epoch": 0.5391276459268762, - "grad_norm": 0.7249862380029812, - "learning_rate": 0.00010379425653726263, - "loss": 0.7265, - "step": 8405 - }, - { - "epoch": 0.5394483643361129, - "grad_norm": 0.9092092180370709, - "learning_rate": 0.00010368238295276455, - "loss": 0.6893, - "step": 8410 - }, - { - "epoch": 0.5397690827453496, - "grad_norm": 0.6540167568734936, - "learning_rate": 0.0001035705047530571, - "loss": 0.7305, - "step": 8415 - }, - { - "epoch": 0.5400898011545863, - "grad_norm": 0.7981383776198956, - "learning_rate": 0.00010345862207835957, - "loss": 0.6453, - "step": 8420 - }, - { - "epoch": 0.5404105195638229, - "grad_norm": 0.945104000015912, - "learning_rate": 0.00010334673506889696, - "loss": 0.7016, - "step": 8425 - }, - { - "epoch": 0.5407312379730597, - "grad_norm": 1.0547131113611765, - "learning_rate": 0.00010323484386489961, - "loss": 0.7347, - "step": 8430 - }, - { - "epoch": 0.5410519563822963, - "grad_norm": 0.8025281891388182, - "learning_rate": 0.00010312294860660319, - "loss": 0.5264, - "step": 8435 - }, - { - "epoch": 0.5413726747915331, - "grad_norm": 0.9019250163215435, - "learning_rate": 0.0001030110494342484, - "loss": 0.5963, - "step": 8440 - }, - { - "epoch": 0.5416933932007697, - "grad_norm": 0.6368675777184184, - "learning_rate": 0.00010289914648808088, - "loss": 0.5399, - "step": 8445 - }, - { - "epoch": 0.5420141116100065, - "grad_norm": 0.8008826667949324, - "learning_rate": 0.00010278723990835097, - "loss": 0.7476, - "step": 8450 - }, - { - "epoch": 0.5423348300192431, - "grad_norm": 0.7219125921723233, - "learning_rate": 0.0001026753298353136, - "loss": 0.5883, - "step": 8455 - }, - { - "epoch": 0.5426555484284797, - "grad_norm": 0.6992313736984004, - "learning_rate": 0.0001025634164092281, - "loss": 0.5797, - "step": 8460 - }, - { - "epoch": 0.5429762668377165, - "grad_norm": 0.44695714450265767, - "learning_rate": 0.00010245149977035792, - "loss": 0.6473, - "step": 8465 - }, - { - "epoch": 0.5432969852469531, - "grad_norm": 1.248682759415961, - "learning_rate": 0.00010233958005897058, - "loss": 0.5812, - "step": 8470 - }, - { - "epoch": 0.5436177036561899, - "grad_norm": 1.0568826134330056, - "learning_rate": 0.00010222765741533744, - "loss": 0.7862, - "step": 8475 - }, - { - "epoch": 0.5439384220654265, - "grad_norm": 0.8116820280676993, - "learning_rate": 0.00010211573197973356, - "loss": 0.6353, - "step": 8480 - }, - { - "epoch": 0.5442591404746633, - "grad_norm": 0.9997535811765578, - "learning_rate": 0.00010200380389243753, - "loss": 0.7229, - "step": 8485 - }, - { - "epoch": 0.5445798588838999, - "grad_norm": 0.8261136419022004, - "learning_rate": 0.00010189187329373113, - "loss": 0.6919, - "step": 8490 - }, - { - "epoch": 0.5449005772931367, - "grad_norm": 0.7977851457213406, - "learning_rate": 0.00010177994032389946, - "loss": 0.5777, - "step": 8495 - }, - { - "epoch": 0.5452212957023733, - "grad_norm": 1.211421213402399, - "learning_rate": 0.00010166800512323043, - "loss": 0.6434, - "step": 8500 - }, - { - "epoch": 0.54554201411161, - "grad_norm": 2.0722177427022244, - "learning_rate": 0.00010155606783201488, - "loss": 0.5933, - "step": 8505 - }, - { - "epoch": 0.5458627325208467, - "grad_norm": 0.7874345109274467, - "learning_rate": 0.00010144412859054617, - "loss": 0.8209, - "step": 8510 - }, - { - "epoch": 0.5461834509300834, - "grad_norm": 0.5164159774237933, - "learning_rate": 0.00010133218753912023, - "loss": 0.6337, - "step": 8515 - }, - { - "epoch": 0.5465041693393201, - "grad_norm": 0.9997324723951748, - "learning_rate": 0.00010122024481803509, - "loss": 0.7799, - "step": 8520 - }, - { - "epoch": 0.5468248877485568, - "grad_norm": 0.868379009704931, - "learning_rate": 0.000101108300567591, - "loss": 0.6205, - "step": 8525 - }, - { - "epoch": 0.5471456061577935, - "grad_norm": 0.7487726179830052, - "learning_rate": 0.00010099635492809007, - "loss": 0.7024, - "step": 8530 - }, - { - "epoch": 0.5474663245670301, - "grad_norm": 0.784320611343729, - "learning_rate": 0.00010088440803983616, - "loss": 0.765, - "step": 8535 - }, - { - "epoch": 0.5477870429762668, - "grad_norm": 0.7657678123947386, - "learning_rate": 0.00010077246004313472, - "loss": 0.6496, - "step": 8540 - }, - { - "epoch": 0.5481077613855035, - "grad_norm": 0.7225029829590283, - "learning_rate": 0.00010066051107829259, - "loss": 0.6885, - "step": 8545 - }, - { - "epoch": 0.5484284797947402, - "grad_norm": 0.8979772778090884, - "learning_rate": 0.00010054856128561778, - "loss": 0.7111, - "step": 8550 - }, - { - "epoch": 0.5487491982039769, - "grad_norm": 1.322201085524258, - "learning_rate": 0.00010043661080541936, - "loss": 0.6252, - "step": 8555 - }, - { - "epoch": 0.5490699166132136, - "grad_norm": 0.6743113052462498, - "learning_rate": 0.00010032465977800726, - "loss": 0.5282, - "step": 8560 - }, - { - "epoch": 0.5493906350224503, - "grad_norm": 0.8693068518513947, - "learning_rate": 0.00010021270834369211, - "loss": 0.6029, - "step": 8565 - }, - { - "epoch": 0.549711353431687, - "grad_norm": 1.1870868813911406, - "learning_rate": 0.00010010075664278507, - "loss": 0.6264, - "step": 8570 - }, - { - "epoch": 0.5500320718409236, - "grad_norm": 1.0567858782770287, - "learning_rate": 9.998880481559755e-05, - "loss": 0.8018, - "step": 8575 - }, - { - "epoch": 0.5503527902501604, - "grad_norm": 0.8137731229847819, - "learning_rate": 9.987685300244117e-05, - "loss": 0.614, - "step": 8580 - }, - { - "epoch": 0.550673508659397, - "grad_norm": 0.9599816781819811, - "learning_rate": 9.976490134362759e-05, - "loss": 0.687, - "step": 8585 - }, - { - "epoch": 0.5509942270686338, - "grad_norm": 0.6181246421982609, - "learning_rate": 9.965294997946815e-05, - "loss": 0.6866, - "step": 8590 - }, - { - "epoch": 0.5513149454778704, - "grad_norm": 1.1348648251746718, - "learning_rate": 9.954099905027396e-05, - "loss": 0.6416, - "step": 8595 - }, - { - "epoch": 0.5516356638871072, - "grad_norm": 1.6639502602729528, - "learning_rate": 9.94290486963555e-05, - "loss": 0.6715, - "step": 8600 - }, - { - "epoch": 0.5519563822963438, - "grad_norm": 0.7678034571145345, - "learning_rate": 9.931709905802252e-05, - "loss": 0.6886, - "step": 8605 - }, - { - "epoch": 0.5522771007055804, - "grad_norm": 1.4578465770643851, - "learning_rate": 9.92051502755839e-05, - "loss": 0.7689, - "step": 8610 - }, - { - "epoch": 0.5525978191148172, - "grad_norm": 0.7434972557340698, - "learning_rate": 9.909320248934747e-05, - "loss": 0.6374, - "step": 8615 - }, - { - "epoch": 0.5529185375240538, - "grad_norm": 0.8031136082718469, - "learning_rate": 9.898125583961977e-05, - "loss": 0.7055, - "step": 8620 - }, - { - "epoch": 0.5532392559332906, - "grad_norm": 1.000878821455057, - "learning_rate": 9.886931046670598e-05, - "loss": 0.6157, - "step": 8625 - }, - { - "epoch": 0.5535599743425272, - "grad_norm": 0.6524291495733984, - "learning_rate": 9.875736651090956e-05, - "loss": 0.561, - "step": 8630 - }, - { - "epoch": 0.553880692751764, - "grad_norm": 1.3537142167105929, - "learning_rate": 9.864542411253229e-05, - "loss": 0.6718, - "step": 8635 - }, - { - "epoch": 0.5542014111610006, - "grad_norm": 1.2775573591627376, - "learning_rate": 9.853348341187398e-05, - "loss": 0.6645, - "step": 8640 - }, - { - "epoch": 0.5545221295702373, - "grad_norm": 0.982975595575632, - "learning_rate": 9.842154454923236e-05, - "loss": 0.5919, - "step": 8645 - }, - { - "epoch": 0.554842847979474, - "grad_norm": 0.960094691754927, - "learning_rate": 9.830960766490274e-05, - "loss": 0.8113, - "step": 8650 - }, - { - "epoch": 0.5551635663887107, - "grad_norm": 0.7965375300164668, - "learning_rate": 9.819767289917802e-05, - "loss": 0.5782, - "step": 8655 - }, - { - "epoch": 0.5554842847979474, - "grad_norm": 1.1381902966011452, - "learning_rate": 9.808574039234843e-05, - "loss": 0.6242, - "step": 8660 - }, - { - "epoch": 0.5558050032071841, - "grad_norm": 0.8670424286605721, - "learning_rate": 9.79738102847014e-05, - "loss": 0.7355, - "step": 8665 - }, - { - "epoch": 0.5561257216164208, - "grad_norm": 0.8366621626207873, - "learning_rate": 9.786188271652133e-05, - "loss": 0.5744, - "step": 8670 - }, - { - "epoch": 0.5564464400256575, - "grad_norm": 0.8273685386138488, - "learning_rate": 9.774995782808943e-05, - "loss": 0.6414, - "step": 8675 - }, - { - "epoch": 0.5567671584348942, - "grad_norm": 0.9522831235441542, - "learning_rate": 9.763803575968357e-05, - "loss": 0.7632, - "step": 8680 - }, - { - "epoch": 0.5570878768441309, - "grad_norm": 0.75372169303836, - "learning_rate": 9.752611665157807e-05, - "loss": 0.6433, - "step": 8685 - }, - { - "epoch": 0.5574085952533675, - "grad_norm": 1.2109886710417286, - "learning_rate": 9.741420064404353e-05, - "loss": 0.63, - "step": 8690 - }, - { - "epoch": 0.5577293136626043, - "grad_norm": 0.5400874445069787, - "learning_rate": 9.730228787734669e-05, - "loss": 0.6789, - "step": 8695 - }, - { - "epoch": 0.5580500320718409, - "grad_norm": 0.7989657543785353, - "learning_rate": 9.719037849175023e-05, - "loss": 0.7407, - "step": 8700 - }, - { - "epoch": 0.5583707504810776, - "grad_norm": 0.7239899818926174, - "learning_rate": 9.707847262751257e-05, - "loss": 0.6029, - "step": 8705 - }, - { - "epoch": 0.5586914688903143, - "grad_norm": 1.1080694844841645, - "learning_rate": 9.696657042488774e-05, - "loss": 0.6841, - "step": 8710 - }, - { - "epoch": 0.559012187299551, - "grad_norm": 0.8668620206006121, - "learning_rate": 9.685467202412514e-05, - "loss": 0.8091, - "step": 8715 - }, - { - "epoch": 0.5593329057087877, - "grad_norm": 0.8263012333520392, - "learning_rate": 9.674277756546941e-05, - "loss": 0.5612, - "step": 8720 - }, - { - "epoch": 0.5596536241180243, - "grad_norm": 1.2272663628925047, - "learning_rate": 9.663088718916031e-05, - "loss": 0.6214, - "step": 8725 - }, - { - "epoch": 0.5599743425272611, - "grad_norm": 0.9766333412497376, - "learning_rate": 9.651900103543244e-05, - "loss": 0.7342, - "step": 8730 - }, - { - "epoch": 0.5602950609364977, - "grad_norm": 0.830624516454487, - "learning_rate": 9.640711924451514e-05, - "loss": 0.6718, - "step": 8735 - }, - { - "epoch": 0.5606157793457345, - "grad_norm": 0.4675831817637492, - "learning_rate": 9.629524195663219e-05, - "loss": 0.6039, - "step": 8740 - }, - { - "epoch": 0.5609364977549711, - "grad_norm": 0.6634840466913374, - "learning_rate": 9.618336931200182e-05, - "loss": 0.5964, - "step": 8745 - }, - { - "epoch": 0.5612572161642079, - "grad_norm": 0.9976406641974719, - "learning_rate": 9.607150145083642e-05, - "loss": 0.7166, - "step": 8750 - }, - { - "epoch": 0.5615779345734445, - "grad_norm": 0.9545013096296738, - "learning_rate": 9.595963851334237e-05, - "loss": 0.689, - "step": 8755 - }, - { - "epoch": 0.5618986529826812, - "grad_norm": 0.9634333696652287, - "learning_rate": 9.58477806397199e-05, - "loss": 0.8048, - "step": 8760 - }, - { - "epoch": 0.5622193713919179, - "grad_norm": 0.8057551483876174, - "learning_rate": 9.573592797016285e-05, - "loss": 0.672, - "step": 8765 - }, - { - "epoch": 0.5625400898011546, - "grad_norm": 1.0000169919459303, - "learning_rate": 9.562408064485858e-05, - "loss": 0.656, - "step": 8770 - }, - { - "epoch": 0.5628608082103913, - "grad_norm": 1.0059598561012926, - "learning_rate": 9.551223880398778e-05, - "loss": 0.6689, - "step": 8775 - }, - { - "epoch": 0.563181526619628, - "grad_norm": 0.7089352756337184, - "learning_rate": 9.540040258772413e-05, - "loss": 0.6104, - "step": 8780 - }, - { - "epoch": 0.5635022450288647, - "grad_norm": 0.9673260454868421, - "learning_rate": 9.528857213623441e-05, - "loss": 0.625, - "step": 8785 - }, - { - "epoch": 0.5638229634381013, - "grad_norm": 0.8425769011906392, - "learning_rate": 9.517674758967812e-05, - "loss": 0.6385, - "step": 8790 - }, - { - "epoch": 0.564143681847338, - "grad_norm": 0.8483079594314462, - "learning_rate": 9.506492908820737e-05, - "loss": 0.7091, - "step": 8795 - }, - { - "epoch": 0.5644644002565747, - "grad_norm": 1.1949041204777606, - "learning_rate": 9.495311677196663e-05, - "loss": 0.5583, - "step": 8800 - }, - { - "epoch": 0.5647851186658114, - "grad_norm": 1.1203988658358368, - "learning_rate": 9.484131078109272e-05, - "loss": 0.6491, - "step": 8805 - }, - { - "epoch": 0.5651058370750481, - "grad_norm": 0.7171168814679133, - "learning_rate": 9.472951125571447e-05, - "loss": 0.5704, - "step": 8810 - }, - { - "epoch": 0.5654265554842848, - "grad_norm": 0.43705154049643696, - "learning_rate": 9.461771833595263e-05, - "loss": 0.6235, - "step": 8815 - }, - { - "epoch": 0.5657472738935215, - "grad_norm": 0.5972509611997564, - "learning_rate": 9.450593216191962e-05, - "loss": 0.6011, - "step": 8820 - }, - { - "epoch": 0.5660679923027582, - "grad_norm": 0.6585353171844711, - "learning_rate": 9.439415287371949e-05, - "loss": 0.6338, - "step": 8825 - }, - { - "epoch": 0.5663887107119948, - "grad_norm": 1.182861072860639, - "learning_rate": 9.42823806114476e-05, - "loss": 0.6286, - "step": 8830 - }, - { - "epoch": 0.5667094291212316, - "grad_norm": 0.774985192783614, - "learning_rate": 9.417061551519051e-05, - "loss": 0.6362, - "step": 8835 - }, - { - "epoch": 0.5670301475304682, - "grad_norm": 1.6279736397998856, - "learning_rate": 9.405885772502582e-05, - "loss": 0.5434, - "step": 8840 - }, - { - "epoch": 0.567350865939705, - "grad_norm": 0.8603999240784707, - "learning_rate": 9.394710738102198e-05, - "loss": 0.7135, - "step": 8845 - }, - { - "epoch": 0.5676715843489416, - "grad_norm": 0.8326631481896093, - "learning_rate": 9.383536462323807e-05, - "loss": 0.6316, - "step": 8850 - }, - { - "epoch": 0.5679923027581784, - "grad_norm": 1.1396992210320314, - "learning_rate": 9.372362959172364e-05, - "loss": 0.6325, - "step": 8855 - }, - { - "epoch": 0.568313021167415, - "grad_norm": 0.6117345152175109, - "learning_rate": 9.361190242651864e-05, - "loss": 0.6159, - "step": 8860 - }, - { - "epoch": 0.5686337395766518, - "grad_norm": 0.9306563316596532, - "learning_rate": 9.350018326765311e-05, - "loss": 0.6533, - "step": 8865 - }, - { - "epoch": 0.5689544579858884, - "grad_norm": 0.8930767778362739, - "learning_rate": 9.338847225514708e-05, - "loss": 0.6675, - "step": 8870 - }, - { - "epoch": 0.569275176395125, - "grad_norm": 0.4141144493955828, - "learning_rate": 9.327676952901034e-05, - "loss": 0.5957, - "step": 8875 - }, - { - "epoch": 0.5695958948043618, - "grad_norm": 0.8888417335481001, - "learning_rate": 9.31650752292423e-05, - "loss": 0.5665, - "step": 8880 - }, - { - "epoch": 0.5699166132135984, - "grad_norm": 0.7603252238964692, - "learning_rate": 9.305338949583183e-05, - "loss": 0.6428, - "step": 8885 - }, - { - "epoch": 0.5702373316228352, - "grad_norm": 1.271342150118716, - "learning_rate": 9.294171246875705e-05, - "loss": 0.7219, - "step": 8890 - }, - { - "epoch": 0.5705580500320718, - "grad_norm": 0.9447555346689784, - "learning_rate": 9.283004428798519e-05, - "loss": 0.6965, - "step": 8895 - }, - { - "epoch": 0.5708787684413086, - "grad_norm": 0.8678646764049435, - "learning_rate": 9.271838509347233e-05, - "loss": 0.7673, - "step": 8900 - }, - { - "epoch": 0.5711994868505452, - "grad_norm": 0.7416908587434721, - "learning_rate": 9.260673502516333e-05, - "loss": 0.6081, - "step": 8905 - }, - { - "epoch": 0.5715202052597819, - "grad_norm": 0.939422337464896, - "learning_rate": 9.24950942229917e-05, - "loss": 0.6721, - "step": 8910 - }, - { - "epoch": 0.5718409236690186, - "grad_norm": 0.8506289909429936, - "learning_rate": 9.238346282687912e-05, - "loss": 0.7379, - "step": 8915 - }, - { - "epoch": 0.5721616420782553, - "grad_norm": 1.3927657753594376, - "learning_rate": 9.227184097673566e-05, - "loss": 0.7231, - "step": 8920 - }, - { - "epoch": 0.572482360487492, - "grad_norm": 0.6002814159409026, - "learning_rate": 9.21602288124594e-05, - "loss": 0.8172, - "step": 8925 - }, - { - "epoch": 0.5728030788967287, - "grad_norm": 0.7935777728563393, - "learning_rate": 9.204862647393625e-05, - "loss": 0.8086, - "step": 8930 - }, - { - "epoch": 0.5731237973059654, - "grad_norm": 1.0397353291637284, - "learning_rate": 9.193703410103978e-05, - "loss": 0.6631, - "step": 8935 - }, - { - "epoch": 0.573444515715202, - "grad_norm": 0.8367031156015087, - "learning_rate": 9.182545183363112e-05, - "loss": 0.5788, - "step": 8940 - }, - { - "epoch": 0.5737652341244387, - "grad_norm": 1.2325263908639137, - "learning_rate": 9.17138798115587e-05, - "loss": 0.7789, - "step": 8945 - }, - { - "epoch": 0.5740859525336754, - "grad_norm": 0.9464147249819552, - "learning_rate": 9.160231817465815e-05, - "loss": 0.5279, - "step": 8950 - }, - { - "epoch": 0.5744066709429121, - "grad_norm": 0.8158486660018726, - "learning_rate": 9.149076706275207e-05, - "loss": 0.7098, - "step": 8955 - }, - { - "epoch": 0.5747273893521488, - "grad_norm": 0.7825563949372556, - "learning_rate": 9.137922661564981e-05, - "loss": 0.6993, - "step": 8960 - }, - { - "epoch": 0.5750481077613855, - "grad_norm": 0.9955286924734048, - "learning_rate": 9.126769697314741e-05, - "loss": 0.6668, - "step": 8965 - }, - { - "epoch": 0.5753688261706222, - "grad_norm": 0.987888018064567, - "learning_rate": 9.11561782750274e-05, - "loss": 0.7683, - "step": 8970 - }, - { - "epoch": 0.5756895445798589, - "grad_norm": 0.9029264976754006, - "learning_rate": 9.104467066105855e-05, - "loss": 0.5976, - "step": 8975 - }, - { - "epoch": 0.5760102629890955, - "grad_norm": 1.2083151109064707, - "learning_rate": 9.093317427099567e-05, - "loss": 0.7444, - "step": 8980 - }, - { - "epoch": 0.5763309813983323, - "grad_norm": 0.627708721729255, - "learning_rate": 9.082168924457963e-05, - "loss": 0.5052, - "step": 8985 - }, - { - "epoch": 0.5766516998075689, - "grad_norm": 0.818341174384118, - "learning_rate": 9.071021572153699e-05, - "loss": 0.6956, - "step": 8990 - }, - { - "epoch": 0.5769724182168057, - "grad_norm": 0.7174427987431503, - "learning_rate": 9.05987538415799e-05, - "loss": 0.6537, - "step": 8995 - }, - { - "epoch": 0.5772931366260423, - "grad_norm": 1.0123101523225277, - "learning_rate": 9.048730374440593e-05, - "loss": 0.6298, - "step": 9000 - }, - { - "epoch": 0.5776138550352791, - "grad_norm": 1.4927380842347644, - "learning_rate": 9.037586556969785e-05, - "loss": 0.7866, - "step": 9005 - }, - { - "epoch": 0.5779345734445157, - "grad_norm": 1.1107550009988214, - "learning_rate": 9.026443945712355e-05, - "loss": 0.5272, - "step": 9010 - }, - { - "epoch": 0.5782552918537524, - "grad_norm": 1.042711051305287, - "learning_rate": 9.015302554633572e-05, - "loss": 0.6862, - "step": 9015 - }, - { - "epoch": 0.5785760102629891, - "grad_norm": 1.097565575641477, - "learning_rate": 9.004162397697183e-05, - "loss": 0.6653, - "step": 9020 - }, - { - "epoch": 0.5788967286722257, - "grad_norm": 0.7962187563904711, - "learning_rate": 8.993023488865384e-05, - "loss": 0.7807, - "step": 9025 - }, - { - "epoch": 0.5792174470814625, - "grad_norm": 0.8018799159927662, - "learning_rate": 8.981885842098807e-05, - "loss": 0.6755, - "step": 9030 - }, - { - "epoch": 0.5795381654906991, - "grad_norm": 1.0103385936451423, - "learning_rate": 8.970749471356508e-05, - "loss": 0.7498, - "step": 9035 - }, - { - "epoch": 0.5798588838999359, - "grad_norm": 0.8540199269462798, - "learning_rate": 8.959614390595933e-05, - "loss": 0.7041, - "step": 9040 - }, - { - "epoch": 0.5801796023091725, - "grad_norm": 1.1040345444470279, - "learning_rate": 8.948480613772923e-05, - "loss": 0.5949, - "step": 9045 - }, - { - "epoch": 0.5805003207184093, - "grad_norm": 1.0463417093934197, - "learning_rate": 8.93734815484167e-05, - "loss": 0.6716, - "step": 9050 - }, - { - "epoch": 0.5808210391276459, - "grad_norm": 0.9338670777982941, - "learning_rate": 8.92621702775473e-05, - "loss": 0.652, - "step": 9055 - }, - { - "epoch": 0.5811417575368826, - "grad_norm": 0.8605449857576016, - "learning_rate": 8.915087246462981e-05, - "loss": 0.6335, - "step": 9060 - }, - { - "epoch": 0.5814624759461193, - "grad_norm": 0.9482034036580209, - "learning_rate": 8.903958824915616e-05, - "loss": 0.7407, - "step": 9065 - }, - { - "epoch": 0.581783194355356, - "grad_norm": 0.9120660938985135, - "learning_rate": 8.892831777060128e-05, - "loss": 0.714, - "step": 9070 - }, - { - "epoch": 0.5821039127645927, - "grad_norm": 0.7546853050581628, - "learning_rate": 8.881706116842277e-05, - "loss": 0.6643, - "step": 9075 - }, - { - "epoch": 0.5824246311738294, - "grad_norm": 0.7217266514190624, - "learning_rate": 8.870581858206097e-05, - "loss": 0.6232, - "step": 9080 - }, - { - "epoch": 0.5827453495830661, - "grad_norm": 0.8122719551725256, - "learning_rate": 8.859459015093856e-05, - "loss": 0.753, - "step": 9085 - }, - { - "epoch": 0.5830660679923028, - "grad_norm": 0.6978194557670415, - "learning_rate": 8.848337601446056e-05, - "loss": 0.592, - "step": 9090 - }, - { - "epoch": 0.5833867864015394, - "grad_norm": 0.7490982355447477, - "learning_rate": 8.8372176312014e-05, - "loss": 0.6739, - "step": 9095 - }, - { - "epoch": 0.5837075048107762, - "grad_norm": 1.074058776492988, - "learning_rate": 8.826099118296781e-05, - "loss": 0.6831, - "step": 9100 - }, - { - "epoch": 0.5840282232200128, - "grad_norm": 0.7986527171477741, - "learning_rate": 8.814982076667274e-05, - "loss": 0.6572, - "step": 9105 - }, - { - "epoch": 0.5843489416292496, - "grad_norm": 0.9594556597631692, - "learning_rate": 8.803866520246111e-05, - "loss": 0.6968, - "step": 9110 - }, - { - "epoch": 0.5846696600384862, - "grad_norm": 0.8185832555992929, - "learning_rate": 8.792752462964643e-05, - "loss": 0.6396, - "step": 9115 - }, - { - "epoch": 0.584990378447723, - "grad_norm": 0.830230327348044, - "learning_rate": 8.781639918752364e-05, - "loss": 0.6288, - "step": 9120 - }, - { - "epoch": 0.5853110968569596, - "grad_norm": 1.260466190111766, - "learning_rate": 8.770528901536866e-05, - "loss": 0.6248, - "step": 9125 - }, - { - "epoch": 0.5856318152661962, - "grad_norm": 0.7805742440541377, - "learning_rate": 8.75941942524382e-05, - "loss": 0.726, - "step": 9130 - }, - { - "epoch": 0.585952533675433, - "grad_norm": 1.0612454515173708, - "learning_rate": 8.748311503796971e-05, - "loss": 0.6807, - "step": 9135 - }, - { - "epoch": 0.5862732520846696, - "grad_norm": 0.8808610696974422, - "learning_rate": 8.737205151118115e-05, - "loss": 0.7349, - "step": 9140 - }, - { - "epoch": 0.5865939704939064, - "grad_norm": 0.8397400084374878, - "learning_rate": 8.726100381127084e-05, - "loss": 0.677, - "step": 9145 - }, - { - "epoch": 0.586914688903143, - "grad_norm": 1.3081126728734789, - "learning_rate": 8.714997207741725e-05, - "loss": 0.7485, - "step": 9150 - }, - { - "epoch": 0.5872354073123798, - "grad_norm": 0.23647447615753048, - "learning_rate": 8.703895644877877e-05, - "loss": 0.5389, - "step": 9155 - }, - { - "epoch": 0.5875561257216164, - "grad_norm": 1.0035423360368345, - "learning_rate": 8.692795706449371e-05, - "loss": 0.6547, - "step": 9160 - }, - { - "epoch": 0.5878768441308531, - "grad_norm": 0.7176089252240778, - "learning_rate": 8.681697406367997e-05, - "loss": 0.6607, - "step": 9165 - }, - { - "epoch": 0.5881975625400898, - "grad_norm": 0.8342266954014463, - "learning_rate": 8.670600758543492e-05, - "loss": 0.6957, - "step": 9170 - }, - { - "epoch": 0.5885182809493265, - "grad_norm": 0.9577059909314858, - "learning_rate": 8.659505776883523e-05, - "loss": 0.7079, - "step": 9175 - }, - { - "epoch": 0.5888389993585632, - "grad_norm": 0.5591665135253571, - "learning_rate": 8.648412475293667e-05, - "loss": 0.4696, - "step": 9180 - }, - { - "epoch": 0.5891597177677999, - "grad_norm": 0.6612061534246185, - "learning_rate": 8.637320867677395e-05, - "loss": 0.8161, - "step": 9185 - }, - { - "epoch": 0.5894804361770366, - "grad_norm": 0.7364614135023326, - "learning_rate": 8.626230967936056e-05, - "loss": 0.584, - "step": 9190 - }, - { - "epoch": 0.5898011545862732, - "grad_norm": 1.1805347583614008, - "learning_rate": 8.615142789968862e-05, - "loss": 0.6749, - "step": 9195 - }, - { - "epoch": 0.5901218729955099, - "grad_norm": 0.8670374427365669, - "learning_rate": 8.604056347672862e-05, - "loss": 0.6273, - "step": 9200 - }, - { - "epoch": 0.5904425914047466, - "grad_norm": 0.9304848686764007, - "learning_rate": 8.592971654942934e-05, - "loss": 0.7438, - "step": 9205 - }, - { - "epoch": 0.5907633098139833, - "grad_norm": 0.9747134027393929, - "learning_rate": 8.581888725671756e-05, - "loss": 0.6131, - "step": 9210 - }, - { - "epoch": 0.59108402822322, - "grad_norm": 1.0129060114876993, - "learning_rate": 8.570807573749803e-05, - "loss": 0.7444, - "step": 9215 - }, - { - "epoch": 0.5914047466324567, - "grad_norm": 0.860206331729887, - "learning_rate": 8.559728213065322e-05, - "loss": 0.71, - "step": 9220 - }, - { - "epoch": 0.5917254650416934, - "grad_norm": 0.9817359438145173, - "learning_rate": 8.548650657504312e-05, - "loss": 0.6491, - "step": 9225 - }, - { - "epoch": 0.5920461834509301, - "grad_norm": 0.7544658228792815, - "learning_rate": 8.537574920950509e-05, - "loss": 0.6348, - "step": 9230 - }, - { - "epoch": 0.5923669018601668, - "grad_norm": 0.7630242666798073, - "learning_rate": 8.526501017285371e-05, - "loss": 0.6261, - "step": 9235 - }, - { - "epoch": 0.5926876202694035, - "grad_norm": 0.9267179536684838, - "learning_rate": 8.515428960388064e-05, - "loss": 0.8258, - "step": 9240 - }, - { - "epoch": 0.5930083386786401, - "grad_norm": 0.6784696630153367, - "learning_rate": 8.504358764135423e-05, - "loss": 0.707, - "step": 9245 - }, - { - "epoch": 0.5933290570878769, - "grad_norm": 0.6689426887073786, - "learning_rate": 8.49329044240197e-05, - "loss": 0.751, - "step": 9250 - }, - { - "epoch": 0.5936497754971135, - "grad_norm": 1.0074921827758931, - "learning_rate": 8.482224009059867e-05, - "loss": 0.7213, - "step": 9255 - }, - { - "epoch": 0.5939704939063503, - "grad_norm": 0.6037825152713899, - "learning_rate": 8.471159477978915e-05, - "loss": 0.621, - "step": 9260 - }, - { - "epoch": 0.5942912123155869, - "grad_norm": 0.6325399857778463, - "learning_rate": 8.460096863026523e-05, - "loss": 0.6925, - "step": 9265 - }, - { - "epoch": 0.5946119307248237, - "grad_norm": 0.9785164961672185, - "learning_rate": 8.449036178067706e-05, - "loss": 0.7721, - "step": 9270 - }, - { - "epoch": 0.5949326491340603, - "grad_norm": 0.8071126693831758, - "learning_rate": 8.437977436965057e-05, - "loss": 0.5628, - "step": 9275 - }, - { - "epoch": 0.5952533675432969, - "grad_norm": 1.093008483996882, - "learning_rate": 8.426920653578731e-05, - "loss": 0.5135, - "step": 9280 - }, - { - "epoch": 0.5955740859525337, - "grad_norm": 0.7334552943764545, - "learning_rate": 8.415865841766437e-05, - "loss": 0.6418, - "step": 9285 - }, - { - "epoch": 0.5958948043617703, - "grad_norm": 0.9720157753455849, - "learning_rate": 8.404813015383402e-05, - "loss": 0.6855, - "step": 9290 - }, - { - "epoch": 0.5962155227710071, - "grad_norm": 0.7988660585883463, - "learning_rate": 8.39376218828237e-05, - "loss": 0.5753, - "step": 9295 - }, - { - "epoch": 0.5965362411802437, - "grad_norm": 1.1413457984041735, - "learning_rate": 8.382713374313582e-05, - "loss": 0.6003, - "step": 9300 - }, - { - "epoch": 0.5968569595894805, - "grad_norm": 1.1011093623211472, - "learning_rate": 8.371666587324753e-05, - "loss": 0.7294, - "step": 9305 - }, - { - "epoch": 0.5971776779987171, - "grad_norm": 0.9285733358885891, - "learning_rate": 8.360621841161059e-05, - "loss": 0.5484, - "step": 9310 - }, - { - "epoch": 0.5974983964079538, - "grad_norm": 0.6748939404643401, - "learning_rate": 8.349579149665111e-05, - "loss": 0.6096, - "step": 9315 - }, - { - "epoch": 0.5978191148171905, - "grad_norm": 0.9020042133223751, - "learning_rate": 8.338538526676955e-05, - "loss": 0.6025, - "step": 9320 - }, - { - "epoch": 0.5981398332264272, - "grad_norm": 0.9270397135681554, - "learning_rate": 8.32749998603404e-05, - "loss": 0.7169, - "step": 9325 - }, - { - "epoch": 0.5984605516356639, - "grad_norm": 0.9890377973574781, - "learning_rate": 8.316463541571202e-05, - "loss": 0.6308, - "step": 9330 - }, - { - "epoch": 0.5987812700449006, - "grad_norm": 0.9865556224427305, - "learning_rate": 8.305429207120657e-05, - "loss": 0.6582, - "step": 9335 - }, - { - "epoch": 0.5991019884541373, - "grad_norm": 0.7178728991086797, - "learning_rate": 8.294396996511973e-05, - "loss": 0.6433, - "step": 9340 - }, - { - "epoch": 0.599422706863374, - "grad_norm": 0.9285152964545721, - "learning_rate": 8.283366923572054e-05, - "loss": 0.548, - "step": 9345 - }, - { - "epoch": 0.5997434252726106, - "grad_norm": 1.0943546547273215, - "learning_rate": 8.272339002125126e-05, - "loss": 0.5401, - "step": 9350 - }, - { - "epoch": 0.6000641436818474, - "grad_norm": 1.0722476752693422, - "learning_rate": 8.261313245992719e-05, - "loss": 0.7496, - "step": 9355 - }, - { - "epoch": 0.600384862091084, - "grad_norm": 0.7239338874930329, - "learning_rate": 8.250289668993651e-05, - "loss": 0.6294, - "step": 9360 - }, - { - "epoch": 0.6007055805003207, - "grad_norm": 0.8162856731878313, - "learning_rate": 8.239268284944008e-05, - "loss": 0.784, - "step": 9365 - }, - { - "epoch": 0.6010262989095574, - "grad_norm": 0.8529031580797097, - "learning_rate": 8.228249107657125e-05, - "loss": 0.7338, - "step": 9370 - }, - { - "epoch": 0.6013470173187941, - "grad_norm": 0.914197482847494, - "learning_rate": 8.217232150943575e-05, - "loss": 0.6738, - "step": 9375 - }, - { - "epoch": 0.6016677357280308, - "grad_norm": 0.561817894827455, - "learning_rate": 8.20621742861114e-05, - "loss": 0.4924, - "step": 9380 - }, - { - "epoch": 0.6019884541372674, - "grad_norm": 0.8679917658001024, - "learning_rate": 8.19520495446481e-05, - "loss": 0.8074, - "step": 9385 - }, - { - "epoch": 0.6023091725465042, - "grad_norm": 1.0120069230072926, - "learning_rate": 8.184194742306756e-05, - "loss": 0.7112, - "step": 9390 - }, - { - "epoch": 0.6026298909557408, - "grad_norm": 0.7356825859409829, - "learning_rate": 8.173186805936313e-05, - "loss": 0.6514, - "step": 9395 - }, - { - "epoch": 0.6029506093649776, - "grad_norm": 0.7794340302339006, - "learning_rate": 8.162181159149964e-05, - "loss": 0.7748, - "step": 9400 - }, - { - "epoch": 0.6032713277742142, - "grad_norm": 0.9190740265202144, - "learning_rate": 8.151177815741318e-05, - "loss": 0.6399, - "step": 9405 - }, - { - "epoch": 0.603592046183451, - "grad_norm": 1.1526131658530894, - "learning_rate": 8.140176789501102e-05, - "loss": 0.7519, - "step": 9410 - }, - { - "epoch": 0.6039127645926876, - "grad_norm": 0.8970675006265497, - "learning_rate": 8.129178094217141e-05, - "loss": 0.7025, - "step": 9415 - }, - { - "epoch": 0.6042334830019244, - "grad_norm": 1.16563982635486, - "learning_rate": 8.118181743674334e-05, - "loss": 0.6515, - "step": 9420 - }, - { - "epoch": 0.604554201411161, - "grad_norm": 1.009328430894082, - "learning_rate": 8.107187751654642e-05, - "loss": 0.8061, - "step": 9425 - }, - { - "epoch": 0.6048749198203976, - "grad_norm": 0.6431656020123224, - "learning_rate": 8.096196131937068e-05, - "loss": 0.7703, - "step": 9430 - }, - { - "epoch": 0.6051956382296344, - "grad_norm": 0.8022392814347792, - "learning_rate": 8.085206898297648e-05, - "loss": 0.4945, - "step": 9435 - }, - { - "epoch": 0.605516356638871, - "grad_norm": 0.8590402951031166, - "learning_rate": 8.074220064509428e-05, - "loss": 0.577, - "step": 9440 - }, - { - "epoch": 0.6058370750481078, - "grad_norm": 0.6529036302559359, - "learning_rate": 8.06323564434243e-05, - "loss": 0.6972, - "step": 9445 - }, - { - "epoch": 0.6061577934573444, - "grad_norm": 0.9053770255851836, - "learning_rate": 8.052253651563671e-05, - "loss": 0.6241, - "step": 9450 - }, - { - "epoch": 0.6064785118665812, - "grad_norm": 0.6968143227671041, - "learning_rate": 8.04127409993712e-05, - "loss": 0.7196, - "step": 9455 - }, - { - "epoch": 0.6067992302758178, - "grad_norm": 0.7907742358273027, - "learning_rate": 8.030297003223676e-05, - "loss": 0.6535, - "step": 9460 - }, - { - "epoch": 0.6071199486850545, - "grad_norm": 0.9043816519851674, - "learning_rate": 8.019322375181175e-05, - "loss": 0.7183, - "step": 9465 - }, - { - "epoch": 0.6074406670942912, - "grad_norm": 0.8583282541776323, - "learning_rate": 8.008350229564351e-05, - "loss": 0.7373, - "step": 9470 - }, - { - "epoch": 0.6077613855035279, - "grad_norm": 1.1639398571753123, - "learning_rate": 7.997380580124832e-05, - "loss": 0.6619, - "step": 9475 - }, - { - "epoch": 0.6080821039127646, - "grad_norm": 0.7363838290393571, - "learning_rate": 7.986413440611115e-05, - "loss": 0.5238, - "step": 9480 - }, - { - "epoch": 0.6084028223220013, - "grad_norm": 0.7361031316329811, - "learning_rate": 7.975448824768546e-05, - "loss": 0.7093, - "step": 9485 - }, - { - "epoch": 0.608723540731238, - "grad_norm": 0.8655976177215603, - "learning_rate": 7.964486746339315e-05, - "loss": 0.6699, - "step": 9490 - }, - { - "epoch": 0.6090442591404747, - "grad_norm": 0.7757949116609816, - "learning_rate": 7.95352721906243e-05, - "loss": 0.6457, - "step": 9495 - }, - { - "epoch": 0.6093649775497113, - "grad_norm": 1.0532442121286478, - "learning_rate": 7.942570256673704e-05, - "loss": 0.8266, - "step": 9500 - }, - { - "epoch": 0.6096856959589481, - "grad_norm": 0.8097807634079536, - "learning_rate": 7.931615872905727e-05, - "loss": 0.6542, - "step": 9505 - }, - { - "epoch": 0.6100064143681847, - "grad_norm": 1.170352424739306, - "learning_rate": 7.92066408148787e-05, - "loss": 0.6511, - "step": 9510 - }, - { - "epoch": 0.6103271327774215, - "grad_norm": 0.6465117473629731, - "learning_rate": 7.909714896146239e-05, - "loss": 0.6102, - "step": 9515 - }, - { - "epoch": 0.6106478511866581, - "grad_norm": 0.9562444288916828, - "learning_rate": 7.898768330603687e-05, - "loss": 0.7281, - "step": 9520 - }, - { - "epoch": 0.6109685695958949, - "grad_norm": 0.48629635257867143, - "learning_rate": 7.887824398579778e-05, - "loss": 0.5576, - "step": 9525 - }, - { - "epoch": 0.6112892880051315, - "grad_norm": 0.6187174821618042, - "learning_rate": 7.876883113790777e-05, - "loss": 0.4536, - "step": 9530 - }, - { - "epoch": 0.6116100064143681, - "grad_norm": 0.8491363897597337, - "learning_rate": 7.865944489949632e-05, - "loss": 0.5082, - "step": 9535 - }, - { - "epoch": 0.6119307248236049, - "grad_norm": 0.9489825766872471, - "learning_rate": 7.855008540765954e-05, - "loss": 0.8288, - "step": 9540 - }, - { - "epoch": 0.6122514432328415, - "grad_norm": 0.8247180962617905, - "learning_rate": 7.844075279945998e-05, - "loss": 0.7947, - "step": 9545 - }, - { - "epoch": 0.6125721616420783, - "grad_norm": 0.8487499152582451, - "learning_rate": 7.833144721192658e-05, - "loss": 0.4836, - "step": 9550 - }, - { - "epoch": 0.6128928800513149, - "grad_norm": 1.4749421151082263, - "learning_rate": 7.822216878205437e-05, - "loss": 0.6604, - "step": 9555 - }, - { - "epoch": 0.6132135984605517, - "grad_norm": 0.6439839118081867, - "learning_rate": 7.811291764680436e-05, - "loss": 0.5311, - "step": 9560 - }, - { - "epoch": 0.6135343168697883, - "grad_norm": 0.6948565188236483, - "learning_rate": 7.800369394310329e-05, - "loss": 0.7818, - "step": 9565 - }, - { - "epoch": 0.613855035279025, - "grad_norm": 0.5432098551962209, - "learning_rate": 7.789449780784361e-05, - "loss": 0.4817, - "step": 9570 - }, - { - "epoch": 0.6141757536882617, - "grad_norm": 0.8116998264643036, - "learning_rate": 7.778532937788319e-05, - "loss": 0.6809, - "step": 9575 - }, - { - "epoch": 0.6144964720974984, - "grad_norm": 0.927156766210116, - "learning_rate": 7.767618879004509e-05, - "loss": 0.6117, - "step": 9580 - }, - { - "epoch": 0.6148171905067351, - "grad_norm": 0.5580255415813408, - "learning_rate": 7.756707618111758e-05, - "loss": 0.5121, - "step": 9585 - }, - { - "epoch": 0.6151379089159718, - "grad_norm": 0.7697324881673694, - "learning_rate": 7.745799168785387e-05, - "loss": 0.7019, - "step": 9590 - }, - { - "epoch": 0.6154586273252085, - "grad_norm": 1.2533080746391783, - "learning_rate": 7.734893544697182e-05, - "loss": 0.6921, - "step": 9595 - }, - { - "epoch": 0.6157793457344451, - "grad_norm": 0.8591968885866408, - "learning_rate": 7.723990759515399e-05, - "loss": 0.6234, - "step": 9600 - }, - { - "epoch": 0.6161000641436819, - "grad_norm": 0.8144982447654572, - "learning_rate": 7.713090826904732e-05, - "loss": 0.6175, - "step": 9605 - }, - { - "epoch": 0.6164207825529185, - "grad_norm": 0.7852604055969639, - "learning_rate": 7.702193760526301e-05, - "loss": 0.538, - "step": 9610 - }, - { - "epoch": 0.6167415009621552, - "grad_norm": 0.82507022800839, - "learning_rate": 7.691299574037633e-05, - "loss": 0.5858, - "step": 9615 - }, - { - "epoch": 0.6170622193713919, - "grad_norm": 0.8977703001606776, - "learning_rate": 7.68040828109264e-05, - "loss": 0.6686, - "step": 9620 - }, - { - "epoch": 0.6173829377806286, - "grad_norm": 0.7575641120784353, - "learning_rate": 7.669519895341618e-05, - "loss": 0.6733, - "step": 9625 - }, - { - "epoch": 0.6177036561898653, - "grad_norm": 0.7782783108716851, - "learning_rate": 7.658634430431211e-05, - "loss": 0.6113, - "step": 9630 - }, - { - "epoch": 0.618024374599102, - "grad_norm": 0.8737688527317737, - "learning_rate": 7.647751900004408e-05, - "loss": 0.7703, - "step": 9635 - }, - { - "epoch": 0.6183450930083387, - "grad_norm": 0.7163537021531532, - "learning_rate": 7.63687231770052e-05, - "loss": 0.6687, - "step": 9640 - }, - { - "epoch": 0.6186658114175754, - "grad_norm": 0.7383194119362961, - "learning_rate": 7.625995697155153e-05, - "loss": 0.7192, - "step": 9645 - }, - { - "epoch": 0.618986529826812, - "grad_norm": 0.7818780084969111, - "learning_rate": 7.615122052000212e-05, - "loss": 0.4781, - "step": 9650 - }, - { - "epoch": 0.6193072482360488, - "grad_norm": 0.9549919791876611, - "learning_rate": 7.604251395863868e-05, - "loss": 0.5972, - "step": 9655 - }, - { - "epoch": 0.6196279666452854, - "grad_norm": 0.9266947067171263, - "learning_rate": 7.593383742370547e-05, - "loss": 0.7661, - "step": 9660 - }, - { - "epoch": 0.6199486850545222, - "grad_norm": 0.7815262374564014, - "learning_rate": 7.582519105140915e-05, - "loss": 0.844, - "step": 9665 - }, - { - "epoch": 0.6202694034637588, - "grad_norm": 0.9851958882202488, - "learning_rate": 7.571657497791855e-05, - "loss": 0.6573, - "step": 9670 - }, - { - "epoch": 0.6205901218729956, - "grad_norm": 0.863915136317819, - "learning_rate": 7.560798933936446e-05, - "loss": 0.6965, - "step": 9675 - }, - { - "epoch": 0.6209108402822322, - "grad_norm": 0.8169772635721835, - "learning_rate": 7.549943427183963e-05, - "loss": 0.6739, - "step": 9680 - }, - { - "epoch": 0.6212315586914688, - "grad_norm": 0.9621597430987586, - "learning_rate": 7.539090991139843e-05, - "loss": 0.7107, - "step": 9685 - }, - { - "epoch": 0.6215522771007056, - "grad_norm": 1.1682951488621962, - "learning_rate": 7.52824163940568e-05, - "loss": 0.7016, - "step": 9690 - }, - { - "epoch": 0.6218729955099422, - "grad_norm": 0.5988705115634277, - "learning_rate": 7.517395385579198e-05, - "loss": 0.5883, - "step": 9695 - }, - { - "epoch": 0.622193713919179, - "grad_norm": 0.6405875029114282, - "learning_rate": 7.506552243254235e-05, - "loss": 0.5632, - "step": 9700 - }, - { - "epoch": 0.6225144323284156, - "grad_norm": 0.9039124102611747, - "learning_rate": 7.49571222602074e-05, - "loss": 0.5569, - "step": 9705 - }, - { - "epoch": 0.6228351507376524, - "grad_norm": 1.1918655890149419, - "learning_rate": 7.484875347464731e-05, - "loss": 0.755, - "step": 9710 - }, - { - "epoch": 0.623155869146889, - "grad_norm": 2.014073968409583, - "learning_rate": 7.474041621168304e-05, - "loss": 0.6472, - "step": 9715 - }, - { - "epoch": 0.6234765875561257, - "grad_norm": 0.8921505648356219, - "learning_rate": 7.4632110607096e-05, - "loss": 0.8289, - "step": 9720 - }, - { - "epoch": 0.6237973059653624, - "grad_norm": 1.1073242240733232, - "learning_rate": 7.452383679662794e-05, - "loss": 0.6634, - "step": 9725 - }, - { - "epoch": 0.6241180243745991, - "grad_norm": 1.1492204881968546, - "learning_rate": 7.441559491598072e-05, - "loss": 0.6672, - "step": 9730 - }, - { - "epoch": 0.6244387427838358, - "grad_norm": 1.2072073594662214, - "learning_rate": 7.43073851008162e-05, - "loss": 0.6821, - "step": 9735 - }, - { - "epoch": 0.6247594611930725, - "grad_norm": 0.7796944953436583, - "learning_rate": 7.41992074867561e-05, - "loss": 0.5997, - "step": 9740 - }, - { - "epoch": 0.6250801796023092, - "grad_norm": 0.8744950902348806, - "learning_rate": 7.40910622093817e-05, - "loss": 0.8027, - "step": 9745 - }, - { - "epoch": 0.6254008980115459, - "grad_norm": 0.5663128313006088, - "learning_rate": 7.398294940423382e-05, - "loss": 0.6558, - "step": 9750 - }, - { - "epoch": 0.6257216164207825, - "grad_norm": 1.03786462429062, - "learning_rate": 7.387486920681251e-05, - "loss": 0.7204, - "step": 9755 - }, - { - "epoch": 0.6260423348300193, - "grad_norm": 1.0086514423501614, - "learning_rate": 7.376682175257703e-05, - "loss": 0.5726, - "step": 9760 - }, - { - "epoch": 0.6263630532392559, - "grad_norm": 0.7340138238860899, - "learning_rate": 7.365880717694558e-05, - "loss": 0.6003, - "step": 9765 - }, - { - "epoch": 0.6266837716484926, - "grad_norm": 1.0154279037896083, - "learning_rate": 7.355082561529511e-05, - "loss": 0.6518, - "step": 9770 - }, - { - "epoch": 0.6270044900577293, - "grad_norm": 1.1008265637631556, - "learning_rate": 7.344287720296128e-05, - "loss": 0.6493, - "step": 9775 - }, - { - "epoch": 0.627325208466966, - "grad_norm": 0.8136002565232989, - "learning_rate": 7.333496207523805e-05, - "loss": 0.7117, - "step": 9780 - }, - { - "epoch": 0.6276459268762027, - "grad_norm": 0.5762089560179455, - "learning_rate": 7.322708036737784e-05, - "loss": 0.4664, - "step": 9785 - }, - { - "epoch": 0.6279666452854393, - "grad_norm": 0.8389502685505456, - "learning_rate": 7.311923221459108e-05, - "loss": 0.6836, - "step": 9790 - }, - { - "epoch": 0.6282873636946761, - "grad_norm": 0.7980523725918469, - "learning_rate": 7.301141775204614e-05, - "loss": 0.6824, - "step": 9795 - }, - { - "epoch": 0.6286080821039127, - "grad_norm": 1.1727596107618312, - "learning_rate": 7.290363711486923e-05, - "loss": 0.6435, - "step": 9800 - }, - { - "epoch": 0.6289288005131495, - "grad_norm": 0.4755883693546517, - "learning_rate": 7.279589043814413e-05, - "loss": 0.7567, - "step": 9805 - }, - { - "epoch": 0.6292495189223861, - "grad_norm": 0.59249663501007, - "learning_rate": 7.268817785691204e-05, - "loss": 0.6907, - "step": 9810 - }, - { - "epoch": 0.6295702373316229, - "grad_norm": 0.848542013217018, - "learning_rate": 7.258049950617146e-05, - "loss": 0.6471, - "step": 9815 - }, - { - "epoch": 0.6298909557408595, - "grad_norm": 1.047981392744028, - "learning_rate": 7.247285552087797e-05, - "loss": 0.5712, - "step": 9820 - }, - { - "epoch": 0.6302116741500963, - "grad_norm": 0.8916612499406957, - "learning_rate": 7.236524603594406e-05, - "loss": 0.6496, - "step": 9825 - }, - { - "epoch": 0.6305323925593329, - "grad_norm": 0.810154490032121, - "learning_rate": 7.225767118623906e-05, - "loss": 0.5871, - "step": 9830 - }, - { - "epoch": 0.6308531109685696, - "grad_norm": 0.8722001341085496, - "learning_rate": 7.215013110658875e-05, - "loss": 0.643, - "step": 9835 - }, - { - "epoch": 0.6311738293778063, - "grad_norm": 0.6036268039451337, - "learning_rate": 7.204262593177551e-05, - "loss": 0.6787, - "step": 9840 - }, - { - "epoch": 0.631494547787043, - "grad_norm": 1.1616717351436967, - "learning_rate": 7.193515579653777e-05, - "loss": 0.5542, - "step": 9845 - }, - { - "epoch": 0.6318152661962797, - "grad_norm": 0.8131100593226482, - "learning_rate": 7.182772083557022e-05, - "loss": 0.7859, - "step": 9850 - }, - { - "epoch": 0.6321359846055163, - "grad_norm": 0.876808117538372, - "learning_rate": 7.172032118352338e-05, - "loss": 0.6484, - "step": 9855 - }, - { - "epoch": 0.6324567030147531, - "grad_norm": 0.8713054808471165, - "learning_rate": 7.161295697500353e-05, - "loss": 0.6265, - "step": 9860 - }, - { - "epoch": 0.6327774214239897, - "grad_norm": 1.023366348564304, - "learning_rate": 7.150562834457257e-05, - "loss": 0.5939, - "step": 9865 - }, - { - "epoch": 0.6330981398332264, - "grad_norm": 0.7588376669281691, - "learning_rate": 7.13983354267477e-05, - "loss": 0.7873, - "step": 9870 - }, - { - "epoch": 0.6334188582424631, - "grad_norm": 1.028561424510279, - "learning_rate": 7.129107835600149e-05, - "loss": 0.6212, - "step": 9875 - }, - { - "epoch": 0.6337395766516998, - "grad_norm": 0.5002948721851668, - "learning_rate": 7.118385726676148e-05, - "loss": 0.6269, - "step": 9880 - }, - { - "epoch": 0.6340602950609365, - "grad_norm": 0.6840341058593294, - "learning_rate": 7.10766722934102e-05, - "loss": 0.6232, - "step": 9885 - }, - { - "epoch": 0.6343810134701732, - "grad_norm": 1.1628940715108431, - "learning_rate": 7.096952357028486e-05, - "loss": 0.7978, - "step": 9890 - }, - { - "epoch": 0.6347017318794099, - "grad_norm": 0.8853939814346806, - "learning_rate": 7.086241123167722e-05, - "loss": 0.6057, - "step": 9895 - }, - { - "epoch": 0.6350224502886466, - "grad_norm": 0.7451557600335174, - "learning_rate": 7.07553354118335e-05, - "loss": 0.7038, - "step": 9900 - }, - { - "epoch": 0.6353431686978832, - "grad_norm": 1.40409713973294, - "learning_rate": 7.064829624495415e-05, - "loss": 0.6721, - "step": 9905 - }, - { - "epoch": 0.63566388710712, - "grad_norm": 0.8791535681920543, - "learning_rate": 7.054129386519356e-05, - "loss": 0.7629, - "step": 9910 - }, - { - "epoch": 0.6359846055163566, - "grad_norm": 0.6562938490531729, - "learning_rate": 7.043432840666015e-05, - "loss": 0.6885, - "step": 9915 - }, - { - "epoch": 0.6363053239255934, - "grad_norm": 0.8475306109482822, - "learning_rate": 7.032740000341604e-05, - "loss": 0.6528, - "step": 9920 - }, - { - "epoch": 0.63662604233483, - "grad_norm": 1.0340930274606936, - "learning_rate": 7.022050878947683e-05, - "loss": 0.5579, - "step": 9925 - }, - { - "epoch": 0.6369467607440668, - "grad_norm": 0.892410748846026, - "learning_rate": 7.011365489881164e-05, - "loss": 0.622, - "step": 9930 - }, - { - "epoch": 0.6372674791533034, - "grad_norm": 1.026899828920046, - "learning_rate": 7.000683846534268e-05, - "loss": 0.7173, - "step": 9935 - }, - { - "epoch": 0.63758819756254, - "grad_norm": 0.7906424850106287, - "learning_rate": 6.99000596229453e-05, - "loss": 0.6518, - "step": 9940 - }, - { - "epoch": 0.6379089159717768, - "grad_norm": 0.885516437560555, - "learning_rate": 6.979331850544772e-05, - "loss": 0.7629, - "step": 9945 - }, - { - "epoch": 0.6382296343810134, - "grad_norm": 1.2585108576804727, - "learning_rate": 6.968661524663085e-05, - "loss": 0.5346, - "step": 9950 - }, - { - "epoch": 0.6385503527902502, - "grad_norm": 0.6378216033005294, - "learning_rate": 6.957994998022817e-05, - "loss": 0.5599, - "step": 9955 - }, - { - "epoch": 0.6388710711994868, - "grad_norm": 1.0857649237283717, - "learning_rate": 6.947332283992553e-05, - "loss": 0.5546, - "step": 9960 - }, - { - "epoch": 0.6391917896087236, - "grad_norm": 0.7485103608812504, - "learning_rate": 6.936673395936103e-05, - "loss": 0.7607, - "step": 9965 - }, - { - "epoch": 0.6395125080179602, - "grad_norm": 0.6831137045570516, - "learning_rate": 6.926018347212482e-05, - "loss": 0.7246, - "step": 9970 - }, - { - "epoch": 0.6398332264271969, - "grad_norm": 0.8371300993555119, - "learning_rate": 6.915367151175887e-05, - "loss": 0.7647, - "step": 9975 - }, - { - "epoch": 0.6401539448364336, - "grad_norm": 0.6790794293309601, - "learning_rate": 6.904719821175691e-05, - "loss": 0.709, - "step": 9980 - }, - { - "epoch": 0.6404746632456703, - "grad_norm": 1.2809292980337206, - "learning_rate": 6.894076370556419e-05, - "loss": 0.7072, - "step": 9985 - }, - { - "epoch": 0.640795381654907, - "grad_norm": 0.6309070049475263, - "learning_rate": 6.883436812657736e-05, - "loss": 0.7517, - "step": 9990 - }, - { - "epoch": 0.6411161000641437, - "grad_norm": 0.7057857328226916, - "learning_rate": 6.872801160814429e-05, - "loss": 0.5892, - "step": 9995 - }, - { - "epoch": 0.6414368184733804, - "grad_norm": 0.6684609047663461, - "learning_rate": 6.862169428356391e-05, - "loss": 0.7041, - "step": 10000 - }, - { - "epoch": 0.641757536882617, - "grad_norm": 0.9825781560923286, - "learning_rate": 6.851541628608593e-05, - "loss": 0.5732, - "step": 10005 - }, - { - "epoch": 0.6420782552918538, - "grad_norm": 0.6656401212815036, - "learning_rate": 6.840917774891089e-05, - "loss": 0.6996, - "step": 10010 - }, - { - "epoch": 0.6423989737010904, - "grad_norm": 1.0284673996842317, - "learning_rate": 6.830297880518982e-05, - "loss": 0.6385, - "step": 10015 - }, - { - "epoch": 0.6427196921103271, - "grad_norm": 1.3813453443085013, - "learning_rate": 6.819681958802411e-05, - "loss": 0.8024, - "step": 10020 - }, - { - "epoch": 0.6430404105195638, - "grad_norm": 1.0439998261378045, - "learning_rate": 6.809070023046542e-05, - "loss": 0.7246, - "step": 10025 - }, - { - "epoch": 0.6433611289288005, - "grad_norm": 1.3726132291968678, - "learning_rate": 6.798462086551536e-05, - "loss": 0.7607, - "step": 10030 - }, - { - "epoch": 0.6436818473380372, - "grad_norm": 0.696112632783953, - "learning_rate": 6.78785816261255e-05, - "loss": 0.6657, - "step": 10035 - }, - { - "epoch": 0.6440025657472739, - "grad_norm": 0.9271308758677715, - "learning_rate": 6.777258264519712e-05, - "loss": 0.7089, - "step": 10040 - }, - { - "epoch": 0.6443232841565106, - "grad_norm": 0.971107223858267, - "learning_rate": 6.766662405558095e-05, - "loss": 0.7127, - "step": 10045 - }, - { - "epoch": 0.6446440025657473, - "grad_norm": 1.1077553805147324, - "learning_rate": 6.756070599007717e-05, - "loss": 0.6674, - "step": 10050 - }, - { - "epoch": 0.6449647209749839, - "grad_norm": 1.1241145720577337, - "learning_rate": 6.745482858143519e-05, - "loss": 0.6908, - "step": 10055 - }, - { - "epoch": 0.6452854393842207, - "grad_norm": 1.0311402231942566, - "learning_rate": 6.734899196235342e-05, - "loss": 0.5903, - "step": 10060 - }, - { - "epoch": 0.6456061577934573, - "grad_norm": 1.1164020984789884, - "learning_rate": 6.724319626547916e-05, - "loss": 0.7299, - "step": 10065 - }, - { - "epoch": 0.6459268762026941, - "grad_norm": 0.862577581408513, - "learning_rate": 6.71374416234084e-05, - "loss": 0.6447, - "step": 10070 - }, - { - "epoch": 0.6462475946119307, - "grad_norm": 0.6813994701366789, - "learning_rate": 6.703172816868575e-05, - "loss": 0.6327, - "step": 10075 - }, - { - "epoch": 0.6465683130211675, - "grad_norm": 0.8916563918460675, - "learning_rate": 6.69260560338041e-05, - "loss": 0.5921, - "step": 10080 - }, - { - "epoch": 0.6468890314304041, - "grad_norm": 0.9332137514439207, - "learning_rate": 6.682042535120463e-05, - "loss": 0.6558, - "step": 10085 - }, - { - "epoch": 0.6472097498396407, - "grad_norm": 0.83477107809383, - "learning_rate": 6.67148362532765e-05, - "loss": 0.6404, - "step": 10090 - }, - { - "epoch": 0.6475304682488775, - "grad_norm": 1.2218962185380584, - "learning_rate": 6.66092888723568e-05, - "loss": 0.6856, - "step": 10095 - }, - { - "epoch": 0.6478511866581141, - "grad_norm": 0.5613953193652488, - "learning_rate": 6.650378334073036e-05, - "loss": 0.5747, - "step": 10100 - }, - { - "epoch": 0.6481719050673509, - "grad_norm": 1.161315529719475, - "learning_rate": 6.639831979062952e-05, - "loss": 0.7714, - "step": 10105 - }, - { - "epoch": 0.6484926234765875, - "grad_norm": 1.2013466455307917, - "learning_rate": 6.629289835423393e-05, - "loss": 0.7067, - "step": 10110 - }, - { - "epoch": 0.6488133418858243, - "grad_norm": 0.8985970817080027, - "learning_rate": 6.618751916367061e-05, - "loss": 0.8022, - "step": 10115 - }, - { - "epoch": 0.6491340602950609, - "grad_norm": 1.2136972519623022, - "learning_rate": 6.608218235101352e-05, - "loss": 0.6141, - "step": 10120 - }, - { - "epoch": 0.6494547787042976, - "grad_norm": 0.9718583450791072, - "learning_rate": 6.597688804828353e-05, - "loss": 0.5938, - "step": 10125 - }, - { - "epoch": 0.6497754971135343, - "grad_norm": 0.9547734637829278, - "learning_rate": 6.587163638744827e-05, - "loss": 0.6992, - "step": 10130 - }, - { - "epoch": 0.650096215522771, - "grad_norm": 0.9151909021410464, - "learning_rate": 6.57664275004219e-05, - "loss": 0.7343, - "step": 10135 - }, - { - "epoch": 0.6504169339320077, - "grad_norm": 1.5971760196514397, - "learning_rate": 6.566126151906498e-05, - "loss": 0.7017, - "step": 10140 - }, - { - "epoch": 0.6507376523412444, - "grad_norm": 0.8126791037548418, - "learning_rate": 6.555613857518425e-05, - "loss": 0.6567, - "step": 10145 - }, - { - "epoch": 0.6510583707504811, - "grad_norm": 0.7571219128173635, - "learning_rate": 6.545105880053258e-05, - "loss": 0.6871, - "step": 10150 - }, - { - "epoch": 0.6513790891597178, - "grad_norm": 0.688497347517119, - "learning_rate": 6.534602232680869e-05, - "loss": 0.7347, - "step": 10155 - }, - { - "epoch": 0.6516998075689544, - "grad_norm": 0.8955793200079804, - "learning_rate": 6.524102928565706e-05, - "loss": 0.5972, - "step": 10160 - }, - { - "epoch": 0.6520205259781912, - "grad_norm": 0.9443767111598063, - "learning_rate": 6.513607980866768e-05, - "loss": 0.723, - "step": 10165 - }, - { - "epoch": 0.6523412443874278, - "grad_norm": 0.8214020012837946, - "learning_rate": 6.5031174027376e-05, - "loss": 0.7531, - "step": 10170 - }, - { - "epoch": 0.6526619627966646, - "grad_norm": 0.9405554364877039, - "learning_rate": 6.492631207326271e-05, - "loss": 0.6579, - "step": 10175 - }, - { - "epoch": 0.6529826812059012, - "grad_norm": 0.8528480386187783, - "learning_rate": 6.482149407775348e-05, - "loss": 0.6639, - "step": 10180 - }, - { - "epoch": 0.653303399615138, - "grad_norm": 1.0215536554217552, - "learning_rate": 6.471672017221897e-05, - "loss": 0.6788, - "step": 10185 - }, - { - "epoch": 0.6536241180243746, - "grad_norm": 1.0458906526223661, - "learning_rate": 6.461199048797457e-05, - "loss": 0.7466, - "step": 10190 - }, - { - "epoch": 0.6539448364336113, - "grad_norm": 0.7250104664732925, - "learning_rate": 6.450730515628025e-05, - "loss": 0.4862, - "step": 10195 - }, - { - "epoch": 0.654265554842848, - "grad_norm": 1.1562228223771571, - "learning_rate": 6.440266430834035e-05, - "loss": 0.7554, - "step": 10200 - }, - { - "epoch": 0.6545862732520846, - "grad_norm": 0.7656674676905709, - "learning_rate": 6.429806807530348e-05, - "loss": 0.6668, - "step": 10205 - }, - { - "epoch": 0.6549069916613214, - "grad_norm": 1.1136322722942007, - "learning_rate": 6.419351658826236e-05, - "loss": 0.7241, - "step": 10210 - }, - { - "epoch": 0.655227710070558, - "grad_norm": 1.0761146316049985, - "learning_rate": 6.40890099782536e-05, - "loss": 0.6501, - "step": 10215 - }, - { - "epoch": 0.6555484284797948, - "grad_norm": 0.9079430022905365, - "learning_rate": 6.398454837625761e-05, - "loss": 0.8384, - "step": 10220 - }, - { - "epoch": 0.6558691468890314, - "grad_norm": 0.8488475441393789, - "learning_rate": 6.388013191319829e-05, - "loss": 0.697, - "step": 10225 - }, - { - "epoch": 0.6561898652982682, - "grad_norm": 1.8731573144161795, - "learning_rate": 6.377576071994306e-05, - "loss": 0.5274, - "step": 10230 - }, - { - "epoch": 0.6565105837075048, - "grad_norm": 0.9597668865369915, - "learning_rate": 6.367143492730257e-05, - "loss": 0.5793, - "step": 10235 - }, - { - "epoch": 0.6568313021167415, - "grad_norm": 0.9184805187055093, - "learning_rate": 6.356715466603058e-05, - "loss": 0.7204, - "step": 10240 - }, - { - "epoch": 0.6571520205259782, - "grad_norm": 1.010481078501907, - "learning_rate": 6.346292006682375e-05, - "loss": 0.6568, - "step": 10245 - }, - { - "epoch": 0.6574727389352149, - "grad_norm": 1.2893595780329616, - "learning_rate": 6.335873126032155e-05, - "loss": 0.7476, - "step": 10250 - }, - { - "epoch": 0.6577934573444516, - "grad_norm": 0.7919851978335327, - "learning_rate": 6.325458837710603e-05, - "loss": 0.6681, - "step": 10255 - }, - { - "epoch": 0.6581141757536882, - "grad_norm": 0.7133876917502856, - "learning_rate": 6.31504915477017e-05, - "loss": 0.7879, - "step": 10260 - }, - { - "epoch": 0.658434894162925, - "grad_norm": 0.8067826322951818, - "learning_rate": 6.304644090257536e-05, - "loss": 0.64, - "step": 10265 - }, - { - "epoch": 0.6587556125721616, - "grad_norm": 0.7174409241967863, - "learning_rate": 6.294243657213587e-05, - "loss": 0.5671, - "step": 10270 - }, - { - "epoch": 0.6590763309813983, - "grad_norm": 0.7812465401233117, - "learning_rate": 6.283847868673417e-05, - "loss": 0.628, - "step": 10275 - }, - { - "epoch": 0.659397049390635, - "grad_norm": 0.565828308616574, - "learning_rate": 6.273456737666281e-05, - "loss": 0.621, - "step": 10280 - }, - { - "epoch": 0.6597177677998717, - "grad_norm": 1.0913219783317336, - "learning_rate": 6.26307027721561e-05, - "loss": 0.6341, - "step": 10285 - }, - { - "epoch": 0.6600384862091084, - "grad_norm": 0.812647700581263, - "learning_rate": 6.252688500338979e-05, - "loss": 0.6266, - "step": 10290 - }, - { - "epoch": 0.6603592046183451, - "grad_norm": 1.3344320513324446, - "learning_rate": 6.242311420048087e-05, - "loss": 0.697, - "step": 10295 - }, - { - "epoch": 0.6606799230275818, - "grad_norm": 0.8037339071262586, - "learning_rate": 6.231939049348756e-05, - "loss": 0.662, - "step": 10300 - }, - { - "epoch": 0.6610006414368185, - "grad_norm": 0.8348124914063436, - "learning_rate": 6.221571401240898e-05, - "loss": 0.5953, - "step": 10305 - }, - { - "epoch": 0.6613213598460551, - "grad_norm": 0.8007698372402566, - "learning_rate": 6.211208488718508e-05, - "loss": 0.7067, - "step": 10310 - }, - { - "epoch": 0.6616420782552919, - "grad_norm": 1.0240691382811138, - "learning_rate": 6.200850324769645e-05, - "loss": 0.6563, - "step": 10315 - }, - { - "epoch": 0.6619627966645285, - "grad_norm": 0.6245391951301155, - "learning_rate": 6.190496922376419e-05, - "loss": 0.566, - "step": 10320 - }, - { - "epoch": 0.6622835150737653, - "grad_norm": 0.9667633410108524, - "learning_rate": 6.180148294514969e-05, - "loss": 0.6114, - "step": 10325 - }, - { - "epoch": 0.6626042334830019, - "grad_norm": 0.7507271356005688, - "learning_rate": 6.169804454155457e-05, - "loss": 0.5604, - "step": 10330 - }, - { - "epoch": 0.6629249518922387, - "grad_norm": 1.3185339543060972, - "learning_rate": 6.159465414262034e-05, - "loss": 0.6832, - "step": 10335 - }, - { - "epoch": 0.6632456703014753, - "grad_norm": 1.1847306027291458, - "learning_rate": 6.14913118779284e-05, - "loss": 0.8276, - "step": 10340 - }, - { - "epoch": 0.6635663887107119, - "grad_norm": 0.645482702109424, - "learning_rate": 6.138801787699988e-05, - "loss": 0.7251, - "step": 10345 - }, - { - "epoch": 0.6638871071199487, - "grad_norm": 0.9170687001642995, - "learning_rate": 6.128477226929532e-05, - "loss": 0.5489, - "step": 10350 - }, - { - "epoch": 0.6642078255291853, - "grad_norm": 1.000806725934412, - "learning_rate": 6.118157518421468e-05, - "loss": 0.7246, - "step": 10355 - }, - { - "epoch": 0.6645285439384221, - "grad_norm": 0.8379511672470946, - "learning_rate": 6.107842675109703e-05, - "loss": 0.7874, - "step": 10360 - }, - { - "epoch": 0.6648492623476587, - "grad_norm": 0.7371509556636497, - "learning_rate": 6.097532709922054e-05, - "loss": 0.6244, - "step": 10365 - }, - { - "epoch": 0.6651699807568955, - "grad_norm": 0.9539665664045133, - "learning_rate": 6.087227635780225e-05, - "loss": 0.6107, - "step": 10370 - }, - { - "epoch": 0.6654906991661321, - "grad_norm": 0.7979555148132079, - "learning_rate": 6.0769274655997775e-05, - "loss": 0.5344, - "step": 10375 - }, - { - "epoch": 0.6658114175753689, - "grad_norm": 0.909657054573839, - "learning_rate": 6.0666322122901396e-05, - "loss": 0.6275, - "step": 10380 - }, - { - "epoch": 0.6661321359846055, - "grad_norm": 1.0313940290067696, - "learning_rate": 6.056341888754573e-05, - "loss": 0.6082, - "step": 10385 - }, - { - "epoch": 0.6664528543938422, - "grad_norm": 0.7489838245596225, - "learning_rate": 6.0460565078901633e-05, - "loss": 0.5819, - "step": 10390 - }, - { - "epoch": 0.6667735728030789, - "grad_norm": 1.1118413959198947, - "learning_rate": 6.035776082587794e-05, - "loss": 0.5196, - "step": 10395 - }, - { - "epoch": 0.6670942912123156, - "grad_norm": 0.8125706280287548, - "learning_rate": 6.025500625732142e-05, - "loss": 0.5352, - "step": 10400 - }, - { - "epoch": 0.6674150096215523, - "grad_norm": 0.9492211031254315, - "learning_rate": 6.015230150201661e-05, - "loss": 0.5139, - "step": 10405 - }, - { - "epoch": 0.667735728030789, - "grad_norm": 0.7268694268672965, - "learning_rate": 6.0049646688685567e-05, - "loss": 0.6442, - "step": 10410 - }, - { - "epoch": 0.6680564464400257, - "grad_norm": 0.7538411268384596, - "learning_rate": 5.994704194598775e-05, - "loss": 0.7771, - "step": 10415 - }, - { - "epoch": 0.6683771648492624, - "grad_norm": 0.732055273874663, - "learning_rate": 5.9844487402519886e-05, - "loss": 0.4246, - "step": 10420 - }, - { - "epoch": 0.668697883258499, - "grad_norm": 0.9282996799361855, - "learning_rate": 5.97419831868158e-05, - "loss": 0.6212, - "step": 10425 - }, - { - "epoch": 0.6690186016677357, - "grad_norm": 0.8160584484135337, - "learning_rate": 5.96395294273462e-05, - "loss": 0.5947, - "step": 10430 - }, - { - "epoch": 0.6693393200769724, - "grad_norm": 0.563899508227464, - "learning_rate": 5.9537126252518595e-05, - "loss": 0.6085, - "step": 10435 - }, - { - "epoch": 0.6696600384862091, - "grad_norm": 0.7096696600311123, - "learning_rate": 5.9434773790677076e-05, - "loss": 0.6623, - "step": 10440 - }, - { - "epoch": 0.6699807568954458, - "grad_norm": 1.0083725702632502, - "learning_rate": 5.933247217010216e-05, - "loss": 0.7533, - "step": 10445 - }, - { - "epoch": 0.6703014753046825, - "grad_norm": 0.8583730314996155, - "learning_rate": 5.9230221519010634e-05, - "loss": 0.6899, - "step": 10450 - }, - { - "epoch": 0.6706221937139192, - "grad_norm": 0.9948242533172998, - "learning_rate": 5.912802196555547e-05, - "loss": 0.6441, - "step": 10455 - }, - { - "epoch": 0.6709429121231558, - "grad_norm": 0.8416659287585814, - "learning_rate": 5.902587363782553e-05, - "loss": 0.52, - "step": 10460 - }, - { - "epoch": 0.6712636305323926, - "grad_norm": 0.7875617753719326, - "learning_rate": 5.892377666384552e-05, - "loss": 0.8289, - "step": 10465 - }, - { - "epoch": 0.6715843489416292, - "grad_norm": 1.3665322708300398, - "learning_rate": 5.882173117157579e-05, - "loss": 0.6931, - "step": 10470 - }, - { - "epoch": 0.671905067350866, - "grad_norm": 1.484703583509698, - "learning_rate": 5.871973728891207e-05, - "loss": 0.6282, - "step": 10475 - }, - { - "epoch": 0.6722257857601026, - "grad_norm": 0.6277171001704246, - "learning_rate": 5.861779514368552e-05, - "loss": 0.5476, - "step": 10480 - }, - { - "epoch": 0.6725465041693394, - "grad_norm": 0.893359208561377, - "learning_rate": 5.851590486366241e-05, - "loss": 0.5851, - "step": 10485 - }, - { - "epoch": 0.672867222578576, - "grad_norm": 0.7320275300041723, - "learning_rate": 5.841406657654402e-05, - "loss": 0.7706, - "step": 10490 - }, - { - "epoch": 0.6731879409878126, - "grad_norm": 0.8287094016340315, - "learning_rate": 5.831228040996643e-05, - "loss": 0.6782, - "step": 10495 - }, - { - "epoch": 0.6735086593970494, - "grad_norm": 0.668748966976369, - "learning_rate": 5.8210546491500416e-05, - "loss": 0.4843, - "step": 10500 - }, - { - "epoch": 0.673829377806286, - "grad_norm": 0.7774193196749479, - "learning_rate": 5.8108864948651385e-05, - "loss": 0.6915, - "step": 10505 - }, - { - "epoch": 0.6741500962155228, - "grad_norm": 0.7361276836480435, - "learning_rate": 5.8007235908858815e-05, - "loss": 0.6037, - "step": 10510 - }, - { - "epoch": 0.6744708146247594, - "grad_norm": 0.9273797610571103, - "learning_rate": 5.790565949949669e-05, - "loss": 0.6447, - "step": 10515 - }, - { - "epoch": 0.6747915330339962, - "grad_norm": 0.7357377379625472, - "learning_rate": 5.780413584787285e-05, - "loss": 0.6123, - "step": 10520 - }, - { - "epoch": 0.6751122514432328, - "grad_norm": 0.7349196129011529, - "learning_rate": 5.770266508122903e-05, - "loss": 0.6148, - "step": 10525 - }, - { - "epoch": 0.6754329698524695, - "grad_norm": 0.7228184809432814, - "learning_rate": 5.760124732674079e-05, - "loss": 0.7375, - "step": 10530 - }, - { - "epoch": 0.6757536882617062, - "grad_norm": 0.7245846277368149, - "learning_rate": 5.749988271151714e-05, - "loss": 0.8622, - "step": 10535 - }, - { - "epoch": 0.6760744066709429, - "grad_norm": 0.7864676224072312, - "learning_rate": 5.739857136260046e-05, - "loss": 0.712, - "step": 10540 - }, - { - "epoch": 0.6763951250801796, - "grad_norm": 1.645141716455399, - "learning_rate": 5.7297313406966534e-05, - "loss": 0.6939, - "step": 10545 - }, - { - "epoch": 0.6767158434894163, - "grad_norm": 0.5062488079743617, - "learning_rate": 5.719610897152405e-05, - "loss": 0.5611, - "step": 10550 - }, - { - "epoch": 0.677036561898653, - "grad_norm": 0.7048718325836721, - "learning_rate": 5.709495818311477e-05, - "loss": 0.7464, - "step": 10555 - }, - { - "epoch": 0.6773572803078897, - "grad_norm": 1.1659307946452016, - "learning_rate": 5.699386116851309e-05, - "loss": 0.7177, - "step": 10560 - }, - { - "epoch": 0.6776779987171264, - "grad_norm": 0.9170897775066968, - "learning_rate": 5.6892818054426035e-05, - "loss": 0.669, - "step": 10565 - }, - { - "epoch": 0.6779987171263631, - "grad_norm": 1.0508889718757837, - "learning_rate": 5.679182896749322e-05, - "loss": 0.6744, - "step": 10570 - }, - { - "epoch": 0.6783194355355997, - "grad_norm": 0.8259858656059345, - "learning_rate": 5.669089403428627e-05, - "loss": 0.6801, - "step": 10575 - }, - { - "epoch": 0.6786401539448365, - "grad_norm": 0.6629893516596802, - "learning_rate": 5.659001338130923e-05, - "loss": 0.6013, - "step": 10580 - }, - { - "epoch": 0.6789608723540731, - "grad_norm": 0.968488221191984, - "learning_rate": 5.648918713499787e-05, - "loss": 0.7905, - "step": 10585 - }, - { - "epoch": 0.6792815907633099, - "grad_norm": 0.7585559410962367, - "learning_rate": 5.6388415421719996e-05, - "loss": 0.5525, - "step": 10590 - }, - { - "epoch": 0.6796023091725465, - "grad_norm": 1.2745141606185377, - "learning_rate": 5.6287698367774897e-05, - "loss": 0.7167, - "step": 10595 - }, - { - "epoch": 0.6799230275817832, - "grad_norm": 0.6728914302123802, - "learning_rate": 5.6187036099393375e-05, - "loss": 0.6937, - "step": 10600 - }, - { - "epoch": 0.6802437459910199, - "grad_norm": 0.600819149081247, - "learning_rate": 5.608642874273771e-05, - "loss": 0.6316, - "step": 10605 - }, - { - "epoch": 0.6805644644002565, - "grad_norm": 0.6959088365991615, - "learning_rate": 5.598587642390114e-05, - "loss": 0.7457, - "step": 10610 - }, - { - "epoch": 0.6808851828094933, - "grad_norm": 0.7266824723699652, - "learning_rate": 5.5885379268908134e-05, - "loss": 0.6045, - "step": 10615 - }, - { - "epoch": 0.6812059012187299, - "grad_norm": 0.6681555688621381, - "learning_rate": 5.578493740371389e-05, - "loss": 0.6286, - "step": 10620 - }, - { - "epoch": 0.6815266196279667, - "grad_norm": 0.7610528413953269, - "learning_rate": 5.568455095420431e-05, - "loss": 0.5733, - "step": 10625 - }, - { - "epoch": 0.6818473380372033, - "grad_norm": 1.3214312132482846, - "learning_rate": 5.558422004619597e-05, - "loss": 0.6319, - "step": 10630 - }, - { - "epoch": 0.6821680564464401, - "grad_norm": 0.6966982078568826, - "learning_rate": 5.548394480543564e-05, - "loss": 0.4698, - "step": 10635 - }, - { - "epoch": 0.6824887748556767, - "grad_norm": 0.6367878363111128, - "learning_rate": 5.538372535760057e-05, - "loss": 0.662, - "step": 10640 - }, - { - "epoch": 0.6828094932649134, - "grad_norm": 0.5466987109462808, - "learning_rate": 5.528356182829777e-05, - "loss": 0.5193, - "step": 10645 - }, - { - "epoch": 0.6831302116741501, - "grad_norm": 0.8091665259225381, - "learning_rate": 5.518345434306444e-05, - "loss": 0.5853, - "step": 10650 - }, - { - "epoch": 0.6834509300833868, - "grad_norm": 0.5989345577351957, - "learning_rate": 5.508340302736743e-05, - "loss": 0.5997, - "step": 10655 - }, - { - "epoch": 0.6837716484926235, - "grad_norm": 0.8246700551716405, - "learning_rate": 5.498340800660313e-05, - "loss": 0.715, - "step": 10660 - }, - { - "epoch": 0.6840923669018601, - "grad_norm": 0.7999016646795889, - "learning_rate": 5.488346940609753e-05, - "loss": 0.7212, - "step": 10665 - }, - { - "epoch": 0.6844130853110969, - "grad_norm": 0.5763703153217136, - "learning_rate": 5.4783587351105734e-05, - "loss": 0.6361, - "step": 10670 - }, - { - "epoch": 0.6847338037203335, - "grad_norm": 1.3911645606934129, - "learning_rate": 5.4683761966812154e-05, - "loss": 0.7494, - "step": 10675 - }, - { - "epoch": 0.6850545221295702, - "grad_norm": 1.1526450545139104, - "learning_rate": 5.458399337833002e-05, - "loss": 0.5274, - "step": 10680 - }, - { - "epoch": 0.6853752405388069, - "grad_norm": 1.0168267129176949, - "learning_rate": 5.448428171070141e-05, - "loss": 0.8071, - "step": 10685 - }, - { - "epoch": 0.6856959589480436, - "grad_norm": 0.7598086971815275, - "learning_rate": 5.438462708889718e-05, - "loss": 0.676, - "step": 10690 - }, - { - "epoch": 0.6860166773572803, - "grad_norm": 1.056491176869749, - "learning_rate": 5.428502963781654e-05, - "loss": 0.591, - "step": 10695 - }, - { - "epoch": 0.686337395766517, - "grad_norm": 0.8433612740283131, - "learning_rate": 5.418548948228709e-05, - "loss": 0.6323, - "step": 10700 - }, - { - "epoch": 0.6866581141757537, - "grad_norm": 1.1399615640431888, - "learning_rate": 5.408600674706474e-05, - "loss": 0.6943, - "step": 10705 - }, - { - "epoch": 0.6869788325849904, - "grad_norm": 1.1427576567421822, - "learning_rate": 5.39865815568332e-05, - "loss": 0.6542, - "step": 10710 - }, - { - "epoch": 0.687299550994227, - "grad_norm": 0.8398449025370285, - "learning_rate": 5.3887214036204295e-05, - "loss": 0.6775, - "step": 10715 - }, - { - "epoch": 0.6876202694034638, - "grad_norm": 0.6183753226440165, - "learning_rate": 5.3787904309717365e-05, - "loss": 0.5856, - "step": 10720 - }, - { - "epoch": 0.6879409878127004, - "grad_norm": 0.7303097761926962, - "learning_rate": 5.368865250183952e-05, - "loss": 0.5393, - "step": 10725 - }, - { - "epoch": 0.6882617062219372, - "grad_norm": 1.042159531292707, - "learning_rate": 5.358945873696514e-05, - "loss": 0.598, - "step": 10730 - }, - { - "epoch": 0.6885824246311738, - "grad_norm": 0.8726534481321939, - "learning_rate": 5.3490323139415844e-05, - "loss": 0.6874, - "step": 10735 - }, - { - "epoch": 0.6889031430404106, - "grad_norm": 0.8279765934645724, - "learning_rate": 5.339124583344046e-05, - "loss": 0.7282, - "step": 10740 - }, - { - "epoch": 0.6892238614496472, - "grad_norm": 1.1033370234326692, - "learning_rate": 5.3292226943214666e-05, - "loss": 0.6647, - "step": 10745 - }, - { - "epoch": 0.689544579858884, - "grad_norm": 0.6731635406372563, - "learning_rate": 5.3193266592840994e-05, - "loss": 0.642, - "step": 10750 - }, - { - "epoch": 0.6898652982681206, - "grad_norm": 0.682406135632238, - "learning_rate": 5.309436490634855e-05, - "loss": 0.6876, - "step": 10755 - }, - { - "epoch": 0.6901860166773572, - "grad_norm": 0.6884304464201593, - "learning_rate": 5.299552200769289e-05, - "loss": 0.6405, - "step": 10760 - }, - { - "epoch": 0.690506735086594, - "grad_norm": 0.9303606786373573, - "learning_rate": 5.289673802075601e-05, - "loss": 0.5867, - "step": 10765 - }, - { - "epoch": 0.6908274534958306, - "grad_norm": 0.8966481917540933, - "learning_rate": 5.279801306934598e-05, - "loss": 0.7328, - "step": 10770 - }, - { - "epoch": 0.6911481719050674, - "grad_norm": 0.8301326693368314, - "learning_rate": 5.269934727719685e-05, - "loss": 0.673, - "step": 10775 - }, - { - "epoch": 0.691468890314304, - "grad_norm": 0.9231136482226949, - "learning_rate": 5.260074076796859e-05, - "loss": 0.8013, - "step": 10780 - }, - { - "epoch": 0.6917896087235408, - "grad_norm": 0.6344332487623263, - "learning_rate": 5.250219366524687e-05, - "loss": 0.6477, - "step": 10785 - }, - { - "epoch": 0.6921103271327774, - "grad_norm": 0.6184925377516596, - "learning_rate": 5.240370609254288e-05, - "loss": 0.5484, - "step": 10790 - }, - { - "epoch": 0.6924310455420141, - "grad_norm": 0.7946249563385892, - "learning_rate": 5.230527817329316e-05, - "loss": 0.7455, - "step": 10795 - }, - { - "epoch": 0.6927517639512508, - "grad_norm": 0.5532448902772473, - "learning_rate": 5.22069100308596e-05, - "loss": 0.5486, - "step": 10800 - }, - { - "epoch": 0.6930724823604875, - "grad_norm": 0.6171304782365078, - "learning_rate": 5.210860178852903e-05, - "loss": 0.681, - "step": 10805 - }, - { - "epoch": 0.6933932007697242, - "grad_norm": 1.2635876971136728, - "learning_rate": 5.201035356951334e-05, - "loss": 0.6736, - "step": 10810 - }, - { - "epoch": 0.6937139191789609, - "grad_norm": 0.5205480150437042, - "learning_rate": 5.191216549694909e-05, - "loss": 0.5153, - "step": 10815 - }, - { - "epoch": 0.6940346375881976, - "grad_norm": 0.9442523324184217, - "learning_rate": 5.1814037693897464e-05, - "loss": 0.6185, - "step": 10820 - }, - { - "epoch": 0.6943553559974343, - "grad_norm": 1.1934267268940544, - "learning_rate": 5.1715970283344205e-05, - "loss": 0.6677, - "step": 10825 - }, - { - "epoch": 0.6946760744066709, - "grad_norm": 0.7652562771619698, - "learning_rate": 5.161796338819924e-05, - "loss": 0.7638, - "step": 10830 - }, - { - "epoch": 0.6949967928159076, - "grad_norm": 0.8994137424891815, - "learning_rate": 5.152001713129677e-05, - "loss": 0.5898, - "step": 10835 - }, - { - "epoch": 0.6953175112251443, - "grad_norm": 1.1569578317709166, - "learning_rate": 5.142213163539491e-05, - "loss": 0.5728, - "step": 10840 - }, - { - "epoch": 0.695638229634381, - "grad_norm": 0.9567492023568471, - "learning_rate": 5.132430702317562e-05, - "loss": 0.6646, - "step": 10845 - }, - { - "epoch": 0.6959589480436177, - "grad_norm": 0.9942541719053858, - "learning_rate": 5.122654341724462e-05, - "loss": 0.7398, - "step": 10850 - }, - { - "epoch": 0.6962796664528544, - "grad_norm": 0.69345380130255, - "learning_rate": 5.1128840940131064e-05, - "loss": 0.5888, - "step": 10855 - }, - { - "epoch": 0.6966003848620911, - "grad_norm": 0.8276215026435204, - "learning_rate": 5.103119971428765e-05, - "loss": 0.6781, - "step": 10860 - }, - { - "epoch": 0.6969211032713277, - "grad_norm": 0.7245991079345528, - "learning_rate": 5.093361986209015e-05, - "loss": 0.7442, - "step": 10865 - }, - { - "epoch": 0.6972418216805645, - "grad_norm": 0.7885551527874833, - "learning_rate": 5.0836101505837494e-05, - "loss": 0.6788, - "step": 10870 - }, - { - "epoch": 0.6975625400898011, - "grad_norm": 0.857297702149309, - "learning_rate": 5.073864476775157e-05, - "loss": 0.6013, - "step": 10875 - }, - { - "epoch": 0.6978832584990379, - "grad_norm": 0.6348649341355659, - "learning_rate": 5.064124976997693e-05, - "loss": 0.6045, - "step": 10880 - }, - { - "epoch": 0.6982039769082745, - "grad_norm": 0.6585605551969316, - "learning_rate": 5.054391663458087e-05, - "loss": 0.6171, - "step": 10885 - }, - { - "epoch": 0.6985246953175113, - "grad_norm": 0.986468962885202, - "learning_rate": 5.044664548355307e-05, - "loss": 0.7186, - "step": 10890 - }, - { - "epoch": 0.6988454137267479, - "grad_norm": 0.9785918246000489, - "learning_rate": 5.0349436438805494e-05, - "loss": 0.7877, - "step": 10895 - }, - { - "epoch": 0.6991661321359846, - "grad_norm": 1.5065392603292607, - "learning_rate": 5.025228962217241e-05, - "loss": 0.6156, - "step": 10900 - }, - { - "epoch": 0.6994868505452213, - "grad_norm": 0.9224408618353005, - "learning_rate": 5.015520515540996e-05, - "loss": 0.5855, - "step": 10905 - }, - { - "epoch": 0.699807568954458, - "grad_norm": 0.8828715863784493, - "learning_rate": 5.005818316019618e-05, - "loss": 0.6038, - "step": 10910 - }, - { - "epoch": 0.7001282873636947, - "grad_norm": 0.9568291721616811, - "learning_rate": 4.996122375813079e-05, - "loss": 0.6317, - "step": 10915 - }, - { - "epoch": 0.7004490057729313, - "grad_norm": 1.4247569725340374, - "learning_rate": 4.986432707073515e-05, - "loss": 0.7097, - "step": 10920 - }, - { - "epoch": 0.7007697241821681, - "grad_norm": 0.5257863778727976, - "learning_rate": 4.976749321945191e-05, - "loss": 0.5316, - "step": 10925 - }, - { - "epoch": 0.7010904425914047, - "grad_norm": 0.7116948483921095, - "learning_rate": 4.9670722325644993e-05, - "loss": 0.6438, - "step": 10930 - }, - { - "epoch": 0.7014111610006415, - "grad_norm": 0.8934801180351521, - "learning_rate": 4.957401451059948e-05, - "loss": 0.6628, - "step": 10935 - }, - { - "epoch": 0.7017318794098781, - "grad_norm": 0.5554525116078812, - "learning_rate": 4.9477369895521284e-05, - "loss": 0.6803, - "step": 10940 - }, - { - "epoch": 0.7020525978191148, - "grad_norm": 1.115600134036066, - "learning_rate": 4.938078860153725e-05, - "loss": 0.582, - "step": 10945 - }, - { - "epoch": 0.7023733162283515, - "grad_norm": 1.04204980372642, - "learning_rate": 4.928427074969475e-05, - "loss": 0.6396, - "step": 10950 - }, - { - "epoch": 0.7026940346375882, - "grad_norm": 0.6952203258967746, - "learning_rate": 4.918781646096161e-05, - "loss": 0.609, - "step": 10955 - }, - { - "epoch": 0.7030147530468249, - "grad_norm": 0.8455941974814938, - "learning_rate": 4.909142585622616e-05, - "loss": 0.7442, - "step": 10960 - }, - { - "epoch": 0.7033354714560616, - "grad_norm": 0.9358056805840572, - "learning_rate": 4.899509905629671e-05, - "loss": 0.6163, - "step": 10965 - }, - { - "epoch": 0.7036561898652983, - "grad_norm": 0.8368567909279319, - "learning_rate": 4.889883618190184e-05, - "loss": 0.6729, - "step": 10970 - }, - { - "epoch": 0.703976908274535, - "grad_norm": 0.9626200217934863, - "learning_rate": 4.8802637353689694e-05, - "loss": 0.6208, - "step": 10975 - }, - { - "epoch": 0.7042976266837716, - "grad_norm": 1.423525816978348, - "learning_rate": 4.870650269222845e-05, - "loss": 0.6301, - "step": 10980 - }, - { - "epoch": 0.7046183450930084, - "grad_norm": 0.8943539539791406, - "learning_rate": 4.8610432318005705e-05, - "loss": 0.8259, - "step": 10985 - }, - { - "epoch": 0.704939063502245, - "grad_norm": 1.0047328070171035, - "learning_rate": 4.851442635142846e-05, - "loss": 0.6759, - "step": 10990 - }, - { - "epoch": 0.7052597819114818, - "grad_norm": 0.864965532206175, - "learning_rate": 4.841848491282315e-05, - "loss": 0.6722, - "step": 10995 - }, - { - "epoch": 0.7055805003207184, - "grad_norm": 0.7890255740216144, - "learning_rate": 4.832260812243513e-05, - "loss": 0.6922, - "step": 11000 - }, - { - "epoch": 0.7059012187299551, - "grad_norm": 1.2389180866062235, - "learning_rate": 4.822679610042894e-05, - "loss": 0.6051, - "step": 11005 - }, - { - "epoch": 0.7062219371391918, - "grad_norm": 0.6998283128694094, - "learning_rate": 4.813104896688777e-05, - "loss": 0.6615, - "step": 11010 - }, - { - "epoch": 0.7065426555484284, - "grad_norm": 0.8090143409111475, - "learning_rate": 4.803536684181354e-05, - "loss": 0.7387, - "step": 11015 - }, - { - "epoch": 0.7068633739576652, - "grad_norm": 1.0370968663682347, - "learning_rate": 4.793974984512677e-05, - "loss": 0.7072, - "step": 11020 - }, - { - "epoch": 0.7071840923669018, - "grad_norm": 0.7853945975713512, - "learning_rate": 4.7844198096666246e-05, - "loss": 0.686, - "step": 11025 - }, - { - "epoch": 0.7075048107761386, - "grad_norm": 0.702386626377002, - "learning_rate": 4.774871171618901e-05, - "loss": 0.7127, - "step": 11030 - }, - { - "epoch": 0.7078255291853752, - "grad_norm": 1.0108215460660506, - "learning_rate": 4.765329082337027e-05, - "loss": 0.6434, - "step": 11035 - }, - { - "epoch": 0.708146247594612, - "grad_norm": 0.9899048924342988, - "learning_rate": 4.755793553780292e-05, - "loss": 0.7323, - "step": 11040 - }, - { - "epoch": 0.7084669660038486, - "grad_norm": 0.9147032893585562, - "learning_rate": 4.746264597899792e-05, - "loss": 0.6739, - "step": 11045 - }, - { - "epoch": 0.7087876844130853, - "grad_norm": 1.0330004401132, - "learning_rate": 4.736742226638363e-05, - "loss": 0.8609, - "step": 11050 - }, - { - "epoch": 0.709108402822322, - "grad_norm": 0.6548738796277453, - "learning_rate": 4.727226451930604e-05, - "loss": 0.6734, - "step": 11055 - }, - { - "epoch": 0.7094291212315587, - "grad_norm": 0.81714120996019, - "learning_rate": 4.717717285702835e-05, - "loss": 0.7523, - "step": 11060 - }, - { - "epoch": 0.7097498396407954, - "grad_norm": 0.885017113426685, - "learning_rate": 4.708214739873096e-05, - "loss": 0.5943, - "step": 11065 - }, - { - "epoch": 0.710070558050032, - "grad_norm": 0.8620179894720568, - "learning_rate": 4.698718826351135e-05, - "loss": 0.593, - "step": 11070 - }, - { - "epoch": 0.7103912764592688, - "grad_norm": 0.7663377237340008, - "learning_rate": 4.689229557038379e-05, - "loss": 0.7649, - "step": 11075 - }, - { - "epoch": 0.7107119948685054, - "grad_norm": 0.779291905786263, - "learning_rate": 4.679746943827939e-05, - "loss": 0.6231, - "step": 11080 - }, - { - "epoch": 0.7110327132777421, - "grad_norm": 0.8488045821194506, - "learning_rate": 4.6702709986045745e-05, - "loss": 0.5658, - "step": 11085 - }, - { - "epoch": 0.7113534316869788, - "grad_norm": 0.7591544492497508, - "learning_rate": 4.660801733244685e-05, - "loss": 0.5434, - "step": 11090 - }, - { - "epoch": 0.7116741500962155, - "grad_norm": 0.9324567178402989, - "learning_rate": 4.651339159616312e-05, - "loss": 0.7694, - "step": 11095 - }, - { - "epoch": 0.7119948685054522, - "grad_norm": 0.614241285241644, - "learning_rate": 4.641883289579095e-05, - "loss": 0.573, - "step": 11100 - }, - { - "epoch": 0.7123155869146889, - "grad_norm": 0.7297521213628075, - "learning_rate": 4.632434134984288e-05, - "loss": 0.7862, - "step": 11105 - }, - { - "epoch": 0.7126363053239256, - "grad_norm": 0.8547500506968054, - "learning_rate": 4.6229917076747056e-05, - "loss": 0.6224, - "step": 11110 - }, - { - "epoch": 0.7129570237331623, - "grad_norm": 1.1207952262364815, - "learning_rate": 4.613556019484754e-05, - "loss": 0.7452, - "step": 11115 - }, - { - "epoch": 0.7132777421423989, - "grad_norm": 0.5122245150734959, - "learning_rate": 4.604127082240379e-05, - "loss": 0.6216, - "step": 11120 - }, - { - "epoch": 0.7135984605516357, - "grad_norm": 0.6841888313664231, - "learning_rate": 4.5947049077590664e-05, - "loss": 0.6031, - "step": 11125 - }, - { - "epoch": 0.7139191789608723, - "grad_norm": 0.8085851937507493, - "learning_rate": 4.585289507849838e-05, - "loss": 0.5983, - "step": 11130 - }, - { - "epoch": 0.7142398973701091, - "grad_norm": 0.8748340585570812, - "learning_rate": 4.575880894313207e-05, - "loss": 0.6462, - "step": 11135 - }, - { - "epoch": 0.7145606157793457, - "grad_norm": 0.5741182108460992, - "learning_rate": 4.566479078941198e-05, - "loss": 0.6313, - "step": 11140 - }, - { - "epoch": 0.7148813341885825, - "grad_norm": 1.3368271859986067, - "learning_rate": 4.557084073517305e-05, - "loss": 0.5434, - "step": 11145 - }, - { - "epoch": 0.7152020525978191, - "grad_norm": 0.7497857375686727, - "learning_rate": 4.547695889816485e-05, - "loss": 0.557, - "step": 11150 - }, - { - "epoch": 0.7155227710070559, - "grad_norm": 0.8178864612038674, - "learning_rate": 4.538314539605155e-05, - "loss": 0.6979, - "step": 11155 - }, - { - "epoch": 0.7158434894162925, - "grad_norm": 0.8969560105198988, - "learning_rate": 4.528940034641158e-05, - "loss": 0.765, - "step": 11160 - }, - { - "epoch": 0.7161642078255291, - "grad_norm": 1.2265503200288288, - "learning_rate": 4.519572386673768e-05, - "loss": 0.5296, - "step": 11165 - }, - { - "epoch": 0.7164849262347659, - "grad_norm": 0.611571817659739, - "learning_rate": 4.510211607443654e-05, - "loss": 0.6223, - "step": 11170 - }, - { - "epoch": 0.7168056446440025, - "grad_norm": 0.8641143822600184, - "learning_rate": 4.500857708682883e-05, - "loss": 0.7204, - "step": 11175 - }, - { - "epoch": 0.7171263630532393, - "grad_norm": 0.9563759174291445, - "learning_rate": 4.491510702114894e-05, - "loss": 0.6728, - "step": 11180 - }, - { - "epoch": 0.7174470814624759, - "grad_norm": 0.5814502110654781, - "learning_rate": 4.482170599454489e-05, - "loss": 0.6652, - "step": 11185 - }, - { - "epoch": 0.7177677998717127, - "grad_norm": 1.0858563785495055, - "learning_rate": 4.472837412407825e-05, - "loss": 0.5543, - "step": 11190 - }, - { - "epoch": 0.7180885182809493, - "grad_norm": 0.6644009179012256, - "learning_rate": 4.4635111526723826e-05, - "loss": 0.8072, - "step": 11195 - }, - { - "epoch": 0.718409236690186, - "grad_norm": 0.9031430293191645, - "learning_rate": 4.454191831936958e-05, - "loss": 0.7006, - "step": 11200 - }, - { - "epoch": 0.7187299550994227, - "grad_norm": 0.6707442290616978, - "learning_rate": 4.4448794618816634e-05, - "loss": 0.6081, - "step": 11205 - }, - { - "epoch": 0.7190506735086594, - "grad_norm": 0.4567339031728235, - "learning_rate": 4.4355740541778837e-05, - "loss": 0.5996, - "step": 11210 - }, - { - "epoch": 0.7193713919178961, - "grad_norm": 0.8456434286308311, - "learning_rate": 4.426275620488293e-05, - "loss": 0.5902, - "step": 11215 - }, - { - "epoch": 0.7196921103271328, - "grad_norm": 0.7375984313670896, - "learning_rate": 4.416984172466814e-05, - "loss": 0.5592, - "step": 11220 - }, - { - "epoch": 0.7200128287363695, - "grad_norm": 1.001285278455043, - "learning_rate": 4.407699721758614e-05, - "loss": 0.4883, - "step": 11225 - }, - { - "epoch": 0.7203335471456062, - "grad_norm": 1.2917508534051378, - "learning_rate": 4.398422280000101e-05, - "loss": 0.6768, - "step": 11230 - }, - { - "epoch": 0.7206542655548428, - "grad_norm": 0.9685204099266428, - "learning_rate": 4.3891518588188875e-05, - "loss": 0.5883, - "step": 11235 - }, - { - "epoch": 0.7209749839640796, - "grad_norm": 0.5295383592814902, - "learning_rate": 4.379888469833791e-05, - "loss": 0.6229, - "step": 11240 - }, - { - "epoch": 0.7212957023733162, - "grad_norm": 0.9573436890552846, - "learning_rate": 4.370632124654811e-05, - "loss": 0.7156, - "step": 11245 - }, - { - "epoch": 0.721616420782553, - "grad_norm": 0.741578858748363, - "learning_rate": 4.361382834883131e-05, - "loss": 0.6556, - "step": 11250 - }, - { - "epoch": 0.7219371391917896, - "grad_norm": 0.916633580201409, - "learning_rate": 4.3521406121110807e-05, - "loss": 0.676, - "step": 11255 - }, - { - "epoch": 0.7222578576010263, - "grad_norm": 0.3992983111166088, - "learning_rate": 4.342905467922133e-05, - "loss": 0.4788, - "step": 11260 - }, - { - "epoch": 0.722578576010263, - "grad_norm": 1.4519640203571154, - "learning_rate": 4.333677413890896e-05, - "loss": 0.7693, - "step": 11265 - }, - { - "epoch": 0.7228992944194996, - "grad_norm": 1.014341854127021, - "learning_rate": 4.324456461583084e-05, - "loss": 0.7161, - "step": 11270 - }, - { - "epoch": 0.7232200128287364, - "grad_norm": 0.5798440252008737, - "learning_rate": 4.315242622555518e-05, - "loss": 0.5319, - "step": 11275 - }, - { - "epoch": 0.723540731237973, - "grad_norm": 1.3961411697107977, - "learning_rate": 4.306035908356097e-05, - "loss": 0.7755, - "step": 11280 - }, - { - "epoch": 0.7238614496472098, - "grad_norm": 0.7989332199967835, - "learning_rate": 4.296836330523791e-05, - "loss": 0.6761, - "step": 11285 - }, - { - "epoch": 0.7241821680564464, - "grad_norm": 0.5432452037456782, - "learning_rate": 4.287643900588634e-05, - "loss": 0.5398, - "step": 11290 - }, - { - "epoch": 0.7245028864656832, - "grad_norm": 1.1422963762576541, - "learning_rate": 4.278458630071687e-05, - "loss": 0.5321, - "step": 11295 - }, - { - "epoch": 0.7248236048749198, - "grad_norm": 0.6668170639427147, - "learning_rate": 4.2692805304850545e-05, - "loss": 0.5796, - "step": 11300 - }, - { - "epoch": 0.7251443232841565, - "grad_norm": 0.8515640505208902, - "learning_rate": 4.260109613331842e-05, - "loss": 0.6569, - "step": 11305 - }, - { - "epoch": 0.7254650416933932, - "grad_norm": 0.7014693919060985, - "learning_rate": 4.250945890106156e-05, - "loss": 0.6856, - "step": 11310 - }, - { - "epoch": 0.7257857601026299, - "grad_norm": 1.067030988068662, - "learning_rate": 4.241789372293087e-05, - "loss": 0.7749, - "step": 11315 - }, - { - "epoch": 0.7261064785118666, - "grad_norm": 0.7479024679363765, - "learning_rate": 4.232640071368691e-05, - "loss": 0.5478, - "step": 11320 - }, - { - "epoch": 0.7264271969211032, - "grad_norm": 1.0084686752935972, - "learning_rate": 4.22349799879999e-05, - "loss": 0.7788, - "step": 11325 - }, - { - "epoch": 0.72674791533034, - "grad_norm": 0.6585878195188157, - "learning_rate": 4.214363166044932e-05, - "loss": 0.6133, - "step": 11330 - }, - { - "epoch": 0.7270686337395766, - "grad_norm": 0.6784141958893567, - "learning_rate": 4.205235584552407e-05, - "loss": 0.6019, - "step": 11335 - }, - { - "epoch": 0.7273893521488134, - "grad_norm": 0.993300088957976, - "learning_rate": 4.1961152657622024e-05, - "loss": 0.7166, - "step": 11340 - }, - { - "epoch": 0.72771007055805, - "grad_norm": 0.8874942343310022, - "learning_rate": 4.1870022211050074e-05, - "loss": 0.6981, - "step": 11345 - }, - { - "epoch": 0.7280307889672867, - "grad_norm": 1.4921657931640064, - "learning_rate": 4.177896462002402e-05, - "loss": 0.5832, - "step": 11350 - }, - { - "epoch": 0.7283515073765234, - "grad_norm": 0.7853192040977804, - "learning_rate": 4.168797999866827e-05, - "loss": 0.7185, - "step": 11355 - }, - { - "epoch": 0.7286722257857601, - "grad_norm": 0.7775032508697538, - "learning_rate": 4.159706846101574e-05, - "loss": 0.5868, - "step": 11360 - }, - { - "epoch": 0.7289929441949968, - "grad_norm": 0.8328166231193795, - "learning_rate": 4.1506230121007894e-05, - "loss": 0.6707, - "step": 11365 - }, - { - "epoch": 0.7293136626042335, - "grad_norm": 1.1556231103657886, - "learning_rate": 4.141546509249433e-05, - "loss": 0.602, - "step": 11370 - }, - { - "epoch": 0.7296343810134702, - "grad_norm": 0.6535692635433068, - "learning_rate": 4.1324773489232794e-05, - "loss": 0.7015, - "step": 11375 - }, - { - "epoch": 0.7299550994227069, - "grad_norm": 1.0308989718059964, - "learning_rate": 4.1234155424889e-05, - "loss": 0.6524, - "step": 11380 - }, - { - "epoch": 0.7302758178319435, - "grad_norm": 0.9042723107486375, - "learning_rate": 4.1143611013036556e-05, - "loss": 0.6932, - "step": 11385 - }, - { - "epoch": 0.7305965362411803, - "grad_norm": 1.045581159518661, - "learning_rate": 4.105314036715668e-05, - "loss": 0.598, - "step": 11390 - }, - { - "epoch": 0.7309172546504169, - "grad_norm": 0.720438985489428, - "learning_rate": 4.096274360063814e-05, - "loss": 0.6927, - "step": 11395 - }, - { - "epoch": 0.7312379730596537, - "grad_norm": 0.7837057060205996, - "learning_rate": 4.087242082677721e-05, - "loss": 0.6271, - "step": 11400 - }, - { - "epoch": 0.7315586914688903, - "grad_norm": 0.9277273501073059, - "learning_rate": 4.0782172158777296e-05, - "loss": 0.7232, - "step": 11405 - }, - { - "epoch": 0.731879409878127, - "grad_norm": 0.7663141809384151, - "learning_rate": 4.069199770974904e-05, - "loss": 0.5593, - "step": 11410 - }, - { - "epoch": 0.7322001282873637, - "grad_norm": 0.7732548069785231, - "learning_rate": 4.0601897592709984e-05, - "loss": 0.6973, - "step": 11415 - }, - { - "epoch": 0.7325208466966003, - "grad_norm": 1.0148083244026747, - "learning_rate": 4.0511871920584486e-05, - "loss": 0.8616, - "step": 11420 - }, - { - "epoch": 0.7328415651058371, - "grad_norm": 0.7789337008538708, - "learning_rate": 4.042192080620374e-05, - "loss": 0.7399, - "step": 11425 - }, - { - "epoch": 0.7331622835150737, - "grad_norm": 0.7411707815027391, - "learning_rate": 4.033204436230532e-05, - "loss": 0.7219, - "step": 11430 - }, - { - "epoch": 0.7334830019243105, - "grad_norm": 0.9973447184162525, - "learning_rate": 4.0242242701533396e-05, - "loss": 0.6579, - "step": 11435 - }, - { - "epoch": 0.7338037203335471, - "grad_norm": 0.5830094144343125, - "learning_rate": 4.015251593643818e-05, - "loss": 0.7666, - "step": 11440 - }, - { - "epoch": 0.7341244387427839, - "grad_norm": 0.9049494653802453, - "learning_rate": 4.006286417947627e-05, - "loss": 0.7362, - "step": 11445 - }, - { - "epoch": 0.7344451571520205, - "grad_norm": 1.1555455068409544, - "learning_rate": 3.9973287543010064e-05, - "loss": 0.7706, - "step": 11450 - }, - { - "epoch": 0.7347658755612572, - "grad_norm": 0.8236939327253207, - "learning_rate": 3.9883786139307864e-05, - "loss": 0.4883, - "step": 11455 - }, - { - "epoch": 0.7350865939704939, - "grad_norm": 0.7242616375495603, - "learning_rate": 3.979436008054377e-05, - "loss": 0.6765, - "step": 11460 - }, - { - "epoch": 0.7354073123797306, - "grad_norm": 0.8282782204794581, - "learning_rate": 3.97050094787973e-05, - "loss": 0.6393, - "step": 11465 - }, - { - "epoch": 0.7357280307889673, - "grad_norm": 0.5484580528486228, - "learning_rate": 3.9615734446053534e-05, - "loss": 0.6273, - "step": 11470 - }, - { - "epoch": 0.736048749198204, - "grad_norm": 0.8342001080027434, - "learning_rate": 3.952653509420277e-05, - "loss": 0.6517, - "step": 11475 - }, - { - "epoch": 0.7363694676074407, - "grad_norm": 0.8544406097793438, - "learning_rate": 3.9437411535040416e-05, - "loss": 0.5679, - "step": 11480 - }, - { - "epoch": 0.7366901860166774, - "grad_norm": 0.8001118287868482, - "learning_rate": 3.9348363880267006e-05, - "loss": 0.7448, - "step": 11485 - }, - { - "epoch": 0.737010904425914, - "grad_norm": 1.0049068620138881, - "learning_rate": 3.92593922414878e-05, - "loss": 0.5381, - "step": 11490 - }, - { - "epoch": 0.7373316228351507, - "grad_norm": 1.0836198813580136, - "learning_rate": 3.9170496730212944e-05, - "loss": 0.6346, - "step": 11495 - }, - { - "epoch": 0.7376523412443874, - "grad_norm": 0.4690219622238173, - "learning_rate": 3.9081677457857045e-05, - "loss": 0.5469, - "step": 11500 - }, - { - "epoch": 0.7379730596536241, - "grad_norm": 0.7653256546259366, - "learning_rate": 3.899293453573919e-05, - "loss": 0.6005, - "step": 11505 - }, - { - "epoch": 0.7382937780628608, - "grad_norm": 0.8939110106983141, - "learning_rate": 3.890426807508278e-05, - "loss": 0.6783, - "step": 11510 - }, - { - "epoch": 0.7386144964720975, - "grad_norm": 0.775603525768831, - "learning_rate": 3.881567818701538e-05, - "loss": 0.6916, - "step": 11515 - }, - { - "epoch": 0.7389352148813342, - "grad_norm": 1.3430493149234304, - "learning_rate": 3.872716498256863e-05, - "loss": 0.5578, - "step": 11520 - }, - { - "epoch": 0.7392559332905709, - "grad_norm": 0.715829315420304, - "learning_rate": 3.863872857267802e-05, - "loss": 0.7686, - "step": 11525 - }, - { - "epoch": 0.7395766516998076, - "grad_norm": 0.6732314863048653, - "learning_rate": 3.8550369068182735e-05, - "loss": 0.4974, - "step": 11530 - }, - { - "epoch": 0.7398973701090442, - "grad_norm": 0.5624440967305854, - "learning_rate": 3.846208657982572e-05, - "loss": 0.5765, - "step": 11535 - }, - { - "epoch": 0.740218088518281, - "grad_norm": 0.9351668361698933, - "learning_rate": 3.837388121825323e-05, - "loss": 0.6699, - "step": 11540 - }, - { - "epoch": 0.7405388069275176, - "grad_norm": 1.0442410475484458, - "learning_rate": 3.828575309401501e-05, - "loss": 0.5723, - "step": 11545 - }, - { - "epoch": 0.7408595253367544, - "grad_norm": 0.897573742077218, - "learning_rate": 3.819770231756389e-05, - "loss": 0.7723, - "step": 11550 - }, - { - "epoch": 0.741180243745991, - "grad_norm": 0.6333361868228848, - "learning_rate": 3.810972899925575e-05, - "loss": 0.5929, - "step": 11555 - }, - { - "epoch": 0.7415009621552278, - "grad_norm": 1.2414234428777005, - "learning_rate": 3.802183324934952e-05, - "loss": 0.6754, - "step": 11560 - }, - { - "epoch": 0.7418216805644644, - "grad_norm": 0.8678280206604037, - "learning_rate": 3.793401517800672e-05, - "loss": 0.434, - "step": 11565 - }, - { - "epoch": 0.742142398973701, - "grad_norm": 0.8589814705072975, - "learning_rate": 3.784627489529177e-05, - "loss": 0.7005, - "step": 11570 - }, - { - "epoch": 0.7424631173829378, - "grad_norm": 1.096069158153898, - "learning_rate": 3.775861251117128e-05, - "loss": 0.6066, - "step": 11575 - }, - { - "epoch": 0.7427838357921744, - "grad_norm": 0.8956575121848285, - "learning_rate": 3.76710281355145e-05, - "loss": 0.5453, - "step": 11580 - }, - { - "epoch": 0.7431045542014112, - "grad_norm": 0.9901238623869012, - "learning_rate": 3.7583521878092766e-05, - "loss": 0.6829, - "step": 11585 - }, - { - "epoch": 0.7434252726106478, - "grad_norm": 1.1556330315855146, - "learning_rate": 3.749609384857952e-05, - "loss": 0.6617, - "step": 11590 - }, - { - "epoch": 0.7437459910198846, - "grad_norm": 0.8946200380979793, - "learning_rate": 3.7408744156550235e-05, - "loss": 0.6454, - "step": 11595 - }, - { - "epoch": 0.7440667094291212, - "grad_norm": 0.6811470722359575, - "learning_rate": 3.73214729114821e-05, - "loss": 0.558, - "step": 11600 - }, - { - "epoch": 0.7443874278383579, - "grad_norm": 1.2129672803037883, - "learning_rate": 3.72342802227541e-05, - "loss": 0.6829, - "step": 11605 - }, - { - "epoch": 0.7447081462475946, - "grad_norm": 0.7287815359687029, - "learning_rate": 3.7147166199646665e-05, - "loss": 0.7291, - "step": 11610 - }, - { - "epoch": 0.7450288646568313, - "grad_norm": 0.7381906467511818, - "learning_rate": 3.706013095134162e-05, - "loss": 0.673, - "step": 11615 - }, - { - "epoch": 0.745349583066068, - "grad_norm": 1.2592430310132843, - "learning_rate": 3.697317458692219e-05, - "loss": 0.6236, - "step": 11620 - }, - { - "epoch": 0.7456703014753047, - "grad_norm": 0.6359130442368803, - "learning_rate": 3.688629721537256e-05, - "loss": 0.6774, - "step": 11625 - }, - { - "epoch": 0.7459910198845414, - "grad_norm": 0.9163313019367859, - "learning_rate": 3.679949894557808e-05, - "loss": 0.6353, - "step": 11630 - }, - { - "epoch": 0.7463117382937781, - "grad_norm": 0.66124758919148, - "learning_rate": 3.671277988632484e-05, - "loss": 0.6667, - "step": 11635 - }, - { - "epoch": 0.7466324567030147, - "grad_norm": 1.093053112833277, - "learning_rate": 3.6626140146299715e-05, - "loss": 0.6706, - "step": 11640 - }, - { - "epoch": 0.7469531751122515, - "grad_norm": 0.585918591610346, - "learning_rate": 3.653957983409012e-05, - "loss": 0.596, - "step": 11645 - }, - { - "epoch": 0.7472738935214881, - "grad_norm": 0.8785492282676739, - "learning_rate": 3.6453099058183936e-05, - "loss": 0.8345, - "step": 11650 - }, - { - "epoch": 0.7475946119307249, - "grad_norm": 1.0886821917358311, - "learning_rate": 3.6366697926969415e-05, - "loss": 0.7223, - "step": 11655 - }, - { - "epoch": 0.7479153303399615, - "grad_norm": 0.8352362172770396, - "learning_rate": 3.628037654873489e-05, - "loss": 0.7974, - "step": 11660 - }, - { - "epoch": 0.7482360487491982, - "grad_norm": 0.6846055972157917, - "learning_rate": 3.619413503166888e-05, - "loss": 0.7061, - "step": 11665 - }, - { - "epoch": 0.7485567671584349, - "grad_norm": 1.1651393765637517, - "learning_rate": 3.610797348385965e-05, - "loss": 0.6326, - "step": 11670 - }, - { - "epoch": 0.7488774855676715, - "grad_norm": 0.8887525600265255, - "learning_rate": 3.60218920132953e-05, - "loss": 0.6543, - "step": 11675 - }, - { - "epoch": 0.7491982039769083, - "grad_norm": 0.47701205334570973, - "learning_rate": 3.5935890727863653e-05, - "loss": 0.5758, - "step": 11680 - }, - { - "epoch": 0.7495189223861449, - "grad_norm": 1.0003500503360518, - "learning_rate": 3.5849969735351917e-05, - "loss": 0.7507, - "step": 11685 - }, - { - "epoch": 0.7498396407953817, - "grad_norm": 0.9203454434610632, - "learning_rate": 3.57641291434467e-05, - "loss": 0.7704, - "step": 11690 - }, - { - "epoch": 0.7501603592046183, - "grad_norm": 1.035485843783069, - "learning_rate": 3.5678369059733884e-05, - "loss": 0.7227, - "step": 11695 - }, - { - "epoch": 0.7504810776138551, - "grad_norm": 0.8574293258900955, - "learning_rate": 3.559268959169842e-05, - "loss": 0.5932, - "step": 11700 - }, - { - "epoch": 0.7508017960230917, - "grad_norm": 1.0713424994868566, - "learning_rate": 3.55070908467242e-05, - "loss": 0.7351, - "step": 11705 - }, - { - "epoch": 0.7511225144323285, - "grad_norm": 0.7637351663255856, - "learning_rate": 3.542157293209394e-05, - "loss": 0.5982, - "step": 11710 - }, - { - "epoch": 0.7514432328415651, - "grad_norm": 0.7283758639132564, - "learning_rate": 3.533613595498914e-05, - "loss": 0.6919, - "step": 11715 - }, - { - "epoch": 0.7517639512508018, - "grad_norm": 0.9199615101682994, - "learning_rate": 3.525078002248974e-05, - "loss": 0.834, - "step": 11720 - }, - { - "epoch": 0.7520846696600385, - "grad_norm": 0.685052311744196, - "learning_rate": 3.516550524157415e-05, - "loss": 0.7766, - "step": 11725 - }, - { - "epoch": 0.7524053880692751, - "grad_norm": 0.9557933778705214, - "learning_rate": 3.508031171911913e-05, - "loss": 0.7334, - "step": 11730 - }, - { - "epoch": 0.7527261064785119, - "grad_norm": 0.8217799938196116, - "learning_rate": 3.4995199561899496e-05, - "loss": 0.6719, - "step": 11735 - }, - { - "epoch": 0.7530468248877485, - "grad_norm": 0.8490165290571312, - "learning_rate": 3.491016887658819e-05, - "loss": 0.6352, - "step": 11740 - }, - { - "epoch": 0.7533675432969853, - "grad_norm": 1.0096737759482532, - "learning_rate": 3.4825219769755955e-05, - "loss": 0.6278, - "step": 11745 - }, - { - "epoch": 0.7536882617062219, - "grad_norm": 0.8116824311381272, - "learning_rate": 3.4740352347871294e-05, - "loss": 0.5794, - "step": 11750 - }, - { - "epoch": 0.7540089801154586, - "grad_norm": 1.0567664205528664, - "learning_rate": 3.4655566717300433e-05, - "loss": 0.5817, - "step": 11755 - }, - { - "epoch": 0.7543296985246953, - "grad_norm": 0.8458879335378663, - "learning_rate": 3.457086298430696e-05, - "loss": 0.5779, - "step": 11760 - }, - { - "epoch": 0.754650416933932, - "grad_norm": 0.8982863213171639, - "learning_rate": 3.448624125505194e-05, - "loss": 0.6697, - "step": 11765 - }, - { - "epoch": 0.7549711353431687, - "grad_norm": 0.8975989314029491, - "learning_rate": 3.440170163559355e-05, - "loss": 0.7032, - "step": 11770 - }, - { - "epoch": 0.7552918537524054, - "grad_norm": 0.8729443546989577, - "learning_rate": 3.4317244231887125e-05, - "loss": 0.8033, - "step": 11775 - }, - { - "epoch": 0.7556125721616421, - "grad_norm": 1.0239920545191055, - "learning_rate": 3.423286914978493e-05, - "loss": 0.672, - "step": 11780 - }, - { - "epoch": 0.7559332905708788, - "grad_norm": 0.7010189828092076, - "learning_rate": 3.414857649503602e-05, - "loss": 0.6409, - "step": 11785 - }, - { - "epoch": 0.7562540089801154, - "grad_norm": 0.8719062018189001, - "learning_rate": 3.4064366373286274e-05, - "loss": 0.7164, - "step": 11790 - }, - { - "epoch": 0.7565747273893522, - "grad_norm": 0.7198915627914316, - "learning_rate": 3.398023889007794e-05, - "loss": 0.6249, - "step": 11795 - }, - { - "epoch": 0.7568954457985888, - "grad_norm": 0.8718719431875859, - "learning_rate": 3.389619415084989e-05, - "loss": 0.6064, - "step": 11800 - }, - { - "epoch": 0.7572161642078256, - "grad_norm": 0.8120042747717762, - "learning_rate": 3.381223226093715e-05, - "loss": 0.5433, - "step": 11805 - }, - { - "epoch": 0.7575368826170622, - "grad_norm": 0.9647874073108456, - "learning_rate": 3.3728353325570915e-05, - "loss": 0.7064, - "step": 11810 - }, - { - "epoch": 0.757857601026299, - "grad_norm": 1.2538875949194586, - "learning_rate": 3.364455744987853e-05, - "loss": 0.5527, - "step": 11815 - }, - { - "epoch": 0.7581783194355356, - "grad_norm": 1.178257170426357, - "learning_rate": 3.35608447388831e-05, - "loss": 0.6565, - "step": 11820 - }, - { - "epoch": 0.7584990378447722, - "grad_norm": 0.8864713208910722, - "learning_rate": 3.3477215297503605e-05, - "loss": 0.5459, - "step": 11825 - }, - { - "epoch": 0.758819756254009, - "grad_norm": 0.81482691903865, - "learning_rate": 3.339366923055458e-05, - "loss": 0.6798, - "step": 11830 - }, - { - "epoch": 0.7591404746632456, - "grad_norm": 0.7808704507490104, - "learning_rate": 3.3310206642746125e-05, - "loss": 0.6767, - "step": 11835 - }, - { - "epoch": 0.7594611930724824, - "grad_norm": 0.6063874143510388, - "learning_rate": 3.3226827638683665e-05, - "loss": 0.7335, - "step": 11840 - }, - { - "epoch": 0.759781911481719, - "grad_norm": 0.9081154038511268, - "learning_rate": 3.3143532322867865e-05, - "loss": 0.7284, - "step": 11845 - }, - { - "epoch": 0.7601026298909558, - "grad_norm": 0.845045773951182, - "learning_rate": 3.306032079969459e-05, - "loss": 0.7782, - "step": 11850 - }, - { - "epoch": 0.7604233483001924, - "grad_norm": 0.8991436429034236, - "learning_rate": 3.29771931734546e-05, - "loss": 0.7148, - "step": 11855 - }, - { - "epoch": 0.7607440667094291, - "grad_norm": 0.9742693305593477, - "learning_rate": 3.2894149548333495e-05, - "loss": 0.6244, - "step": 11860 - }, - { - "epoch": 0.7610647851186658, - "grad_norm": 0.6773700996601912, - "learning_rate": 3.281119002841169e-05, - "loss": 0.5872, - "step": 11865 - }, - { - "epoch": 0.7613855035279025, - "grad_norm": 0.8384804126775537, - "learning_rate": 3.2728314717664055e-05, - "loss": 0.7845, - "step": 11870 - }, - { - "epoch": 0.7617062219371392, - "grad_norm": 1.1357544575552236, - "learning_rate": 3.264552371996008e-05, - "loss": 0.6953, - "step": 11875 - }, - { - "epoch": 0.7620269403463759, - "grad_norm": 0.8516566580601438, - "learning_rate": 3.256281713906343e-05, - "loss": 0.7256, - "step": 11880 - }, - { - "epoch": 0.7623476587556126, - "grad_norm": 1.2370541167396898, - "learning_rate": 3.248019507863203e-05, - "loss": 0.7604, - "step": 11885 - }, - { - "epoch": 0.7626683771648493, - "grad_norm": 0.9542563866917992, - "learning_rate": 3.2397657642217926e-05, - "loss": 0.5988, - "step": 11890 - }, - { - "epoch": 0.762989095574086, - "grad_norm": 1.0432964488893417, - "learning_rate": 3.2315204933266996e-05, - "loss": 0.6991, - "step": 11895 - }, - { - "epoch": 0.7633098139833226, - "grad_norm": 1.0011228778914865, - "learning_rate": 3.223283705511908e-05, - "loss": 0.7298, - "step": 11900 - }, - { - "epoch": 0.7636305323925593, - "grad_norm": 1.5274397488438434, - "learning_rate": 3.215055411100748e-05, - "loss": 0.6428, - "step": 11905 - }, - { - "epoch": 0.763951250801796, - "grad_norm": 0.876587920734237, - "learning_rate": 3.2068356204059255e-05, - "loss": 0.7244, - "step": 11910 - }, - { - "epoch": 0.7642719692110327, - "grad_norm": 0.6121339451327354, - "learning_rate": 3.198624343729479e-05, - "loss": 0.7324, - "step": 11915 - }, - { - "epoch": 0.7645926876202694, - "grad_norm": 0.8464048080490233, - "learning_rate": 3.190421591362772e-05, - "loss": 0.7464, - "step": 11920 - }, - { - "epoch": 0.7649134060295061, - "grad_norm": 0.9880557475834854, - "learning_rate": 3.1822273735864984e-05, - "loss": 0.71, - "step": 11925 - }, - { - "epoch": 0.7652341244387428, - "grad_norm": 1.0295342644337049, - "learning_rate": 3.174041700670638e-05, - "loss": 0.4895, - "step": 11930 - }, - { - "epoch": 0.7655548428479795, - "grad_norm": 0.7076312841936536, - "learning_rate": 3.165864582874477e-05, - "loss": 0.691, - "step": 11935 - }, - { - "epoch": 0.7658755612572161, - "grad_norm": 1.0135591193887252, - "learning_rate": 3.1576960304465705e-05, - "loss": 0.6266, - "step": 11940 - }, - { - "epoch": 0.7661962796664529, - "grad_norm": 1.0323761526191306, - "learning_rate": 3.149536053624735e-05, - "loss": 0.7654, - "step": 11945 - }, - { - "epoch": 0.7665169980756895, - "grad_norm": 1.55635605359068, - "learning_rate": 3.1413846626360536e-05, - "loss": 0.7714, - "step": 11950 - }, - { - "epoch": 0.7668377164849263, - "grad_norm": 0.9497662276751877, - "learning_rate": 3.133241867696829e-05, - "loss": 0.6683, - "step": 11955 - }, - { - "epoch": 0.7671584348941629, - "grad_norm": 0.8979757336357795, - "learning_rate": 3.1251076790126086e-05, - "loss": 0.7516, - "step": 11960 - }, - { - "epoch": 0.7674791533033997, - "grad_norm": 0.764820887022675, - "learning_rate": 3.1169821067781425e-05, - "loss": 0.5679, - "step": 11965 - }, - { - "epoch": 0.7677998717126363, - "grad_norm": 0.5942733392588654, - "learning_rate": 3.1088651611773834e-05, - "loss": 0.5194, - "step": 11970 - }, - { - "epoch": 0.768120590121873, - "grad_norm": 0.9490603016131256, - "learning_rate": 3.100756852383473e-05, - "loss": 0.5963, - "step": 11975 - }, - { - "epoch": 0.7684413085311097, - "grad_norm": 0.7616783689998372, - "learning_rate": 3.092657190558727e-05, - "loss": 0.6785, - "step": 11980 - }, - { - "epoch": 0.7687620269403463, - "grad_norm": 0.830417639785896, - "learning_rate": 3.084566185854628e-05, - "loss": 0.5892, - "step": 11985 - }, - { - "epoch": 0.7690827453495831, - "grad_norm": 1.0515557973724121, - "learning_rate": 3.076483848411803e-05, - "loss": 0.6846, - "step": 11990 - }, - { - "epoch": 0.7694034637588197, - "grad_norm": 0.9480637021643955, - "learning_rate": 3.068410188360022e-05, - "loss": 0.741, - "step": 11995 - }, - { - "epoch": 0.7697241821680565, - "grad_norm": 0.9435811108298884, - "learning_rate": 3.0603452158181744e-05, - "loss": 0.7019, - "step": 12000 - }, - { - "epoch": 0.7700449005772931, - "grad_norm": 0.7019989507064325, - "learning_rate": 3.052288940894259e-05, - "loss": 0.5835, - "step": 12005 - }, - { - "epoch": 0.7703656189865298, - "grad_norm": 0.6770008543875123, - "learning_rate": 3.0442413736853846e-05, - "loss": 0.6826, - "step": 12010 - }, - { - "epoch": 0.7706863373957665, - "grad_norm": 0.7178710129095005, - "learning_rate": 3.036202524277735e-05, - "loss": 0.7033, - "step": 12015 - }, - { - "epoch": 0.7710070558050032, - "grad_norm": 0.7298827842977621, - "learning_rate": 3.0281724027465708e-05, - "loss": 0.6847, - "step": 12020 - }, - { - "epoch": 0.7713277742142399, - "grad_norm": 1.2518124809303286, - "learning_rate": 3.020151019156221e-05, - "loss": 0.5659, - "step": 12025 - }, - { - "epoch": 0.7716484926234766, - "grad_norm": 0.7542697248961158, - "learning_rate": 3.0121383835600513e-05, - "loss": 0.7575, - "step": 12030 - }, - { - "epoch": 0.7719692110327133, - "grad_norm": 0.779461786694263, - "learning_rate": 3.0041345060004776e-05, - "loss": 0.7238, - "step": 12035 - }, - { - "epoch": 0.77228992944195, - "grad_norm": 1.0655675292269764, - "learning_rate": 2.9961393965089203e-05, - "loss": 0.7475, - "step": 12040 - }, - { - "epoch": 0.7726106478511866, - "grad_norm": 1.1044101389504177, - "learning_rate": 2.98815306510583e-05, - "loss": 0.6353, - "step": 12045 - }, - { - "epoch": 0.7729313662604234, - "grad_norm": 0.8533414942650657, - "learning_rate": 2.9801755218006433e-05, - "loss": 0.5867, - "step": 12050 - }, - { - "epoch": 0.77325208466966, - "grad_norm": 1.0958682723686255, - "learning_rate": 2.9722067765917838e-05, - "loss": 0.5739, - "step": 12055 - }, - { - "epoch": 0.7735728030788968, - "grad_norm": 0.7152332630816656, - "learning_rate": 2.9642468394666557e-05, - "loss": 0.6729, - "step": 12060 - }, - { - "epoch": 0.7738935214881334, - "grad_norm": 0.9986989562442445, - "learning_rate": 2.956295720401612e-05, - "loss": 0.6726, - "step": 12065 - }, - { - "epoch": 0.7742142398973701, - "grad_norm": 0.9811723796412208, - "learning_rate": 2.9483534293619685e-05, - "loss": 0.5619, - "step": 12070 - }, - { - "epoch": 0.7745349583066068, - "grad_norm": 0.9118000616924434, - "learning_rate": 2.9404199763019645e-05, - "loss": 0.6516, - "step": 12075 - }, - { - "epoch": 0.7748556767158435, - "grad_norm": 0.8942392291019036, - "learning_rate": 2.932495371164764e-05, - "loss": 0.7949, - "step": 12080 - }, - { - "epoch": 0.7751763951250802, - "grad_norm": 0.9745393445698103, - "learning_rate": 2.9245796238824496e-05, - "loss": 0.6836, - "step": 12085 - }, - { - "epoch": 0.7754971135343168, - "grad_norm": 0.624918898789372, - "learning_rate": 2.916672744375991e-05, - "loss": 0.5384, - "step": 12090 - }, - { - "epoch": 0.7758178319435536, - "grad_norm": 0.7577038101937041, - "learning_rate": 2.908774742555257e-05, - "loss": 0.7673, - "step": 12095 - }, - { - "epoch": 0.7761385503527902, - "grad_norm": 1.0261935822819983, - "learning_rate": 2.9008856283189778e-05, - "loss": 0.5503, - "step": 12100 - }, - { - "epoch": 0.776459268762027, - "grad_norm": 0.8962534874969645, - "learning_rate": 2.8930054115547488e-05, - "loss": 0.6463, - "step": 12105 - }, - { - "epoch": 0.7767799871712636, - "grad_norm": 0.70250181904508, - "learning_rate": 2.8851341021390155e-05, - "loss": 0.5889, - "step": 12110 - }, - { - "epoch": 0.7771007055805004, - "grad_norm": 0.6163717028953168, - "learning_rate": 2.877271709937056e-05, - "loss": 0.6057, - "step": 12115 - }, - { - "epoch": 0.777421423989737, - "grad_norm": 1.139236879333557, - "learning_rate": 2.8694182448029795e-05, - "loss": 0.6143, - "step": 12120 - }, - { - "epoch": 0.7777421423989737, - "grad_norm": 0.8597109154676085, - "learning_rate": 2.8615737165796974e-05, - "loss": 0.6156, - "step": 12125 - }, - { - "epoch": 0.7780628608082104, - "grad_norm": 1.0377068227971646, - "learning_rate": 2.8537381350989288e-05, - "loss": 0.7131, - "step": 12130 - }, - { - "epoch": 0.778383579217447, - "grad_norm": 0.9278713523838525, - "learning_rate": 2.8459115101811752e-05, - "loss": 0.5643, - "step": 12135 - }, - { - "epoch": 0.7787042976266838, - "grad_norm": 0.9111079193714665, - "learning_rate": 2.838093851635708e-05, - "loss": 0.7114, - "step": 12140 - }, - { - "epoch": 0.7790250160359204, - "grad_norm": 0.636013231630343, - "learning_rate": 2.8302851692605748e-05, - "loss": 0.5425, - "step": 12145 - }, - { - "epoch": 0.7793457344451572, - "grad_norm": 0.9437606048473691, - "learning_rate": 2.8224854728425555e-05, - "loss": 0.7358, - "step": 12150 - }, - { - "epoch": 0.7796664528543938, - "grad_norm": 0.9877250051200861, - "learning_rate": 2.814694772157184e-05, - "loss": 0.7881, - "step": 12155 - }, - { - "epoch": 0.7799871712636305, - "grad_norm": 0.6355892070558739, - "learning_rate": 2.806913076968709e-05, - "loss": 0.5765, - "step": 12160 - }, - { - "epoch": 0.7803078896728672, - "grad_norm": 0.8553618089212107, - "learning_rate": 2.7991403970300923e-05, - "loss": 0.6339, - "step": 12165 - }, - { - "epoch": 0.7806286080821039, - "grad_norm": 0.7956244875523378, - "learning_rate": 2.7913767420830105e-05, - "loss": 0.6316, - "step": 12170 - }, - { - "epoch": 0.7809493264913406, - "grad_norm": 0.74745099568378, - "learning_rate": 2.7836221218578052e-05, - "loss": 0.5178, - "step": 12175 - }, - { - "epoch": 0.7812700449005773, - "grad_norm": 2.797197105902477, - "learning_rate": 2.775876546073518e-05, - "loss": 0.7453, - "step": 12180 - }, - { - "epoch": 0.781590763309814, - "grad_norm": 0.8203117179056878, - "learning_rate": 2.768140024437842e-05, - "loss": 0.7123, - "step": 12185 - }, - { - "epoch": 0.7819114817190507, - "grad_norm": 0.8491800107534502, - "learning_rate": 2.7604125666471202e-05, - "loss": 0.6031, - "step": 12190 - }, - { - "epoch": 0.7822322001282873, - "grad_norm": 0.7920825834762689, - "learning_rate": 2.7526941823863494e-05, - "loss": 0.6918, - "step": 12195 - }, - { - "epoch": 0.7825529185375241, - "grad_norm": 0.8070095630772426, - "learning_rate": 2.744984881329139e-05, - "loss": 0.5921, - "step": 12200 - }, - { - "epoch": 0.7828736369467607, - "grad_norm": 0.6455255637368961, - "learning_rate": 2.7372846731377265e-05, - "loss": 0.6382, - "step": 12205 - }, - { - "epoch": 0.7831943553559975, - "grad_norm": 0.92556283214074, - "learning_rate": 2.7295935674629457e-05, - "loss": 0.5116, - "step": 12210 - }, - { - "epoch": 0.7835150737652341, - "grad_norm": 1.1170799846804207, - "learning_rate": 2.7219115739442215e-05, - "loss": 0.6566, - "step": 12215 - }, - { - "epoch": 0.7838357921744709, - "grad_norm": 0.5890009042735036, - "learning_rate": 2.7142387022095638e-05, - "loss": 0.6128, - "step": 12220 - }, - { - "epoch": 0.7841565105837075, - "grad_norm": 0.6327668177080631, - "learning_rate": 2.7065749618755455e-05, - "loss": 0.6366, - "step": 12225 - }, - { - "epoch": 0.7844772289929441, - "grad_norm": 0.8664538277798131, - "learning_rate": 2.698920362547299e-05, - "loss": 0.6013, - "step": 12230 - }, - { - "epoch": 0.7847979474021809, - "grad_norm": 0.7003044665428215, - "learning_rate": 2.6912749138184956e-05, - "loss": 0.7929, - "step": 12235 - }, - { - "epoch": 0.7851186658114175, - "grad_norm": 0.7853265661064053, - "learning_rate": 2.6836386252713396e-05, - "loss": 0.7137, - "step": 12240 - }, - { - "epoch": 0.7854393842206543, - "grad_norm": 0.909806347924112, - "learning_rate": 2.6760115064765568e-05, - "loss": 0.6994, - "step": 12245 - }, - { - "epoch": 0.7857601026298909, - "grad_norm": 0.8351806612159146, - "learning_rate": 2.6683935669933736e-05, - "loss": 0.6935, - "step": 12250 - }, - { - "epoch": 0.7860808210391277, - "grad_norm": 0.7611491943408887, - "learning_rate": 2.6607848163695227e-05, - "loss": 0.7319, - "step": 12255 - }, - { - "epoch": 0.7864015394483643, - "grad_norm": 1.122080599336026, - "learning_rate": 2.6531852641412082e-05, - "loss": 0.6022, - "step": 12260 - }, - { - "epoch": 0.7867222578576011, - "grad_norm": 1.1817121943287525, - "learning_rate": 2.645594919833119e-05, - "loss": 0.7494, - "step": 12265 - }, - { - "epoch": 0.7870429762668377, - "grad_norm": 0.7929071478719117, - "learning_rate": 2.6380137929583914e-05, - "loss": 0.7783, - "step": 12270 - }, - { - "epoch": 0.7873636946760744, - "grad_norm": 0.820309764452619, - "learning_rate": 2.6304418930186115e-05, - "loss": 0.6332, - "step": 12275 - }, - { - "epoch": 0.7876844130853111, - "grad_norm": 0.707291602928582, - "learning_rate": 2.6228792295038106e-05, - "loss": 0.537, - "step": 12280 - }, - { - "epoch": 0.7880051314945478, - "grad_norm": 0.8141400312776754, - "learning_rate": 2.6153258118924308e-05, - "loss": 0.6322, - "step": 12285 - }, - { - "epoch": 0.7883258499037845, - "grad_norm": 0.7187432563518902, - "learning_rate": 2.6077816496513363e-05, - "loss": 0.5032, - "step": 12290 - }, - { - "epoch": 0.7886465683130212, - "grad_norm": 0.921998673200194, - "learning_rate": 2.6002467522357867e-05, - "loss": 0.6134, - "step": 12295 - }, - { - "epoch": 0.7889672867222579, - "grad_norm": 1.4739251939697386, - "learning_rate": 2.592721129089427e-05, - "loss": 0.6579, - "step": 12300 - }, - { - "epoch": 0.7892880051314946, - "grad_norm": 0.7698494785751436, - "learning_rate": 2.5852047896442853e-05, - "loss": 0.6832, - "step": 12305 - }, - { - "epoch": 0.7896087235407312, - "grad_norm": 0.9676144058038108, - "learning_rate": 2.577697743320746e-05, - "loss": 0.6789, - "step": 12310 - }, - { - "epoch": 0.789929441949968, - "grad_norm": 0.7989952533967423, - "learning_rate": 2.570199999527557e-05, - "loss": 0.683, - "step": 12315 - }, - { - "epoch": 0.7902501603592046, - "grad_norm": 0.7540668642091226, - "learning_rate": 2.5627115676617953e-05, - "loss": 0.6137, - "step": 12320 - }, - { - "epoch": 0.7905708787684413, - "grad_norm": 1.2363573852579546, - "learning_rate": 2.555232457108879e-05, - "loss": 0.6497, - "step": 12325 - }, - { - "epoch": 0.790891597177678, - "grad_norm": 0.5683854501183521, - "learning_rate": 2.5477626772425356e-05, - "loss": 0.6996, - "step": 12330 - }, - { - "epoch": 0.7912123155869147, - "grad_norm": 0.5533412352742278, - "learning_rate": 2.5403022374247953e-05, - "loss": 0.7001, - "step": 12335 - }, - { - "epoch": 0.7915330339961514, - "grad_norm": 0.675236986686075, - "learning_rate": 2.5328511470059935e-05, - "loss": 0.5805, - "step": 12340 - }, - { - "epoch": 0.791853752405388, - "grad_norm": 0.7285390988297157, - "learning_rate": 2.5254094153247355e-05, - "loss": 0.6149, - "step": 12345 - }, - { - "epoch": 0.7921744708146248, - "grad_norm": 0.80400571870766, - "learning_rate": 2.5179770517079093e-05, - "loss": 0.6948, - "step": 12350 - }, - { - "epoch": 0.7924951892238614, - "grad_norm": 0.9377676574780994, - "learning_rate": 2.510554065470653e-05, - "loss": 0.7308, - "step": 12355 - }, - { - "epoch": 0.7928159076330982, - "grad_norm": 0.6446906934234106, - "learning_rate": 2.5031404659163492e-05, - "loss": 0.7255, - "step": 12360 - }, - { - "epoch": 0.7931366260423348, - "grad_norm": 0.8158537224973699, - "learning_rate": 2.495736262336632e-05, - "loss": 0.7016, - "step": 12365 - }, - { - "epoch": 0.7934573444515716, - "grad_norm": 0.9172314841106095, - "learning_rate": 2.4883414640113357e-05, - "loss": 0.6117, - "step": 12370 - }, - { - "epoch": 0.7937780628608082, - "grad_norm": 0.7437504326268314, - "learning_rate": 2.4809560802085274e-05, - "loss": 0.6409, - "step": 12375 - }, - { - "epoch": 0.7940987812700449, - "grad_norm": 0.6879611505056618, - "learning_rate": 2.4735801201844645e-05, - "loss": 0.6397, - "step": 12380 - }, - { - "epoch": 0.7944194996792816, - "grad_norm": 0.9926575009144855, - "learning_rate": 2.466213593183593e-05, - "loss": 0.6966, - "step": 12385 - }, - { - "epoch": 0.7947402180885182, - "grad_norm": 0.8127945292903275, - "learning_rate": 2.458856508438544e-05, - "loss": 0.7704, - "step": 12390 - }, - { - "epoch": 0.795060936497755, - "grad_norm": 0.8871371492144181, - "learning_rate": 2.451508875170104e-05, - "loss": 0.5606, - "step": 12395 - }, - { - "epoch": 0.7953816549069916, - "grad_norm": 0.8206919204372869, - "learning_rate": 2.444170702587226e-05, - "loss": 0.6932, - "step": 12400 - }, - { - "epoch": 0.7957023733162284, - "grad_norm": 0.6603633676196071, - "learning_rate": 2.436841999886994e-05, - "loss": 0.6109, - "step": 12405 - }, - { - "epoch": 0.796023091725465, - "grad_norm": 0.9151323413512733, - "learning_rate": 2.4295227762546267e-05, - "loss": 0.6631, - "step": 12410 - }, - { - "epoch": 0.7963438101347017, - "grad_norm": 0.9827343805814039, - "learning_rate": 2.422213040863468e-05, - "loss": 0.6563, - "step": 12415 - }, - { - "epoch": 0.7966645285439384, - "grad_norm": 0.9469619065977057, - "learning_rate": 2.414912802874961e-05, - "loss": 0.7412, - "step": 12420 - }, - { - "epoch": 0.7969852469531751, - "grad_norm": 1.3131843532103706, - "learning_rate": 2.4076220714386568e-05, - "loss": 0.6886, - "step": 12425 - }, - { - "epoch": 0.7973059653624118, - "grad_norm": 1.2148517258592102, - "learning_rate": 2.40034085569218e-05, - "loss": 0.6898, - "step": 12430 - }, - { - "epoch": 0.7976266837716485, - "grad_norm": 0.8095565024509138, - "learning_rate": 2.393069164761237e-05, - "loss": 0.6122, - "step": 12435 - }, - { - "epoch": 0.7979474021808852, - "grad_norm": 0.9467420200870824, - "learning_rate": 2.3858070077595908e-05, - "loss": 0.7174, - "step": 12440 - }, - { - "epoch": 0.7982681205901219, - "grad_norm": 0.6202794025655268, - "learning_rate": 2.3785543937890586e-05, - "loss": 0.66, - "step": 12445 - }, - { - "epoch": 0.7985888389993585, - "grad_norm": 1.0791006971385633, - "learning_rate": 2.3713113319394997e-05, - "loss": 0.5363, - "step": 12450 - }, - { - "epoch": 0.7989095574085953, - "grad_norm": 1.026500892588481, - "learning_rate": 2.3640778312887945e-05, - "loss": 0.7948, - "step": 12455 - }, - { - "epoch": 0.7992302758178319, - "grad_norm": 0.7967893717258743, - "learning_rate": 2.35685390090285e-05, - "loss": 0.6343, - "step": 12460 - }, - { - "epoch": 0.7995509942270687, - "grad_norm": 1.1948126480397625, - "learning_rate": 2.3496395498355694e-05, - "loss": 0.7174, - "step": 12465 - }, - { - "epoch": 0.7998717126363053, - "grad_norm": 0.8650772892603197, - "learning_rate": 2.34243478712885e-05, - "loss": 0.7018, - "step": 12470 - }, - { - "epoch": 0.800192431045542, - "grad_norm": 0.49196395624702055, - "learning_rate": 2.3352396218125827e-05, - "loss": 0.5881, - "step": 12475 - }, - { - "epoch": 0.8005131494547787, - "grad_norm": 0.7575733059076403, - "learning_rate": 2.3280540629046143e-05, - "loss": 0.7292, - "step": 12480 - }, - { - "epoch": 0.8008338678640154, - "grad_norm": 0.8513796572354395, - "learning_rate": 2.3208781194107664e-05, - "loss": 0.6286, - "step": 12485 - }, - { - "epoch": 0.8011545862732521, - "grad_norm": 0.734121779464679, - "learning_rate": 2.3137118003248004e-05, - "loss": 0.6818, - "step": 12490 - }, - { - "epoch": 0.8014753046824887, - "grad_norm": 0.5881243074608535, - "learning_rate": 2.306555114628415e-05, - "loss": 0.6553, - "step": 12495 - }, - { - "epoch": 0.8017960230917255, - "grad_norm": 0.6452008879569514, - "learning_rate": 2.2994080712912435e-05, - "loss": 0.705, - "step": 12500 - }, - { - "epoch": 0.8021167415009621, - "grad_norm": 1.409626103322556, - "learning_rate": 2.2922706792708194e-05, - "loss": 0.5859, - "step": 12505 - }, - { - "epoch": 0.8024374599101989, - "grad_norm": 0.7556485492806266, - "learning_rate": 2.2851429475125963e-05, - "loss": 0.6137, - "step": 12510 - }, - { - "epoch": 0.8027581783194355, - "grad_norm": 0.9809427245901448, - "learning_rate": 2.2780248849499088e-05, - "loss": 0.7344, - "step": 12515 - }, - { - "epoch": 0.8030788967286723, - "grad_norm": 0.38473648876347516, - "learning_rate": 2.2709165005039802e-05, - "loss": 0.4635, - "step": 12520 - }, - { - "epoch": 0.8033996151379089, - "grad_norm": 0.7409973296233345, - "learning_rate": 2.263817803083901e-05, - "loss": 0.6076, - "step": 12525 - }, - { - "epoch": 0.8037203335471456, - "grad_norm": 0.7165871670251992, - "learning_rate": 2.256728801586616e-05, - "loss": 0.6541, - "step": 12530 - }, - { - "epoch": 0.8040410519563823, - "grad_norm": 0.8518968659931285, - "learning_rate": 2.249649504896929e-05, - "loss": 0.7555, - "step": 12535 - }, - { - "epoch": 0.804361770365619, - "grad_norm": 0.9159683373230153, - "learning_rate": 2.242579921887471e-05, - "loss": 0.6843, - "step": 12540 - }, - { - "epoch": 0.8046824887748557, - "grad_norm": 0.6228826380501181, - "learning_rate": 2.2355200614186987e-05, - "loss": 0.5394, - "step": 12545 - }, - { - "epoch": 0.8050032071840924, - "grad_norm": 0.8002539057082869, - "learning_rate": 2.2284699323388923e-05, - "loss": 0.7345, - "step": 12550 - }, - { - "epoch": 0.8053239255933291, - "grad_norm": 0.9766455426961175, - "learning_rate": 2.2214295434841248e-05, - "loss": 0.7367, - "step": 12555 - }, - { - "epoch": 0.8056446440025657, - "grad_norm": 0.7046361659107024, - "learning_rate": 2.2143989036782707e-05, - "loss": 0.5187, - "step": 12560 - }, - { - "epoch": 0.8059653624118024, - "grad_norm": 0.8108273818757799, - "learning_rate": 2.2073780217329786e-05, - "loss": 0.6532, - "step": 12565 - }, - { - "epoch": 0.8062860808210391, - "grad_norm": 0.818379710541348, - "learning_rate": 2.2003669064476706e-05, - "loss": 0.6059, - "step": 12570 - }, - { - "epoch": 0.8066067992302758, - "grad_norm": 0.984654681269158, - "learning_rate": 2.1933655666095275e-05, - "loss": 0.6525, - "step": 12575 - }, - { - "epoch": 0.8069275176395125, - "grad_norm": 0.9567899833609597, - "learning_rate": 2.186374010993476e-05, - "loss": 0.7311, - "step": 12580 - }, - { - "epoch": 0.8072482360487492, - "grad_norm": 0.7463705769882709, - "learning_rate": 2.1793922483621876e-05, - "loss": 0.6196, - "step": 12585 - }, - { - "epoch": 0.8075689544579859, - "grad_norm": 0.9733520585461265, - "learning_rate": 2.1724202874660492e-05, - "loss": 0.7193, - "step": 12590 - }, - { - "epoch": 0.8078896728672226, - "grad_norm": 0.7681175464199929, - "learning_rate": 2.165458137043175e-05, - "loss": 0.6522, - "step": 12595 - }, - { - "epoch": 0.8082103912764592, - "grad_norm": 0.6886221085607587, - "learning_rate": 2.158505805819374e-05, - "loss": 0.6666, - "step": 12600 - }, - { - "epoch": 0.808531109685696, - "grad_norm": 0.603328263564938, - "learning_rate": 2.1515633025081484e-05, - "loss": 0.667, - "step": 12605 - }, - { - "epoch": 0.8088518280949326, - "grad_norm": 0.8470975793567042, - "learning_rate": 2.1446306358106927e-05, - "loss": 0.6453, - "step": 12610 - }, - { - "epoch": 0.8091725465041694, - "grad_norm": 1.0220077328521942, - "learning_rate": 2.1377078144158603e-05, - "loss": 0.6582, - "step": 12615 - }, - { - "epoch": 0.809493264913406, - "grad_norm": 0.7129620704949545, - "learning_rate": 2.1307948470001782e-05, - "loss": 0.5496, - "step": 12620 - }, - { - "epoch": 0.8098139833226428, - "grad_norm": 0.6343852911809139, - "learning_rate": 2.1238917422278116e-05, - "loss": 0.5455, - "step": 12625 - }, - { - "epoch": 0.8101347017318794, - "grad_norm": 0.36707540294038493, - "learning_rate": 2.1169985087505694e-05, - "loss": 0.6399, - "step": 12630 - }, - { - "epoch": 0.810455420141116, - "grad_norm": 0.813228299713834, - "learning_rate": 2.1101151552078944e-05, - "loss": 0.6842, - "step": 12635 - }, - { - "epoch": 0.8107761385503528, - "grad_norm": 0.6267132658473076, - "learning_rate": 2.1032416902268314e-05, - "loss": 0.5479, - "step": 12640 - }, - { - "epoch": 0.8110968569595894, - "grad_norm": 1.275645304461915, - "learning_rate": 2.0963781224220503e-05, - "loss": 0.6785, - "step": 12645 - }, - { - "epoch": 0.8114175753688262, - "grad_norm": 0.8576850457893269, - "learning_rate": 2.0895244603957998e-05, - "loss": 0.7868, - "step": 12650 - }, - { - "epoch": 0.8117382937780628, - "grad_norm": 0.5639578214670323, - "learning_rate": 2.082680712737929e-05, - "loss": 0.5559, - "step": 12655 - }, - { - "epoch": 0.8120590121872996, - "grad_norm": 1.1440696942831554, - "learning_rate": 2.0758468880258486e-05, - "loss": 0.7089, - "step": 12660 - }, - { - "epoch": 0.8123797305965362, - "grad_norm": 0.8070604839659317, - "learning_rate": 2.0690229948245365e-05, - "loss": 0.6695, - "step": 12665 - }, - { - "epoch": 0.812700449005773, - "grad_norm": 0.6244747169984161, - "learning_rate": 2.0622090416865293e-05, - "loss": 0.5854, - "step": 12670 - }, - { - "epoch": 0.8130211674150096, - "grad_norm": 0.506375535891638, - "learning_rate": 2.055405037151894e-05, - "loss": 0.6383, - "step": 12675 - }, - { - "epoch": 0.8133418858242463, - "grad_norm": 1.183001348716755, - "learning_rate": 2.0486109897482407e-05, - "loss": 0.6203, - "step": 12680 - }, - { - "epoch": 0.813662604233483, - "grad_norm": 0.6143509135493088, - "learning_rate": 2.0418269079906936e-05, - "loss": 0.5593, - "step": 12685 - }, - { - "epoch": 0.8139833226427197, - "grad_norm": 0.6234718472183463, - "learning_rate": 2.0350528003818825e-05, - "loss": 0.6459, - "step": 12690 - }, - { - "epoch": 0.8143040410519564, - "grad_norm": 1.8693845624658407, - "learning_rate": 2.0282886754119478e-05, - "loss": 0.7211, - "step": 12695 - }, - { - "epoch": 0.8146247594611931, - "grad_norm": 0.8258541488205007, - "learning_rate": 2.0215345415585107e-05, - "loss": 0.5976, - "step": 12700 - }, - { - "epoch": 0.8149454778704298, - "grad_norm": 0.914739265249098, - "learning_rate": 2.0147904072866695e-05, - "loss": 0.6308, - "step": 12705 - }, - { - "epoch": 0.8152661962796665, - "grad_norm": 0.7090505847389847, - "learning_rate": 2.0080562810489935e-05, - "loss": 0.727, - "step": 12710 - }, - { - "epoch": 0.8155869146889031, - "grad_norm": 0.9339182937300688, - "learning_rate": 2.001332171285505e-05, - "loss": 0.6809, - "step": 12715 - }, - { - "epoch": 0.8159076330981399, - "grad_norm": 0.925613865395883, - "learning_rate": 1.9946180864236797e-05, - "loss": 0.7004, - "step": 12720 - }, - { - "epoch": 0.8162283515073765, - "grad_norm": 0.874166373614285, - "learning_rate": 1.9879140348784177e-05, - "loss": 0.6623, - "step": 12725 - }, - { - "epoch": 0.8165490699166132, - "grad_norm": 0.8313132986404351, - "learning_rate": 1.981220025052056e-05, - "loss": 0.6177, - "step": 12730 - }, - { - "epoch": 0.8168697883258499, - "grad_norm": 0.6383078710564455, - "learning_rate": 1.9745360653343393e-05, - "loss": 0.6089, - "step": 12735 - }, - { - "epoch": 0.8171905067350866, - "grad_norm": 0.5929159065490891, - "learning_rate": 1.9678621641024132e-05, - "loss": 0.5833, - "step": 12740 - }, - { - "epoch": 0.8175112251443233, - "grad_norm": 0.6839908339425101, - "learning_rate": 1.961198329720827e-05, - "loss": 0.6513, - "step": 12745 - }, - { - "epoch": 0.8178319435535599, - "grad_norm": 0.43381578975254104, - "learning_rate": 1.9545445705415012e-05, - "loss": 0.655, - "step": 12750 - }, - { - "epoch": 0.8181526619627967, - "grad_norm": 0.666728316560307, - "learning_rate": 1.947900894903739e-05, - "loss": 0.5284, - "step": 12755 - }, - { - "epoch": 0.8184733803720333, - "grad_norm": 1.0911535549941562, - "learning_rate": 1.9412673111342018e-05, - "loss": 0.6534, - "step": 12760 - }, - { - "epoch": 0.8187940987812701, - "grad_norm": 0.8721963911370444, - "learning_rate": 1.934643827546899e-05, - "loss": 0.7718, - "step": 12765 - }, - { - "epoch": 0.8191148171905067, - "grad_norm": 0.9043104390757369, - "learning_rate": 1.928030452443187e-05, - "loss": 0.7249, - "step": 12770 - }, - { - "epoch": 0.8194355355997435, - "grad_norm": 0.6520308339900129, - "learning_rate": 1.9214271941117458e-05, - "loss": 0.569, - "step": 12775 - }, - { - "epoch": 0.8197562540089801, - "grad_norm": 1.0081351400932888, - "learning_rate": 1.9148340608285863e-05, - "loss": 0.6623, - "step": 12780 - }, - { - "epoch": 0.8200769724182168, - "grad_norm": 0.6541686083293314, - "learning_rate": 1.908251060857019e-05, - "loss": 0.6006, - "step": 12785 - }, - { - "epoch": 0.8203976908274535, - "grad_norm": 0.6996268349045872, - "learning_rate": 1.901678202447663e-05, - "loss": 0.6209, - "step": 12790 - }, - { - "epoch": 0.8207184092366901, - "grad_norm": 0.6137399071233165, - "learning_rate": 1.8951154938384207e-05, - "loss": 0.7341, - "step": 12795 - }, - { - "epoch": 0.8210391276459269, - "grad_norm": 0.6979894249139232, - "learning_rate": 1.8885629432544717e-05, - "loss": 0.6331, - "step": 12800 - }, - { - "epoch": 0.8213598460551635, - "grad_norm": 1.4876520614972237, - "learning_rate": 1.882020558908274e-05, - "loss": 0.5262, - "step": 12805 - }, - { - "epoch": 0.8216805644644003, - "grad_norm": 1.1310428300822517, - "learning_rate": 1.8754883489995335e-05, - "loss": 0.6548, - "step": 12810 - }, - { - "epoch": 0.8220012828736369, - "grad_norm": 0.819858534428383, - "learning_rate": 1.868966321715212e-05, - "loss": 0.6514, - "step": 12815 - }, - { - "epoch": 0.8223220012828736, - "grad_norm": 0.9699270159513138, - "learning_rate": 1.8624544852295046e-05, - "loss": 0.6668, - "step": 12820 - }, - { - "epoch": 0.8226427196921103, - "grad_norm": 1.1171340784169779, - "learning_rate": 1.8559528477038325e-05, - "loss": 0.7466, - "step": 12825 - }, - { - "epoch": 0.822963438101347, - "grad_norm": 0.9010920277558152, - "learning_rate": 1.849461417286843e-05, - "loss": 0.5722, - "step": 12830 - }, - { - "epoch": 0.8232841565105837, - "grad_norm": 0.8446632185572971, - "learning_rate": 1.8429802021143816e-05, - "loss": 0.7673, - "step": 12835 - }, - { - "epoch": 0.8236048749198204, - "grad_norm": 0.8445623736137308, - "learning_rate": 1.8365092103094938e-05, - "loss": 0.6343, - "step": 12840 - }, - { - "epoch": 0.8239255933290571, - "grad_norm": 1.3224256501204117, - "learning_rate": 1.83004844998241e-05, - "loss": 0.6446, - "step": 12845 - }, - { - "epoch": 0.8242463117382938, - "grad_norm": 1.2509505443818558, - "learning_rate": 1.8235979292305448e-05, - "loss": 0.5908, - "step": 12850 - }, - { - "epoch": 0.8245670301475305, - "grad_norm": 1.045236864985607, - "learning_rate": 1.8171576561384718e-05, - "loss": 0.6833, - "step": 12855 - }, - { - "epoch": 0.8248877485567672, - "grad_norm": 0.8131230488754208, - "learning_rate": 1.8107276387779194e-05, - "loss": 0.6713, - "step": 12860 - }, - { - "epoch": 0.8252084669660038, - "grad_norm": 0.9987203815522278, - "learning_rate": 1.8043078852077723e-05, - "loss": 0.6382, - "step": 12865 - }, - { - "epoch": 0.8255291853752406, - "grad_norm": 0.8378880198765352, - "learning_rate": 1.797898403474041e-05, - "loss": 0.651, - "step": 12870 - }, - { - "epoch": 0.8258499037844772, - "grad_norm": 0.69860101125052, - "learning_rate": 1.7914992016098652e-05, - "loss": 0.6678, - "step": 12875 - }, - { - "epoch": 0.826170622193714, - "grad_norm": 0.7906981356515638, - "learning_rate": 1.7851102876355064e-05, - "loss": 0.7724, - "step": 12880 - }, - { - "epoch": 0.8264913406029506, - "grad_norm": 0.7220660188316776, - "learning_rate": 1.778731669558322e-05, - "loss": 0.7528, - "step": 12885 - }, - { - "epoch": 0.8268120590121874, - "grad_norm": 0.8602114436332251, - "learning_rate": 1.772363355372776e-05, - "loss": 0.7355, - "step": 12890 - }, - { - "epoch": 0.827132777421424, - "grad_norm": 0.7936909578079667, - "learning_rate": 1.7660053530604103e-05, - "loss": 0.5939, - "step": 12895 - }, - { - "epoch": 0.8274534958306606, - "grad_norm": 0.7386556230325233, - "learning_rate": 1.759657670589844e-05, - "loss": 0.7065, - "step": 12900 - }, - { - "epoch": 0.8277742142398974, - "grad_norm": 0.7508393958424202, - "learning_rate": 1.7533203159167653e-05, - "loss": 0.7995, - "step": 12905 - }, - { - "epoch": 0.828094932649134, - "grad_norm": 1.484996895062748, - "learning_rate": 1.7469932969839133e-05, - "loss": 0.5822, - "step": 12910 - }, - { - "epoch": 0.8284156510583708, - "grad_norm": 0.7889368806667416, - "learning_rate": 1.7406766217210813e-05, - "loss": 0.6915, - "step": 12915 - }, - { - "epoch": 0.8287363694676074, - "grad_norm": 1.043078354293378, - "learning_rate": 1.7343702980450882e-05, - "loss": 0.6678, - "step": 12920 - }, - { - "epoch": 0.8290570878768442, - "grad_norm": 0.5235441869984315, - "learning_rate": 1.7280743338597903e-05, - "loss": 0.6732, - "step": 12925 - }, - { - "epoch": 0.8293778062860808, - "grad_norm": 0.9827303368182867, - "learning_rate": 1.7217887370560527e-05, - "loss": 0.5817, - "step": 12930 - }, - { - "epoch": 0.8296985246953175, - "grad_norm": 0.8919025135393817, - "learning_rate": 1.715513515511743e-05, - "loss": 0.5394, - "step": 12935 - }, - { - "epoch": 0.8300192431045542, - "grad_norm": 0.8422357074138689, - "learning_rate": 1.7092486770917382e-05, - "loss": 0.7755, - "step": 12940 - }, - { - "epoch": 0.8303399615137909, - "grad_norm": 0.9473245373995116, - "learning_rate": 1.7029942296478885e-05, - "loss": 0.6846, - "step": 12945 - }, - { - "epoch": 0.8306606799230276, - "grad_norm": 0.6373840068433619, - "learning_rate": 1.6967501810190323e-05, - "loss": 0.6543, - "step": 12950 - }, - { - "epoch": 0.8309813983322643, - "grad_norm": 0.7843610971634594, - "learning_rate": 1.6905165390309665e-05, - "loss": 0.6431, - "step": 12955 - }, - { - "epoch": 0.831302116741501, - "grad_norm": 1.1652096610055944, - "learning_rate": 1.6842933114964466e-05, - "loss": 0.8221, - "step": 12960 - }, - { - "epoch": 0.8316228351507376, - "grad_norm": 0.8194937278113069, - "learning_rate": 1.6780805062151816e-05, - "loss": 0.5232, - "step": 12965 - }, - { - "epoch": 0.8319435535599743, - "grad_norm": 1.188666287581691, - "learning_rate": 1.6718781309738073e-05, - "loss": 0.6604, - "step": 12970 - }, - { - "epoch": 0.832264271969211, - "grad_norm": 0.8641382912001553, - "learning_rate": 1.665686193545898e-05, - "loss": 0.5844, - "step": 12975 - }, - { - "epoch": 0.8325849903784477, - "grad_norm": 0.7062740744596516, - "learning_rate": 1.6595047016919373e-05, - "loss": 0.6843, - "step": 12980 - }, - { - "epoch": 0.8329057087876844, - "grad_norm": 1.7666107387397485, - "learning_rate": 1.6533336631593276e-05, - "loss": 0.5533, - "step": 12985 - }, - { - "epoch": 0.8332264271969211, - "grad_norm": 0.6713809127329562, - "learning_rate": 1.6471730856823587e-05, - "loss": 0.5803, - "step": 12990 - }, - { - "epoch": 0.8335471456061578, - "grad_norm": 0.789870715650865, - "learning_rate": 1.6410229769822137e-05, - "loss": 0.5722, - "step": 12995 - }, - { - "epoch": 0.8338678640153945, - "grad_norm": 0.694543681011162, - "learning_rate": 1.6348833447669596e-05, - "loss": 0.7518, - "step": 13000 - }, - { - "epoch": 0.8341885824246311, - "grad_norm": 0.9060155570486944, - "learning_rate": 1.6287541967315246e-05, - "loss": 0.6968, - "step": 13005 - }, - { - "epoch": 0.8345093008338679, - "grad_norm": 0.7521276185282114, - "learning_rate": 1.6226355405577052e-05, - "loss": 0.7398, - "step": 13010 - }, - { - "epoch": 0.8348300192431045, - "grad_norm": 0.6239824879078599, - "learning_rate": 1.6165273839141425e-05, - "loss": 0.5993, - "step": 13015 - }, - { - "epoch": 0.8351507376523413, - "grad_norm": 0.8788280197433859, - "learning_rate": 1.610429734456317e-05, - "loss": 0.5281, - "step": 13020 - }, - { - "epoch": 0.8354714560615779, - "grad_norm": 0.5708218830810341, - "learning_rate": 1.604342599826548e-05, - "loss": 0.6636, - "step": 13025 - }, - { - "epoch": 0.8357921744708147, - "grad_norm": 0.9995506015609548, - "learning_rate": 1.5982659876539706e-05, - "loss": 0.6224, - "step": 13030 - }, - { - "epoch": 0.8361128928800513, - "grad_norm": 0.6985670528256153, - "learning_rate": 1.5921999055545322e-05, - "loss": 0.7875, - "step": 13035 - }, - { - "epoch": 0.8364336112892881, - "grad_norm": 1.1017729058211603, - "learning_rate": 1.5861443611309836e-05, - "loss": 0.5689, - "step": 13040 - }, - { - "epoch": 0.8367543296985247, - "grad_norm": 0.6102105059220153, - "learning_rate": 1.5800993619728645e-05, - "loss": 0.6071, - "step": 13045 - }, - { - "epoch": 0.8370750481077613, - "grad_norm": 1.0918121069567406, - "learning_rate": 1.574064915656508e-05, - "loss": 0.6389, - "step": 13050 - }, - { - "epoch": 0.8373957665169981, - "grad_norm": 0.8119509757109902, - "learning_rate": 1.5680410297450097e-05, - "loss": 0.6904, - "step": 13055 - }, - { - "epoch": 0.8377164849262347, - "grad_norm": 1.0654010067070523, - "learning_rate": 1.56202771178824e-05, - "loss": 0.6806, - "step": 13060 - }, - { - "epoch": 0.8380372033354715, - "grad_norm": 1.003140917229182, - "learning_rate": 1.5560249693228167e-05, - "loss": 0.7506, - "step": 13065 - }, - { - "epoch": 0.8383579217447081, - "grad_norm": 0.8104009198927022, - "learning_rate": 1.5500328098721017e-05, - "loss": 0.6771, - "step": 13070 - }, - { - "epoch": 0.8386786401539449, - "grad_norm": 0.6505916854083006, - "learning_rate": 1.5440512409462027e-05, - "loss": 0.4606, - "step": 13075 - }, - { - "epoch": 0.8389993585631815, - "grad_norm": 0.8172274238106711, - "learning_rate": 1.5380802700419437e-05, - "loss": 0.6273, - "step": 13080 - }, - { - "epoch": 0.8393200769724182, - "grad_norm": 0.8412486560565198, - "learning_rate": 1.5321199046428748e-05, - "loss": 0.6232, - "step": 13085 - }, - { - "epoch": 0.8396407953816549, - "grad_norm": 1.2677355193498017, - "learning_rate": 1.526170152219246e-05, - "loss": 0.6965, - "step": 13090 - }, - { - "epoch": 0.8399615137908916, - "grad_norm": 1.1729148810404941, - "learning_rate": 1.520231020228008e-05, - "loss": 0.6742, - "step": 13095 - }, - { - "epoch": 0.8402822322001283, - "grad_norm": 0.9492910072998716, - "learning_rate": 1.51430251611281e-05, - "loss": 0.6427, - "step": 13100 - }, - { - "epoch": 0.840602950609365, - "grad_norm": 0.9485664054113067, - "learning_rate": 1.508384647303962e-05, - "loss": 0.7599, - "step": 13105 - }, - { - "epoch": 0.8409236690186017, - "grad_norm": 0.7710450909617227, - "learning_rate": 1.5024774212184644e-05, - "loss": 0.7211, - "step": 13110 - }, - { - "epoch": 0.8412443874278384, - "grad_norm": 1.4732302257890362, - "learning_rate": 1.496580845259965e-05, - "loss": 0.5757, - "step": 13115 - }, - { - "epoch": 0.841565105837075, - "grad_norm": 0.815748738677427, - "learning_rate": 1.4906949268187731e-05, - "loss": 0.7202, - "step": 13120 - }, - { - "epoch": 0.8418858242463118, - "grad_norm": 0.7569265134733956, - "learning_rate": 1.4848196732718333e-05, - "loss": 0.5067, - "step": 13125 - }, - { - "epoch": 0.8422065426555484, - "grad_norm": 0.7019350874014, - "learning_rate": 1.4789550919827255e-05, - "loss": 0.6555, - "step": 13130 - }, - { - "epoch": 0.8425272610647851, - "grad_norm": 1.070502908495116, - "learning_rate": 1.4731011903016589e-05, - "loss": 0.5612, - "step": 13135 - }, - { - "epoch": 0.8428479794740218, - "grad_norm": 0.8746378057344433, - "learning_rate": 1.4672579755654492e-05, - "loss": 0.6644, - "step": 13140 - }, - { - "epoch": 0.8431686978832585, - "grad_norm": 0.9069204901759049, - "learning_rate": 1.4614254550975282e-05, - "loss": 0.6041, - "step": 13145 - }, - { - "epoch": 0.8434894162924952, - "grad_norm": 0.6286045045253976, - "learning_rate": 1.455603636207915e-05, - "loss": 0.573, - "step": 13150 - }, - { - "epoch": 0.8438101347017318, - "grad_norm": 0.8046184953958996, - "learning_rate": 1.4497925261932188e-05, - "loss": 0.7031, - "step": 13155 - }, - { - "epoch": 0.8441308531109686, - "grad_norm": 0.9289022471342262, - "learning_rate": 1.4439921323366323e-05, - "loss": 0.6532, - "step": 13160 - }, - { - "epoch": 0.8444515715202052, - "grad_norm": 0.8155940315800527, - "learning_rate": 1.4382024619079105e-05, - "loss": 0.6537, - "step": 13165 - }, - { - "epoch": 0.844772289929442, - "grad_norm": 1.064740365786613, - "learning_rate": 1.432423522163372e-05, - "loss": 0.598, - "step": 13170 - }, - { - "epoch": 0.8450930083386786, - "grad_norm": 0.7962110283298796, - "learning_rate": 1.4266553203458831e-05, - "loss": 0.7714, - "step": 13175 - }, - { - "epoch": 0.8454137267479154, - "grad_norm": 0.9891349725088471, - "learning_rate": 1.4208978636848591e-05, - "loss": 0.666, - "step": 13180 - }, - { - "epoch": 0.845734445157152, - "grad_norm": 0.4894444101288945, - "learning_rate": 1.4151511593962418e-05, - "loss": 0.5697, - "step": 13185 - }, - { - "epoch": 0.8460551635663887, - "grad_norm": 0.9733970578229911, - "learning_rate": 1.4094152146824969e-05, - "loss": 0.639, - "step": 13190 - }, - { - "epoch": 0.8463758819756254, - "grad_norm": 0.7185939555951706, - "learning_rate": 1.40369003673261e-05, - "loss": 0.6608, - "step": 13195 - }, - { - "epoch": 0.846696600384862, - "grad_norm": 0.9770382341303654, - "learning_rate": 1.3979756327220683e-05, - "loss": 0.5714, - "step": 13200 - }, - { - "epoch": 0.8470173187940988, - "grad_norm": 0.9521286519290345, - "learning_rate": 1.3922720098128527e-05, - "loss": 0.7672, - "step": 13205 - }, - { - "epoch": 0.8473380372033354, - "grad_norm": 1.0026426650083589, - "learning_rate": 1.3865791751534418e-05, - "loss": 0.589, - "step": 13210 - }, - { - "epoch": 0.8476587556125722, - "grad_norm": 0.7945648077908503, - "learning_rate": 1.3808971358787837e-05, - "loss": 0.5791, - "step": 13215 - }, - { - "epoch": 0.8479794740218088, - "grad_norm": 0.6890626619071494, - "learning_rate": 1.3752258991103018e-05, - "loss": 0.7313, - "step": 13220 - }, - { - "epoch": 0.8483001924310456, - "grad_norm": 0.8523591274592248, - "learning_rate": 1.369565471955878e-05, - "loss": 0.7, - "step": 13225 - }, - { - "epoch": 0.8486209108402822, - "grad_norm": 0.6661468510777631, - "learning_rate": 1.3639158615098457e-05, - "loss": 0.681, - "step": 13230 - }, - { - "epoch": 0.8489416292495189, - "grad_norm": 0.498183926121059, - "learning_rate": 1.3582770748529839e-05, - "loss": 0.6238, - "step": 13235 - }, - { - "epoch": 0.8492623476587556, - "grad_norm": 0.9855613055277577, - "learning_rate": 1.3526491190525025e-05, - "loss": 0.7218, - "step": 13240 - }, - { - "epoch": 0.8495830660679923, - "grad_norm": 0.42718056670086024, - "learning_rate": 1.3470320011620418e-05, - "loss": 0.5768, - "step": 13245 - }, - { - "epoch": 0.849903784477229, - "grad_norm": 1.0362257024186183, - "learning_rate": 1.3414257282216535e-05, - "loss": 0.6332, - "step": 13250 - }, - { - "epoch": 0.8502245028864657, - "grad_norm": 0.8990446366365678, - "learning_rate": 1.3358303072578027e-05, - "loss": 0.6709, - "step": 13255 - }, - { - "epoch": 0.8505452212957024, - "grad_norm": 0.7211479323078617, - "learning_rate": 1.3302457452833484e-05, - "loss": 0.5878, - "step": 13260 - }, - { - "epoch": 0.8508659397049391, - "grad_norm": 1.1924552884788637, - "learning_rate": 1.3246720492975396e-05, - "loss": 0.7302, - "step": 13265 - }, - { - "epoch": 0.8511866581141757, - "grad_norm": 0.6589451039855936, - "learning_rate": 1.3191092262860127e-05, - "loss": 0.6891, - "step": 13270 - }, - { - "epoch": 0.8515073765234125, - "grad_norm": 0.6379938202383435, - "learning_rate": 1.3135572832207699e-05, - "loss": 0.5751, - "step": 13275 - }, - { - "epoch": 0.8518280949326491, - "grad_norm": 0.6107227064835382, - "learning_rate": 1.3080162270601826e-05, - "loss": 0.6705, - "step": 13280 - }, - { - "epoch": 0.8521488133418859, - "grad_norm": 0.7796857101023206, - "learning_rate": 1.3024860647489756e-05, - "loss": 0.595, - "step": 13285 - }, - { - "epoch": 0.8524695317511225, - "grad_norm": 1.067556813441523, - "learning_rate": 1.2969668032182147e-05, - "loss": 0.6906, - "step": 13290 - }, - { - "epoch": 0.8527902501603593, - "grad_norm": 0.7705240841097785, - "learning_rate": 1.2914584493853144e-05, - "loss": 0.6176, - "step": 13295 - }, - { - "epoch": 0.8531109685695959, - "grad_norm": 0.9227266859657003, - "learning_rate": 1.285961010154011e-05, - "loss": 0.6479, - "step": 13300 - }, - { - "epoch": 0.8534316869788325, - "grad_norm": 1.0189541311376396, - "learning_rate": 1.2804744924143608e-05, - "loss": 0.728, - "step": 13305 - }, - { - "epoch": 0.8537524053880693, - "grad_norm": 0.8401997316168908, - "learning_rate": 1.2749989030427344e-05, - "loss": 0.7617, - "step": 13310 - }, - { - "epoch": 0.8540731237973059, - "grad_norm": 0.9093797714776795, - "learning_rate": 1.269534248901807e-05, - "loss": 0.5851, - "step": 13315 - }, - { - "epoch": 0.8543938422065427, - "grad_norm": 1.1037034088263697, - "learning_rate": 1.2640805368405462e-05, - "loss": 0.6118, - "step": 13320 - }, - { - "epoch": 0.8547145606157793, - "grad_norm": 0.7177427685245759, - "learning_rate": 1.2586377736942034e-05, - "loss": 0.7042, - "step": 13325 - }, - { - "epoch": 0.8550352790250161, - "grad_norm": 0.9633359403241921, - "learning_rate": 1.2532059662843144e-05, - "loss": 0.7182, - "step": 13330 - }, - { - "epoch": 0.8553559974342527, - "grad_norm": 0.8564133887667676, - "learning_rate": 1.2477851214186754e-05, - "loss": 0.5807, - "step": 13335 - }, - { - "epoch": 0.8556767158434894, - "grad_norm": 1.0067512789243385, - "learning_rate": 1.2423752458913518e-05, - "loss": 0.6689, - "step": 13340 - }, - { - "epoch": 0.8559974342527261, - "grad_norm": 0.6740456644820353, - "learning_rate": 1.2369763464826533e-05, - "loss": 0.5505, - "step": 13345 - }, - { - "epoch": 0.8563181526619628, - "grad_norm": 0.7485205146558563, - "learning_rate": 1.2315884299591362e-05, - "loss": 0.7485, - "step": 13350 - }, - { - "epoch": 0.8566388710711995, - "grad_norm": 0.9943455478406926, - "learning_rate": 1.2262115030735944e-05, - "loss": 0.7464, - "step": 13355 - }, - { - "epoch": 0.8569595894804362, - "grad_norm": 0.7832997459113116, - "learning_rate": 1.2208455725650436e-05, - "loss": 0.6956, - "step": 13360 - }, - { - "epoch": 0.8572803078896729, - "grad_norm": 0.9569726126068407, - "learning_rate": 1.2154906451587189e-05, - "loss": 0.7132, - "step": 13365 - }, - { - "epoch": 0.8576010262989096, - "grad_norm": 0.7447552856015294, - "learning_rate": 1.2101467275660661e-05, - "loss": 0.4959, - "step": 13370 - }, - { - "epoch": 0.8579217447081462, - "grad_norm": 0.8033856598382162, - "learning_rate": 1.2048138264847297e-05, - "loss": 0.8208, - "step": 13375 - }, - { - "epoch": 0.858242463117383, - "grad_norm": 1.2548309542667209, - "learning_rate": 1.1994919485985522e-05, - "loss": 0.5933, - "step": 13380 - }, - { - "epoch": 0.8585631815266196, - "grad_norm": 0.8849084463562876, - "learning_rate": 1.1941811005775538e-05, - "loss": 0.7345, - "step": 13385 - }, - { - "epoch": 0.8588838999358563, - "grad_norm": 1.0662756941569218, - "learning_rate": 1.1888812890779377e-05, - "loss": 0.672, - "step": 13390 - }, - { - "epoch": 0.859204618345093, - "grad_norm": 1.4484403343446357, - "learning_rate": 1.1835925207420694e-05, - "loss": 0.606, - "step": 13395 - }, - { - "epoch": 0.8595253367543297, - "grad_norm": 0.8332555994611591, - "learning_rate": 1.1783148021984725e-05, - "loss": 0.692, - "step": 13400 - }, - { - "epoch": 0.8598460551635664, - "grad_norm": 0.7857634142558743, - "learning_rate": 1.1730481400618299e-05, - "loss": 0.8791, - "step": 13405 - }, - { - "epoch": 0.8601667735728031, - "grad_norm": 1.0726454797623632, - "learning_rate": 1.167792540932957e-05, - "loss": 0.6978, - "step": 13410 - }, - { - "epoch": 0.8604874919820398, - "grad_norm": 0.662627507867472, - "learning_rate": 1.162548011398814e-05, - "loss": 0.6655, - "step": 13415 - }, - { - "epoch": 0.8608082103912764, - "grad_norm": 0.7427411411925819, - "learning_rate": 1.1573145580324785e-05, - "loss": 0.7019, - "step": 13420 - }, - { - "epoch": 0.8611289288005132, - "grad_norm": 0.8465518983483786, - "learning_rate": 1.1520921873931489e-05, - "loss": 0.7452, - "step": 13425 - }, - { - "epoch": 0.8614496472097498, - "grad_norm": 0.5455286801662246, - "learning_rate": 1.1468809060261399e-05, - "loss": 0.652, - "step": 13430 - }, - { - "epoch": 0.8617703656189866, - "grad_norm": 0.8972113556345591, - "learning_rate": 1.1416807204628533e-05, - "loss": 0.5988, - "step": 13435 - }, - { - "epoch": 0.8620910840282232, - "grad_norm": 0.6854697322056322, - "learning_rate": 1.1364916372208e-05, - "loss": 0.696, - "step": 13440 - }, - { - "epoch": 0.86241180243746, - "grad_norm": 0.678150343614853, - "learning_rate": 1.1313136628035647e-05, - "loss": 0.5252, - "step": 13445 - }, - { - "epoch": 0.8627325208466966, - "grad_norm": 0.6285060401132421, - "learning_rate": 1.1261468037008172e-05, - "loss": 0.4725, - "step": 13450 - }, - { - "epoch": 0.8630532392559332, - "grad_norm": 0.6510845504498061, - "learning_rate": 1.1209910663882916e-05, - "loss": 0.5565, - "step": 13455 - }, - { - "epoch": 0.86337395766517, - "grad_norm": 1.2698183413935256, - "learning_rate": 1.1158464573277816e-05, - "loss": 0.7544, - "step": 13460 - }, - { - "epoch": 0.8636946760744066, - "grad_norm": 1.048484623181104, - "learning_rate": 1.1107129829671393e-05, - "loss": 0.6762, - "step": 13465 - }, - { - "epoch": 0.8640153944836434, - "grad_norm": 0.8197138470113798, - "learning_rate": 1.1055906497402534e-05, - "loss": 0.7671, - "step": 13470 - }, - { - "epoch": 0.86433611289288, - "grad_norm": 0.8060735013585868, - "learning_rate": 1.1004794640670602e-05, - "loss": 0.7412, - "step": 13475 - }, - { - "epoch": 0.8646568313021168, - "grad_norm": 0.5202202198681646, - "learning_rate": 1.0953794323535138e-05, - "loss": 0.617, - "step": 13480 - }, - { - "epoch": 0.8649775497113534, - "grad_norm": 0.9060221838859691, - "learning_rate": 1.0902905609915925e-05, - "loss": 0.6724, - "step": 13485 - }, - { - "epoch": 0.8652982681205901, - "grad_norm": 0.9948896143875089, - "learning_rate": 1.0852128563592911e-05, - "loss": 0.6916, - "step": 13490 - }, - { - "epoch": 0.8656189865298268, - "grad_norm": 0.6185205159442889, - "learning_rate": 1.0801463248206012e-05, - "loss": 0.6155, - "step": 13495 - }, - { - "epoch": 0.8659397049390635, - "grad_norm": 0.8621415617622489, - "learning_rate": 1.0750909727255231e-05, - "loss": 0.5641, - "step": 13500 - }, - { - "epoch": 0.8662604233483002, - "grad_norm": 1.0099987644568347, - "learning_rate": 1.0700468064100278e-05, - "loss": 0.5874, - "step": 13505 - }, - { - "epoch": 0.8665811417575369, - "grad_norm": 0.7860625683994522, - "learning_rate": 1.0650138321960834e-05, - "loss": 0.6447, - "step": 13510 - }, - { - "epoch": 0.8669018601667736, - "grad_norm": 1.0075130412273372, - "learning_rate": 1.0599920563916233e-05, - "loss": 0.6428, - "step": 13515 - }, - { - "epoch": 0.8672225785760103, - "grad_norm": 0.8433746537048423, - "learning_rate": 1.0549814852905427e-05, - "loss": 0.6156, - "step": 13520 - }, - { - "epoch": 0.8675432969852469, - "grad_norm": 0.6911458595910109, - "learning_rate": 1.0499821251727038e-05, - "loss": 0.7697, - "step": 13525 - }, - { - "epoch": 0.8678640153944837, - "grad_norm": 0.7261479775249019, - "learning_rate": 1.044993982303909e-05, - "loss": 0.7353, - "step": 13530 - }, - { - "epoch": 0.8681847338037203, - "grad_norm": 0.5256687873474478, - "learning_rate": 1.040017062935902e-05, - "loss": 0.5737, - "step": 13535 - }, - { - "epoch": 0.868505452212957, - "grad_norm": 1.0493206252194889, - "learning_rate": 1.035051373306366e-05, - "loss": 0.6215, - "step": 13540 - }, - { - "epoch": 0.8688261706221937, - "grad_norm": 0.665208544741004, - "learning_rate": 1.0300969196389033e-05, - "loss": 0.6073, - "step": 13545 - }, - { - "epoch": 0.8691468890314304, - "grad_norm": 0.6978534685649864, - "learning_rate": 1.0251537081430406e-05, - "loss": 0.5837, - "step": 13550 - }, - { - "epoch": 0.8694676074406671, - "grad_norm": 0.7579892159049441, - "learning_rate": 1.0202217450142082e-05, - "loss": 0.5604, - "step": 13555 - }, - { - "epoch": 0.8697883258499037, - "grad_norm": 0.6514517952782195, - "learning_rate": 1.015301036433739e-05, - "loss": 0.6971, - "step": 13560 - }, - { - "epoch": 0.8701090442591405, - "grad_norm": 0.4398371785948417, - "learning_rate": 1.0103915885688686e-05, - "loss": 0.5459, - "step": 13565 - }, - { - "epoch": 0.8704297626683771, - "grad_norm": 0.6924160948174624, - "learning_rate": 1.0054934075727062e-05, - "loss": 0.5386, - "step": 13570 - }, - { - "epoch": 0.8707504810776139, - "grad_norm": 0.9269090072648052, - "learning_rate": 1.0006064995842513e-05, - "loss": 0.7547, - "step": 13575 - }, - { - "epoch": 0.8710711994868505, - "grad_norm": 0.788185049843599, - "learning_rate": 9.957308707283675e-06, - "loss": 0.6128, - "step": 13580 - }, - { - "epoch": 0.8713919178960873, - "grad_norm": 0.7308595928706564, - "learning_rate": 9.90866527115788e-06, - "loss": 0.6036, - "step": 13585 - }, - { - "epoch": 0.8717126363053239, - "grad_norm": 0.7092354057653707, - "learning_rate": 9.860134748430972e-06, - "loss": 0.7038, - "step": 13590 - }, - { - "epoch": 0.8720333547145607, - "grad_norm": 1.0470346737728682, - "learning_rate": 9.811717199927273e-06, - "loss": 0.73, - "step": 13595 - }, - { - "epoch": 0.8723540731237973, - "grad_norm": 1.2863495939351028, - "learning_rate": 9.763412686329575e-06, - "loss": 0.7084, - "step": 13600 - }, - { - "epoch": 0.872674791533034, - "grad_norm": 0.768903275631644, - "learning_rate": 9.71522126817892e-06, - "loss": 0.7444, - "step": 13605 - }, - { - "epoch": 0.8729955099422707, - "grad_norm": 0.8561382895899066, - "learning_rate": 9.667143005874679e-06, - "loss": 0.6743, - "step": 13610 - }, - { - "epoch": 0.8733162283515074, - "grad_norm": 0.6255033102428371, - "learning_rate": 9.619177959674353e-06, - "loss": 0.6357, - "step": 13615 - }, - { - "epoch": 0.8736369467607441, - "grad_norm": 0.8367879131361138, - "learning_rate": 9.57132618969354e-06, - "loss": 0.7229, - "step": 13620 - }, - { - "epoch": 0.8739576651699807, - "grad_norm": 0.6292130728042913, - "learning_rate": 9.523587755905938e-06, - "loss": 0.6561, - "step": 13625 - }, - { - "epoch": 0.8742783835792175, - "grad_norm": 0.9860204738083063, - "learning_rate": 9.475962718143106e-06, - "loss": 0.6323, - "step": 13630 - }, - { - "epoch": 0.8745991019884541, - "grad_norm": 0.841887275726057, - "learning_rate": 9.428451136094541e-06, - "loss": 0.6762, - "step": 13635 - }, - { - "epoch": 0.8749198203976908, - "grad_norm": 0.7100122528682058, - "learning_rate": 9.381053069307499e-06, - "loss": 0.5494, - "step": 13640 - }, - { - "epoch": 0.8752405388069275, - "grad_norm": 1.1202599763010757, - "learning_rate": 9.33376857718703e-06, - "loss": 0.5936, - "step": 13645 - }, - { - "epoch": 0.8755612572161642, - "grad_norm": 1.0773135254923245, - "learning_rate": 9.286597718995783e-06, - "loss": 0.5523, - "step": 13650 - }, - { - "epoch": 0.8758819756254009, - "grad_norm": 0.7262011668633317, - "learning_rate": 9.239540553853987e-06, - "loss": 0.7559, - "step": 13655 - }, - { - "epoch": 0.8762026940346376, - "grad_norm": 1.1845562776611291, - "learning_rate": 9.192597140739445e-06, - "loss": 0.6214, - "step": 13660 - }, - { - "epoch": 0.8765234124438743, - "grad_norm": 1.0832215867500623, - "learning_rate": 9.145767538487282e-06, - "loss": 0.6363, - "step": 13665 - }, - { - "epoch": 0.876844130853111, - "grad_norm": 0.8384508766840872, - "learning_rate": 9.099051805790081e-06, - "loss": 0.7162, - "step": 13670 - }, - { - "epoch": 0.8771648492623476, - "grad_norm": 0.7886740113805487, - "learning_rate": 9.052450001197666e-06, - "loss": 0.5292, - "step": 13675 - }, - { - "epoch": 0.8774855676715844, - "grad_norm": 0.724073412445175, - "learning_rate": 9.005962183117055e-06, - "loss": 0.7159, - "step": 13680 - }, - { - "epoch": 0.877806286080821, - "grad_norm": 0.5059344342927663, - "learning_rate": 8.959588409812458e-06, - "loss": 0.6316, - "step": 13685 - }, - { - "epoch": 0.8781270044900578, - "grad_norm": 1.2097294273874917, - "learning_rate": 8.913328739405092e-06, - "loss": 0.7006, - "step": 13690 - }, - { - "epoch": 0.8784477228992944, - "grad_norm": 0.615032496760421, - "learning_rate": 8.867183229873211e-06, - "loss": 0.738, - "step": 13695 - }, - { - "epoch": 0.8787684413085312, - "grad_norm": 0.6135358966273193, - "learning_rate": 8.821151939051953e-06, - "loss": 0.6287, - "step": 13700 - }, - { - "epoch": 0.8790891597177678, - "grad_norm": 0.8910522096004475, - "learning_rate": 8.775234924633301e-06, - "loss": 0.7301, - "step": 13705 - }, - { - "epoch": 0.8794098781270044, - "grad_norm": 0.7815093977889225, - "learning_rate": 8.72943224416609e-06, - "loss": 0.6499, - "step": 13710 - }, - { - "epoch": 0.8797305965362412, - "grad_norm": 0.5607257491266542, - "learning_rate": 8.683743955055746e-06, - "loss": 0.6083, - "step": 13715 - }, - { - "epoch": 0.8800513149454778, - "grad_norm": 0.884214002379739, - "learning_rate": 8.638170114564414e-06, - "loss": 0.611, - "step": 13720 - }, - { - "epoch": 0.8803720333547146, - "grad_norm": 0.7528314170250561, - "learning_rate": 8.592710779810765e-06, - "loss": 0.6921, - "step": 13725 - }, - { - "epoch": 0.8806927517639512, - "grad_norm": 0.9161588988308113, - "learning_rate": 8.547366007769919e-06, - "loss": 0.652, - "step": 13730 - }, - { - "epoch": 0.881013470173188, - "grad_norm": 1.4044011636843894, - "learning_rate": 8.502135855273497e-06, - "loss": 0.6532, - "step": 13735 - }, - { - "epoch": 0.8813341885824246, - "grad_norm": 0.5311315649019397, - "learning_rate": 8.457020379009373e-06, - "loss": 0.5949, - "step": 13740 - }, - { - "epoch": 0.8816549069916613, - "grad_norm": 0.6747473256173435, - "learning_rate": 8.412019635521784e-06, - "loss": 0.5982, - "step": 13745 - }, - { - "epoch": 0.881975625400898, - "grad_norm": 0.6539295071967237, - "learning_rate": 8.367133681211103e-06, - "loss": 0.4702, - "step": 13750 - }, - { - "epoch": 0.8822963438101347, - "grad_norm": 0.403206890252452, - "learning_rate": 8.322362572333841e-06, - "loss": 0.5464, - "step": 13755 - }, - { - "epoch": 0.8826170622193714, - "grad_norm": 0.7780767642995721, - "learning_rate": 8.277706365002625e-06, - "loss": 0.6976, - "step": 13760 - }, - { - "epoch": 0.8829377806286081, - "grad_norm": 0.6272304201483566, - "learning_rate": 8.233165115186003e-06, - "loss": 0.6613, - "step": 13765 - }, - { - "epoch": 0.8832584990378448, - "grad_norm": 0.8343537172020628, - "learning_rate": 8.188738878708502e-06, - "loss": 0.7469, - "step": 13770 - }, - { - "epoch": 0.8835792174470815, - "grad_norm": 0.9345794017556924, - "learning_rate": 8.144427711250447e-06, - "loss": 0.7586, - "step": 13775 - }, - { - "epoch": 0.8838999358563181, - "grad_norm": 1.162828611729811, - "learning_rate": 8.100231668348002e-06, - "loss": 0.5382, - "step": 13780 - }, - { - "epoch": 0.8842206542655549, - "grad_norm": 1.1205395105885234, - "learning_rate": 8.056150805392993e-06, - "loss": 0.6138, - "step": 13785 - }, - { - "epoch": 0.8845413726747915, - "grad_norm": 0.5630057786543724, - "learning_rate": 8.012185177632914e-06, - "loss": 0.4977, - "step": 13790 - }, - { - "epoch": 0.8848620910840282, - "grad_norm": 0.8477848139037634, - "learning_rate": 7.968334840170843e-06, - "loss": 0.7394, - "step": 13795 - }, - { - "epoch": 0.8851828094932649, - "grad_norm": 0.7207695540829029, - "learning_rate": 7.92459984796532e-06, - "loss": 0.7108, - "step": 13800 - }, - { - "epoch": 0.8855035279025016, - "grad_norm": 0.9355747131091594, - "learning_rate": 7.880980255830372e-06, - "loss": 0.6971, - "step": 13805 - }, - { - "epoch": 0.8858242463117383, - "grad_norm": 0.8391670611046308, - "learning_rate": 7.83747611843536e-06, - "loss": 0.6618, - "step": 13810 - }, - { - "epoch": 0.886144964720975, - "grad_norm": 0.6940296556964382, - "learning_rate": 7.794087490304935e-06, - "loss": 0.7303, - "step": 13815 - }, - { - "epoch": 0.8864656831302117, - "grad_norm": 0.5463085826484815, - "learning_rate": 7.75081442581902e-06, - "loss": 0.7128, - "step": 13820 - }, - { - "epoch": 0.8867864015394483, - "grad_norm": 0.9553016730601827, - "learning_rate": 7.707656979212653e-06, - "loss": 0.5325, - "step": 13825 - }, - { - "epoch": 0.8871071199486851, - "grad_norm": 0.9151217967040441, - "learning_rate": 7.66461520457602e-06, - "loss": 0.7276, - "step": 13830 - }, - { - "epoch": 0.8874278383579217, - "grad_norm": 0.7869236135130984, - "learning_rate": 7.6216891558542395e-06, - "loss": 0.744, - "step": 13835 - }, - { - "epoch": 0.8877485567671585, - "grad_norm": 0.748585172606016, - "learning_rate": 7.578878886847507e-06, - "loss": 0.5891, - "step": 13840 - }, - { - "epoch": 0.8880692751763951, - "grad_norm": 0.7205402378107477, - "learning_rate": 7.536184451210815e-06, - "loss": 0.6715, - "step": 13845 - }, - { - "epoch": 0.8883899935856319, - "grad_norm": 0.6198613140638497, - "learning_rate": 7.493605902454004e-06, - "loss": 0.7581, - "step": 13850 - }, - { - "epoch": 0.8887107119948685, - "grad_norm": 0.84149727085621, - "learning_rate": 7.451143293941709e-06, - "loss": 0.746, - "step": 13855 - }, - { - "epoch": 0.8890314304041051, - "grad_norm": 0.8368846152026573, - "learning_rate": 7.408796678893226e-06, - "loss": 0.6687, - "step": 13860 - }, - { - "epoch": 0.8893521488133419, - "grad_norm": 0.7827898269521945, - "learning_rate": 7.366566110382445e-06, - "loss": 0.5832, - "step": 13865 - }, - { - "epoch": 0.8896728672225785, - "grad_norm": 0.7300699318830831, - "learning_rate": 7.324451641337882e-06, - "loss": 0.6294, - "step": 13870 - }, - { - "epoch": 0.8899935856318153, - "grad_norm": 0.9238241719407477, - "learning_rate": 7.28245332454246e-06, - "loss": 0.7083, - "step": 13875 - }, - { - "epoch": 0.8903143040410519, - "grad_norm": 0.7709293686153301, - "learning_rate": 7.240571212633618e-06, - "loss": 0.5686, - "step": 13880 - }, - { - "epoch": 0.8906350224502887, - "grad_norm": 1.1869349060713659, - "learning_rate": 7.198805358103067e-06, - "loss": 0.728, - "step": 13885 - }, - { - "epoch": 0.8909557408595253, - "grad_norm": 1.0851258551108929, - "learning_rate": 7.157155813296834e-06, - "loss": 0.7379, - "step": 13890 - }, - { - "epoch": 0.891276459268762, - "grad_norm": 0.7394708926504447, - "learning_rate": 7.115622630415253e-06, - "loss": 0.7321, - "step": 13895 - }, - { - "epoch": 0.8915971776779987, - "grad_norm": 0.816039779235774, - "learning_rate": 7.0742058615126726e-06, - "loss": 0.601, - "step": 13900 - }, - { - "epoch": 0.8919178960872354, - "grad_norm": 0.41244323070119415, - "learning_rate": 7.03290555849766e-06, - "loss": 0.5809, - "step": 13905 - }, - { - "epoch": 0.8922386144964721, - "grad_norm": 0.8918418533925353, - "learning_rate": 6.991721773132742e-06, - "loss": 0.7142, - "step": 13910 - }, - { - "epoch": 0.8925593329057088, - "grad_norm": 0.8732825568065812, - "learning_rate": 6.950654557034475e-06, - "loss": 0.6635, - "step": 13915 - }, - { - "epoch": 0.8928800513149455, - "grad_norm": 0.7358325355065991, - "learning_rate": 6.909703961673253e-06, - "loss": 0.6412, - "step": 13920 - }, - { - "epoch": 0.8932007697241822, - "grad_norm": 0.7111667197818642, - "learning_rate": 6.868870038373332e-06, - "loss": 0.6767, - "step": 13925 - }, - { - "epoch": 0.8935214881334188, - "grad_norm": 1.0721200112803682, - "learning_rate": 6.828152838312773e-06, - "loss": 0.5066, - "step": 13930 - }, - { - "epoch": 0.8938422065426556, - "grad_norm": 0.8310238983860934, - "learning_rate": 6.787552412523279e-06, - "loss": 0.6764, - "step": 13935 - }, - { - "epoch": 0.8941629249518922, - "grad_norm": 0.6872676077028719, - "learning_rate": 6.747068811890256e-06, - "loss": 0.6671, - "step": 13940 - }, - { - "epoch": 0.894483643361129, - "grad_norm": 0.9702633803545438, - "learning_rate": 6.706702087152661e-06, - "loss": 0.4624, - "step": 13945 - }, - { - "epoch": 0.8948043617703656, - "grad_norm": 1.375052365512822, - "learning_rate": 6.666452288902958e-06, - "loss": 0.7522, - "step": 13950 - }, - { - "epoch": 0.8951250801796024, - "grad_norm": 0.908667367564301, - "learning_rate": 6.626319467587106e-06, - "loss": 0.6602, - "step": 13955 - }, - { - "epoch": 0.895445798588839, - "grad_norm": 1.0327055092345554, - "learning_rate": 6.586303673504412e-06, - "loss": 0.6192, - "step": 13960 - }, - { - "epoch": 0.8957665169980756, - "grad_norm": 0.7913439515419154, - "learning_rate": 6.5464049568075615e-06, - "loss": 0.6883, - "step": 13965 - }, - { - "epoch": 0.8960872354073124, - "grad_norm": 0.9249759944838365, - "learning_rate": 6.506623367502418e-06, - "loss": 0.7207, - "step": 13970 - }, - { - "epoch": 0.896407953816549, - "grad_norm": 0.6185623923439777, - "learning_rate": 6.4669589554481325e-06, - "loss": 0.7935, - "step": 13975 - }, - { - "epoch": 0.8967286722257858, - "grad_norm": 0.9047502038967159, - "learning_rate": 6.4274117703569615e-06, - "loss": 0.523, - "step": 13980 - }, - { - "epoch": 0.8970493906350224, - "grad_norm": 0.5862791588591175, - "learning_rate": 6.387981861794212e-06, - "loss": 0.5767, - "step": 13985 - }, - { - "epoch": 0.8973701090442592, - "grad_norm": 0.9286416832372187, - "learning_rate": 6.348669279178277e-06, - "loss": 0.5952, - "step": 13990 - }, - { - "epoch": 0.8976908274534958, - "grad_norm": 0.9632286005822661, - "learning_rate": 6.309474071780408e-06, - "loss": 0.7512, - "step": 13995 - }, - { - "epoch": 0.8980115458627326, - "grad_norm": 0.6713818773459586, - "learning_rate": 6.2703962887248444e-06, - "loss": 0.8033, - "step": 14000 - }, - { - "epoch": 0.8983322642719692, - "grad_norm": 0.43098921146350616, - "learning_rate": 6.2314359789885756e-06, - "loss": 0.5506, - "step": 14005 - }, - { - "epoch": 0.8986529826812059, - "grad_norm": 1.029998963102262, - "learning_rate": 6.192593191401396e-06, - "loss": 0.6528, - "step": 14010 - }, - { - "epoch": 0.8989737010904426, - "grad_norm": 0.9428065435910548, - "learning_rate": 6.153867974645833e-06, - "loss": 0.6822, - "step": 14015 - }, - { - "epoch": 0.8992944194996793, - "grad_norm": 0.6275896637114994, - "learning_rate": 6.115260377257004e-06, - "loss": 0.556, - "step": 14020 - }, - { - "epoch": 0.899615137908916, - "grad_norm": 0.6094084941175278, - "learning_rate": 6.076770447622615e-06, - "loss": 0.5094, - "step": 14025 - }, - { - "epoch": 0.8999358563181526, - "grad_norm": 0.7526848860794296, - "learning_rate": 6.038398233982989e-06, - "loss": 0.678, - "step": 14030 - }, - { - "epoch": 0.9002565747273894, - "grad_norm": 0.5704944797751071, - "learning_rate": 6.000143784430756e-06, - "loss": 0.6822, - "step": 14035 - }, - { - "epoch": 0.900577293136626, - "grad_norm": 0.7525424440388754, - "learning_rate": 5.962007146911109e-06, - "loss": 0.7008, - "step": 14040 - }, - { - "epoch": 0.9008980115458627, - "grad_norm": 0.961888964093016, - "learning_rate": 5.923988369221456e-06, - "loss": 0.6805, - "step": 14045 - }, - { - "epoch": 0.9012187299550994, - "grad_norm": 0.8861288123930613, - "learning_rate": 5.886087499011594e-06, - "loss": 0.758, - "step": 14050 - }, - { - "epoch": 0.9015394483643361, - "grad_norm": 0.8032927310909407, - "learning_rate": 5.8483045837834705e-06, - "loss": 0.6607, - "step": 14055 - }, - { - "epoch": 0.9018601667735728, - "grad_norm": 0.8087075039644414, - "learning_rate": 5.810639670891216e-06, - "loss": 0.7027, - "step": 14060 - }, - { - "epoch": 0.9021808851828095, - "grad_norm": 0.8539578913251452, - "learning_rate": 5.773092807541092e-06, - "loss": 0.5801, - "step": 14065 - }, - { - "epoch": 0.9025016035920462, - "grad_norm": 0.7756452243315396, - "learning_rate": 5.735664040791367e-06, - "loss": 0.7103, - "step": 14070 - }, - { - "epoch": 0.9028223220012829, - "grad_norm": 1.222999060061691, - "learning_rate": 5.698353417552327e-06, - "loss": 0.6017, - "step": 14075 - }, - { - "epoch": 0.9031430404105195, - "grad_norm": 0.7983696291416744, - "learning_rate": 5.661160984586178e-06, - "loss": 0.6049, - "step": 14080 - }, - { - "epoch": 0.9034637588197563, - "grad_norm": 0.6490907871037943, - "learning_rate": 5.624086788506977e-06, - "loss": 0.5526, - "step": 14085 - }, - { - "epoch": 0.9037844772289929, - "grad_norm": 0.8508537947980717, - "learning_rate": 5.587130875780633e-06, - "loss": 0.7109, - "step": 14090 - }, - { - "epoch": 0.9041051956382297, - "grad_norm": 1.2192033565455072, - "learning_rate": 5.550293292724762e-06, - "loss": 0.7051, - "step": 14095 - }, - { - "epoch": 0.9044259140474663, - "grad_norm": 0.6587693265105345, - "learning_rate": 5.51357408550871e-06, - "loss": 0.6174, - "step": 14100 - }, - { - "epoch": 0.9047466324567031, - "grad_norm": 0.49748231304384327, - "learning_rate": 5.47697330015341e-06, - "loss": 0.5172, - "step": 14105 - }, - { - "epoch": 0.9050673508659397, - "grad_norm": 0.7884068967557873, - "learning_rate": 5.440490982531465e-06, - "loss": 0.6816, - "step": 14110 - }, - { - "epoch": 0.9053880692751763, - "grad_norm": 1.1069363022735697, - "learning_rate": 5.404127178366902e-06, - "loss": 0.6431, - "step": 14115 - }, - { - "epoch": 0.9057087876844131, - "grad_norm": 0.6826869882965035, - "learning_rate": 5.367881933235275e-06, - "loss": 0.5101, - "step": 14120 - }, - { - "epoch": 0.9060295060936497, - "grad_norm": 0.7273219199634979, - "learning_rate": 5.331755292563523e-06, - "loss": 0.637, - "step": 14125 - }, - { - "epoch": 0.9063502245028865, - "grad_norm": 0.8472794210673035, - "learning_rate": 5.295747301629917e-06, - "loss": 0.6022, - "step": 14130 - }, - { - "epoch": 0.9066709429121231, - "grad_norm": 0.8444956562341863, - "learning_rate": 5.259858005564089e-06, - "loss": 0.5334, - "step": 14135 - }, - { - "epoch": 0.9069916613213599, - "grad_norm": 0.6732860335353007, - "learning_rate": 5.224087449346826e-06, - "loss": 0.7202, - "step": 14140 - }, - { - "epoch": 0.9073123797305965, - "grad_norm": 0.9666322387828169, - "learning_rate": 5.188435677810133e-06, - "loss": 0.7559, - "step": 14145 - }, - { - "epoch": 0.9076330981398332, - "grad_norm": 0.9869737805273263, - "learning_rate": 5.152902735637166e-06, - "loss": 0.678, - "step": 14150 - }, - { - "epoch": 0.9079538165490699, - "grad_norm": 0.5732371579819191, - "learning_rate": 5.1174886673620805e-06, - "loss": 0.5993, - "step": 14155 - }, - { - "epoch": 0.9082745349583066, - "grad_norm": 0.8654988560178682, - "learning_rate": 5.082193517370127e-06, - "loss": 0.6813, - "step": 14160 - }, - { - "epoch": 0.9085952533675433, - "grad_norm": 1.0265921567687237, - "learning_rate": 5.047017329897463e-06, - "loss": 0.6737, - "step": 14165 - }, - { - "epoch": 0.90891597177678, - "grad_norm": 0.8248152748170539, - "learning_rate": 5.011960149031137e-06, - "loss": 0.5857, - "step": 14170 - }, - { - "epoch": 0.9092366901860167, - "grad_norm": 0.7956729093404309, - "learning_rate": 4.977022018709088e-06, - "loss": 0.6643, - "step": 14175 - }, - { - "epoch": 0.9095574085952534, - "grad_norm": 1.7578923486790687, - "learning_rate": 4.94220298271999e-06, - "loss": 0.7325, - "step": 14180 - }, - { - "epoch": 0.9098781270044901, - "grad_norm": 0.8908535862934428, - "learning_rate": 4.907503084703335e-06, - "loss": 0.7003, - "step": 14185 - }, - { - "epoch": 0.9101988454137268, - "grad_norm": 0.5989152273082363, - "learning_rate": 4.872922368149213e-06, - "loss": 0.6494, - "step": 14190 - }, - { - "epoch": 0.9105195638229634, - "grad_norm": 1.1947032610011639, - "learning_rate": 4.838460876398365e-06, - "loss": 0.712, - "step": 14195 - }, - { - "epoch": 0.9108402822322001, - "grad_norm": 0.8008113658697428, - "learning_rate": 4.804118652642164e-06, - "loss": 0.6607, - "step": 14200 - }, - { - "epoch": 0.9111610006414368, - "grad_norm": 0.9092451384048743, - "learning_rate": 4.769895739922403e-06, - "loss": 0.532, - "step": 14205 - }, - { - "epoch": 0.9114817190506735, - "grad_norm": 0.9642837868126427, - "learning_rate": 4.7357921811314374e-06, - "loss": 0.5875, - "step": 14210 - }, - { - "epoch": 0.9118024374599102, - "grad_norm": 1.2120942953279068, - "learning_rate": 4.701808019011966e-06, - "loss": 0.644, - "step": 14215 - }, - { - "epoch": 0.9121231558691469, - "grad_norm": 0.7731779356318255, - "learning_rate": 4.66794329615704e-06, - "loss": 0.7528, - "step": 14220 - }, - { - "epoch": 0.9124438742783836, - "grad_norm": 0.8452499221199778, - "learning_rate": 4.634198055010097e-06, - "loss": 0.7321, - "step": 14225 - }, - { - "epoch": 0.9127645926876202, - "grad_norm": 0.7660682093886364, - "learning_rate": 4.600572337864739e-06, - "loss": 0.58, - "step": 14230 - }, - { - "epoch": 0.913085311096857, - "grad_norm": 0.919577008788518, - "learning_rate": 4.567066186864799e-06, - "loss": 0.5792, - "step": 14235 - }, - { - "epoch": 0.9134060295060936, - "grad_norm": 0.7240560589023852, - "learning_rate": 4.53367964400423e-06, - "loss": 0.6382, - "step": 14240 - }, - { - "epoch": 0.9137267479153304, - "grad_norm": 0.9404018211860803, - "learning_rate": 4.500412751127148e-06, - "loss": 0.6983, - "step": 14245 - }, - { - "epoch": 0.914047466324567, - "grad_norm": 0.9226737613175637, - "learning_rate": 4.467265549927646e-06, - "loss": 0.7371, - "step": 14250 - }, - { - "epoch": 0.9143681847338038, - "grad_norm": 0.8674349211052579, - "learning_rate": 4.434238081949793e-06, - "loss": 0.715, - "step": 14255 - }, - { - "epoch": 0.9146889031430404, - "grad_norm": 1.0086095744064745, - "learning_rate": 4.401330388587655e-06, - "loss": 0.6359, - "step": 14260 - }, - { - "epoch": 0.915009621552277, - "grad_norm": 0.7399699212191572, - "learning_rate": 4.368542511085127e-06, - "loss": 0.6856, - "step": 14265 - }, - { - "epoch": 0.9153303399615138, - "grad_norm": 0.7837381511015072, - "learning_rate": 4.3358744905359845e-06, - "loss": 0.5355, - "step": 14270 - }, - { - "epoch": 0.9156510583707504, - "grad_norm": 0.7456554819958952, - "learning_rate": 4.303326367883742e-06, - "loss": 0.6506, - "step": 14275 - }, - { - "epoch": 0.9159717767799872, - "grad_norm": 0.7504015595604561, - "learning_rate": 4.2708981839216344e-06, - "loss": 0.7347, - "step": 14280 - }, - { - "epoch": 0.9162924951892238, - "grad_norm": 0.7872333950088334, - "learning_rate": 4.238589979292651e-06, - "loss": 0.7448, - "step": 14285 - }, - { - "epoch": 0.9166132135984606, - "grad_norm": 0.848658406503067, - "learning_rate": 4.206401794489301e-06, - "loss": 0.755, - "step": 14290 - }, - { - "epoch": 0.9169339320076972, - "grad_norm": 0.7157699993484576, - "learning_rate": 4.1743336698537805e-06, - "loss": 0.6877, - "step": 14295 - }, - { - "epoch": 0.9172546504169339, - "grad_norm": 0.920746793540226, - "learning_rate": 4.142385645577707e-06, - "loss": 0.6888, - "step": 14300 - }, - { - "epoch": 0.9175753688261706, - "grad_norm": 0.6845975702530432, - "learning_rate": 4.110557761702249e-06, - "loss": 0.754, - "step": 14305 - }, - { - "epoch": 0.9178960872354073, - "grad_norm": 1.1511196348448594, - "learning_rate": 4.078850058117978e-06, - "loss": 0.616, - "step": 14310 - }, - { - "epoch": 0.918216805644644, - "grad_norm": 0.6109287776036132, - "learning_rate": 4.0472625745648144e-06, - "loss": 0.5921, - "step": 14315 - }, - { - "epoch": 0.9185375240538807, - "grad_norm": 0.5799180489438701, - "learning_rate": 4.015795350632068e-06, - "loss": 0.6258, - "step": 14320 - }, - { - "epoch": 0.9188582424631174, - "grad_norm": 1.0588410053870487, - "learning_rate": 3.984448425758236e-06, - "loss": 0.6294, - "step": 14325 - }, - { - "epoch": 0.9191789608723541, - "grad_norm": 0.9656078510689677, - "learning_rate": 3.953221839231125e-06, - "loss": 0.7232, - "step": 14330 - }, - { - "epoch": 0.9194996792815907, - "grad_norm": 0.7627108781290338, - "learning_rate": 3.922115630187684e-06, - "loss": 0.7192, - "step": 14335 - }, - { - "epoch": 0.9198203976908275, - "grad_norm": 0.9118690797348065, - "learning_rate": 3.8911298376139604e-06, - "loss": 0.7131, - "step": 14340 - }, - { - "epoch": 0.9201411161000641, - "grad_norm": 0.6032629064325823, - "learning_rate": 3.860264500345145e-06, - "loss": 0.701, - "step": 14345 - }, - { - "epoch": 0.9204618345093009, - "grad_norm": 0.7887702725778526, - "learning_rate": 3.829519657065417e-06, - "loss": 0.4822, - "step": 14350 - }, - { - "epoch": 0.9207825529185375, - "grad_norm": 0.7138715411195988, - "learning_rate": 3.798895346307929e-06, - "loss": 0.6301, - "step": 14355 - }, - { - "epoch": 0.9211032713277743, - "grad_norm": 0.9024603895099268, - "learning_rate": 3.768391606454824e-06, - "loss": 0.7522, - "step": 14360 - }, - { - "epoch": 0.9214239897370109, - "grad_norm": 1.0280776294268867, - "learning_rate": 3.7380084757370427e-06, - "loss": 0.5146, - "step": 14365 - }, - { - "epoch": 0.9217447081462476, - "grad_norm": 1.2746584097883105, - "learning_rate": 3.707745992234446e-06, - "loss": 0.6437, - "step": 14370 - }, - { - "epoch": 0.9220654265554843, - "grad_norm": 0.7420480886663697, - "learning_rate": 3.677604193875639e-06, - "loss": 0.7434, - "step": 14375 - }, - { - "epoch": 0.9223861449647209, - "grad_norm": 0.7760260552269074, - "learning_rate": 3.647583118438003e-06, - "loss": 0.7314, - "step": 14380 - }, - { - "epoch": 0.9227068633739577, - "grad_norm": 0.5526340026602907, - "learning_rate": 3.617682803547573e-06, - "loss": 0.6684, - "step": 14385 - }, - { - "epoch": 0.9230275817831943, - "grad_norm": 0.8601770168248275, - "learning_rate": 3.587903286679051e-06, - "loss": 0.7048, - "step": 14390 - }, - { - "epoch": 0.9233483001924311, - "grad_norm": 0.768831329847095, - "learning_rate": 3.5582446051557694e-06, - "loss": 0.7109, - "step": 14395 - }, - { - "epoch": 0.9236690186016677, - "grad_norm": 0.7061972963645736, - "learning_rate": 3.5287067961495613e-06, - "loss": 0.7226, - "step": 14400 - }, - { - "epoch": 0.9239897370109045, - "grad_norm": 0.9718492483949128, - "learning_rate": 3.4992898966808128e-06, - "loss": 0.6096, - "step": 14405 - }, - { - "epoch": 0.9243104554201411, - "grad_norm": 0.6613307717148478, - "learning_rate": 3.4699939436183548e-06, - "loss": 0.6359, - "step": 14410 - }, - { - "epoch": 0.9246311738293778, - "grad_norm": 0.48853477777273874, - "learning_rate": 3.440818973679416e-06, - "loss": 0.5916, - "step": 14415 - }, - { - "epoch": 0.9249518922386145, - "grad_norm": 2.8872548788201846, - "learning_rate": 3.411765023429625e-06, - "loss": 0.6681, - "step": 14420 - }, - { - "epoch": 0.9252726106478512, - "grad_norm": 0.8605678505533776, - "learning_rate": 3.382832129282909e-06, - "loss": 0.7061, - "step": 14425 - }, - { - "epoch": 0.9255933290570879, - "grad_norm": 0.8152777611420922, - "learning_rate": 3.354020327501506e-06, - "loss": 0.7016, - "step": 14430 - }, - { - "epoch": 0.9259140474663246, - "grad_norm": 0.5720911855352934, - "learning_rate": 3.32532965419583e-06, - "loss": 0.6065, - "step": 14435 - }, - { - "epoch": 0.9262347658755613, - "grad_norm": 0.5729769215244488, - "learning_rate": 3.29676014532454e-06, - "loss": 0.6385, - "step": 14440 - }, - { - "epoch": 0.926555484284798, - "grad_norm": 0.7971168307254297, - "learning_rate": 3.2683118366944153e-06, - "loss": 0.7482, - "step": 14445 - }, - { - "epoch": 0.9268762026940346, - "grad_norm": 0.8082127626355636, - "learning_rate": 3.2399847639603132e-06, - "loss": 0.5749, - "step": 14450 - }, - { - "epoch": 0.9271969211032713, - "grad_norm": 0.986366425048449, - "learning_rate": 3.211778962625178e-06, - "loss": 0.814, - "step": 14455 - }, - { - "epoch": 0.927517639512508, - "grad_norm": 0.7974470102591675, - "learning_rate": 3.1836944680399215e-06, - "loss": 0.6845, - "step": 14460 - }, - { - "epoch": 0.9278383579217447, - "grad_norm": 0.9030012061093406, - "learning_rate": 3.155731315403465e-06, - "loss": 0.7462, - "step": 14465 - }, - { - "epoch": 0.9281590763309814, - "grad_norm": 0.8114451125831404, - "learning_rate": 3.1278895397626295e-06, - "loss": 0.7289, - "step": 14470 - }, - { - "epoch": 0.9284797947402181, - "grad_norm": 0.7580184369514217, - "learning_rate": 3.10016917601208e-06, - "loss": 0.8204, - "step": 14475 - }, - { - "epoch": 0.9288005131494548, - "grad_norm": 0.9028047332034969, - "learning_rate": 3.0725702588943693e-06, - "loss": 0.6502, - "step": 14480 - }, - { - "epoch": 0.9291212315586914, - "grad_norm": 0.5328705285389578, - "learning_rate": 3.0450928229997956e-06, - "loss": 0.6282, - "step": 14485 - }, - { - "epoch": 0.9294419499679282, - "grad_norm": 0.676301284723922, - "learning_rate": 3.0177369027664324e-06, - "loss": 0.6152, - "step": 14490 - }, - { - "epoch": 0.9297626683771648, - "grad_norm": 0.6911219963447808, - "learning_rate": 2.990502532480033e-06, - "loss": 0.7075, - "step": 14495 - }, - { - "epoch": 0.9300833867864016, - "grad_norm": 0.8158597361321028, - "learning_rate": 2.9633897462740035e-06, - "loss": 0.5278, - "step": 14500 - }, - { - "epoch": 0.9304041051956382, - "grad_norm": 0.8885816510360459, - "learning_rate": 2.936398578129407e-06, - "loss": 0.7842, - "step": 14505 - }, - { - "epoch": 0.930724823604875, - "grad_norm": 0.9090481734964072, - "learning_rate": 2.909529061874816e-06, - "loss": 0.6346, - "step": 14510 - }, - { - "epoch": 0.9310455420141116, - "grad_norm": 0.6271937382541385, - "learning_rate": 2.8827812311864044e-06, - "loss": 0.4965, - "step": 14515 - }, - { - "epoch": 0.9313662604233482, - "grad_norm": 0.8626519977341744, - "learning_rate": 2.856155119587789e-06, - "loss": 0.6916, - "step": 14520 - }, - { - "epoch": 0.931686978832585, - "grad_norm": 1.2378284751762905, - "learning_rate": 2.829650760450031e-06, - "loss": 0.6573, - "step": 14525 - }, - { - "epoch": 0.9320076972418216, - "grad_norm": 1.2677367998396853, - "learning_rate": 2.8032681869916366e-06, - "loss": 0.5755, - "step": 14530 - }, - { - "epoch": 0.9323284156510584, - "grad_norm": 0.5109336107393835, - "learning_rate": 2.7770074322784334e-06, - "loss": 0.5688, - "step": 14535 - }, - { - "epoch": 0.932649134060295, - "grad_norm": 0.7042004857736548, - "learning_rate": 2.7508685292235937e-06, - "loss": 0.7213, - "step": 14540 - }, - { - "epoch": 0.9329698524695318, - "grad_norm": 0.7309101698002372, - "learning_rate": 2.7248515105875673e-06, - "loss": 0.6667, - "step": 14545 - }, - { - "epoch": 0.9332905708787684, - "grad_norm": 0.6908743464424493, - "learning_rate": 2.6989564089780263e-06, - "loss": 0.6156, - "step": 14550 - }, - { - "epoch": 0.9336112892880052, - "grad_norm": 0.9549405672325, - "learning_rate": 2.673183256849876e-06, - "loss": 0.5705, - "step": 14555 - }, - { - "epoch": 0.9339320076972418, - "grad_norm": 0.8108069141144446, - "learning_rate": 2.6475320865051444e-06, - "loss": 0.6301, - "step": 14560 - }, - { - "epoch": 0.9342527261064785, - "grad_norm": 0.7542934406058188, - "learning_rate": 2.6220029300930037e-06, - "loss": 0.6081, - "step": 14565 - }, - { - "epoch": 0.9345734445157152, - "grad_norm": 0.8121008842739622, - "learning_rate": 2.5965958196096706e-06, - "loss": 0.7333, - "step": 14570 - }, - { - "epoch": 0.9348941629249519, - "grad_norm": 0.7044098978011041, - "learning_rate": 2.571310786898451e-06, - "loss": 0.6786, - "step": 14575 - }, - { - "epoch": 0.9352148813341886, - "grad_norm": 0.669296953567193, - "learning_rate": 2.5461478636496062e-06, - "loss": 0.6451, - "step": 14580 - }, - { - "epoch": 0.9355355997434253, - "grad_norm": 1.0134964970782947, - "learning_rate": 2.5211070814003536e-06, - "loss": 0.7071, - "step": 14585 - }, - { - "epoch": 0.935856318152662, - "grad_norm": 0.8079966960225432, - "learning_rate": 2.496188471534866e-06, - "loss": 0.6494, - "step": 14590 - }, - { - "epoch": 0.9361770365618987, - "grad_norm": 0.7980284916096867, - "learning_rate": 2.4713920652841394e-06, - "loss": 0.6966, - "step": 14595 - }, - { - "epoch": 0.9364977549711353, - "grad_norm": 1.4182606806536633, - "learning_rate": 2.4467178937260692e-06, - "loss": 0.5106, - "step": 14600 - }, - { - "epoch": 0.936818473380372, - "grad_norm": 1.1450293247030983, - "learning_rate": 2.4221659877853074e-06, - "loss": 0.6734, - "step": 14605 - }, - { - "epoch": 0.9371391917896087, - "grad_norm": 0.7955638461295016, - "learning_rate": 2.397736378233284e-06, - "loss": 0.725, - "step": 14610 - }, - { - "epoch": 0.9374599101988454, - "grad_norm": 0.7397520509486079, - "learning_rate": 2.3734290956881734e-06, - "loss": 0.6244, - "step": 14615 - }, - { - "epoch": 0.9377806286080821, - "grad_norm": 0.9732579754101209, - "learning_rate": 2.349244170614773e-06, - "loss": 0.6057, - "step": 14620 - }, - { - "epoch": 0.9381013470173188, - "grad_norm": 1.406456086581141, - "learning_rate": 2.3251816333246025e-06, - "loss": 0.7182, - "step": 14625 - }, - { - "epoch": 0.9384220654265555, - "grad_norm": 0.8952424347381697, - "learning_rate": 2.301241513975749e-06, - "loss": 0.7598, - "step": 14630 - }, - { - "epoch": 0.9387427838357921, - "grad_norm": 1.0218439096331748, - "learning_rate": 2.2774238425728677e-06, - "loss": 0.7246, - "step": 14635 - }, - { - "epoch": 0.9390635022450289, - "grad_norm": 0.7685781373474748, - "learning_rate": 2.2537286489671573e-06, - "loss": 0.5579, - "step": 14640 - }, - { - "epoch": 0.9393842206542655, - "grad_norm": 0.7182539188714678, - "learning_rate": 2.2301559628563062e-06, - "loss": 0.4816, - "step": 14645 - }, - { - "epoch": 0.9397049390635023, - "grad_norm": 0.7271338524133633, - "learning_rate": 2.206705813784471e-06, - "loss": 0.7117, - "step": 14650 - }, - { - "epoch": 0.9400256574727389, - "grad_norm": 0.9142892488291297, - "learning_rate": 2.18337823114223e-06, - "loss": 0.5035, - "step": 14655 - }, - { - "epoch": 0.9403463758819757, - "grad_norm": 1.1230106908678623, - "learning_rate": 2.160173244166541e-06, - "loss": 0.5692, - "step": 14660 - }, - { - "epoch": 0.9406670942912123, - "grad_norm": 0.40796226780607736, - "learning_rate": 2.1370908819407174e-06, - "loss": 0.5771, - "step": 14665 - }, - { - "epoch": 0.940987812700449, - "grad_norm": 0.9481608724103522, - "learning_rate": 2.1141311733943626e-06, - "loss": 0.5029, - "step": 14670 - }, - { - "epoch": 0.9413085311096857, - "grad_norm": 1.0000026556770782, - "learning_rate": 2.09129414730338e-06, - "loss": 0.6156, - "step": 14675 - }, - { - "epoch": 0.9416292495189224, - "grad_norm": 0.521971426032197, - "learning_rate": 2.0685798322899073e-06, - "loss": 0.6233, - "step": 14680 - }, - { - "epoch": 0.9419499679281591, - "grad_norm": 0.555113548672577, - "learning_rate": 2.045988256822273e-06, - "loss": 0.6226, - "step": 14685 - }, - { - "epoch": 0.9422706863373957, - "grad_norm": 1.0940970203612415, - "learning_rate": 2.0235194492149832e-06, - "loss": 0.6603, - "step": 14690 - }, - { - "epoch": 0.9425914047466325, - "grad_norm": 1.0787803604629624, - "learning_rate": 2.0011734376286896e-06, - "loss": 0.6915, - "step": 14695 - }, - { - "epoch": 0.9429121231558691, - "grad_norm": 0.603441598329727, - "learning_rate": 1.978950250070111e-06, - "loss": 0.7826, - "step": 14700 - }, - { - "epoch": 0.9432328415651058, - "grad_norm": 1.1933790532010597, - "learning_rate": 1.9568499143920336e-06, - "loss": 0.6277, - "step": 14705 - }, - { - "epoch": 0.9435535599743425, - "grad_norm": 0.5764914897220961, - "learning_rate": 1.9348724582933133e-06, - "loss": 0.6875, - "step": 14710 - }, - { - "epoch": 0.9438742783835792, - "grad_norm": 0.9696889870454197, - "learning_rate": 1.9130179093187484e-06, - "loss": 0.8159, - "step": 14715 - }, - { - "epoch": 0.9441949967928159, - "grad_norm": 1.174884517440042, - "learning_rate": 1.891286294859107e-06, - "loss": 0.7811, - "step": 14720 - }, - { - "epoch": 0.9445157152020526, - "grad_norm": 0.7432254800663841, - "learning_rate": 1.869677642151102e-06, - "loss": 0.8169, - "step": 14725 - }, - { - "epoch": 0.9448364336112893, - "grad_norm": 1.3451481683596176, - "learning_rate": 1.8481919782773138e-06, - "loss": 0.6386, - "step": 14730 - }, - { - "epoch": 0.945157152020526, - "grad_norm": 0.8999549303642768, - "learning_rate": 1.82682933016618e-06, - "loss": 0.6578, - "step": 14735 - }, - { - "epoch": 0.9454778704297627, - "grad_norm": 0.7535938047620351, - "learning_rate": 1.8055897245919718e-06, - "loss": 0.6345, - "step": 14740 - }, - { - "epoch": 0.9457985888389994, - "grad_norm": 0.9031933438522918, - "learning_rate": 1.78447318817474e-06, - "loss": 0.6979, - "step": 14745 - }, - { - "epoch": 0.946119307248236, - "grad_norm": 0.5909234139284275, - "learning_rate": 1.7634797473802922e-06, - "loss": 0.5283, - "step": 14750 - }, - { - "epoch": 0.9464400256574728, - "grad_norm": 0.7478929356403822, - "learning_rate": 1.7426094285201478e-06, - "loss": 0.7548, - "step": 14755 - }, - { - "epoch": 0.9467607440667094, - "grad_norm": 0.7939890902510196, - "learning_rate": 1.7218622577515496e-06, - "loss": 0.7005, - "step": 14760 - }, - { - "epoch": 0.9470814624759462, - "grad_norm": 0.6058878555015041, - "learning_rate": 1.7012382610773315e-06, - "loss": 0.6766, - "step": 14765 - }, - { - "epoch": 0.9474021808851828, - "grad_norm": 0.848486027790844, - "learning_rate": 1.6807374643460272e-06, - "loss": 0.7677, - "step": 14770 - }, - { - "epoch": 0.9477228992944196, - "grad_norm": 0.7595303087988711, - "learning_rate": 1.6603598932517061e-06, - "loss": 0.7407, - "step": 14775 - }, - { - "epoch": 0.9480436177036562, - "grad_norm": 0.7579789167134414, - "learning_rate": 1.6401055733340164e-06, - "loss": 0.669, - "step": 14780 - }, - { - "epoch": 0.9483643361128928, - "grad_norm": 1.2648466067379471, - "learning_rate": 1.61997452997813e-06, - "loss": 0.6469, - "step": 14785 - }, - { - "epoch": 0.9486850545221296, - "grad_norm": 0.797026657881511, - "learning_rate": 1.5999667884147196e-06, - "loss": 0.588, - "step": 14790 - }, - { - "epoch": 0.9490057729313662, - "grad_norm": 0.915174796254417, - "learning_rate": 1.5800823737199156e-06, - "loss": 0.7036, - "step": 14795 - }, - { - "epoch": 0.949326491340603, - "grad_norm": 0.7014564001359544, - "learning_rate": 1.5603213108152715e-06, - "loss": 0.604, - "step": 14800 - }, - { - "epoch": 0.9496472097498396, - "grad_norm": 1.0673933698941918, - "learning_rate": 1.5406836244677646e-06, - "loss": 0.6767, - "step": 14805 - }, - { - "epoch": 0.9499679281590764, - "grad_norm": 0.5974581758846627, - "learning_rate": 1.5211693392897185e-06, - "loss": 0.6277, - "step": 14810 - }, - { - "epoch": 0.950288646568313, - "grad_norm": 0.76752354413579, - "learning_rate": 1.5017784797388024e-06, - "loss": 0.6575, - "step": 14815 - }, - { - "epoch": 0.9506093649775497, - "grad_norm": 0.6302709486833972, - "learning_rate": 1.482511070118009e-06, - "loss": 0.5797, - "step": 14820 - }, - { - "epoch": 0.9509300833867864, - "grad_norm": 0.6408626471147529, - "learning_rate": 1.4633671345755884e-06, - "loss": 0.6938, - "step": 14825 - }, - { - "epoch": 0.9512508017960231, - "grad_norm": 1.147885938640683, - "learning_rate": 1.4443466971050367e-06, - "loss": 0.6631, - "step": 14830 - }, - { - "epoch": 0.9515715202052598, - "grad_norm": 1.2090975514637632, - "learning_rate": 1.4254497815450852e-06, - "loss": 0.5987, - "step": 14835 - }, - { - "epoch": 0.9518922386144965, - "grad_norm": 1.4462854589201612, - "learning_rate": 1.4066764115796328e-06, - "loss": 0.5496, - "step": 14840 - }, - { - "epoch": 0.9522129570237332, - "grad_norm": 2.2267736323891603, - "learning_rate": 1.3880266107377581e-06, - "loss": 0.6236, - "step": 14845 - }, - { - "epoch": 0.9525336754329699, - "grad_norm": 0.9767897268690148, - "learning_rate": 1.369500402393653e-06, - "loss": 0.6737, - "step": 14850 - }, - { - "epoch": 0.9528543938422065, - "grad_norm": 0.6597022287518994, - "learning_rate": 1.3510978097665994e-06, - "loss": 0.6009, - "step": 14855 - }, - { - "epoch": 0.9531751122514432, - "grad_norm": 0.8352297747099178, - "learning_rate": 1.332818855920981e-06, - "loss": 0.6206, - "step": 14860 - }, - { - "epoch": 0.9534958306606799, - "grad_norm": 0.3398468741414835, - "learning_rate": 1.314663563766172e-06, - "loss": 0.745, - "step": 14865 - }, - { - "epoch": 0.9538165490699166, - "grad_norm": 0.6650997138673455, - "learning_rate": 1.2966319560566264e-06, - "loss": 0.5189, - "step": 14870 - }, - { - "epoch": 0.9541372674791533, - "grad_norm": 0.8495035997423334, - "learning_rate": 1.2787240553917223e-06, - "loss": 0.5352, - "step": 14875 - }, - { - "epoch": 0.95445798588839, - "grad_norm": 0.6804679950864659, - "learning_rate": 1.2609398842158171e-06, - "loss": 0.5298, - "step": 14880 - }, - { - "epoch": 0.9547787042976267, - "grad_norm": 0.9011394842975389, - "learning_rate": 1.2432794648181922e-06, - "loss": 0.6416, - "step": 14885 - }, - { - "epoch": 0.9550994227068633, - "grad_norm": 0.8017624405517991, - "learning_rate": 1.225742819333031e-06, - "loss": 0.7683, - "step": 14890 - }, - { - "epoch": 0.9554201411161001, - "grad_norm": 1.0189493989237226, - "learning_rate": 1.2083299697393968e-06, - "loss": 0.6712, - "step": 14895 - }, - { - "epoch": 0.9557408595253367, - "grad_norm": 0.8632861800860692, - "learning_rate": 1.1910409378611653e-06, - "loss": 0.6677, - "step": 14900 - }, - { - "epoch": 0.9560615779345735, - "grad_norm": 0.8271377018484679, - "learning_rate": 1.17387574536707e-06, - "loss": 0.8435, - "step": 14905 - }, - { - "epoch": 0.9563822963438101, - "grad_norm": 1.090763241775662, - "learning_rate": 1.1568344137706133e-06, - "loss": 0.751, - "step": 14910 - }, - { - "epoch": 0.9567030147530469, - "grad_norm": 0.8533558406500173, - "learning_rate": 1.1399169644300323e-06, - "loss": 0.7627, - "step": 14915 - }, - { - "epoch": 0.9570237331622835, - "grad_norm": 0.7969691903367916, - "learning_rate": 1.1231234185483663e-06, - "loss": 0.6599, - "step": 14920 - }, - { - "epoch": 0.9573444515715203, - "grad_norm": 0.6892919393359965, - "learning_rate": 1.1064537971733124e-06, - "loss": 0.6862, - "step": 14925 - }, - { - "epoch": 0.9576651699807569, - "grad_norm": 0.8464857234158932, - "learning_rate": 1.0899081211972584e-06, - "loss": 0.8058, - "step": 14930 - }, - { - "epoch": 0.9579858883899935, - "grad_norm": 0.5019234017303561, - "learning_rate": 1.0734864113572606e-06, - "loss": 0.684, - "step": 14935 - }, - { - "epoch": 0.9583066067992303, - "grad_norm": 0.7995354303661617, - "learning_rate": 1.057188688234989e-06, - "loss": 0.577, - "step": 14940 - }, - { - "epoch": 0.9586273252084669, - "grad_norm": 1.053084388323032, - "learning_rate": 1.0410149722567376e-06, - "loss": 0.6179, - "step": 14945 - }, - { - "epoch": 0.9589480436177037, - "grad_norm": 0.9473025528524849, - "learning_rate": 1.0249652836933688e-06, - "loss": 0.6448, - "step": 14950 - }, - { - "epoch": 0.9592687620269403, - "grad_norm": 0.8867828551638389, - "learning_rate": 1.0090396426603143e-06, - "loss": 0.7081, - "step": 14955 - }, - { - "epoch": 0.9595894804361771, - "grad_norm": 0.579392165704179, - "learning_rate": 9.93238069117508e-07, - "loss": 0.6266, - "step": 14960 - }, - { - "epoch": 0.9599101988454137, - "grad_norm": 1.3419589121931794, - "learning_rate": 9.775605828693969e-07, - "loss": 0.6619, - "step": 14965 - }, - { - "epoch": 0.9602309172546504, - "grad_norm": 0.9125359836127329, - "learning_rate": 9.620072035649075e-07, - "loss": 0.6073, - "step": 14970 - }, - { - "epoch": 0.9605516356638871, - "grad_norm": 1.0860000796878035, - "learning_rate": 9.465779506974359e-07, - "loss": 0.5401, - "step": 14975 - }, - { - "epoch": 0.9608723540731238, - "grad_norm": 1.171824681775004, - "learning_rate": 9.312728436047913e-07, - "loss": 0.5753, - "step": 14980 - }, - { - "epoch": 0.9611930724823605, - "grad_norm": 0.5643018528812354, - "learning_rate": 9.160919014691848e-07, - "loss": 0.5638, - "step": 14985 - }, - { - "epoch": 0.9615137908915972, - "grad_norm": 0.9034235555165777, - "learning_rate": 9.010351433172304e-07, - "loss": 0.6334, - "step": 14990 - }, - { - "epoch": 0.9618345093008339, - "grad_norm": 1.1839905897068703, - "learning_rate": 8.86102588019877e-07, - "loss": 0.7153, - "step": 14995 - }, - { - "epoch": 0.9621552277100706, - "grad_norm": 0.8180578726272846, - "learning_rate": 8.712942542923986e-07, - "loss": 0.5817, - "step": 15000 - }, - { - "epoch": 0.9624759461193072, - "grad_norm": 1.0696335688074747, - "learning_rate": 8.566101606944266e-07, - "loss": 0.6736, - "step": 15005 - }, - { - "epoch": 0.962796664528544, - "grad_norm": 0.7303824338994761, - "learning_rate": 8.420503256298396e-07, - "loss": 0.6429, - "step": 15010 - }, - { - "epoch": 0.9631173829377806, - "grad_norm": 1.0294755318998579, - "learning_rate": 8.276147673467849e-07, - "loss": 0.7188, - "step": 15015 - }, - { - "epoch": 0.9634381013470174, - "grad_norm": 0.9556262852737702, - "learning_rate": 8.133035039376679e-07, - "loss": 0.5951, - "step": 15020 - }, - { - "epoch": 0.963758819756254, - "grad_norm": 0.9324693251087647, - "learning_rate": 7.991165533390854e-07, - "loss": 0.7127, - "step": 15025 - }, - { - "epoch": 0.9640795381654907, - "grad_norm": 0.9591152159542692, - "learning_rate": 7.850539333318585e-07, - "loss": 0.6322, - "step": 15030 - }, - { - "epoch": 0.9644002565747274, - "grad_norm": 0.6946002197246557, - "learning_rate": 7.711156615409665e-07, - "loss": 0.5755, - "step": 15035 - }, - { - "epoch": 0.964720974983964, - "grad_norm": 1.3334758098994104, - "learning_rate": 7.573017554355355e-07, - "loss": 0.6318, - "step": 15040 - }, - { - "epoch": 0.9650416933932008, - "grad_norm": 0.8978971885207064, - "learning_rate": 7.436122323288497e-07, - "loss": 0.6035, - "step": 15045 - }, - { - "epoch": 0.9653624118024374, - "grad_norm": 0.8103686748723528, - "learning_rate": 7.300471093782624e-07, - "loss": 0.6194, - "step": 15050 - }, - { - "epoch": 0.9656831302116742, - "grad_norm": 0.753034703476334, - "learning_rate": 7.166064035852405e-07, - "loss": 0.6241, - "step": 15055 - }, - { - "epoch": 0.9660038486209108, - "grad_norm": 0.8194295630630289, - "learning_rate": 7.032901317953089e-07, - "loss": 0.804, - "step": 15060 - }, - { - "epoch": 0.9663245670301476, - "grad_norm": 0.6380479125093319, - "learning_rate": 6.900983106980396e-07, - "loss": 0.4591, - "step": 15065 - }, - { - "epoch": 0.9666452854393842, - "grad_norm": 0.6010950679928249, - "learning_rate": 6.770309568270183e-07, - "loss": 0.5964, - "step": 15070 - }, - { - "epoch": 0.9669660038486209, - "grad_norm": 0.6142851169104145, - "learning_rate": 6.640880865598331e-07, - "loss": 0.515, - "step": 15075 - }, - { - "epoch": 0.9672867222578576, - "grad_norm": 0.5969279751540932, - "learning_rate": 6.512697161180859e-07, - "loss": 0.5795, - "step": 15080 - }, - { - "epoch": 0.9676074406670943, - "grad_norm": 1.1554904145083251, - "learning_rate": 6.38575861567281e-07, - "loss": 0.7483, - "step": 15085 - }, - { - "epoch": 0.967928159076331, - "grad_norm": 0.7865746542213344, - "learning_rate": 6.260065388169256e-07, - "loss": 0.5557, - "step": 15090 - }, - { - "epoch": 0.9682488774855676, - "grad_norm": 1.1050848806521416, - "learning_rate": 6.135617636204072e-07, - "loss": 0.5939, - "step": 15095 - }, - { - "epoch": 0.9685695958948044, - "grad_norm": 0.7070536160439901, - "learning_rate": 6.01241551575027e-07, - "loss": 0.6985, - "step": 15100 - }, - { - "epoch": 0.968890314304041, - "grad_norm": 1.105194184766872, - "learning_rate": 5.890459181219776e-07, - "loss": 0.7083, - "step": 15105 - }, - { - "epoch": 0.9692110327132777, - "grad_norm": 1.2744464352233527, - "learning_rate": 5.769748785463103e-07, - "loss": 0.6397, - "step": 15110 - }, - { - "epoch": 0.9695317511225144, - "grad_norm": 0.9272062316818276, - "learning_rate": 5.650284479769008e-07, - "loss": 0.7676, - "step": 15115 - }, - { - "epoch": 0.9698524695317511, - "grad_norm": 0.7995773908927787, - "learning_rate": 5.532066413864834e-07, - "loss": 0.6971, - "step": 15120 - }, - { - "epoch": 0.9701731879409878, - "grad_norm": 0.38586358236871543, - "learning_rate": 5.415094735915838e-07, - "loss": 0.6707, - "step": 15125 - }, - { - "epoch": 0.9704939063502245, - "grad_norm": 0.9134739108193013, - "learning_rate": 5.299369592524972e-07, - "loss": 0.7099, - "step": 15130 - }, - { - "epoch": 0.9708146247594612, - "grad_norm": 1.1214413150852183, - "learning_rate": 5.184891128733216e-07, - "loss": 0.5773, - "step": 15135 - }, - { - "epoch": 0.9711353431686979, - "grad_norm": 0.9080341063196368, - "learning_rate": 5.071659488018688e-07, - "loss": 0.5541, - "step": 15140 - }, - { - "epoch": 0.9714560615779346, - "grad_norm": 0.6396326113379124, - "learning_rate": 4.959674812297089e-07, - "loss": 0.7547, - "step": 15145 - }, - { - "epoch": 0.9717767799871713, - "grad_norm": 0.6247330527268826, - "learning_rate": 4.848937241921369e-07, - "loss": 0.7347, - "step": 15150 - }, - { - "epoch": 0.9720974983964079, - "grad_norm": 0.7413180396760661, - "learning_rate": 4.7394469156810674e-07, - "loss": 0.6324, - "step": 15155 - }, - { - "epoch": 0.9724182168056447, - "grad_norm": 0.8191285127812412, - "learning_rate": 4.6312039708028553e-07, - "loss": 0.6501, - "step": 15160 - }, - { - "epoch": 0.9727389352148813, - "grad_norm": 1.5646180696875727, - "learning_rate": 4.5242085429499923e-07, - "loss": 0.7018, - "step": 15165 - }, - { - "epoch": 0.9730596536241181, - "grad_norm": 1.05700452006374, - "learning_rate": 4.4184607662220987e-07, - "loss": 0.702, - "step": 15170 - }, - { - "epoch": 0.9733803720333547, - "grad_norm": 0.6341783140741876, - "learning_rate": 4.313960773155046e-07, - "loss": 0.636, - "step": 15175 - }, - { - "epoch": 0.9737010904425915, - "grad_norm": 0.7888859139283535, - "learning_rate": 4.2107086947209553e-07, - "loss": 0.6313, - "step": 15180 - }, - { - "epoch": 0.9740218088518281, - "grad_norm": 0.9191085670941561, - "learning_rate": 4.1087046603279777e-07, - "loss": 0.6221, - "step": 15185 - }, - { - "epoch": 0.9743425272610647, - "grad_norm": 0.747755641512419, - "learning_rate": 4.007948797819738e-07, - "loss": 0.7214, - "step": 15190 - }, - { - "epoch": 0.9746632456703015, - "grad_norm": 0.977703835187041, - "learning_rate": 3.90844123347589e-07, - "loss": 0.6226, - "step": 15195 - }, - { - "epoch": 0.9749839640795381, - "grad_norm": 1.0760333069724886, - "learning_rate": 3.8101820920114494e-07, - "loss": 0.5479, - "step": 15200 - }, - { - "epoch": 0.9753046824887749, - "grad_norm": 0.6944511489853861, - "learning_rate": 3.713171496576573e-07, - "loss": 0.5499, - "step": 15205 - }, - { - "epoch": 0.9756254008980115, - "grad_norm": 0.8427188819091052, - "learning_rate": 3.617409568756669e-07, - "loss": 0.7567, - "step": 15210 - }, - { - "epoch": 0.9759461193072483, - "grad_norm": 0.8552901758457413, - "learning_rate": 3.5228964285722864e-07, - "loss": 0.5683, - "step": 15215 - }, - { - "epoch": 0.9762668377164849, - "grad_norm": 1.3132456382472737, - "learning_rate": 3.429632194478782e-07, - "loss": 0.6284, - "step": 15220 - }, - { - "epoch": 0.9765875561257216, - "grad_norm": 0.7318279273617357, - "learning_rate": 3.337616983366321e-07, - "loss": 0.5582, - "step": 15225 - }, - { - "epoch": 0.9769082745349583, - "grad_norm": 0.6573550653291185, - "learning_rate": 3.246850910559318e-07, - "loss": 0.5491, - "step": 15230 - }, - { - "epoch": 0.977228992944195, - "grad_norm": 0.8242113768294678, - "learning_rate": 3.157334089816888e-07, - "loss": 0.7255, - "step": 15235 - }, - { - "epoch": 0.9775497113534317, - "grad_norm": 0.9030228435778539, - "learning_rate": 3.0690666333325067e-07, - "loss": 0.5873, - "step": 15240 - }, - { - "epoch": 0.9778704297626684, - "grad_norm": 0.565513303166446, - "learning_rate": 2.9820486517335713e-07, - "loss": 0.598, - "step": 15245 - }, - { - "epoch": 0.9781911481719051, - "grad_norm": 0.6147817142778307, - "learning_rate": 2.896280254081618e-07, - "loss": 0.7145, - "step": 15250 - }, - { - "epoch": 0.9785118665811418, - "grad_norm": 0.8743323298527471, - "learning_rate": 2.811761547871994e-07, - "loss": 0.6756, - "step": 15255 - }, - { - "epoch": 0.9788325849903784, - "grad_norm": 1.1307500659483494, - "learning_rate": 2.728492639033742e-07, - "loss": 0.6188, - "step": 15260 - }, - { - "epoch": 0.9791533033996151, - "grad_norm": 0.7125463266714677, - "learning_rate": 2.6464736319297136e-07, - "loss": 0.6278, - "step": 15265 - }, - { - "epoch": 0.9794740218088518, - "grad_norm": 0.5910469031411075, - "learning_rate": 2.5657046293560137e-07, - "loss": 0.6905, - "step": 15270 - }, - { - "epoch": 0.9797947402180885, - "grad_norm": 0.7878661937473239, - "learning_rate": 2.4861857325421123e-07, - "loss": 0.7325, - "step": 15275 - }, - { - "epoch": 0.9801154586273252, - "grad_norm": 0.8286733473521487, - "learning_rate": 2.4079170411507315e-07, - "loss": 0.7773, - "step": 15280 - }, - { - "epoch": 0.9804361770365619, - "grad_norm": 0.9685767265903029, - "learning_rate": 2.3308986532778464e-07, - "loss": 0.646, - "step": 15285 - }, - { - "epoch": 0.9807568954457986, - "grad_norm": 0.9361901486165769, - "learning_rate": 2.255130665452243e-07, - "loss": 0.6598, - "step": 15290 - }, - { - "epoch": 0.9810776138550352, - "grad_norm": 0.9598712869867538, - "learning_rate": 2.180613172635404e-07, - "loss": 0.5625, - "step": 15295 - }, - { - "epoch": 0.981398332264272, - "grad_norm": 0.53570267588639, - "learning_rate": 2.1073462682217325e-07, - "loss": 0.5784, - "step": 15300 - }, - { - "epoch": 0.9817190506735086, - "grad_norm": 0.7566088957917948, - "learning_rate": 2.0353300440382194e-07, - "loss": 0.6119, - "step": 15305 - }, - { - "epoch": 0.9820397690827454, - "grad_norm": 1.146329754716512, - "learning_rate": 1.9645645903444422e-07, - "loss": 0.7188, - "step": 15310 - }, - { - "epoch": 0.982360487491982, - "grad_norm": 0.8370973588336825, - "learning_rate": 1.895049995832232e-07, - "loss": 0.7563, - "step": 15315 - }, - { - "epoch": 0.9826812059012188, - "grad_norm": 0.9434580889772379, - "learning_rate": 1.8267863476255643e-07, - "loss": 0.7839, - "step": 15320 - }, - { - "epoch": 0.9830019243104554, - "grad_norm": 0.8804750628505544, - "learning_rate": 1.7597737312810004e-07, - "loss": 0.4332, - "step": 15325 - }, - { - "epoch": 0.9833226427196922, - "grad_norm": 0.7320489722005881, - "learning_rate": 1.694012230786579e-07, - "loss": 0.7652, - "step": 15330 - }, - { - "epoch": 0.9836433611289288, - "grad_norm": 0.7366362970085942, - "learning_rate": 1.6295019285628154e-07, - "loss": 0.7341, - "step": 15335 - }, - { - "epoch": 0.9839640795381654, - "grad_norm": 1.0140709106862729, - "learning_rate": 1.5662429054618122e-07, - "loss": 0.4945, - "step": 15340 - }, - { - "epoch": 0.9842847979474022, - "grad_norm": 1.9484887809729772, - "learning_rate": 1.504235240767371e-07, - "loss": 0.6308, - "step": 15345 - }, - { - "epoch": 0.9846055163566388, - "grad_norm": 0.9619117197899885, - "learning_rate": 1.4434790121951036e-07, - "loss": 0.6099, - "step": 15350 - }, - { - "epoch": 0.9849262347658756, - "grad_norm": 0.9949706333975902, - "learning_rate": 1.3839742958920987e-07, - "loss": 0.5725, - "step": 15355 - }, - { - "epoch": 0.9852469531751122, - "grad_norm": 0.9242186083511401, - "learning_rate": 1.3257211664368106e-07, - "loss": 0.6308, - "step": 15360 - }, - { - "epoch": 0.985567671584349, - "grad_norm": 1.0782239190960032, - "learning_rate": 1.2687196968392822e-07, - "loss": 0.6935, - "step": 15365 - }, - { - "epoch": 0.9858883899935856, - "grad_norm": 0.8111644243864005, - "learning_rate": 1.2129699585404774e-07, - "loss": 0.7241, - "step": 15370 - }, - { - "epoch": 0.9862091084028223, - "grad_norm": 0.7276347564310323, - "learning_rate": 1.1584720214129485e-07, - "loss": 0.6842, - "step": 15375 - }, - { - "epoch": 0.986529826812059, - "grad_norm": 1.036558622735431, - "learning_rate": 1.1052259537599474e-07, - "loss": 0.7109, - "step": 15380 - }, - { - "epoch": 0.9868505452212957, - "grad_norm": 0.8442723448288622, - "learning_rate": 1.053231822315981e-07, - "loss": 0.5197, - "step": 15385 - }, - { - "epoch": 0.9871712636305324, - "grad_norm": 0.7755592771907561, - "learning_rate": 1.0024896922464777e-07, - "loss": 0.5958, - "step": 15390 - }, - { - "epoch": 0.9874919820397691, - "grad_norm": 1.0235862204819772, - "learning_rate": 9.529996271475661e-08, - "loss": 0.7323, - "step": 15395 - }, - { - "epoch": 0.9878127004490058, - "grad_norm": 0.6802556392432448, - "learning_rate": 9.047616890461852e-08, - "loss": 0.6661, - "step": 15400 - }, - { - "epoch": 0.9881334188582425, - "grad_norm": 0.7642842609623561, - "learning_rate": 8.57775938399974e-08, - "loss": 0.6418, - "step": 15405 - }, - { - "epoch": 0.9884541372674791, - "grad_norm": 0.7629833080692018, - "learning_rate": 8.1204243409716e-08, - "loss": 0.71, - "step": 15410 - }, - { - "epoch": 0.9887748556767159, - "grad_norm": 0.8028551912844719, - "learning_rate": 7.675612334566706e-08, - "loss": 0.6261, - "step": 15415 - }, - { - "epoch": 0.9890955740859525, - "grad_norm": 0.8568280018874693, - "learning_rate": 7.24332392227578e-08, - "loss": 0.7818, - "step": 15420 - }, - { - "epoch": 0.9894162924951893, - "grad_norm": 0.9435010043749265, - "learning_rate": 6.823559645896538e-08, - "loss": 0.7135, - "step": 15425 - }, - { - "epoch": 0.9897370109044259, - "grad_norm": 0.8536947904193946, - "learning_rate": 6.416320031527035e-08, - "loss": 0.6909, - "step": 15430 - }, - { - "epoch": 0.9900577293136626, - "grad_norm": 0.6375751055715156, - "learning_rate": 6.02160558957121e-08, - "loss": 0.7567, - "step": 15435 - }, - { - "epoch": 0.9903784477228993, - "grad_norm": 0.722851635446421, - "learning_rate": 5.639416814731124e-08, - "loss": 0.595, - "step": 15440 - }, - { - "epoch": 0.9906991661321359, - "grad_norm": 0.6530835942019998, - "learning_rate": 5.269754186013609e-08, - "loss": 0.6185, - "step": 15445 - }, - { - "epoch": 0.9910198845413727, - "grad_norm": 1.0508657841447764, - "learning_rate": 4.912618166723615e-08, - "loss": 0.5615, - "step": 15450 - }, - { - "epoch": 0.9913406029506093, - "grad_norm": 0.89657789016663, - "learning_rate": 4.5680092044686486e-08, - "loss": 0.686, - "step": 15455 - }, - { - "epoch": 0.9916613213598461, - "grad_norm": 1.0049970608249212, - "learning_rate": 4.235927731153222e-08, - "loss": 0.5976, - "step": 15460 - }, - { - "epoch": 0.9919820397690827, - "grad_norm": 0.5955235189985802, - "learning_rate": 3.916374162983294e-08, - "loss": 0.4921, - "step": 15465 - }, - { - "epoch": 0.9923027581783195, - "grad_norm": 1.0006472782878193, - "learning_rate": 3.6093489004618286e-08, - "loss": 0.6268, - "step": 15470 - }, - { - "epoch": 0.9926234765875561, - "grad_norm": 0.7931648933621266, - "learning_rate": 3.314852328389906e-08, - "loss": 0.6005, - "step": 15475 - }, - { - "epoch": 0.9929441949967928, - "grad_norm": 0.9041277771423232, - "learning_rate": 3.032884815866721e-08, - "loss": 0.5324, - "step": 15480 - }, - { - "epoch": 0.9932649134060295, - "grad_norm": 0.9494072939119311, - "learning_rate": 2.7634467162873657e-08, - "loss": 0.7065, - "step": 15485 - }, - { - "epoch": 0.9935856318152662, - "grad_norm": 0.6280167222373476, - "learning_rate": 2.506538367345046e-08, - "loss": 0.6061, - "step": 15490 - }, - { - "epoch": 0.9939063502245029, - "grad_norm": 0.9951079606352037, - "learning_rate": 2.2621600910288644e-08, - "loss": 0.6444, - "step": 15495 - }, - { - "epoch": 0.9942270686337396, - "grad_norm": 0.5695988637172767, - "learning_rate": 2.0303121936227077e-08, - "loss": 0.5318, - "step": 15500 - }, - { - "epoch": 0.9945477870429763, - "grad_norm": 0.7104107796380682, - "learning_rate": 1.8109949657074687e-08, - "loss": 0.584, - "step": 15505 - }, - { - "epoch": 0.994868505452213, - "grad_norm": 0.9519239668806431, - "learning_rate": 1.6042086821566048e-08, - "loss": 0.6069, - "step": 15510 - }, - { - "epoch": 0.9951892238614497, - "grad_norm": 1.0531482909821168, - "learning_rate": 1.409953602140579e-08, - "loss": 0.6419, - "step": 15515 - }, - { - "epoch": 0.9955099422706863, - "grad_norm": 0.8960669638227693, - "learning_rate": 1.2282299691235289e-08, - "loss": 0.6139, - "step": 15520 - }, - { - "epoch": 0.995830660679923, - "grad_norm": 1.4364607207448494, - "learning_rate": 1.059038010863267e-08, - "loss": 0.557, - "step": 15525 - }, - { - "epoch": 0.9961513790891597, - "grad_norm": 0.7870715712258225, - "learning_rate": 9.02377939412391e-09, - "loss": 0.6829, - "step": 15530 - }, - { - "epoch": 0.9964720974983964, - "grad_norm": 0.6681758560958523, - "learning_rate": 7.582499511160635e-09, - "loss": 0.6894, - "step": 15535 - }, - { - "epoch": 0.9967928159076331, - "grad_norm": 0.7692932903463889, - "learning_rate": 6.266542266120112e-09, - "loss": 0.6775, - "step": 15540 - }, - { - "epoch": 0.9971135343168698, - "grad_norm": 1.2971219190629335, - "learning_rate": 5.0759093083385665e-09, - "loss": 0.6272, - "step": 15545 - }, - { - "epoch": 0.9974342527261065, - "grad_norm": 0.6500496471556959, - "learning_rate": 4.010602130033458e-09, - "loss": 0.6068, - "step": 15550 - }, - { - "epoch": 0.9977549711353432, - "grad_norm": 0.686167526298323, - "learning_rate": 3.0706220664034057e-09, - "loss": 0.6119, - "step": 15555 - }, - { - "epoch": 0.9980756895445798, - "grad_norm": 0.7818918449822959, - "learning_rate": 2.255970295539367e-09, - "loss": 0.6275, - "step": 15560 - }, - { - "epoch": 0.9983964079538166, - "grad_norm": 0.7349503006612832, - "learning_rate": 1.5666478384579464e-09, - "loss": 0.7661, - "step": 15565 - }, - { - "epoch": 0.9987171263630532, - "grad_norm": 0.7447321528022689, - "learning_rate": 1.0026555591013952e-09, - "loss": 0.7204, - "step": 15570 - }, - { - "epoch": 0.99903784477229, - "grad_norm": 0.5813111087659052, - "learning_rate": 5.639941643376112e-10, - "loss": 0.6803, - "step": 15575 - }, - { - "epoch": 0.9993585631815266, - "grad_norm": 0.7896016109360797, - "learning_rate": 2.5066420393793365e-10, - "loss": 0.7841, - "step": 15580 - }, - { - "epoch": 0.9996792815907634, - "grad_norm": 0.8103436457382939, - "learning_rate": 6.266607062155316e-11, - "loss": 0.5852, - "step": 15585 - }, { "epoch": 1.0, - "grad_norm": 0.9796619082932287, - "learning_rate": 0.0, - "loss": 0.7238, - "step": 15590 + "eval_loss": NaN, + "eval_runtime": 946.4484, + "eval_samples_per_second": 1.22, + "eval_steps_per_second": 0.305, + "step": 406 }, { "epoch": 1.0, - "step": 15590, - "total_flos": 1.6764562374852608e+16, - "train_loss": 0.0, - "train_runtime": 0.0156, - "train_samples_per_second": 6662118.554, - "train_steps_per_second": 104099.609 + "step": 406, + "total_flos": 2.1031592360023163e+18, + "train_loss": 0.7368329773689138, + "train_runtime": 67624.003, + "train_samples_per_second": 1.537, + "train_steps_per_second": 0.006 } ], "logging_steps": 5, - "max_steps": 1624, + "max_steps": 406, "num_input_tokens_seen": 0, "num_train_epochs": 1, - "save_steps": 10, + "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { @@ -21863,13 +612,13 @@ "should_evaluate": false, "should_log": false, "should_save": true, - "should_training_stop": true + "should_training_stop": false }, "attributes": {} } }, - "total_flos": 1.6764562374852608e+16, - "train_batch_size": 2, + "total_flos": 2.1031592360023163e+18, + "train_batch_size": 8, "trial_name": null, "trial_params": null }