{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 7416, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 6.961409543494689, "learning_rate": 4.4843049327354265e-08, "loss": 0.8591, "step": 1 }, { "epoch": 0.0, "grad_norm": 7.068084644073217, "learning_rate": 8.968609865470853e-08, "loss": 1.099, "step": 2 }, { "epoch": 0.0, "grad_norm": 8.941786739776187, "learning_rate": 1.345291479820628e-07, "loss": 1.0761, "step": 3 }, { "epoch": 0.0, "grad_norm": 9.844383322574918, "learning_rate": 1.7937219730941706e-07, "loss": 1.1539, "step": 4 }, { "epoch": 0.0, "grad_norm": 6.09350044210741, "learning_rate": 2.242152466367713e-07, "loss": 0.9987, "step": 5 }, { "epoch": 0.0, "grad_norm": 6.592161425352053, "learning_rate": 2.690582959641256e-07, "loss": 1.039, "step": 6 }, { "epoch": 0.0, "grad_norm": 8.459160420196962, "learning_rate": 3.1390134529147985e-07, "loss": 1.2088, "step": 7 }, { "epoch": 0.0, "grad_norm": 7.468300267755148, "learning_rate": 3.587443946188341e-07, "loss": 0.9747, "step": 8 }, { "epoch": 0.0, "grad_norm": 10.725265287675171, "learning_rate": 4.0358744394618834e-07, "loss": 1.1631, "step": 9 }, { "epoch": 0.0, "grad_norm": 6.614116795270108, "learning_rate": 4.484304932735426e-07, "loss": 0.8901, "step": 10 }, { "epoch": 0.0, "grad_norm": 8.888840674966046, "learning_rate": 4.932735426008969e-07, "loss": 1.094, "step": 11 }, { "epoch": 0.0, "grad_norm": 7.206303377815786, "learning_rate": 5.381165919282512e-07, "loss": 1.0112, "step": 12 }, { "epoch": 0.0, "grad_norm": 4.792146338016799, "learning_rate": 5.829596412556054e-07, "loss": 0.7926, "step": 13 }, { "epoch": 0.0, "grad_norm": 8.8311357062959, "learning_rate": 6.278026905829597e-07, "loss": 0.965, "step": 14 }, { "epoch": 0.0, "grad_norm": 6.939883264359239, "learning_rate": 6.72645739910314e-07, "loss": 1.0251, "step": 15 }, { "epoch": 0.0, "grad_norm": 5.400161933235832, "learning_rate": 7.174887892376682e-07, "loss": 0.7672, "step": 16 }, { "epoch": 0.0, "grad_norm": 7.621402600324801, "learning_rate": 7.623318385650225e-07, "loss": 0.85, "step": 17 }, { "epoch": 0.0, "grad_norm": 5.136979902527208, "learning_rate": 8.071748878923767e-07, "loss": 0.7351, "step": 18 }, { "epoch": 0.0, "grad_norm": 6.527326714055755, "learning_rate": 8.520179372197309e-07, "loss": 0.9788, "step": 19 }, { "epoch": 0.0, "grad_norm": 4.905855395979164, "learning_rate": 8.968609865470852e-07, "loss": 0.7862, "step": 20 }, { "epoch": 0.0, "grad_norm": 5.7159391121966365, "learning_rate": 9.417040358744395e-07, "loss": 0.9564, "step": 21 }, { "epoch": 0.0, "grad_norm": 3.8938294796245176, "learning_rate": 9.865470852017938e-07, "loss": 0.7469, "step": 22 }, { "epoch": 0.0, "grad_norm": 6.158361979606944, "learning_rate": 1.031390134529148e-06, "loss": 0.8207, "step": 23 }, { "epoch": 0.0, "grad_norm": 5.687072887480626, "learning_rate": 1.0762331838565023e-06, "loss": 0.8532, "step": 24 }, { "epoch": 0.0, "grad_norm": 4.889524073390678, "learning_rate": 1.1210762331838566e-06, "loss": 0.8989, "step": 25 }, { "epoch": 0.0, "grad_norm": 4.768646204745344, "learning_rate": 1.1659192825112108e-06, "loss": 0.7812, "step": 26 }, { "epoch": 0.0, "grad_norm": 4.450274844398882, "learning_rate": 1.2107623318385651e-06, "loss": 0.7407, "step": 27 }, { "epoch": 0.0, "grad_norm": 5.35880205830437, "learning_rate": 1.2556053811659194e-06, "loss": 0.8789, "step": 28 }, { "epoch": 0.0, "grad_norm": 3.6434319724699327, "learning_rate": 1.3004484304932734e-06, "loss": 0.7134, "step": 29 }, { "epoch": 0.0, "grad_norm": 5.491302933344519, "learning_rate": 1.345291479820628e-06, "loss": 0.6748, "step": 30 }, { "epoch": 0.0, "grad_norm": 4.486955338343044, "learning_rate": 1.3901345291479822e-06, "loss": 0.6111, "step": 31 }, { "epoch": 0.0, "grad_norm": 3.1220225135847337, "learning_rate": 1.4349775784753365e-06, "loss": 0.57, "step": 32 }, { "epoch": 0.0, "grad_norm": 4.418863625050277, "learning_rate": 1.4798206278026907e-06, "loss": 0.6577, "step": 33 }, { "epoch": 0.0, "grad_norm": 4.254919857449252, "learning_rate": 1.524663677130045e-06, "loss": 0.5638, "step": 34 }, { "epoch": 0.0, "grad_norm": 3.1827366992385073, "learning_rate": 1.5695067264573993e-06, "loss": 0.5709, "step": 35 }, { "epoch": 0.0, "grad_norm": 3.6307313821819416, "learning_rate": 1.6143497757847533e-06, "loss": 0.492, "step": 36 }, { "epoch": 0.0, "grad_norm": 3.095405927759096, "learning_rate": 1.6591928251121078e-06, "loss": 0.591, "step": 37 }, { "epoch": 0.01, "grad_norm": 4.629493475882606, "learning_rate": 1.7040358744394619e-06, "loss": 0.5228, "step": 38 }, { "epoch": 0.01, "grad_norm": 4.739268094489607, "learning_rate": 1.7488789237668164e-06, "loss": 0.6156, "step": 39 }, { "epoch": 0.01, "grad_norm": 3.418246360799165, "learning_rate": 1.7937219730941704e-06, "loss": 0.5223, "step": 40 }, { "epoch": 0.01, "grad_norm": 4.188004255868745, "learning_rate": 1.838565022421525e-06, "loss": 0.4484, "step": 41 }, { "epoch": 0.01, "grad_norm": 4.605795229116246, "learning_rate": 1.883408071748879e-06, "loss": 0.4042, "step": 42 }, { "epoch": 0.01, "grad_norm": 2.7151507534770265, "learning_rate": 1.9282511210762332e-06, "loss": 0.3793, "step": 43 }, { "epoch": 0.01, "grad_norm": 2.746434734814048, "learning_rate": 1.9730941704035875e-06, "loss": 0.4389, "step": 44 }, { "epoch": 0.01, "grad_norm": 3.2295817103355273, "learning_rate": 2.0179372197309418e-06, "loss": 0.5164, "step": 45 }, { "epoch": 0.01, "grad_norm": 2.962318967375179, "learning_rate": 2.062780269058296e-06, "loss": 0.3965, "step": 46 }, { "epoch": 0.01, "grad_norm": 2.7677124619961733, "learning_rate": 2.1076233183856503e-06, "loss": 0.4133, "step": 47 }, { "epoch": 0.01, "grad_norm": 3.4849910302839833, "learning_rate": 2.1524663677130046e-06, "loss": 0.4494, "step": 48 }, { "epoch": 0.01, "grad_norm": 2.9730215264257955, "learning_rate": 2.197309417040359e-06, "loss": 0.4307, "step": 49 }, { "epoch": 0.01, "grad_norm": 2.6068354909105764, "learning_rate": 2.242152466367713e-06, "loss": 0.3631, "step": 50 }, { "epoch": 0.01, "grad_norm": 3.0922184334079565, "learning_rate": 2.2869955156950674e-06, "loss": 0.4451, "step": 51 }, { "epoch": 0.01, "grad_norm": 2.8591269434613555, "learning_rate": 2.3318385650224217e-06, "loss": 0.3658, "step": 52 }, { "epoch": 0.01, "grad_norm": 3.2797960321742563, "learning_rate": 2.376681614349776e-06, "loss": 0.3505, "step": 53 }, { "epoch": 0.01, "grad_norm": 3.596905622131219, "learning_rate": 2.4215246636771302e-06, "loss": 0.5093, "step": 54 }, { "epoch": 0.01, "grad_norm": 2.700171031134967, "learning_rate": 2.4663677130044845e-06, "loss": 0.2772, "step": 55 }, { "epoch": 0.01, "grad_norm": 2.1163525959543166, "learning_rate": 2.5112107623318388e-06, "loss": 0.2988, "step": 56 }, { "epoch": 0.01, "grad_norm": 3.496120876159893, "learning_rate": 2.556053811659193e-06, "loss": 0.3441, "step": 57 }, { "epoch": 0.01, "grad_norm": 2.546834612729583, "learning_rate": 2.600896860986547e-06, "loss": 0.3386, "step": 58 }, { "epoch": 0.01, "grad_norm": 2.2419601237122335, "learning_rate": 2.6457399103139016e-06, "loss": 0.2807, "step": 59 }, { "epoch": 0.01, "grad_norm": 2.3107924512252596, "learning_rate": 2.690582959641256e-06, "loss": 0.313, "step": 60 }, { "epoch": 0.01, "grad_norm": 2.496453228752521, "learning_rate": 2.73542600896861e-06, "loss": 0.3055, "step": 61 }, { "epoch": 0.01, "grad_norm": 3.0463382787433124, "learning_rate": 2.7802690582959644e-06, "loss": 0.3541, "step": 62 }, { "epoch": 0.01, "grad_norm": 2.690907094947088, "learning_rate": 2.8251121076233182e-06, "loss": 0.3353, "step": 63 }, { "epoch": 0.01, "grad_norm": 2.1264772682160493, "learning_rate": 2.869955156950673e-06, "loss": 0.2437, "step": 64 }, { "epoch": 0.01, "grad_norm": 2.8401565711421655, "learning_rate": 2.9147982062780272e-06, "loss": 0.2736, "step": 65 }, { "epoch": 0.01, "grad_norm": 2.221969204817513, "learning_rate": 2.9596412556053815e-06, "loss": 0.2217, "step": 66 }, { "epoch": 0.01, "grad_norm": 2.1095757292501194, "learning_rate": 3.0044843049327353e-06, "loss": 0.2711, "step": 67 }, { "epoch": 0.01, "grad_norm": 3.0277920994978915, "learning_rate": 3.04932735426009e-06, "loss": 0.3734, "step": 68 }, { "epoch": 0.01, "grad_norm": 2.8289773855576463, "learning_rate": 3.0941704035874443e-06, "loss": 0.3792, "step": 69 }, { "epoch": 0.01, "grad_norm": 2.5028507825166764, "learning_rate": 3.1390134529147986e-06, "loss": 0.2905, "step": 70 }, { "epoch": 0.01, "grad_norm": 2.897239618094786, "learning_rate": 3.1838565022421524e-06, "loss": 0.3039, "step": 71 }, { "epoch": 0.01, "grad_norm": 3.4808480664321655, "learning_rate": 3.2286995515695067e-06, "loss": 0.4212, "step": 72 }, { "epoch": 0.01, "grad_norm": 2.5149257788658907, "learning_rate": 3.2735426008968614e-06, "loss": 0.2721, "step": 73 }, { "epoch": 0.01, "grad_norm": 2.4600353525625276, "learning_rate": 3.3183856502242157e-06, "loss": 0.2782, "step": 74 }, { "epoch": 0.01, "grad_norm": 2.111174897963261, "learning_rate": 3.36322869955157e-06, "loss": 0.2229, "step": 75 }, { "epoch": 0.01, "grad_norm": 2.9992985602119315, "learning_rate": 3.4080717488789238e-06, "loss": 0.3687, "step": 76 }, { "epoch": 0.01, "grad_norm": 2.4686620697217605, "learning_rate": 3.4529147982062785e-06, "loss": 0.3111, "step": 77 }, { "epoch": 0.01, "grad_norm": 2.814584129116945, "learning_rate": 3.4977578475336327e-06, "loss": 0.3572, "step": 78 }, { "epoch": 0.01, "grad_norm": 2.7459466690987364, "learning_rate": 3.542600896860987e-06, "loss": 0.3403, "step": 79 }, { "epoch": 0.01, "grad_norm": 2.3057967553595926, "learning_rate": 3.587443946188341e-06, "loss": 0.2565, "step": 80 }, { "epoch": 0.01, "grad_norm": 2.5626531722547643, "learning_rate": 3.632286995515695e-06, "loss": 0.2246, "step": 81 }, { "epoch": 0.01, "grad_norm": 2.360419973333545, "learning_rate": 3.67713004484305e-06, "loss": 0.2656, "step": 82 }, { "epoch": 0.01, "grad_norm": 2.453904543177983, "learning_rate": 3.721973094170404e-06, "loss": 0.2447, "step": 83 }, { "epoch": 0.01, "grad_norm": 1.8835718492060942, "learning_rate": 3.766816143497758e-06, "loss": 0.2396, "step": 84 }, { "epoch": 0.01, "grad_norm": 2.9444137072583336, "learning_rate": 3.8116591928251122e-06, "loss": 0.3215, "step": 85 }, { "epoch": 0.01, "grad_norm": 2.7751327018071743, "learning_rate": 3.8565022421524665e-06, "loss": 0.3939, "step": 86 }, { "epoch": 0.01, "grad_norm": 2.3860572916595557, "learning_rate": 3.901345291479821e-06, "loss": 0.2778, "step": 87 }, { "epoch": 0.01, "grad_norm": 1.6806292080717355, "learning_rate": 3.946188340807175e-06, "loss": 0.2364, "step": 88 }, { "epoch": 0.01, "grad_norm": 3.081092879300505, "learning_rate": 3.991031390134529e-06, "loss": 0.3548, "step": 89 }, { "epoch": 0.01, "grad_norm": 2.543568588808836, "learning_rate": 4.0358744394618836e-06, "loss": 0.2606, "step": 90 }, { "epoch": 0.01, "grad_norm": 2.2190104932864942, "learning_rate": 4.080717488789238e-06, "loss": 0.2614, "step": 91 }, { "epoch": 0.01, "grad_norm": 2.4159374908237456, "learning_rate": 4.125560538116592e-06, "loss": 0.2797, "step": 92 }, { "epoch": 0.01, "grad_norm": 1.6082523444748706, "learning_rate": 4.170403587443946e-06, "loss": 0.2433, "step": 93 }, { "epoch": 0.01, "grad_norm": 2.7818833291542697, "learning_rate": 4.215246636771301e-06, "loss": 0.3054, "step": 94 }, { "epoch": 0.01, "grad_norm": 1.9645064726475108, "learning_rate": 4.260089686098655e-06, "loss": 0.2821, "step": 95 }, { "epoch": 0.01, "grad_norm": 2.663596686478882, "learning_rate": 4.304932735426009e-06, "loss": 0.3153, "step": 96 }, { "epoch": 0.01, "grad_norm": 2.6104707094091344, "learning_rate": 4.3497757847533635e-06, "loss": 0.3359, "step": 97 }, { "epoch": 0.01, "grad_norm": 1.8603439030442848, "learning_rate": 4.394618834080718e-06, "loss": 0.2436, "step": 98 }, { "epoch": 0.01, "grad_norm": 2.6621420800562126, "learning_rate": 4.439461883408072e-06, "loss": 0.3327, "step": 99 }, { "epoch": 0.01, "grad_norm": 2.0925493123087064, "learning_rate": 4.484304932735426e-06, "loss": 0.2192, "step": 100 }, { "epoch": 0.01, "grad_norm": 2.8071009490243366, "learning_rate": 4.5291479820627806e-06, "loss": 0.3077, "step": 101 }, { "epoch": 0.01, "grad_norm": 2.0571185565408006, "learning_rate": 4.573991031390135e-06, "loss": 0.273, "step": 102 }, { "epoch": 0.01, "grad_norm": 1.6497470961537766, "learning_rate": 4.618834080717489e-06, "loss": 0.2297, "step": 103 }, { "epoch": 0.01, "grad_norm": 2.3540212264446443, "learning_rate": 4.663677130044843e-06, "loss": 0.3003, "step": 104 }, { "epoch": 0.01, "grad_norm": 2.1615276173877853, "learning_rate": 4.708520179372198e-06, "loss": 0.3016, "step": 105 }, { "epoch": 0.01, "grad_norm": 2.568472143549945, "learning_rate": 4.753363228699552e-06, "loss": 0.2282, "step": 106 }, { "epoch": 0.01, "grad_norm": 2.437880086425516, "learning_rate": 4.798206278026906e-06, "loss": 0.3083, "step": 107 }, { "epoch": 0.01, "grad_norm": 2.8185885523088223, "learning_rate": 4.8430493273542605e-06, "loss": 0.3101, "step": 108 }, { "epoch": 0.01, "grad_norm": 2.2116078289171206, "learning_rate": 4.887892376681615e-06, "loss": 0.3381, "step": 109 }, { "epoch": 0.01, "grad_norm": 2.953589416411613, "learning_rate": 4.932735426008969e-06, "loss": 0.335, "step": 110 }, { "epoch": 0.01, "grad_norm": 1.9084195525703147, "learning_rate": 4.977578475336323e-06, "loss": 0.2497, "step": 111 }, { "epoch": 0.02, "grad_norm": 3.2077006114218745, "learning_rate": 5.0224215246636775e-06, "loss": 0.3045, "step": 112 }, { "epoch": 0.02, "grad_norm": 2.7196869276496902, "learning_rate": 5.067264573991032e-06, "loss": 0.3098, "step": 113 }, { "epoch": 0.02, "grad_norm": 1.8003794492223448, "learning_rate": 5.112107623318386e-06, "loss": 0.2376, "step": 114 }, { "epoch": 0.02, "grad_norm": 2.541887240072164, "learning_rate": 5.156950672645741e-06, "loss": 0.2303, "step": 115 }, { "epoch": 0.02, "grad_norm": 2.775535699771745, "learning_rate": 5.201793721973094e-06, "loss": 0.2962, "step": 116 }, { "epoch": 0.02, "grad_norm": 2.2369783940136547, "learning_rate": 5.246636771300448e-06, "loss": 0.3025, "step": 117 }, { "epoch": 0.02, "grad_norm": 2.936039697479903, "learning_rate": 5.291479820627803e-06, "loss": 0.2821, "step": 118 }, { "epoch": 0.02, "grad_norm": 2.295683882205222, "learning_rate": 5.3363228699551574e-06, "loss": 0.2634, "step": 119 }, { "epoch": 0.02, "grad_norm": 2.463269896680059, "learning_rate": 5.381165919282512e-06, "loss": 0.2511, "step": 120 }, { "epoch": 0.02, "grad_norm": 1.9155294041839315, "learning_rate": 5.426008968609866e-06, "loss": 0.2486, "step": 121 }, { "epoch": 0.02, "grad_norm": 2.149292071012473, "learning_rate": 5.47085201793722e-06, "loss": 0.29, "step": 122 }, { "epoch": 0.02, "grad_norm": 1.8986799260999123, "learning_rate": 5.5156950672645745e-06, "loss": 0.303, "step": 123 }, { "epoch": 0.02, "grad_norm": 2.1657464085233005, "learning_rate": 5.560538116591929e-06, "loss": 0.1877, "step": 124 }, { "epoch": 0.02, "grad_norm": 2.7711689261146724, "learning_rate": 5.605381165919282e-06, "loss": 0.2675, "step": 125 }, { "epoch": 0.02, "grad_norm": 2.8914018818273926, "learning_rate": 5.6502242152466365e-06, "loss": 0.387, "step": 126 }, { "epoch": 0.02, "grad_norm": 2.481214637729105, "learning_rate": 5.695067264573992e-06, "loss": 0.269, "step": 127 }, { "epoch": 0.02, "grad_norm": 1.7842042412253984, "learning_rate": 5.739910313901346e-06, "loss": 0.2433, "step": 128 }, { "epoch": 0.02, "grad_norm": 2.3026826941412377, "learning_rate": 5.7847533632287e-06, "loss": 0.2672, "step": 129 }, { "epoch": 0.02, "grad_norm": 2.4022390833270566, "learning_rate": 5.8295964125560544e-06, "loss": 0.2869, "step": 130 }, { "epoch": 0.02, "grad_norm": 1.7904132628693517, "learning_rate": 5.874439461883409e-06, "loss": 0.259, "step": 131 }, { "epoch": 0.02, "grad_norm": 1.8044619424831707, "learning_rate": 5.919282511210763e-06, "loss": 0.25, "step": 132 }, { "epoch": 0.02, "grad_norm": 2.4255913568856267, "learning_rate": 5.964125560538116e-06, "loss": 0.265, "step": 133 }, { "epoch": 0.02, "grad_norm": 2.0291063511899736, "learning_rate": 6.008968609865471e-06, "loss": 0.3023, "step": 134 }, { "epoch": 0.02, "grad_norm": 2.3883695202049116, "learning_rate": 6.053811659192825e-06, "loss": 0.3329, "step": 135 }, { "epoch": 0.02, "grad_norm": 2.2425213260821186, "learning_rate": 6.09865470852018e-06, "loss": 0.3011, "step": 136 }, { "epoch": 0.02, "grad_norm": 2.577468197202622, "learning_rate": 6.143497757847534e-06, "loss": 0.2857, "step": 137 }, { "epoch": 0.02, "grad_norm": 1.9505023405746649, "learning_rate": 6.188340807174889e-06, "loss": 0.2512, "step": 138 }, { "epoch": 0.02, "grad_norm": 1.9174410504185457, "learning_rate": 6.233183856502243e-06, "loss": 0.3387, "step": 139 }, { "epoch": 0.02, "grad_norm": 2.659139544780493, "learning_rate": 6.278026905829597e-06, "loss": 0.2404, "step": 140 }, { "epoch": 0.02, "grad_norm": 2.4558934220466284, "learning_rate": 6.322869955156951e-06, "loss": 0.2488, "step": 141 }, { "epoch": 0.02, "grad_norm": 2.467739413080413, "learning_rate": 6.367713004484305e-06, "loss": 0.2952, "step": 142 }, { "epoch": 0.02, "grad_norm": 2.702802622313577, "learning_rate": 6.412556053811659e-06, "loss": 0.334, "step": 143 }, { "epoch": 0.02, "grad_norm": 3.2429223067638295, "learning_rate": 6.457399103139013e-06, "loss": 0.2968, "step": 144 }, { "epoch": 0.02, "grad_norm": 2.615223526042839, "learning_rate": 6.5022421524663685e-06, "loss": 0.3092, "step": 145 }, { "epoch": 0.02, "grad_norm": 2.5360609876376374, "learning_rate": 6.547085201793723e-06, "loss": 0.2025, "step": 146 }, { "epoch": 0.02, "grad_norm": 2.7282131885833096, "learning_rate": 6.591928251121077e-06, "loss": 0.2747, "step": 147 }, { "epoch": 0.02, "grad_norm": 1.6363072940619485, "learning_rate": 6.636771300448431e-06, "loss": 0.1785, "step": 148 }, { "epoch": 0.02, "grad_norm": 2.0409717817954833, "learning_rate": 6.681614349775786e-06, "loss": 0.2736, "step": 149 }, { "epoch": 0.02, "grad_norm": 2.6137213891497093, "learning_rate": 6.72645739910314e-06, "loss": 0.2592, "step": 150 }, { "epoch": 0.02, "grad_norm": 1.640043553290742, "learning_rate": 6.771300448430493e-06, "loss": 0.1957, "step": 151 }, { "epoch": 0.02, "grad_norm": 1.8345339737183508, "learning_rate": 6.8161434977578476e-06, "loss": 0.2164, "step": 152 }, { "epoch": 0.02, "grad_norm": 2.4326610388544014, "learning_rate": 6.860986547085202e-06, "loss": 0.1953, "step": 153 }, { "epoch": 0.02, "grad_norm": 2.4478610105960303, "learning_rate": 6.905829596412557e-06, "loss": 0.3009, "step": 154 }, { "epoch": 0.02, "grad_norm": 2.3105090364615077, "learning_rate": 6.950672645739911e-06, "loss": 0.2315, "step": 155 }, { "epoch": 0.02, "grad_norm": 2.1987814846843543, "learning_rate": 6.9955156950672655e-06, "loss": 0.2538, "step": 156 }, { "epoch": 0.02, "grad_norm": 2.1768223083464213, "learning_rate": 7.04035874439462e-06, "loss": 0.2331, "step": 157 }, { "epoch": 0.02, "grad_norm": 2.3479652613152466, "learning_rate": 7.085201793721974e-06, "loss": 0.2877, "step": 158 }, { "epoch": 0.02, "grad_norm": 1.8276359584731827, "learning_rate": 7.1300448430493275e-06, "loss": 0.1907, "step": 159 }, { "epoch": 0.02, "grad_norm": 2.1714420546352255, "learning_rate": 7.174887892376682e-06, "loss": 0.1761, "step": 160 }, { "epoch": 0.02, "grad_norm": 1.9223107789084368, "learning_rate": 7.219730941704036e-06, "loss": 0.2236, "step": 161 }, { "epoch": 0.02, "grad_norm": 2.520605201217695, "learning_rate": 7.26457399103139e-06, "loss": 0.3357, "step": 162 }, { "epoch": 0.02, "grad_norm": 2.9097719869420424, "learning_rate": 7.309417040358745e-06, "loss": 0.2593, "step": 163 }, { "epoch": 0.02, "grad_norm": 2.665940728919098, "learning_rate": 7.3542600896861e-06, "loss": 0.2504, "step": 164 }, { "epoch": 0.02, "grad_norm": 2.7444838082590635, "learning_rate": 7.399103139013454e-06, "loss": 0.3103, "step": 165 }, { "epoch": 0.02, "grad_norm": 1.8187393339692315, "learning_rate": 7.443946188340808e-06, "loss": 0.2677, "step": 166 }, { "epoch": 0.02, "grad_norm": 3.844815553592905, "learning_rate": 7.4887892376681625e-06, "loss": 0.3531, "step": 167 }, { "epoch": 0.02, "grad_norm": 2.7668500648529997, "learning_rate": 7.533632286995516e-06, "loss": 0.3038, "step": 168 }, { "epoch": 0.02, "grad_norm": 2.1052511655238337, "learning_rate": 7.57847533632287e-06, "loss": 0.224, "step": 169 }, { "epoch": 0.02, "grad_norm": 2.0415114310460765, "learning_rate": 7.6233183856502244e-06, "loss": 0.256, "step": 170 }, { "epoch": 0.02, "grad_norm": 1.191152471820726, "learning_rate": 7.668161434977579e-06, "loss": 0.2239, "step": 171 }, { "epoch": 0.02, "grad_norm": 2.2401681133610962, "learning_rate": 7.713004484304933e-06, "loss": 0.344, "step": 172 }, { "epoch": 0.02, "grad_norm": 2.050543909404076, "learning_rate": 7.757847533632287e-06, "loss": 0.2066, "step": 173 }, { "epoch": 0.02, "grad_norm": 2.5733128556297302, "learning_rate": 7.802690582959642e-06, "loss": 0.3081, "step": 174 }, { "epoch": 0.02, "grad_norm": 2.5121724141014234, "learning_rate": 7.847533632286996e-06, "loss": 0.2907, "step": 175 }, { "epoch": 0.02, "grad_norm": 1.6801017120514927, "learning_rate": 7.89237668161435e-06, "loss": 0.2174, "step": 176 }, { "epoch": 0.02, "grad_norm": 2.499366090232887, "learning_rate": 7.937219730941704e-06, "loss": 0.273, "step": 177 }, { "epoch": 0.02, "grad_norm": 1.9568557909043316, "learning_rate": 7.982062780269059e-06, "loss": 0.2662, "step": 178 }, { "epoch": 0.02, "grad_norm": 2.2088414107310737, "learning_rate": 8.026905829596413e-06, "loss": 0.2331, "step": 179 }, { "epoch": 0.02, "grad_norm": 1.6586286777710597, "learning_rate": 8.071748878923767e-06, "loss": 0.2327, "step": 180 }, { "epoch": 0.02, "grad_norm": 1.8215508582413624, "learning_rate": 8.116591928251121e-06, "loss": 0.2277, "step": 181 }, { "epoch": 0.02, "grad_norm": 2.654059183135072, "learning_rate": 8.161434977578476e-06, "loss": 0.2532, "step": 182 }, { "epoch": 0.02, "grad_norm": 2.289756691551343, "learning_rate": 8.20627802690583e-06, "loss": 0.2594, "step": 183 }, { "epoch": 0.02, "grad_norm": 2.4906872230177646, "learning_rate": 8.251121076233184e-06, "loss": 0.2947, "step": 184 }, { "epoch": 0.02, "grad_norm": 1.9832491736935196, "learning_rate": 8.295964125560539e-06, "loss": 0.2262, "step": 185 }, { "epoch": 0.03, "grad_norm": 2.430939187625257, "learning_rate": 8.340807174887893e-06, "loss": 0.3593, "step": 186 }, { "epoch": 0.03, "grad_norm": 1.5461961843246408, "learning_rate": 8.385650224215247e-06, "loss": 0.2304, "step": 187 }, { "epoch": 0.03, "grad_norm": 2.0420044991618953, "learning_rate": 8.430493273542601e-06, "loss": 0.2617, "step": 188 }, { "epoch": 0.03, "grad_norm": 1.8740054401534418, "learning_rate": 8.475336322869956e-06, "loss": 0.2424, "step": 189 }, { "epoch": 0.03, "grad_norm": 1.822324084291057, "learning_rate": 8.52017937219731e-06, "loss": 0.2629, "step": 190 }, { "epoch": 0.03, "grad_norm": 1.7853180985661834, "learning_rate": 8.565022421524664e-06, "loss": 0.2517, "step": 191 }, { "epoch": 0.03, "grad_norm": 2.3183986305253357, "learning_rate": 8.609865470852018e-06, "loss": 0.2764, "step": 192 }, { "epoch": 0.03, "grad_norm": 2.6789634213745477, "learning_rate": 8.654708520179373e-06, "loss": 0.3098, "step": 193 }, { "epoch": 0.03, "grad_norm": 2.4444426052756074, "learning_rate": 8.699551569506727e-06, "loss": 0.3149, "step": 194 }, { "epoch": 0.03, "grad_norm": 2.292089656469051, "learning_rate": 8.744394618834081e-06, "loss": 0.2926, "step": 195 }, { "epoch": 0.03, "grad_norm": 1.9203836657183857, "learning_rate": 8.789237668161435e-06, "loss": 0.2272, "step": 196 }, { "epoch": 0.03, "grad_norm": 1.8171435529592985, "learning_rate": 8.83408071748879e-06, "loss": 0.2755, "step": 197 }, { "epoch": 0.03, "grad_norm": 1.9031160499986202, "learning_rate": 8.878923766816144e-06, "loss": 0.2606, "step": 198 }, { "epoch": 0.03, "grad_norm": 1.9530622775833888, "learning_rate": 8.923766816143498e-06, "loss": 0.2449, "step": 199 }, { "epoch": 0.03, "grad_norm": 1.8573170046143632, "learning_rate": 8.968609865470853e-06, "loss": 0.1901, "step": 200 }, { "epoch": 0.03, "grad_norm": 2.6874023778401095, "learning_rate": 9.013452914798207e-06, "loss": 0.3546, "step": 201 }, { "epoch": 0.03, "grad_norm": 1.77592713845347, "learning_rate": 9.058295964125561e-06, "loss": 0.1726, "step": 202 }, { "epoch": 0.03, "grad_norm": 1.8105018174732679, "learning_rate": 9.103139013452915e-06, "loss": 0.2174, "step": 203 }, { "epoch": 0.03, "grad_norm": 2.794961898159075, "learning_rate": 9.14798206278027e-06, "loss": 0.2589, "step": 204 }, { "epoch": 0.03, "grad_norm": 1.4385366865865028, "learning_rate": 9.192825112107624e-06, "loss": 0.2057, "step": 205 }, { "epoch": 0.03, "grad_norm": 1.8309089344658787, "learning_rate": 9.237668161434978e-06, "loss": 0.2681, "step": 206 }, { "epoch": 0.03, "grad_norm": 1.6736911383932145, "learning_rate": 9.282511210762332e-06, "loss": 0.2702, "step": 207 }, { "epoch": 0.03, "grad_norm": 1.9314495912494731, "learning_rate": 9.327354260089687e-06, "loss": 0.2222, "step": 208 }, { "epoch": 0.03, "grad_norm": 1.5325444478730132, "learning_rate": 9.372197309417041e-06, "loss": 0.2779, "step": 209 }, { "epoch": 0.03, "grad_norm": 1.5443990700131134, "learning_rate": 9.417040358744395e-06, "loss": 0.2265, "step": 210 }, { "epoch": 0.03, "grad_norm": 1.888794336782419, "learning_rate": 9.46188340807175e-06, "loss": 0.3335, "step": 211 }, { "epoch": 0.03, "grad_norm": 1.954889568901051, "learning_rate": 9.506726457399104e-06, "loss": 0.257, "step": 212 }, { "epoch": 0.03, "grad_norm": 1.8308611161918678, "learning_rate": 9.551569506726458e-06, "loss": 0.2576, "step": 213 }, { "epoch": 0.03, "grad_norm": 2.0759907623622724, "learning_rate": 9.596412556053812e-06, "loss": 0.2163, "step": 214 }, { "epoch": 0.03, "grad_norm": 1.8176402875834878, "learning_rate": 9.641255605381167e-06, "loss": 0.229, "step": 215 }, { "epoch": 0.03, "grad_norm": 2.44157232659259, "learning_rate": 9.686098654708521e-06, "loss": 0.2632, "step": 216 }, { "epoch": 0.03, "grad_norm": 1.7942102162349531, "learning_rate": 9.730941704035875e-06, "loss": 0.2699, "step": 217 }, { "epoch": 0.03, "grad_norm": 2.231235980210026, "learning_rate": 9.77578475336323e-06, "loss": 0.2432, "step": 218 }, { "epoch": 0.03, "grad_norm": 1.6398446274201475, "learning_rate": 9.820627802690584e-06, "loss": 0.2061, "step": 219 }, { "epoch": 0.03, "grad_norm": 1.6349659132551297, "learning_rate": 9.865470852017938e-06, "loss": 0.2447, "step": 220 }, { "epoch": 0.03, "grad_norm": 1.5550928457536628, "learning_rate": 9.910313901345292e-06, "loss": 0.1976, "step": 221 }, { "epoch": 0.03, "grad_norm": 2.181677200264685, "learning_rate": 9.955156950672647e-06, "loss": 0.2927, "step": 222 }, { "epoch": 0.03, "grad_norm": 1.6120243112253345, "learning_rate": 1e-05, "loss": 0.1586, "step": 223 }, { "epoch": 0.03, "grad_norm": 1.9924114535776216, "learning_rate": 9.999999523108452e-06, "loss": 0.2887, "step": 224 }, { "epoch": 0.03, "grad_norm": 2.0894719193365305, "learning_rate": 9.999998092433895e-06, "loss": 0.2744, "step": 225 }, { "epoch": 0.03, "grad_norm": 1.5989556139791503, "learning_rate": 9.999995707976604e-06, "loss": 0.2127, "step": 226 }, { "epoch": 0.03, "grad_norm": 2.1262758013819947, "learning_rate": 9.999992369737033e-06, "loss": 0.2917, "step": 227 }, { "epoch": 0.03, "grad_norm": 1.5905678764346425, "learning_rate": 9.999988077715818e-06, "loss": 0.214, "step": 228 }, { "epoch": 0.03, "grad_norm": 2.15835165506946, "learning_rate": 9.999982831913782e-06, "loss": 0.2837, "step": 229 }, { "epoch": 0.03, "grad_norm": 1.7842417291879207, "learning_rate": 9.99997663233192e-06, "loss": 0.2883, "step": 230 }, { "epoch": 0.03, "grad_norm": 2.0653189680375243, "learning_rate": 9.999969478971417e-06, "loss": 0.2661, "step": 231 }, { "epoch": 0.03, "grad_norm": 1.8459535640565652, "learning_rate": 9.99996137183364e-06, "loss": 0.3145, "step": 232 }, { "epoch": 0.03, "grad_norm": 1.7749214911155626, "learning_rate": 9.999952310920131e-06, "loss": 0.2597, "step": 233 }, { "epoch": 0.03, "grad_norm": 1.5128693360122796, "learning_rate": 9.999942296232621e-06, "loss": 0.183, "step": 234 }, { "epoch": 0.03, "grad_norm": 1.7433859225248813, "learning_rate": 9.99993132777302e-06, "loss": 0.2389, "step": 235 }, { "epoch": 0.03, "grad_norm": 1.585073179242758, "learning_rate": 9.99991940554342e-06, "loss": 0.1926, "step": 236 }, { "epoch": 0.03, "grad_norm": 1.8802756284747646, "learning_rate": 9.999906529546095e-06, "loss": 0.2862, "step": 237 }, { "epoch": 0.03, "grad_norm": 1.5384526797782068, "learning_rate": 9.999892699783503e-06, "loss": 0.2781, "step": 238 }, { "epoch": 0.03, "grad_norm": 1.8642545331989269, "learning_rate": 9.999877916258281e-06, "loss": 0.3026, "step": 239 }, { "epoch": 0.03, "grad_norm": 2.04361780170625, "learning_rate": 9.999862178973247e-06, "loss": 0.3555, "step": 240 }, { "epoch": 0.03, "grad_norm": 1.9043164162627273, "learning_rate": 9.999845487931408e-06, "loss": 0.2813, "step": 241 }, { "epoch": 0.03, "grad_norm": 2.1751266435095817, "learning_rate": 9.999827843135943e-06, "loss": 0.3021, "step": 242 }, { "epoch": 0.03, "grad_norm": 1.3220494514857908, "learning_rate": 9.999809244590218e-06, "loss": 0.1975, "step": 243 }, { "epoch": 0.03, "grad_norm": 1.6315256961242937, "learning_rate": 9.999789692297784e-06, "loss": 0.2055, "step": 244 }, { "epoch": 0.03, "grad_norm": 1.8473766816186759, "learning_rate": 9.99976918626237e-06, "loss": 0.2116, "step": 245 }, { "epoch": 0.03, "grad_norm": 1.5653854329017143, "learning_rate": 9.999747726487886e-06, "loss": 0.2939, "step": 246 }, { "epoch": 0.03, "grad_norm": 2.0260234726995585, "learning_rate": 9.999725312978425e-06, "loss": 0.2578, "step": 247 }, { "epoch": 0.03, "grad_norm": 1.3419959701078072, "learning_rate": 9.999701945738265e-06, "loss": 0.1868, "step": 248 }, { "epoch": 0.03, "grad_norm": 1.4331765765822781, "learning_rate": 9.999677624771863e-06, "loss": 0.168, "step": 249 }, { "epoch": 0.03, "grad_norm": 1.5875474454318506, "learning_rate": 9.999652350083857e-06, "loss": 0.2768, "step": 250 }, { "epoch": 0.03, "grad_norm": 1.8908075270161602, "learning_rate": 9.99962612167907e-06, "loss": 0.3171, "step": 251 }, { "epoch": 0.03, "grad_norm": 2.0341180351908017, "learning_rate": 9.999598939562504e-06, "loss": 0.1806, "step": 252 }, { "epoch": 0.03, "grad_norm": 2.19610614525716, "learning_rate": 9.999570803739345e-06, "loss": 0.2998, "step": 253 }, { "epoch": 0.03, "grad_norm": 1.402794713998738, "learning_rate": 9.99954171421496e-06, "loss": 0.2432, "step": 254 }, { "epoch": 0.03, "grad_norm": 1.443677642576484, "learning_rate": 9.999511670994896e-06, "loss": 0.235, "step": 255 }, { "epoch": 0.03, "grad_norm": 1.7974153153026966, "learning_rate": 9.999480674084888e-06, "loss": 0.372, "step": 256 }, { "epoch": 0.03, "grad_norm": 2.2856386450969266, "learning_rate": 9.999448723490843e-06, "loss": 0.2879, "step": 257 }, { "epoch": 0.03, "grad_norm": 2.2183505841820113, "learning_rate": 9.999415819218861e-06, "loss": 0.2792, "step": 258 }, { "epoch": 0.03, "grad_norm": 1.433519945852409, "learning_rate": 9.999381961275216e-06, "loss": 0.2019, "step": 259 }, { "epoch": 0.04, "grad_norm": 1.57799090847337, "learning_rate": 9.99934714966637e-06, "loss": 0.2473, "step": 260 }, { "epoch": 0.04, "grad_norm": 1.496041380836511, "learning_rate": 9.99931138439896e-06, "loss": 0.2297, "step": 261 }, { "epoch": 0.04, "grad_norm": 0.9745405493926677, "learning_rate": 9.999274665479809e-06, "loss": 0.2005, "step": 262 }, { "epoch": 0.04, "grad_norm": 1.9655756245615328, "learning_rate": 9.999236992915922e-06, "loss": 0.3022, "step": 263 }, { "epoch": 0.04, "grad_norm": 1.9461083237394081, "learning_rate": 9.999198366714485e-06, "loss": 0.3281, "step": 264 }, { "epoch": 0.04, "grad_norm": 0.899600839348888, "learning_rate": 9.999158786882867e-06, "loss": 0.2318, "step": 265 }, { "epoch": 0.04, "grad_norm": 1.2288886890359425, "learning_rate": 9.999118253428617e-06, "loss": 0.1953, "step": 266 }, { "epoch": 0.04, "grad_norm": 1.6640935984124947, "learning_rate": 9.999076766359468e-06, "loss": 0.2347, "step": 267 }, { "epoch": 0.04, "grad_norm": 1.544789589801104, "learning_rate": 9.999034325683333e-06, "loss": 0.2402, "step": 268 }, { "epoch": 0.04, "grad_norm": 1.881110241137654, "learning_rate": 9.998990931408308e-06, "loss": 0.2095, "step": 269 }, { "epoch": 0.04, "grad_norm": 1.8184328507696852, "learning_rate": 9.998946583542672e-06, "loss": 0.313, "step": 270 }, { "epoch": 0.04, "grad_norm": 1.7721575965896967, "learning_rate": 9.998901282094883e-06, "loss": 0.3206, "step": 271 }, { "epoch": 0.04, "grad_norm": 2.139067096516454, "learning_rate": 9.998855027073586e-06, "loss": 0.2649, "step": 272 }, { "epoch": 0.04, "grad_norm": 1.4537166662870942, "learning_rate": 9.998807818487599e-06, "loss": 0.1638, "step": 273 }, { "epoch": 0.04, "grad_norm": 1.495237901826271, "learning_rate": 9.998759656345932e-06, "loss": 0.2586, "step": 274 }, { "epoch": 0.04, "grad_norm": 1.5806342598303946, "learning_rate": 9.998710540657768e-06, "loss": 0.1744, "step": 275 }, { "epoch": 0.04, "grad_norm": 1.4512517605741175, "learning_rate": 9.99866047143248e-06, "loss": 0.1907, "step": 276 }, { "epoch": 0.04, "grad_norm": 1.4472971853731498, "learning_rate": 9.998609448679616e-06, "loss": 0.2577, "step": 277 }, { "epoch": 0.04, "grad_norm": 1.6395978441901917, "learning_rate": 9.998557472408911e-06, "loss": 0.2502, "step": 278 }, { "epoch": 0.04, "grad_norm": 1.70537504309961, "learning_rate": 9.99850454263028e-06, "loss": 0.2755, "step": 279 }, { "epoch": 0.04, "grad_norm": 1.8898276285007134, "learning_rate": 9.99845065935382e-06, "loss": 0.2898, "step": 280 }, { "epoch": 0.04, "grad_norm": 1.9504813519265933, "learning_rate": 9.998395822589806e-06, "loss": 0.3641, "step": 281 }, { "epoch": 0.04, "grad_norm": 1.4579689203817288, "learning_rate": 9.998340032348703e-06, "loss": 0.2369, "step": 282 }, { "epoch": 0.04, "grad_norm": 2.0055649136156424, "learning_rate": 9.998283288641151e-06, "loss": 0.273, "step": 283 }, { "epoch": 0.04, "grad_norm": 1.8462271798284726, "learning_rate": 9.998225591477974e-06, "loss": 0.2876, "step": 284 }, { "epoch": 0.04, "grad_norm": 1.6589213308191293, "learning_rate": 9.99816694087018e-06, "loss": 0.2603, "step": 285 }, { "epoch": 0.04, "grad_norm": 1.5182852409983394, "learning_rate": 9.998107336828954e-06, "loss": 0.2427, "step": 286 }, { "epoch": 0.04, "grad_norm": 1.2635381391566969, "learning_rate": 9.99804677936567e-06, "loss": 0.2676, "step": 287 }, { "epoch": 0.04, "grad_norm": 1.7797379434569023, "learning_rate": 9.997985268491874e-06, "loss": 0.1931, "step": 288 }, { "epoch": 0.04, "grad_norm": 1.707134688319711, "learning_rate": 9.997922804219306e-06, "loss": 0.2405, "step": 289 }, { "epoch": 0.04, "grad_norm": 1.1521884398002293, "learning_rate": 9.997859386559876e-06, "loss": 0.1743, "step": 290 }, { "epoch": 0.04, "grad_norm": 1.3890897992669649, "learning_rate": 9.997795015525686e-06, "loss": 0.2339, "step": 291 }, { "epoch": 0.04, "grad_norm": 1.352852933551124, "learning_rate": 9.997729691129012e-06, "loss": 0.2378, "step": 292 }, { "epoch": 0.04, "grad_norm": 2.071963399059025, "learning_rate": 9.997663413382315e-06, "loss": 0.3095, "step": 293 }, { "epoch": 0.04, "grad_norm": 2.207985718454822, "learning_rate": 9.997596182298241e-06, "loss": 0.3492, "step": 294 }, { "epoch": 0.04, "grad_norm": 1.6634815658840405, "learning_rate": 9.997527997889611e-06, "loss": 0.2711, "step": 295 }, { "epoch": 0.04, "grad_norm": 1.6254177297715333, "learning_rate": 9.997458860169434e-06, "loss": 0.2297, "step": 296 }, { "epoch": 0.04, "grad_norm": 1.5309648035970025, "learning_rate": 9.9973887691509e-06, "loss": 0.2252, "step": 297 }, { "epoch": 0.04, "grad_norm": 1.3698161173226695, "learning_rate": 9.997317724847374e-06, "loss": 0.2925, "step": 298 }, { "epoch": 0.04, "grad_norm": 1.8259868019911907, "learning_rate": 9.997245727272412e-06, "loss": 0.3152, "step": 299 }, { "epoch": 0.04, "grad_norm": 1.9052739844820998, "learning_rate": 9.997172776439747e-06, "loss": 0.3229, "step": 300 }, { "epoch": 0.04, "grad_norm": 2.582574565974949, "learning_rate": 9.997098872363297e-06, "loss": 0.3965, "step": 301 }, { "epoch": 0.04, "grad_norm": 1.55261765202827, "learning_rate": 9.997024015057156e-06, "loss": 0.2669, "step": 302 }, { "epoch": 0.04, "grad_norm": 1.7879625771782097, "learning_rate": 9.996948204535606e-06, "loss": 0.2599, "step": 303 }, { "epoch": 0.04, "grad_norm": 1.7051280891993243, "learning_rate": 9.996871440813108e-06, "loss": 0.2746, "step": 304 }, { "epoch": 0.04, "grad_norm": 1.7556892722099415, "learning_rate": 9.996793723904306e-06, "loss": 0.2813, "step": 305 }, { "epoch": 0.04, "grad_norm": 1.7318041224805814, "learning_rate": 9.996715053824022e-06, "loss": 0.2794, "step": 306 }, { "epoch": 0.04, "grad_norm": 1.8637752792292228, "learning_rate": 9.996635430587267e-06, "loss": 0.2781, "step": 307 }, { "epoch": 0.04, "grad_norm": 1.9445128637961269, "learning_rate": 9.996554854209225e-06, "loss": 0.3044, "step": 308 }, { "epoch": 0.04, "grad_norm": 1.209350183363098, "learning_rate": 9.99647332470527e-06, "loss": 0.2262, "step": 309 }, { "epoch": 0.04, "grad_norm": 2.055116622389113, "learning_rate": 9.996390842090955e-06, "loss": 0.3078, "step": 310 }, { "epoch": 0.04, "grad_norm": 1.6803359154522632, "learning_rate": 9.996307406382009e-06, "loss": 0.2933, "step": 311 }, { "epoch": 0.04, "grad_norm": 1.700049683806981, "learning_rate": 9.996223017594354e-06, "loss": 0.242, "step": 312 }, { "epoch": 0.04, "grad_norm": 1.6184090046934805, "learning_rate": 9.996137675744083e-06, "loss": 0.2567, "step": 313 }, { "epoch": 0.04, "grad_norm": 1.768717035633087, "learning_rate": 9.996051380847479e-06, "loss": 0.2846, "step": 314 }, { "epoch": 0.04, "grad_norm": 1.506581835755855, "learning_rate": 9.995964132921001e-06, "loss": 0.2256, "step": 315 }, { "epoch": 0.04, "grad_norm": 1.5509361607647292, "learning_rate": 9.995875931981294e-06, "loss": 0.2504, "step": 316 }, { "epoch": 0.04, "grad_norm": 2.0046303664066105, "learning_rate": 9.99578677804518e-06, "loss": 0.2905, "step": 317 }, { "epoch": 0.04, "grad_norm": 1.2070283476168322, "learning_rate": 9.995696671129668e-06, "loss": 0.2727, "step": 318 }, { "epoch": 0.04, "grad_norm": 1.8153143604319486, "learning_rate": 9.995605611251948e-06, "loss": 0.3081, "step": 319 }, { "epoch": 0.04, "grad_norm": 1.6066158495210843, "learning_rate": 9.995513598429387e-06, "loss": 0.2139, "step": 320 }, { "epoch": 0.04, "grad_norm": 2.041787649402546, "learning_rate": 9.995420632679538e-06, "loss": 0.2993, "step": 321 }, { "epoch": 0.04, "grad_norm": 1.8437722164867407, "learning_rate": 9.995326714020136e-06, "loss": 0.2722, "step": 322 }, { "epoch": 0.04, "grad_norm": 1.1564868117872524, "learning_rate": 9.995231842469095e-06, "loss": 0.2624, "step": 323 }, { "epoch": 0.04, "grad_norm": 1.9723940992061613, "learning_rate": 9.995136018044513e-06, "loss": 0.271, "step": 324 }, { "epoch": 0.04, "grad_norm": 1.73038802804571, "learning_rate": 9.99503924076467e-06, "loss": 0.3289, "step": 325 }, { "epoch": 0.04, "grad_norm": 1.3328778703589965, "learning_rate": 9.994941510648026e-06, "loss": 0.1773, "step": 326 }, { "epoch": 0.04, "grad_norm": 1.6209515821269351, "learning_rate": 9.994842827713225e-06, "loss": 0.2061, "step": 327 }, { "epoch": 0.04, "grad_norm": 1.7506618273105239, "learning_rate": 9.99474319197909e-06, "loss": 0.2574, "step": 328 }, { "epoch": 0.04, "grad_norm": 1.4575830430626537, "learning_rate": 9.994642603464626e-06, "loss": 0.2454, "step": 329 }, { "epoch": 0.04, "grad_norm": 1.638166981644136, "learning_rate": 9.994541062189025e-06, "loss": 0.2373, "step": 330 }, { "epoch": 0.04, "grad_norm": 1.7840616443987694, "learning_rate": 9.994438568171652e-06, "loss": 0.2253, "step": 331 }, { "epoch": 0.04, "grad_norm": 1.7411236928775584, "learning_rate": 9.99433512143206e-06, "loss": 0.2354, "step": 332 }, { "epoch": 0.04, "grad_norm": 1.362306936140375, "learning_rate": 9.994230721989983e-06, "loss": 0.163, "step": 333 }, { "epoch": 0.05, "grad_norm": 1.1521866105067113, "learning_rate": 9.994125369865337e-06, "loss": 0.142, "step": 334 }, { "epoch": 0.05, "grad_norm": 1.8375526417948076, "learning_rate": 9.994019065078217e-06, "loss": 0.3063, "step": 335 }, { "epoch": 0.05, "grad_norm": 1.9764586607380783, "learning_rate": 9.993911807648901e-06, "loss": 0.3668, "step": 336 }, { "epoch": 0.05, "grad_norm": 1.75831008854968, "learning_rate": 9.993803597597849e-06, "loss": 0.3088, "step": 337 }, { "epoch": 0.05, "grad_norm": 1.639406224603388, "learning_rate": 9.993694434945705e-06, "loss": 0.3125, "step": 338 }, { "epoch": 0.05, "grad_norm": 1.8479663534416089, "learning_rate": 9.993584319713289e-06, "loss": 0.3031, "step": 339 }, { "epoch": 0.05, "grad_norm": 1.5617648713058614, "learning_rate": 9.993473251921608e-06, "loss": 0.2496, "step": 340 }, { "epoch": 0.05, "grad_norm": 1.996336714011638, "learning_rate": 9.99336123159185e-06, "loss": 0.3139, "step": 341 }, { "epoch": 0.05, "grad_norm": 1.6949488742587895, "learning_rate": 9.993248258745383e-06, "loss": 0.2414, "step": 342 }, { "epoch": 0.05, "grad_norm": 1.6476285878172074, "learning_rate": 9.993134333403756e-06, "loss": 0.3006, "step": 343 }, { "epoch": 0.05, "grad_norm": 1.30734707045587, "learning_rate": 9.993019455588701e-06, "loss": 0.2147, "step": 344 }, { "epoch": 0.05, "grad_norm": 1.6513939331967316, "learning_rate": 9.992903625322135e-06, "loss": 0.2724, "step": 345 }, { "epoch": 0.05, "grad_norm": 1.8524042976967978, "learning_rate": 9.99278684262615e-06, "loss": 0.3233, "step": 346 }, { "epoch": 0.05, "grad_norm": 1.7447000415836578, "learning_rate": 9.992669107523025e-06, "loss": 0.2581, "step": 347 }, { "epoch": 0.05, "grad_norm": 1.358548463706941, "learning_rate": 9.992550420035217e-06, "loss": 0.2084, "step": 348 }, { "epoch": 0.05, "grad_norm": 1.521740265393964, "learning_rate": 9.992430780185368e-06, "loss": 0.2611, "step": 349 }, { "epoch": 0.05, "grad_norm": 1.5537110131216487, "learning_rate": 9.992310187996298e-06, "loss": 0.2591, "step": 350 }, { "epoch": 0.05, "grad_norm": 1.7653938139011018, "learning_rate": 9.992188643491013e-06, "loss": 0.3445, "step": 351 }, { "epoch": 0.05, "grad_norm": 1.34380075127815, "learning_rate": 9.992066146692695e-06, "loss": 0.1779, "step": 352 }, { "epoch": 0.05, "grad_norm": 1.1494611403386057, "learning_rate": 9.991942697624717e-06, "loss": 0.1782, "step": 353 }, { "epoch": 0.05, "grad_norm": 1.252133650727571, "learning_rate": 9.991818296310623e-06, "loss": 0.1997, "step": 354 }, { "epoch": 0.05, "grad_norm": 0.8128143028682625, "learning_rate": 9.991692942774144e-06, "loss": 0.2481, "step": 355 }, { "epoch": 0.05, "grad_norm": 1.101730431468222, "learning_rate": 9.991566637039193e-06, "loss": 0.235, "step": 356 }, { "epoch": 0.05, "grad_norm": 1.3603617279791294, "learning_rate": 9.991439379129864e-06, "loss": 0.2196, "step": 357 }, { "epoch": 0.05, "grad_norm": 1.4624778934961298, "learning_rate": 9.991311169070433e-06, "loss": 0.168, "step": 358 }, { "epoch": 0.05, "grad_norm": 1.5326217606696262, "learning_rate": 9.991182006885354e-06, "loss": 0.2587, "step": 359 }, { "epoch": 0.05, "grad_norm": 1.4234728273118735, "learning_rate": 9.991051892599267e-06, "loss": 0.2502, "step": 360 }, { "epoch": 0.05, "grad_norm": 1.3182319467343533, "learning_rate": 9.990920826236993e-06, "loss": 0.2065, "step": 361 }, { "epoch": 0.05, "grad_norm": 1.1516566116161606, "learning_rate": 9.990788807823533e-06, "loss": 0.2377, "step": 362 }, { "epoch": 0.05, "grad_norm": 1.4170919394074935, "learning_rate": 9.99065583738407e-06, "loss": 0.1954, "step": 363 }, { "epoch": 0.05, "grad_norm": 2.035379683136883, "learning_rate": 9.990521914943969e-06, "loss": 0.3012, "step": 364 }, { "epoch": 0.05, "grad_norm": 1.474186274063641, "learning_rate": 9.990387040528778e-06, "loss": 0.2573, "step": 365 }, { "epoch": 0.05, "grad_norm": 1.6081366475768557, "learning_rate": 9.990251214164225e-06, "loss": 0.2499, "step": 366 }, { "epoch": 0.05, "grad_norm": 1.440626134556924, "learning_rate": 9.990114435876217e-06, "loss": 0.2328, "step": 367 }, { "epoch": 0.05, "grad_norm": 1.5317837617027603, "learning_rate": 9.98997670569085e-06, "loss": 0.274, "step": 368 }, { "epoch": 0.05, "grad_norm": 1.6328400047238645, "learning_rate": 9.989838023634392e-06, "loss": 0.2855, "step": 369 }, { "epoch": 0.05, "grad_norm": 1.407754480578645, "learning_rate": 9.9896983897333e-06, "loss": 0.2446, "step": 370 }, { "epoch": 0.05, "grad_norm": 1.469179966886001, "learning_rate": 9.989557804014211e-06, "loss": 0.22, "step": 371 }, { "epoch": 0.05, "grad_norm": 2.0233906923939395, "learning_rate": 9.989416266503941e-06, "loss": 0.3793, "step": 372 }, { "epoch": 0.05, "grad_norm": 1.0152417285517032, "learning_rate": 9.98927377722949e-06, "loss": 0.1754, "step": 373 }, { "epoch": 0.05, "grad_norm": 1.514607854209359, "learning_rate": 9.989130336218039e-06, "loss": 0.2553, "step": 374 }, { "epoch": 0.05, "grad_norm": 1.3766219248903173, "learning_rate": 9.98898594349695e-06, "loss": 0.1337, "step": 375 }, { "epoch": 0.05, "grad_norm": 1.7427332165408644, "learning_rate": 9.988840599093765e-06, "loss": 0.2234, "step": 376 }, { "epoch": 0.05, "grad_norm": 2.464126128979691, "learning_rate": 9.988694303036213e-06, "loss": 0.372, "step": 377 }, { "epoch": 0.05, "grad_norm": 1.8563217736273507, "learning_rate": 9.988547055352198e-06, "loss": 0.266, "step": 378 }, { "epoch": 0.05, "grad_norm": 1.6781914700162943, "learning_rate": 9.98839885606981e-06, "loss": 0.2673, "step": 379 }, { "epoch": 0.05, "grad_norm": 1.155751408018473, "learning_rate": 9.988249705217319e-06, "loss": 0.2218, "step": 380 }, { "epoch": 0.05, "grad_norm": 1.2773435423663768, "learning_rate": 9.988099602823176e-06, "loss": 0.1695, "step": 381 }, { "epoch": 0.05, "grad_norm": 1.9207287047121115, "learning_rate": 9.987948548916013e-06, "loss": 0.3124, "step": 382 }, { "epoch": 0.05, "grad_norm": 1.5929039925829125, "learning_rate": 9.987796543524646e-06, "loss": 0.3003, "step": 383 }, { "epoch": 0.05, "grad_norm": 1.9606189878064246, "learning_rate": 9.987643586678072e-06, "loss": 0.2778, "step": 384 }, { "epoch": 0.05, "grad_norm": 1.7029876998328461, "learning_rate": 9.987489678405466e-06, "loss": 0.2792, "step": 385 }, { "epoch": 0.05, "grad_norm": 1.4537220626857696, "learning_rate": 9.98733481873619e-06, "loss": 0.2473, "step": 386 }, { "epoch": 0.05, "grad_norm": 1.1914177287456573, "learning_rate": 9.987179007699781e-06, "loss": 0.1775, "step": 387 }, { "epoch": 0.05, "grad_norm": 1.7656774263089312, "learning_rate": 9.987022245325962e-06, "loss": 0.2705, "step": 388 }, { "epoch": 0.05, "grad_norm": 1.8449305478584697, "learning_rate": 9.986864531644638e-06, "loss": 0.3358, "step": 389 }, { "epoch": 0.05, "grad_norm": 1.2221327230096954, "learning_rate": 9.986705866685895e-06, "loss": 0.1285, "step": 390 }, { "epoch": 0.05, "grad_norm": 1.2390381429424178, "learning_rate": 9.986546250479996e-06, "loss": 0.2001, "step": 391 }, { "epoch": 0.05, "grad_norm": 1.1219064062050794, "learning_rate": 9.98638568305739e-06, "loss": 0.1694, "step": 392 }, { "epoch": 0.05, "grad_norm": 2.0434242859240044, "learning_rate": 9.986224164448706e-06, "loss": 0.2479, "step": 393 }, { "epoch": 0.05, "grad_norm": 1.4206821661978786, "learning_rate": 9.986061694684757e-06, "loss": 0.2526, "step": 394 }, { "epoch": 0.05, "grad_norm": 1.1448459715427264, "learning_rate": 9.985898273796532e-06, "loss": 0.1625, "step": 395 }, { "epoch": 0.05, "grad_norm": 1.2150347157734798, "learning_rate": 9.985733901815208e-06, "loss": 0.2268, "step": 396 }, { "epoch": 0.05, "grad_norm": 1.1167541044676215, "learning_rate": 9.985568578772138e-06, "loss": 0.1519, "step": 397 }, { "epoch": 0.05, "grad_norm": 1.7994698084375245, "learning_rate": 9.985402304698857e-06, "loss": 0.3021, "step": 398 }, { "epoch": 0.05, "grad_norm": 1.6883930387820392, "learning_rate": 9.985235079627087e-06, "loss": 0.31, "step": 399 }, { "epoch": 0.05, "grad_norm": 1.27019975776995, "learning_rate": 9.985066903588723e-06, "loss": 0.1557, "step": 400 }, { "epoch": 0.05, "grad_norm": 1.5216059402625692, "learning_rate": 9.98489777661585e-06, "loss": 0.2125, "step": 401 }, { "epoch": 0.05, "grad_norm": 1.7880525842769537, "learning_rate": 9.984727698740724e-06, "loss": 0.2476, "step": 402 }, { "epoch": 0.05, "grad_norm": 1.491605120773047, "learning_rate": 9.984556669995794e-06, "loss": 0.2284, "step": 403 }, { "epoch": 0.05, "grad_norm": 1.7641898404649257, "learning_rate": 9.984384690413684e-06, "loss": 0.2288, "step": 404 }, { "epoch": 0.05, "grad_norm": 1.5285823958284155, "learning_rate": 9.984211760027199e-06, "loss": 0.2295, "step": 405 }, { "epoch": 0.05, "grad_norm": 1.6109222577575069, "learning_rate": 9.984037878869325e-06, "loss": 0.219, "step": 406 }, { "epoch": 0.05, "grad_norm": 1.0085774867017783, "learning_rate": 9.983863046973235e-06, "loss": 0.1425, "step": 407 }, { "epoch": 0.06, "grad_norm": 1.554847726109499, "learning_rate": 9.983687264372275e-06, "loss": 0.1908, "step": 408 }, { "epoch": 0.06, "grad_norm": 1.152047171319707, "learning_rate": 9.98351053109998e-06, "loss": 0.2942, "step": 409 }, { "epoch": 0.06, "grad_norm": 1.066256378616708, "learning_rate": 9.98333284719006e-06, "loss": 0.2479, "step": 410 }, { "epoch": 0.06, "grad_norm": 1.022025242828952, "learning_rate": 9.983154212676416e-06, "loss": 0.2129, "step": 411 }, { "epoch": 0.06, "grad_norm": 1.3658526855021984, "learning_rate": 9.982974627593116e-06, "loss": 0.2098, "step": 412 }, { "epoch": 0.06, "grad_norm": 1.3928705764527871, "learning_rate": 9.982794091974421e-06, "loss": 0.2562, "step": 413 }, { "epoch": 0.06, "grad_norm": 1.6005524270682647, "learning_rate": 9.982612605854767e-06, "loss": 0.3131, "step": 414 }, { "epoch": 0.06, "grad_norm": 1.5340081343998748, "learning_rate": 9.982430169268775e-06, "loss": 0.2521, "step": 415 }, { "epoch": 0.06, "grad_norm": 1.331724170308008, "learning_rate": 9.982246782251247e-06, "loss": 0.2752, "step": 416 }, { "epoch": 0.06, "grad_norm": 1.2612078982758885, "learning_rate": 9.982062444837167e-06, "loss": 0.2485, "step": 417 }, { "epoch": 0.06, "grad_norm": 1.2405080280199146, "learning_rate": 9.981877157061693e-06, "loss": 0.1841, "step": 418 }, { "epoch": 0.06, "grad_norm": 1.2642511665521114, "learning_rate": 9.981690918960172e-06, "loss": 0.2496, "step": 419 }, { "epoch": 0.06, "grad_norm": 1.6540447782558483, "learning_rate": 9.981503730568134e-06, "loss": 0.2935, "step": 420 }, { "epoch": 0.06, "grad_norm": 1.6286383466255596, "learning_rate": 9.98131559192128e-06, "loss": 0.2903, "step": 421 }, { "epoch": 0.06, "grad_norm": 1.2922032449360228, "learning_rate": 9.981126503055505e-06, "loss": 0.2639, "step": 422 }, { "epoch": 0.06, "grad_norm": 1.5183591043592868, "learning_rate": 9.980936464006874e-06, "loss": 0.218, "step": 423 }, { "epoch": 0.06, "grad_norm": 1.1031400598277206, "learning_rate": 9.980745474811641e-06, "loss": 0.2002, "step": 424 }, { "epoch": 0.06, "grad_norm": 1.5059057939953393, "learning_rate": 9.980553535506239e-06, "loss": 0.2592, "step": 425 }, { "epoch": 0.06, "grad_norm": 1.177892190164666, "learning_rate": 9.980360646127279e-06, "loss": 0.2481, "step": 426 }, { "epoch": 0.06, "grad_norm": 1.641722270532745, "learning_rate": 9.980166806711558e-06, "loss": 0.2715, "step": 427 }, { "epoch": 0.06, "grad_norm": 1.5102139419222766, "learning_rate": 9.97997201729605e-06, "loss": 0.2832, "step": 428 }, { "epoch": 0.06, "grad_norm": 1.611984875148702, "learning_rate": 9.979776277917915e-06, "loss": 0.1804, "step": 429 }, { "epoch": 0.06, "grad_norm": 1.4047601988190053, "learning_rate": 9.97957958861449e-06, "loss": 0.2372, "step": 430 }, { "epoch": 0.06, "grad_norm": 1.1786204771970885, "learning_rate": 9.979381949423297e-06, "loss": 0.1579, "step": 431 }, { "epoch": 0.06, "grad_norm": 1.4740441251110188, "learning_rate": 9.979183360382034e-06, "loss": 0.2291, "step": 432 }, { "epoch": 0.06, "grad_norm": 1.116895865719068, "learning_rate": 9.978983821528583e-06, "loss": 0.207, "step": 433 }, { "epoch": 0.06, "grad_norm": 1.6510876547081634, "learning_rate": 9.97878333290101e-06, "loss": 0.2465, "step": 434 }, { "epoch": 0.06, "grad_norm": 1.276415310418171, "learning_rate": 9.978581894537558e-06, "loss": 0.2243, "step": 435 }, { "epoch": 0.06, "grad_norm": 1.5398265955213237, "learning_rate": 9.978379506476654e-06, "loss": 0.3386, "step": 436 }, { "epoch": 0.06, "grad_norm": 1.2817260223377647, "learning_rate": 9.978176168756903e-06, "loss": 0.2605, "step": 437 }, { "epoch": 0.06, "grad_norm": 1.7296186117811552, "learning_rate": 9.977971881417095e-06, "loss": 0.2382, "step": 438 }, { "epoch": 0.06, "grad_norm": 1.3699895998128955, "learning_rate": 9.977766644496195e-06, "loss": 0.2833, "step": 439 }, { "epoch": 0.06, "grad_norm": 1.4034811491452013, "learning_rate": 9.977560458033359e-06, "loss": 0.2597, "step": 440 }, { "epoch": 0.06, "grad_norm": 2.117434377455661, "learning_rate": 9.977353322067915e-06, "loss": 0.3616, "step": 441 }, { "epoch": 0.06, "grad_norm": 1.2819590256552735, "learning_rate": 9.977145236639377e-06, "loss": 0.2597, "step": 442 }, { "epoch": 0.06, "grad_norm": 1.4133385458608474, "learning_rate": 9.976936201787437e-06, "loss": 0.2563, "step": 443 }, { "epoch": 0.06, "grad_norm": 1.381463036135683, "learning_rate": 9.976726217551971e-06, "loss": 0.2519, "step": 444 }, { "epoch": 0.06, "grad_norm": 1.3151395500871566, "learning_rate": 9.976515283973035e-06, "loss": 0.2464, "step": 445 }, { "epoch": 0.06, "grad_norm": 1.4536996616551807, "learning_rate": 9.976303401090866e-06, "loss": 0.255, "step": 446 }, { "epoch": 0.06, "grad_norm": 1.388091261090952, "learning_rate": 9.97609056894588e-06, "loss": 0.2371, "step": 447 }, { "epoch": 0.06, "grad_norm": 1.645823837453223, "learning_rate": 9.97587678757868e-06, "loss": 0.214, "step": 448 }, { "epoch": 0.06, "grad_norm": 1.0984934033674487, "learning_rate": 9.975662057030042e-06, "loss": 0.2035, "step": 449 }, { "epoch": 0.06, "grad_norm": 1.193863086904732, "learning_rate": 9.97544637734093e-06, "loss": 0.2145, "step": 450 }, { "epoch": 0.06, "grad_norm": 1.588966043524015, "learning_rate": 9.975229748552485e-06, "loss": 0.3193, "step": 451 }, { "epoch": 0.06, "grad_norm": 1.0055603616412143, "learning_rate": 9.975012170706033e-06, "loss": 0.1494, "step": 452 }, { "epoch": 0.06, "grad_norm": 1.709236419005986, "learning_rate": 9.974793643843076e-06, "loss": 0.3301, "step": 453 }, { "epoch": 0.06, "grad_norm": 1.36226472460251, "learning_rate": 9.9745741680053e-06, "loss": 0.2932, "step": 454 }, { "epoch": 0.06, "grad_norm": 1.4454367350170994, "learning_rate": 9.97435374323457e-06, "loss": 0.2431, "step": 455 }, { "epoch": 0.06, "grad_norm": 1.4603359622000458, "learning_rate": 9.974132369572936e-06, "loss": 0.2377, "step": 456 }, { "epoch": 0.06, "grad_norm": 1.3531408852287579, "learning_rate": 9.973910047062625e-06, "loss": 0.2206, "step": 457 }, { "epoch": 0.06, "grad_norm": 1.3199208151859918, "learning_rate": 9.973686775746045e-06, "loss": 0.2347, "step": 458 }, { "epoch": 0.06, "grad_norm": 1.2521765787653636, "learning_rate": 9.97346255566579e-06, "loss": 0.2047, "step": 459 }, { "epoch": 0.06, "grad_norm": 0.9106461542220722, "learning_rate": 9.973237386864629e-06, "loss": 0.2291, "step": 460 }, { "epoch": 0.06, "grad_norm": 1.2809074564827772, "learning_rate": 9.973011269385518e-06, "loss": 0.2565, "step": 461 }, { "epoch": 0.06, "grad_norm": 1.3204804400413626, "learning_rate": 9.972784203271583e-06, "loss": 0.211, "step": 462 }, { "epoch": 0.06, "grad_norm": 1.0926110049507076, "learning_rate": 9.972556188566147e-06, "loss": 0.2181, "step": 463 }, { "epoch": 0.06, "grad_norm": 1.2669929613658588, "learning_rate": 9.972327225312698e-06, "loss": 0.2489, "step": 464 }, { "epoch": 0.06, "grad_norm": 0.8559552917620891, "learning_rate": 9.972097313554918e-06, "loss": 0.2104, "step": 465 }, { "epoch": 0.06, "grad_norm": 0.9329313515659348, "learning_rate": 9.971866453336662e-06, "loss": 0.1433, "step": 466 }, { "epoch": 0.06, "grad_norm": 1.5368873904697071, "learning_rate": 9.971634644701966e-06, "loss": 0.2737, "step": 467 }, { "epoch": 0.06, "grad_norm": 1.3699973020832892, "learning_rate": 9.971401887695052e-06, "loss": 0.2097, "step": 468 }, { "epoch": 0.06, "grad_norm": 1.7073666526604245, "learning_rate": 9.971168182360318e-06, "loss": 0.2758, "step": 469 }, { "epoch": 0.06, "grad_norm": 1.3860170386719612, "learning_rate": 9.970933528742347e-06, "loss": 0.2205, "step": 470 }, { "epoch": 0.06, "grad_norm": 1.573131827218781, "learning_rate": 9.970697926885899e-06, "loss": 0.2727, "step": 471 }, { "epoch": 0.06, "grad_norm": 1.5555894224059688, "learning_rate": 9.970461376835916e-06, "loss": 0.1817, "step": 472 }, { "epoch": 0.06, "grad_norm": 1.4680983537503904, "learning_rate": 9.970223878637523e-06, "loss": 0.2096, "step": 473 }, { "epoch": 0.06, "grad_norm": 1.507254938052605, "learning_rate": 9.969985432336023e-06, "loss": 0.229, "step": 474 }, { "epoch": 0.06, "grad_norm": 1.5866275403294732, "learning_rate": 9.969746037976905e-06, "loss": 0.2247, "step": 475 }, { "epoch": 0.06, "grad_norm": 1.4529661280095152, "learning_rate": 9.969505695605828e-06, "loss": 0.2137, "step": 476 }, { "epoch": 0.06, "grad_norm": 1.0636582165570403, "learning_rate": 9.969264405268646e-06, "loss": 0.2012, "step": 477 }, { "epoch": 0.06, "grad_norm": 1.3735163108264759, "learning_rate": 9.969022167011381e-06, "loss": 0.2305, "step": 478 }, { "epoch": 0.06, "grad_norm": 1.4661228700839977, "learning_rate": 9.968778980880247e-06, "loss": 0.2493, "step": 479 }, { "epoch": 0.06, "grad_norm": 1.7203417845597087, "learning_rate": 9.96853484692163e-06, "loss": 0.2644, "step": 480 }, { "epoch": 0.06, "grad_norm": 1.3676651456259614, "learning_rate": 9.9682897651821e-06, "loss": 0.2339, "step": 481 }, { "epoch": 0.06, "grad_norm": 1.6933553921730415, "learning_rate": 9.968043735708411e-06, "loss": 0.2501, "step": 482 }, { "epoch": 0.07, "grad_norm": 1.6970584606866173, "learning_rate": 9.96779675854749e-06, "loss": 0.2606, "step": 483 }, { "epoch": 0.07, "grad_norm": 1.1816048186471906, "learning_rate": 9.967548833746451e-06, "loss": 0.2167, "step": 484 }, { "epoch": 0.07, "grad_norm": 1.7136313108990782, "learning_rate": 9.967299961352592e-06, "loss": 0.2697, "step": 485 }, { "epoch": 0.07, "grad_norm": 1.6199191796460857, "learning_rate": 9.967050141413382e-06, "loss": 0.2772, "step": 486 }, { "epoch": 0.07, "grad_norm": 1.6219118871290195, "learning_rate": 9.966799373976478e-06, "loss": 0.3001, "step": 487 }, { "epoch": 0.07, "grad_norm": 1.1634006590478403, "learning_rate": 9.966547659089711e-06, "loss": 0.1875, "step": 488 }, { "epoch": 0.07, "grad_norm": 1.299979436068115, "learning_rate": 9.966294996801105e-06, "loss": 0.2022, "step": 489 }, { "epoch": 0.07, "grad_norm": 1.4213481875229708, "learning_rate": 9.966041387158851e-06, "loss": 0.3289, "step": 490 }, { "epoch": 0.07, "grad_norm": 1.0544732025970767, "learning_rate": 9.96578683021133e-06, "loss": 0.1975, "step": 491 }, { "epoch": 0.07, "grad_norm": 1.148722424342399, "learning_rate": 9.965531326007099e-06, "loss": 0.2003, "step": 492 }, { "epoch": 0.07, "grad_norm": 0.9773312609636016, "learning_rate": 9.965274874594896e-06, "loss": 0.2025, "step": 493 }, { "epoch": 0.07, "grad_norm": 1.028542094479082, "learning_rate": 9.965017476023645e-06, "loss": 0.2589, "step": 494 }, { "epoch": 0.07, "grad_norm": 1.8126563775567521, "learning_rate": 9.96475913034244e-06, "loss": 0.299, "step": 495 }, { "epoch": 0.07, "grad_norm": 1.7055442025194985, "learning_rate": 9.96449983760057e-06, "loss": 0.2542, "step": 496 }, { "epoch": 0.07, "grad_norm": 1.4314125162513158, "learning_rate": 9.96423959784749e-06, "loss": 0.2274, "step": 497 }, { "epoch": 0.07, "grad_norm": 1.7205350111831672, "learning_rate": 9.963978411132845e-06, "loss": 0.2848, "step": 498 }, { "epoch": 0.07, "grad_norm": 1.637242686472228, "learning_rate": 9.963716277506457e-06, "loss": 0.2671, "step": 499 }, { "epoch": 0.07, "grad_norm": 1.490526491486953, "learning_rate": 9.963453197018332e-06, "loss": 0.1934, "step": 500 }, { "epoch": 0.07, "grad_norm": 1.032335260866634, "learning_rate": 9.963189169718655e-06, "loss": 0.201, "step": 501 }, { "epoch": 0.07, "grad_norm": 1.8524871404809244, "learning_rate": 9.962924195657785e-06, "loss": 0.2873, "step": 502 }, { "epoch": 0.07, "grad_norm": 1.3167279083703323, "learning_rate": 9.962658274886275e-06, "loss": 0.1732, "step": 503 }, { "epoch": 0.07, "grad_norm": 1.4881878216880058, "learning_rate": 9.962391407454849e-06, "loss": 0.2577, "step": 504 }, { "epoch": 0.07, "grad_norm": 1.4956928345698226, "learning_rate": 9.962123593414409e-06, "loss": 0.2003, "step": 505 }, { "epoch": 0.07, "grad_norm": 1.4687985924520885, "learning_rate": 9.961854832816048e-06, "loss": 0.2482, "step": 506 }, { "epoch": 0.07, "grad_norm": 1.3652822416433232, "learning_rate": 9.961585125711031e-06, "loss": 0.2663, "step": 507 }, { "epoch": 0.07, "grad_norm": 1.2719514587663154, "learning_rate": 9.961314472150807e-06, "loss": 0.2284, "step": 508 }, { "epoch": 0.07, "grad_norm": 1.4808677787452937, "learning_rate": 9.961042872187005e-06, "loss": 0.2432, "step": 509 }, { "epoch": 0.07, "grad_norm": 1.398236444640137, "learning_rate": 9.960770325871435e-06, "loss": 0.2082, "step": 510 }, { "epoch": 0.07, "grad_norm": 1.1146556069681315, "learning_rate": 9.960496833256086e-06, "loss": 0.1852, "step": 511 }, { "epoch": 0.07, "grad_norm": 1.1135849827760493, "learning_rate": 9.960222394393131e-06, "loss": 0.2325, "step": 512 }, { "epoch": 0.07, "grad_norm": 1.085126437637428, "learning_rate": 9.959947009334918e-06, "loss": 0.2364, "step": 513 }, { "epoch": 0.07, "grad_norm": 1.7384445522749252, "learning_rate": 9.95967067813398e-06, "loss": 0.2873, "step": 514 }, { "epoch": 0.07, "grad_norm": 1.1498139816481436, "learning_rate": 9.959393400843027e-06, "loss": 0.2057, "step": 515 }, { "epoch": 0.07, "grad_norm": 1.2179986739689934, "learning_rate": 9.959115177514955e-06, "loss": 0.2135, "step": 516 }, { "epoch": 0.07, "grad_norm": 1.6523932505417487, "learning_rate": 9.958836008202835e-06, "loss": 0.2427, "step": 517 }, { "epoch": 0.07, "grad_norm": 1.2435937148022085, "learning_rate": 9.958555892959919e-06, "loss": 0.1392, "step": 518 }, { "epoch": 0.07, "grad_norm": 1.3894258046683345, "learning_rate": 9.958274831839642e-06, "loss": 0.262, "step": 519 }, { "epoch": 0.07, "grad_norm": 1.7427854051888891, "learning_rate": 9.957992824895621e-06, "loss": 0.2672, "step": 520 }, { "epoch": 0.07, "grad_norm": 1.37627548031114, "learning_rate": 9.957709872181648e-06, "loss": 0.2754, "step": 521 }, { "epoch": 0.07, "grad_norm": 1.9932317688280896, "learning_rate": 9.957425973751698e-06, "loss": 0.3028, "step": 522 }, { "epoch": 0.07, "grad_norm": 1.1840227778603012, "learning_rate": 9.957141129659926e-06, "loss": 0.2292, "step": 523 }, { "epoch": 0.07, "grad_norm": 1.0970031046528939, "learning_rate": 9.95685533996067e-06, "loss": 0.1987, "step": 524 }, { "epoch": 0.07, "grad_norm": 1.3240672988036166, "learning_rate": 9.956568604708444e-06, "loss": 0.234, "step": 525 }, { "epoch": 0.07, "grad_norm": 1.2284800942808367, "learning_rate": 9.956280923957945e-06, "loss": 0.171, "step": 526 }, { "epoch": 0.07, "grad_norm": 1.6356880551847488, "learning_rate": 9.955992297764054e-06, "loss": 0.2785, "step": 527 }, { "epoch": 0.07, "grad_norm": 1.3335816849469193, "learning_rate": 9.955702726181823e-06, "loss": 0.1748, "step": 528 }, { "epoch": 0.07, "grad_norm": 1.3896312850944919, "learning_rate": 9.955412209266492e-06, "loss": 0.3433, "step": 529 }, { "epoch": 0.07, "grad_norm": 1.6778711046511512, "learning_rate": 9.955120747073478e-06, "loss": 0.236, "step": 530 }, { "epoch": 0.07, "grad_norm": 1.47102785880093, "learning_rate": 9.954828339658382e-06, "loss": 0.257, "step": 531 }, { "epoch": 0.07, "grad_norm": 1.789727597617072, "learning_rate": 9.95453498707698e-06, "loss": 0.2661, "step": 532 }, { "epoch": 0.07, "grad_norm": 1.1662090199199446, "learning_rate": 9.95424068938523e-06, "loss": 0.2263, "step": 533 }, { "epoch": 0.07, "grad_norm": 1.7545615515629476, "learning_rate": 9.953945446639275e-06, "loss": 0.3044, "step": 534 }, { "epoch": 0.07, "grad_norm": 1.018484957496278, "learning_rate": 9.953649258895432e-06, "loss": 0.1651, "step": 535 }, { "epoch": 0.07, "grad_norm": 1.469276267757643, "learning_rate": 9.953352126210202e-06, "loss": 0.2948, "step": 536 }, { "epoch": 0.07, "grad_norm": 1.4309809376229912, "learning_rate": 9.953054048640263e-06, "loss": 0.313, "step": 537 }, { "epoch": 0.07, "grad_norm": 1.3512718411972158, "learning_rate": 9.952755026242478e-06, "loss": 0.2292, "step": 538 }, { "epoch": 0.07, "grad_norm": 1.3653234568109998, "learning_rate": 9.952455059073884e-06, "loss": 0.2299, "step": 539 }, { "epoch": 0.07, "grad_norm": 1.2966123135198702, "learning_rate": 9.952154147191706e-06, "loss": 0.2337, "step": 540 }, { "epoch": 0.07, "grad_norm": 1.6657235058104731, "learning_rate": 9.951852290653341e-06, "loss": 0.2517, "step": 541 }, { "epoch": 0.07, "grad_norm": 1.4625906095348107, "learning_rate": 9.951549489516373e-06, "loss": 0.22, "step": 542 }, { "epoch": 0.07, "grad_norm": 1.0920449971412483, "learning_rate": 9.951245743838563e-06, "loss": 0.1703, "step": 543 }, { "epoch": 0.07, "grad_norm": 1.2135801366959844, "learning_rate": 9.950941053677849e-06, "loss": 0.2434, "step": 544 }, { "epoch": 0.07, "grad_norm": 1.5356452664497233, "learning_rate": 9.950635419092358e-06, "loss": 0.2989, "step": 545 }, { "epoch": 0.07, "grad_norm": 1.123904002911888, "learning_rate": 9.950328840140388e-06, "loss": 0.1793, "step": 546 }, { "epoch": 0.07, "grad_norm": 1.2409260663613944, "learning_rate": 9.950021316880422e-06, "loss": 0.2321, "step": 547 }, { "epoch": 0.07, "grad_norm": 1.3440762418459185, "learning_rate": 9.949712849371123e-06, "loss": 0.2486, "step": 548 }, { "epoch": 0.07, "grad_norm": 1.1638955785293899, "learning_rate": 9.94940343767133e-06, "loss": 0.2338, "step": 549 }, { "epoch": 0.07, "grad_norm": 1.3768457213991032, "learning_rate": 9.949093081840071e-06, "loss": 0.2442, "step": 550 }, { "epoch": 0.07, "grad_norm": 1.3519649303852872, "learning_rate": 9.948781781936543e-06, "loss": 0.2096, "step": 551 }, { "epoch": 0.07, "grad_norm": 1.963785626582525, "learning_rate": 9.94846953802013e-06, "loss": 0.2677, "step": 552 }, { "epoch": 0.07, "grad_norm": 1.3412188590579937, "learning_rate": 9.948156350150398e-06, "loss": 0.2048, "step": 553 }, { "epoch": 0.07, "grad_norm": 1.2142208317758985, "learning_rate": 9.947842218387088e-06, "loss": 0.2215, "step": 554 }, { "epoch": 0.07, "grad_norm": 1.9163946585718983, "learning_rate": 9.947527142790119e-06, "loss": 0.3431, "step": 555 }, { "epoch": 0.07, "grad_norm": 1.2230991066168264, "learning_rate": 9.947211123419596e-06, "loss": 0.154, "step": 556 }, { "epoch": 0.08, "grad_norm": 1.3291539805102057, "learning_rate": 9.946894160335803e-06, "loss": 0.1959, "step": 557 }, { "epoch": 0.08, "grad_norm": 1.272482063481536, "learning_rate": 9.946576253599203e-06, "loss": 0.2036, "step": 558 }, { "epoch": 0.08, "grad_norm": 1.4387337915251277, "learning_rate": 9.946257403270437e-06, "loss": 0.2276, "step": 559 }, { "epoch": 0.08, "grad_norm": 1.0219178736890568, "learning_rate": 9.94593760941033e-06, "loss": 0.2273, "step": 560 }, { "epoch": 0.08, "grad_norm": 1.134101677144825, "learning_rate": 9.945616872079881e-06, "loss": 0.2114, "step": 561 }, { "epoch": 0.08, "grad_norm": 1.0686433405663007, "learning_rate": 9.945295191340276e-06, "loss": 0.1692, "step": 562 }, { "epoch": 0.08, "grad_norm": 1.8580189611736475, "learning_rate": 9.944972567252878e-06, "loss": 0.3008, "step": 563 }, { "epoch": 0.08, "grad_norm": 1.1982399843380407, "learning_rate": 9.944648999879227e-06, "loss": 0.2597, "step": 564 }, { "epoch": 0.08, "grad_norm": 1.2333019405511845, "learning_rate": 9.94432448928105e-06, "loss": 0.2528, "step": 565 }, { "epoch": 0.08, "grad_norm": 0.8045828951814397, "learning_rate": 9.943999035520244e-06, "loss": 0.1814, "step": 566 }, { "epoch": 0.08, "grad_norm": 1.4347831574074827, "learning_rate": 9.943672638658896e-06, "loss": 0.227, "step": 567 }, { "epoch": 0.08, "grad_norm": 1.4393444787471719, "learning_rate": 9.943345298759266e-06, "loss": 0.2224, "step": 568 }, { "epoch": 0.08, "grad_norm": 1.5172413594070782, "learning_rate": 9.943017015883797e-06, "loss": 0.2434, "step": 569 }, { "epoch": 0.08, "grad_norm": 1.0791953084669463, "learning_rate": 9.942687790095111e-06, "loss": 0.1857, "step": 570 }, { "epoch": 0.08, "grad_norm": 1.6134547124222949, "learning_rate": 9.94235762145601e-06, "loss": 0.2739, "step": 571 }, { "epoch": 0.08, "grad_norm": 1.5857039074817099, "learning_rate": 9.942026510029476e-06, "loss": 0.2553, "step": 572 }, { "epoch": 0.08, "grad_norm": 1.4053712953584911, "learning_rate": 9.94169445587867e-06, "loss": 0.2061, "step": 573 }, { "epoch": 0.08, "grad_norm": 1.2680271455162422, "learning_rate": 9.941361459066935e-06, "loss": 0.2002, "step": 574 }, { "epoch": 0.08, "grad_norm": 1.6640549762203467, "learning_rate": 9.94102751965779e-06, "loss": 0.2518, "step": 575 }, { "epoch": 0.08, "grad_norm": 1.8288877834505906, "learning_rate": 9.940692637714939e-06, "loss": 0.2779, "step": 576 }, { "epoch": 0.08, "grad_norm": 1.199146349292955, "learning_rate": 9.94035681330226e-06, "loss": 0.2326, "step": 577 }, { "epoch": 0.08, "grad_norm": 1.194685108787481, "learning_rate": 9.940020046483817e-06, "loss": 0.2248, "step": 578 }, { "epoch": 0.08, "grad_norm": 0.9392705998846206, "learning_rate": 9.939682337323847e-06, "loss": 0.1207, "step": 579 }, { "epoch": 0.08, "grad_norm": 1.7605655254820953, "learning_rate": 9.939343685886775e-06, "loss": 0.3155, "step": 580 }, { "epoch": 0.08, "grad_norm": 1.1717188672102792, "learning_rate": 9.939004092237197e-06, "loss": 0.2233, "step": 581 }, { "epoch": 0.08, "grad_norm": 1.4074979840465214, "learning_rate": 9.938663556439891e-06, "loss": 0.2521, "step": 582 }, { "epoch": 0.08, "grad_norm": 1.4613185352825728, "learning_rate": 9.938322078559822e-06, "loss": 0.2531, "step": 583 }, { "epoch": 0.08, "grad_norm": 1.2812936397922998, "learning_rate": 9.937979658662125e-06, "loss": 0.2641, "step": 584 }, { "epoch": 0.08, "grad_norm": 1.3314233533704027, "learning_rate": 9.937636296812122e-06, "loss": 0.2051, "step": 585 }, { "epoch": 0.08, "grad_norm": 1.035155842878196, "learning_rate": 9.937291993075308e-06, "loss": 0.1797, "step": 586 }, { "epoch": 0.08, "grad_norm": 1.5443165005378425, "learning_rate": 9.936946747517363e-06, "loss": 0.2216, "step": 587 }, { "epoch": 0.08, "grad_norm": 1.2093464749917844, "learning_rate": 9.936600560204146e-06, "loss": 0.2524, "step": 588 }, { "epoch": 0.08, "grad_norm": 1.4371020078945902, "learning_rate": 9.936253431201691e-06, "loss": 0.2556, "step": 589 }, { "epoch": 0.08, "grad_norm": 1.4247386404357398, "learning_rate": 9.935905360576221e-06, "loss": 0.2454, "step": 590 }, { "epoch": 0.08, "grad_norm": 1.7405100771624604, "learning_rate": 9.935556348394128e-06, "loss": 0.2496, "step": 591 }, { "epoch": 0.08, "grad_norm": 1.340030876508446, "learning_rate": 9.93520639472199e-06, "loss": 0.3087, "step": 592 }, { "epoch": 0.08, "grad_norm": 1.5768981352327542, "learning_rate": 9.93485549962656e-06, "loss": 0.2861, "step": 593 }, { "epoch": 0.08, "grad_norm": 1.48982081910507, "learning_rate": 9.934503663174779e-06, "loss": 0.2766, "step": 594 }, { "epoch": 0.08, "grad_norm": 1.2627258465017708, "learning_rate": 9.93415088543376e-06, "loss": 0.2366, "step": 595 }, { "epoch": 0.08, "grad_norm": 1.19478387460378, "learning_rate": 9.933797166470795e-06, "loss": 0.1812, "step": 596 }, { "epoch": 0.08, "grad_norm": 1.1436793171443547, "learning_rate": 9.933442506353362e-06, "loss": 0.2666, "step": 597 }, { "epoch": 0.08, "grad_norm": 1.285177779397736, "learning_rate": 9.933086905149113e-06, "loss": 0.1944, "step": 598 }, { "epoch": 0.08, "grad_norm": 1.2334164441743987, "learning_rate": 9.93273036292588e-06, "loss": 0.2499, "step": 599 }, { "epoch": 0.08, "grad_norm": 1.215831493762732, "learning_rate": 9.932372879751677e-06, "loss": 0.2008, "step": 600 }, { "epoch": 0.08, "grad_norm": 1.3597562975554311, "learning_rate": 9.932014455694699e-06, "loss": 0.2015, "step": 601 }, { "epoch": 0.08, "grad_norm": 1.6916139492949407, "learning_rate": 9.931655090823311e-06, "loss": 0.2819, "step": 602 }, { "epoch": 0.08, "grad_norm": 1.0725272288898027, "learning_rate": 9.931294785206072e-06, "loss": 0.2054, "step": 603 }, { "epoch": 0.08, "grad_norm": 1.570469431017226, "learning_rate": 9.93093353891171e-06, "loss": 0.2634, "step": 604 }, { "epoch": 0.08, "grad_norm": 1.2210578341257852, "learning_rate": 9.93057135200913e-06, "loss": 0.2446, "step": 605 }, { "epoch": 0.08, "grad_norm": 1.4828591456438873, "learning_rate": 9.930208224567429e-06, "loss": 0.3157, "step": 606 }, { "epoch": 0.08, "grad_norm": 1.8447957113626772, "learning_rate": 9.929844156655872e-06, "loss": 0.244, "step": 607 }, { "epoch": 0.08, "grad_norm": 1.44278316798067, "learning_rate": 9.929479148343909e-06, "loss": 0.2402, "step": 608 }, { "epoch": 0.08, "grad_norm": 1.3537252045293708, "learning_rate": 9.929113199701163e-06, "loss": 0.255, "step": 609 }, { "epoch": 0.08, "grad_norm": 1.8292581825301466, "learning_rate": 9.928746310797448e-06, "loss": 0.3316, "step": 610 }, { "epoch": 0.08, "grad_norm": 1.7406541812701393, "learning_rate": 9.928378481702748e-06, "loss": 0.3367, "step": 611 }, { "epoch": 0.08, "grad_norm": 1.1585764439036026, "learning_rate": 9.928009712487227e-06, "loss": 0.1832, "step": 612 }, { "epoch": 0.08, "grad_norm": 1.0131021697123965, "learning_rate": 9.927640003221233e-06, "loss": 0.2053, "step": 613 }, { "epoch": 0.08, "grad_norm": 1.587452217961807, "learning_rate": 9.927269353975288e-06, "loss": 0.2492, "step": 614 }, { "epoch": 0.08, "grad_norm": 1.5450176202870771, "learning_rate": 9.926897764820095e-06, "loss": 0.231, "step": 615 }, { "epoch": 0.08, "grad_norm": 1.276331991312613, "learning_rate": 9.926525235826541e-06, "loss": 0.213, "step": 616 }, { "epoch": 0.08, "grad_norm": 1.3490089278801147, "learning_rate": 9.926151767065687e-06, "loss": 0.2519, "step": 617 }, { "epoch": 0.08, "grad_norm": 1.3577766268488236, "learning_rate": 9.925777358608772e-06, "loss": 0.2294, "step": 618 }, { "epoch": 0.08, "grad_norm": 1.3965352474514439, "learning_rate": 9.92540201052722e-06, "loss": 0.3029, "step": 619 }, { "epoch": 0.08, "grad_norm": 1.4668988675565036, "learning_rate": 9.92502572289263e-06, "loss": 0.2809, "step": 620 }, { "epoch": 0.08, "grad_norm": 1.4182785897197276, "learning_rate": 9.924648495776781e-06, "loss": 0.235, "step": 621 }, { "epoch": 0.08, "grad_norm": 1.7879353436026295, "learning_rate": 9.924270329251631e-06, "loss": 0.2526, "step": 622 }, { "epoch": 0.08, "grad_norm": 1.4340170727350245, "learning_rate": 9.92389122338932e-06, "loss": 0.2627, "step": 623 }, { "epoch": 0.08, "grad_norm": 1.4495712620456707, "learning_rate": 9.923511178262162e-06, "loss": 0.1976, "step": 624 }, { "epoch": 0.08, "grad_norm": 1.3201328963369183, "learning_rate": 9.923130193942654e-06, "loss": 0.1879, "step": 625 }, { "epoch": 0.08, "grad_norm": 0.9826596222639931, "learning_rate": 9.922748270503472e-06, "loss": 0.1525, "step": 626 }, { "epoch": 0.08, "grad_norm": 1.4123216031515593, "learning_rate": 9.922365408017474e-06, "loss": 0.2217, "step": 627 }, { "epoch": 0.08, "grad_norm": 1.411351842093732, "learning_rate": 9.921981606557687e-06, "loss": 0.2963, "step": 628 }, { "epoch": 0.08, "grad_norm": 1.4566795523655254, "learning_rate": 9.921596866197328e-06, "loss": 0.2664, "step": 629 }, { "epoch": 0.08, "grad_norm": 1.0666586395188855, "learning_rate": 9.921211187009785e-06, "loss": 0.181, "step": 630 }, { "epoch": 0.09, "grad_norm": 1.7419180123921258, "learning_rate": 9.920824569068632e-06, "loss": 0.3363, "step": 631 }, { "epoch": 0.09, "grad_norm": 0.8262413847865508, "learning_rate": 9.920437012447619e-06, "loss": 0.1857, "step": 632 }, { "epoch": 0.09, "grad_norm": 1.2688735718624455, "learning_rate": 9.920048517220674e-06, "loss": 0.1995, "step": 633 }, { "epoch": 0.09, "grad_norm": 1.0837230218705434, "learning_rate": 9.919659083461906e-06, "loss": 0.2147, "step": 634 }, { "epoch": 0.09, "grad_norm": 1.6014147321786254, "learning_rate": 9.9192687112456e-06, "loss": 0.2672, "step": 635 }, { "epoch": 0.09, "grad_norm": 1.1902261119324566, "learning_rate": 9.918877400646223e-06, "loss": 0.2272, "step": 636 }, { "epoch": 0.09, "grad_norm": 1.4727548812306739, "learning_rate": 9.918485151738421e-06, "loss": 0.2753, "step": 637 }, { "epoch": 0.09, "grad_norm": 1.1861806379776234, "learning_rate": 9.918091964597017e-06, "loss": 0.2193, "step": 638 }, { "epoch": 0.09, "grad_norm": 1.4378238717315506, "learning_rate": 9.917697839297016e-06, "loss": 0.2722, "step": 639 }, { "epoch": 0.09, "grad_norm": 1.3734205429704252, "learning_rate": 9.917302775913597e-06, "loss": 0.2204, "step": 640 }, { "epoch": 0.09, "grad_norm": 1.075251672002481, "learning_rate": 9.916906774522124e-06, "loss": 0.2302, "step": 641 }, { "epoch": 0.09, "grad_norm": 0.9313942800041353, "learning_rate": 9.916509835198134e-06, "loss": 0.2033, "step": 642 }, { "epoch": 0.09, "grad_norm": 1.5138440003517304, "learning_rate": 9.916111958017347e-06, "loss": 0.3453, "step": 643 }, { "epoch": 0.09, "grad_norm": 1.4876970346769995, "learning_rate": 9.915713143055663e-06, "loss": 0.2611, "step": 644 }, { "epoch": 0.09, "grad_norm": 0.9019295013580393, "learning_rate": 9.915313390389154e-06, "loss": 0.132, "step": 645 }, { "epoch": 0.09, "grad_norm": 1.232873461010102, "learning_rate": 9.91491270009408e-06, "loss": 0.211, "step": 646 }, { "epoch": 0.09, "grad_norm": 1.2242105870174862, "learning_rate": 9.914511072246874e-06, "loss": 0.2195, "step": 647 }, { "epoch": 0.09, "grad_norm": 1.1723127250279795, "learning_rate": 9.914108506924146e-06, "loss": 0.1623, "step": 648 }, { "epoch": 0.09, "grad_norm": 1.2847645249050796, "learning_rate": 9.913705004202691e-06, "loss": 0.2093, "step": 649 }, { "epoch": 0.09, "grad_norm": 1.2100818796137633, "learning_rate": 9.913300564159478e-06, "loss": 0.218, "step": 650 }, { "epoch": 0.09, "grad_norm": 1.4117834035698307, "learning_rate": 9.912895186871661e-06, "loss": 0.3043, "step": 651 }, { "epoch": 0.09, "grad_norm": 1.2024525718147812, "learning_rate": 9.912488872416564e-06, "loss": 0.2557, "step": 652 }, { "epoch": 0.09, "grad_norm": 1.0937660466324213, "learning_rate": 9.912081620871694e-06, "loss": 0.1867, "step": 653 }, { "epoch": 0.09, "grad_norm": 1.335671212454067, "learning_rate": 9.91167343231474e-06, "loss": 0.221, "step": 654 }, { "epoch": 0.09, "grad_norm": 1.500778918794191, "learning_rate": 9.911264306823564e-06, "loss": 0.3304, "step": 655 }, { "epoch": 0.09, "grad_norm": 1.447722629723532, "learning_rate": 9.910854244476212e-06, "loss": 0.2379, "step": 656 }, { "epoch": 0.09, "grad_norm": 1.3577451368808762, "learning_rate": 9.910443245350903e-06, "loss": 0.2562, "step": 657 }, { "epoch": 0.09, "grad_norm": 1.239221737193145, "learning_rate": 9.91003130952604e-06, "loss": 0.2219, "step": 658 }, { "epoch": 0.09, "grad_norm": 1.575373664520173, "learning_rate": 9.909618437080204e-06, "loss": 0.2487, "step": 659 }, { "epoch": 0.09, "grad_norm": 1.8713135806072339, "learning_rate": 9.909204628092148e-06, "loss": 0.2889, "step": 660 }, { "epoch": 0.09, "grad_norm": 0.994423933015644, "learning_rate": 9.908789882640812e-06, "loss": 0.1127, "step": 661 }, { "epoch": 0.09, "grad_norm": 1.3432365402120863, "learning_rate": 9.908374200805313e-06, "loss": 0.1852, "step": 662 }, { "epoch": 0.09, "grad_norm": 1.2326705030986878, "learning_rate": 9.907957582664942e-06, "loss": 0.2637, "step": 663 }, { "epoch": 0.09, "grad_norm": 1.3836664989685823, "learning_rate": 9.907540028299173e-06, "loss": 0.2398, "step": 664 }, { "epoch": 0.09, "grad_norm": 1.0078539116714875, "learning_rate": 9.907121537787659e-06, "loss": 0.2084, "step": 665 }, { "epoch": 0.09, "grad_norm": 1.2166824977817328, "learning_rate": 9.906702111210225e-06, "loss": 0.2963, "step": 666 }, { "epoch": 0.09, "grad_norm": 1.098280563864795, "learning_rate": 9.906281748646884e-06, "loss": 0.1964, "step": 667 }, { "epoch": 0.09, "grad_norm": 1.408726454010625, "learning_rate": 9.905860450177822e-06, "loss": 0.2575, "step": 668 }, { "epoch": 0.09, "grad_norm": 1.5695165704550953, "learning_rate": 9.905438215883403e-06, "loss": 0.2718, "step": 669 }, { "epoch": 0.09, "grad_norm": 1.2913982625894043, "learning_rate": 9.905015045844173e-06, "loss": 0.2343, "step": 670 }, { "epoch": 0.09, "grad_norm": 1.1942448893602566, "learning_rate": 9.904590940140853e-06, "loss": 0.2422, "step": 671 }, { "epoch": 0.09, "grad_norm": 1.1770714221306793, "learning_rate": 9.904165898854343e-06, "loss": 0.1896, "step": 672 }, { "epoch": 0.09, "grad_norm": 1.273254560065629, "learning_rate": 9.903739922065724e-06, "loss": 0.2349, "step": 673 }, { "epoch": 0.09, "grad_norm": 1.3524070450472117, "learning_rate": 9.903313009856254e-06, "loss": 0.2356, "step": 674 }, { "epoch": 0.09, "grad_norm": 1.2385749457678545, "learning_rate": 9.902885162307371e-06, "loss": 0.2297, "step": 675 }, { "epoch": 0.09, "grad_norm": 1.3373734351580295, "learning_rate": 9.902456379500686e-06, "loss": 0.2105, "step": 676 }, { "epoch": 0.09, "grad_norm": 1.146534546096548, "learning_rate": 9.902026661517994e-06, "loss": 0.2157, "step": 677 }, { "epoch": 0.09, "grad_norm": 1.122941485953434, "learning_rate": 9.901596008441267e-06, "loss": 0.1907, "step": 678 }, { "epoch": 0.09, "grad_norm": 1.6369360526090062, "learning_rate": 9.901164420352654e-06, "loss": 0.2686, "step": 679 }, { "epoch": 0.09, "grad_norm": 1.3165986867588195, "learning_rate": 9.900731897334483e-06, "loss": 0.2493, "step": 680 }, { "epoch": 0.09, "grad_norm": 1.7038606771997948, "learning_rate": 9.900298439469262e-06, "loss": 0.342, "step": 681 }, { "epoch": 0.09, "grad_norm": 0.8460932150549558, "learning_rate": 9.899864046839675e-06, "loss": 0.1534, "step": 682 }, { "epoch": 0.09, "grad_norm": 1.8854574392536674, "learning_rate": 9.899428719528585e-06, "loss": 0.336, "step": 683 }, { "epoch": 0.09, "grad_norm": 1.4589351511335327, "learning_rate": 9.898992457619034e-06, "loss": 0.2372, "step": 684 }, { "epoch": 0.09, "grad_norm": 1.5950511263503862, "learning_rate": 9.898555261194243e-06, "loss": 0.2335, "step": 685 }, { "epoch": 0.09, "grad_norm": 1.523853849303764, "learning_rate": 9.898117130337609e-06, "loss": 0.2555, "step": 686 }, { "epoch": 0.09, "grad_norm": 1.3528811583845057, "learning_rate": 9.897678065132707e-06, "loss": 0.2355, "step": 687 }, { "epoch": 0.09, "grad_norm": 1.379029208812336, "learning_rate": 9.897238065663295e-06, "loss": 0.2273, "step": 688 }, { "epoch": 0.09, "grad_norm": 0.9525968053535399, "learning_rate": 9.896797132013304e-06, "loss": 0.2211, "step": 689 }, { "epoch": 0.09, "grad_norm": 1.356113540178845, "learning_rate": 9.896355264266842e-06, "loss": 0.288, "step": 690 }, { "epoch": 0.09, "grad_norm": 1.1505961146115422, "learning_rate": 9.895912462508203e-06, "loss": 0.243, "step": 691 }, { "epoch": 0.09, "grad_norm": 1.2359804736830924, "learning_rate": 9.895468726821853e-06, "loss": 0.2562, "step": 692 }, { "epoch": 0.09, "grad_norm": 1.0749254954782916, "learning_rate": 9.895024057292434e-06, "loss": 0.1717, "step": 693 }, { "epoch": 0.09, "grad_norm": 1.1625281888079761, "learning_rate": 9.894578454004776e-06, "loss": 0.209, "step": 694 }, { "epoch": 0.09, "grad_norm": 1.1234877898849043, "learning_rate": 9.894131917043875e-06, "loss": 0.226, "step": 695 }, { "epoch": 0.09, "grad_norm": 1.4026176130918955, "learning_rate": 9.893684446494915e-06, "loss": 0.2329, "step": 696 }, { "epoch": 0.09, "grad_norm": 1.3071488509371818, "learning_rate": 9.893236042443251e-06, "loss": 0.2288, "step": 697 }, { "epoch": 0.09, "grad_norm": 1.740814301506615, "learning_rate": 9.89278670497442e-06, "loss": 0.3211, "step": 698 }, { "epoch": 0.09, "grad_norm": 1.6100080140773747, "learning_rate": 9.892336434174137e-06, "loss": 0.2779, "step": 699 }, { "epoch": 0.09, "grad_norm": 1.5739983799372084, "learning_rate": 9.891885230128293e-06, "loss": 0.27, "step": 700 }, { "epoch": 0.09, "grad_norm": 1.1492332588535001, "learning_rate": 9.89143309292296e-06, "loss": 0.1918, "step": 701 }, { "epoch": 0.09, "grad_norm": 1.491662962273359, "learning_rate": 9.890980022644383e-06, "loss": 0.3117, "step": 702 }, { "epoch": 0.09, "grad_norm": 1.3329187067395991, "learning_rate": 9.890526019378991e-06, "loss": 0.2777, "step": 703 }, { "epoch": 0.09, "grad_norm": 1.7786210401757554, "learning_rate": 9.890071083213387e-06, "loss": 0.278, "step": 704 }, { "epoch": 0.1, "grad_norm": 1.1918447793002338, "learning_rate": 9.889615214234353e-06, "loss": 0.2169, "step": 705 }, { "epoch": 0.1, "grad_norm": 1.355991216031387, "learning_rate": 9.889158412528849e-06, "loss": 0.224, "step": 706 }, { "epoch": 0.1, "grad_norm": 0.7468971003444869, "learning_rate": 9.888700678184013e-06, "loss": 0.1564, "step": 707 }, { "epoch": 0.1, "grad_norm": 1.1221905921363289, "learning_rate": 9.88824201128716e-06, "loss": 0.223, "step": 708 }, { "epoch": 0.1, "grad_norm": 0.8490894560787874, "learning_rate": 9.887782411925788e-06, "loss": 0.1373, "step": 709 }, { "epoch": 0.1, "grad_norm": 0.9779229138751871, "learning_rate": 9.887321880187562e-06, "loss": 0.1333, "step": 710 }, { "epoch": 0.1, "grad_norm": 1.4421078925966384, "learning_rate": 9.886860416160335e-06, "loss": 0.3177, "step": 711 }, { "epoch": 0.1, "grad_norm": 1.4125661216948424, "learning_rate": 9.886398019932136e-06, "loss": 0.234, "step": 712 }, { "epoch": 0.1, "grad_norm": 1.5627678831897098, "learning_rate": 9.885934691591167e-06, "loss": 0.2612, "step": 713 }, { "epoch": 0.1, "grad_norm": 1.5013963468130973, "learning_rate": 9.885470431225814e-06, "loss": 0.2698, "step": 714 }, { "epoch": 0.1, "grad_norm": 1.4210244586614647, "learning_rate": 9.885005238924635e-06, "loss": 0.2711, "step": 715 }, { "epoch": 0.1, "grad_norm": 1.063439474004361, "learning_rate": 9.884539114776368e-06, "loss": 0.1798, "step": 716 }, { "epoch": 0.1, "grad_norm": 1.175692096695271, "learning_rate": 9.884072058869933e-06, "loss": 0.1912, "step": 717 }, { "epoch": 0.1, "grad_norm": 1.3167100356972794, "learning_rate": 9.88360407129442e-06, "loss": 0.201, "step": 718 }, { "epoch": 0.1, "grad_norm": 1.584476397648018, "learning_rate": 9.883135152139102e-06, "loss": 0.2799, "step": 719 }, { "epoch": 0.1, "grad_norm": 1.4643707722289512, "learning_rate": 9.882665301493431e-06, "loss": 0.1715, "step": 720 }, { "epoch": 0.1, "grad_norm": 1.0940643757664703, "learning_rate": 9.882194519447033e-06, "loss": 0.1753, "step": 721 }, { "epoch": 0.1, "grad_norm": 1.1531218924434834, "learning_rate": 9.881722806089708e-06, "loss": 0.2136, "step": 722 }, { "epoch": 0.1, "grad_norm": 1.4725858477133837, "learning_rate": 9.881250161511446e-06, "loss": 0.2608, "step": 723 }, { "epoch": 0.1, "grad_norm": 1.0346830734273653, "learning_rate": 9.8807765858024e-06, "loss": 0.1849, "step": 724 }, { "epoch": 0.1, "grad_norm": 1.3717132730800796, "learning_rate": 9.880302079052914e-06, "loss": 0.2337, "step": 725 }, { "epoch": 0.1, "grad_norm": 1.3638876844782337, "learning_rate": 9.879826641353498e-06, "loss": 0.2531, "step": 726 }, { "epoch": 0.1, "grad_norm": 1.4574979789104439, "learning_rate": 9.879350272794849e-06, "loss": 0.2993, "step": 727 }, { "epoch": 0.1, "grad_norm": 1.4412365176858606, "learning_rate": 9.878872973467834e-06, "loss": 0.2448, "step": 728 }, { "epoch": 0.1, "grad_norm": 1.1973796680268303, "learning_rate": 9.878394743463503e-06, "loss": 0.2077, "step": 729 }, { "epoch": 0.1, "grad_norm": 1.0449311810702868, "learning_rate": 9.877915582873083e-06, "loss": 0.186, "step": 730 }, { "epoch": 0.1, "grad_norm": 1.1701745930825294, "learning_rate": 9.877435491787975e-06, "loss": 0.2207, "step": 731 }, { "epoch": 0.1, "grad_norm": 1.746230543867163, "learning_rate": 9.87695447029976e-06, "loss": 0.218, "step": 732 }, { "epoch": 0.1, "grad_norm": 1.5795486736361055, "learning_rate": 9.876472518500195e-06, "loss": 0.2475, "step": 733 }, { "epoch": 0.1, "grad_norm": 1.633086625412564, "learning_rate": 9.875989636481216e-06, "loss": 0.2407, "step": 734 }, { "epoch": 0.1, "grad_norm": 1.4946800023750137, "learning_rate": 9.875505824334937e-06, "loss": 0.3, "step": 735 }, { "epoch": 0.1, "grad_norm": 1.171043540317, "learning_rate": 9.875021082153648e-06, "loss": 0.2136, "step": 736 }, { "epoch": 0.1, "grad_norm": 1.6834981915821736, "learning_rate": 9.874535410029816e-06, "loss": 0.2928, "step": 737 }, { "epoch": 0.1, "grad_norm": 1.155357657780537, "learning_rate": 9.874048808056086e-06, "loss": 0.2238, "step": 738 }, { "epoch": 0.1, "grad_norm": 1.4309808424213013, "learning_rate": 9.873561276325281e-06, "loss": 0.1915, "step": 739 }, { "epoch": 0.1, "grad_norm": 1.7160905626446037, "learning_rate": 9.873072814930402e-06, "loss": 0.2826, "step": 740 }, { "epoch": 0.1, "grad_norm": 1.4317567977084604, "learning_rate": 9.872583423964626e-06, "loss": 0.2546, "step": 741 }, { "epoch": 0.1, "grad_norm": 1.4395553787853244, "learning_rate": 9.872093103521306e-06, "loss": 0.3131, "step": 742 }, { "epoch": 0.1, "grad_norm": 1.43303550526125, "learning_rate": 9.871601853693975e-06, "loss": 0.3014, "step": 743 }, { "epoch": 0.1, "grad_norm": 1.4092518769463647, "learning_rate": 9.871109674576342e-06, "loss": 0.2628, "step": 744 }, { "epoch": 0.1, "grad_norm": 1.2856634384919887, "learning_rate": 9.870616566262295e-06, "loss": 0.2752, "step": 745 }, { "epoch": 0.1, "grad_norm": 1.6120764314440594, "learning_rate": 9.870122528845894e-06, "loss": 0.2925, "step": 746 }, { "epoch": 0.1, "grad_norm": 0.8888909676092348, "learning_rate": 9.869627562421381e-06, "loss": 0.1082, "step": 747 }, { "epoch": 0.1, "grad_norm": 0.9450232927683705, "learning_rate": 9.869131667083176e-06, "loss": 0.1426, "step": 748 }, { "epoch": 0.1, "grad_norm": 1.5870156634758588, "learning_rate": 9.868634842925875e-06, "loss": 0.2521, "step": 749 }, { "epoch": 0.1, "grad_norm": 1.2202504166149544, "learning_rate": 9.868137090044248e-06, "loss": 0.2453, "step": 750 }, { "epoch": 0.1, "grad_norm": 1.3532718129332717, "learning_rate": 9.867638408533244e-06, "loss": 0.2679, "step": 751 }, { "epoch": 0.1, "grad_norm": 1.0248696358791918, "learning_rate": 9.867138798487993e-06, "loss": 0.246, "step": 752 }, { "epoch": 0.1, "grad_norm": 1.2940752547195293, "learning_rate": 9.866638260003797e-06, "loss": 0.2097, "step": 753 }, { "epoch": 0.1, "grad_norm": 1.4539954761012326, "learning_rate": 9.866136793176138e-06, "loss": 0.2765, "step": 754 }, { "epoch": 0.1, "grad_norm": 1.0392805415060045, "learning_rate": 9.865634398100671e-06, "loss": 0.1533, "step": 755 }, { "epoch": 0.1, "grad_norm": 1.3003708443437, "learning_rate": 9.865131074873235e-06, "loss": 0.2946, "step": 756 }, { "epoch": 0.1, "grad_norm": 1.0253105350755423, "learning_rate": 9.864626823589842e-06, "loss": 0.214, "step": 757 }, { "epoch": 0.1, "grad_norm": 0.8298477995316634, "learning_rate": 9.864121644346679e-06, "loss": 0.2083, "step": 758 }, { "epoch": 0.1, "grad_norm": 1.0169813901537461, "learning_rate": 9.863615537240114e-06, "loss": 0.3004, "step": 759 }, { "epoch": 0.1, "grad_norm": 1.002662225988702, "learning_rate": 9.86310850236669e-06, "loss": 0.143, "step": 760 }, { "epoch": 0.1, "grad_norm": 1.05250154510306, "learning_rate": 9.862600539823124e-06, "loss": 0.2086, "step": 761 }, { "epoch": 0.1, "grad_norm": 1.2578054528912948, "learning_rate": 9.862091649706321e-06, "loss": 0.2648, "step": 762 }, { "epoch": 0.1, "grad_norm": 1.4830180443986873, "learning_rate": 9.861581832113346e-06, "loss": 0.2861, "step": 763 }, { "epoch": 0.1, "grad_norm": 0.9054683190027109, "learning_rate": 9.861071087141456e-06, "loss": 0.1909, "step": 764 }, { "epoch": 0.1, "grad_norm": 1.0699163733010635, "learning_rate": 9.860559414888077e-06, "loss": 0.177, "step": 765 }, { "epoch": 0.1, "grad_norm": 1.7554394257283226, "learning_rate": 9.860046815450815e-06, "loss": 0.3066, "step": 766 }, { "epoch": 0.1, "grad_norm": 1.4837517215547054, "learning_rate": 9.85953328892745e-06, "loss": 0.2069, "step": 767 }, { "epoch": 0.1, "grad_norm": 1.3506371063912843, "learning_rate": 9.859018835415944e-06, "loss": 0.2625, "step": 768 }, { "epoch": 0.1, "grad_norm": 1.1620344653599775, "learning_rate": 9.858503455014428e-06, "loss": 0.2345, "step": 769 }, { "epoch": 0.1, "grad_norm": 1.6941691416434994, "learning_rate": 9.857987147821216e-06, "loss": 0.2888, "step": 770 }, { "epoch": 0.1, "grad_norm": 0.7607932617007285, "learning_rate": 9.857469913934796e-06, "loss": 0.1361, "step": 771 }, { "epoch": 0.1, "grad_norm": 1.0012161839914138, "learning_rate": 9.856951753453836e-06, "loss": 0.2228, "step": 772 }, { "epoch": 0.1, "grad_norm": 0.940854364497359, "learning_rate": 9.856432666477178e-06, "loss": 0.1932, "step": 773 }, { "epoch": 0.1, "grad_norm": 0.9955982956777913, "learning_rate": 9.855912653103844e-06, "loss": 0.1719, "step": 774 }, { "epoch": 0.1, "grad_norm": 1.0543869179550516, "learning_rate": 9.855391713433023e-06, "loss": 0.1891, "step": 775 }, { "epoch": 0.1, "grad_norm": 0.9513353273754006, "learning_rate": 9.85486984756409e-06, "loss": 0.1705, "step": 776 }, { "epoch": 0.1, "grad_norm": 1.5063972073935952, "learning_rate": 9.8543470555966e-06, "loss": 0.3096, "step": 777 }, { "epoch": 0.1, "grad_norm": 1.259242066828801, "learning_rate": 9.853823337630272e-06, "loss": 0.2131, "step": 778 }, { "epoch": 0.11, "grad_norm": 1.1561683403175882, "learning_rate": 9.853298693765013e-06, "loss": 0.2195, "step": 779 }, { "epoch": 0.11, "grad_norm": 1.1840971540717589, "learning_rate": 9.8527731241009e-06, "loss": 0.2173, "step": 780 }, { "epoch": 0.11, "grad_norm": 0.969902528255275, "learning_rate": 9.85224662873819e-06, "loss": 0.2412, "step": 781 }, { "epoch": 0.11, "grad_norm": 1.2273182799551192, "learning_rate": 9.851719207777317e-06, "loss": 0.2538, "step": 782 }, { "epoch": 0.11, "grad_norm": 1.080744683364228, "learning_rate": 9.851190861318887e-06, "loss": 0.1891, "step": 783 }, { "epoch": 0.11, "grad_norm": 1.3500168406403443, "learning_rate": 9.850661589463686e-06, "loss": 0.2381, "step": 784 }, { "epoch": 0.11, "grad_norm": 1.2214630303456726, "learning_rate": 9.850131392312677e-06, "loss": 0.2096, "step": 785 }, { "epoch": 0.11, "grad_norm": 1.5101176022979277, "learning_rate": 9.849600269967e-06, "loss": 0.2365, "step": 786 }, { "epoch": 0.11, "grad_norm": 1.286184034991567, "learning_rate": 9.849068222527967e-06, "loss": 0.2373, "step": 787 }, { "epoch": 0.11, "grad_norm": 1.0533398670439795, "learning_rate": 9.848535250097072e-06, "loss": 0.1577, "step": 788 }, { "epoch": 0.11, "grad_norm": 1.0015523844747476, "learning_rate": 9.848001352775982e-06, "loss": 0.2164, "step": 789 }, { "epoch": 0.11, "grad_norm": 0.8957864777970989, "learning_rate": 9.847466530666543e-06, "loss": 0.1425, "step": 790 }, { "epoch": 0.11, "grad_norm": 1.3355858604784832, "learning_rate": 9.846930783870774e-06, "loss": 0.2214, "step": 791 }, { "epoch": 0.11, "grad_norm": 1.25510345041101, "learning_rate": 9.846394112490871e-06, "loss": 0.2318, "step": 792 }, { "epoch": 0.11, "grad_norm": 1.4266719292047687, "learning_rate": 9.845856516629213e-06, "loss": 0.247, "step": 793 }, { "epoch": 0.11, "grad_norm": 1.0812169237731417, "learning_rate": 9.845317996388344e-06, "loss": 0.1638, "step": 794 }, { "epoch": 0.11, "grad_norm": 1.6502057875948142, "learning_rate": 9.844778551870992e-06, "loss": 0.2321, "step": 795 }, { "epoch": 0.11, "grad_norm": 1.4217383838341269, "learning_rate": 9.844238183180062e-06, "loss": 0.2641, "step": 796 }, { "epoch": 0.11, "grad_norm": 1.2575093615267035, "learning_rate": 9.84369689041863e-06, "loss": 0.2517, "step": 797 }, { "epoch": 0.11, "grad_norm": 1.0987775412675593, "learning_rate": 9.843154673689953e-06, "loss": 0.2556, "step": 798 }, { "epoch": 0.11, "grad_norm": 1.0005285011072222, "learning_rate": 9.842611533097463e-06, "loss": 0.2598, "step": 799 }, { "epoch": 0.11, "grad_norm": 1.4755578440436372, "learning_rate": 9.842067468744765e-06, "loss": 0.2343, "step": 800 }, { "epoch": 0.11, "grad_norm": 1.1436999502354823, "learning_rate": 9.841522480735644e-06, "loss": 0.2712, "step": 801 }, { "epoch": 0.11, "grad_norm": 1.2223999137877746, "learning_rate": 9.84097656917406e-06, "loss": 0.207, "step": 802 }, { "epoch": 0.11, "grad_norm": 1.1551097329773554, "learning_rate": 9.840429734164153e-06, "loss": 0.2203, "step": 803 }, { "epoch": 0.11, "grad_norm": 1.544712268942903, "learning_rate": 9.839881975810231e-06, "loss": 0.2584, "step": 804 }, { "epoch": 0.11, "grad_norm": 0.8885199426899517, "learning_rate": 9.839333294216782e-06, "loss": 0.1984, "step": 805 }, { "epoch": 0.11, "grad_norm": 1.212914452679541, "learning_rate": 9.838783689488473e-06, "loss": 0.1849, "step": 806 }, { "epoch": 0.11, "grad_norm": 1.401991235653526, "learning_rate": 9.838233161730144e-06, "loss": 0.2886, "step": 807 }, { "epoch": 0.11, "grad_norm": 1.662763325262415, "learning_rate": 9.837681711046812e-06, "loss": 0.2401, "step": 808 }, { "epoch": 0.11, "grad_norm": 1.1519168289997064, "learning_rate": 9.837129337543668e-06, "loss": 0.2296, "step": 809 }, { "epoch": 0.11, "grad_norm": 1.158796796764017, "learning_rate": 9.836576041326085e-06, "loss": 0.2053, "step": 810 }, { "epoch": 0.11, "grad_norm": 1.1710758607349494, "learning_rate": 9.836021822499605e-06, "loss": 0.1699, "step": 811 }, { "epoch": 0.11, "grad_norm": 1.1656773113660077, "learning_rate": 9.835466681169949e-06, "loss": 0.2284, "step": 812 }, { "epoch": 0.11, "grad_norm": 1.1963113871504714, "learning_rate": 9.834910617443014e-06, "loss": 0.257, "step": 813 }, { "epoch": 0.11, "grad_norm": 1.2648441550957625, "learning_rate": 9.834353631424873e-06, "loss": 0.2584, "step": 814 }, { "epoch": 0.11, "grad_norm": 1.1860266223724936, "learning_rate": 9.833795723221776e-06, "loss": 0.2172, "step": 815 }, { "epoch": 0.11, "grad_norm": 1.4647908947115373, "learning_rate": 9.833236892940145e-06, "loss": 0.2721, "step": 816 }, { "epoch": 0.11, "grad_norm": 0.8971858964142536, "learning_rate": 9.832677140686584e-06, "loss": 0.2073, "step": 817 }, { "epoch": 0.11, "grad_norm": 1.8320070041735548, "learning_rate": 9.832116466567867e-06, "loss": 0.3094, "step": 818 }, { "epoch": 0.11, "grad_norm": 0.9734784684759342, "learning_rate": 9.831554870690945e-06, "loss": 0.1967, "step": 819 }, { "epoch": 0.11, "grad_norm": 1.2066272272865906, "learning_rate": 9.830992353162951e-06, "loss": 0.2482, "step": 820 }, { "epoch": 0.11, "grad_norm": 1.0712785566948935, "learning_rate": 9.830428914091185e-06, "loss": 0.2653, "step": 821 }, { "epoch": 0.11, "grad_norm": 1.092713270337398, "learning_rate": 9.829864553583126e-06, "loss": 0.1757, "step": 822 }, { "epoch": 0.11, "grad_norm": 1.188739125067949, "learning_rate": 9.829299271746433e-06, "loss": 0.1831, "step": 823 }, { "epoch": 0.11, "grad_norm": 1.107767761866914, "learning_rate": 9.828733068688936e-06, "loss": 0.2038, "step": 824 }, { "epoch": 0.11, "grad_norm": 1.3932322563323003, "learning_rate": 9.828165944518641e-06, "loss": 0.2558, "step": 825 }, { "epoch": 0.11, "grad_norm": 1.602701236695193, "learning_rate": 9.827597899343731e-06, "loss": 0.2528, "step": 826 }, { "epoch": 0.11, "grad_norm": 1.5483633312155973, "learning_rate": 9.827028933272564e-06, "loss": 0.2741, "step": 827 }, { "epoch": 0.11, "grad_norm": 1.2595605344972771, "learning_rate": 9.826459046413675e-06, "loss": 0.2243, "step": 828 }, { "epoch": 0.11, "grad_norm": 1.2738226875842265, "learning_rate": 9.825888238875772e-06, "loss": 0.2405, "step": 829 }, { "epoch": 0.11, "grad_norm": 1.232813211967336, "learning_rate": 9.825316510767744e-06, "loss": 0.2061, "step": 830 }, { "epoch": 0.11, "grad_norm": 0.8708702154268229, "learning_rate": 9.824743862198647e-06, "loss": 0.156, "step": 831 }, { "epoch": 0.11, "grad_norm": 0.7697069487193199, "learning_rate": 9.824170293277722e-06, "loss": 0.1702, "step": 832 }, { "epoch": 0.11, "grad_norm": 1.0269331444443028, "learning_rate": 9.823595804114379e-06, "loss": 0.2274, "step": 833 }, { "epoch": 0.11, "grad_norm": 1.3441793313584478, "learning_rate": 9.823020394818203e-06, "loss": 0.2539, "step": 834 }, { "epoch": 0.11, "grad_norm": 1.1219864621176474, "learning_rate": 9.822444065498964e-06, "loss": 0.2114, "step": 835 }, { "epoch": 0.11, "grad_norm": 1.5348095754362354, "learning_rate": 9.821866816266595e-06, "loss": 0.2415, "step": 836 }, { "epoch": 0.11, "grad_norm": 1.0384560607647009, "learning_rate": 9.821288647231208e-06, "loss": 0.2024, "step": 837 }, { "epoch": 0.11, "grad_norm": 1.240711652529904, "learning_rate": 9.8207095585031e-06, "loss": 0.265, "step": 838 }, { "epoch": 0.11, "grad_norm": 1.200714928517479, "learning_rate": 9.820129550192731e-06, "loss": 0.2241, "step": 839 }, { "epoch": 0.11, "grad_norm": 1.082931142967991, "learning_rate": 9.819548622410744e-06, "loss": 0.2484, "step": 840 }, { "epoch": 0.11, "grad_norm": 1.5197698625198457, "learning_rate": 9.818966775267951e-06, "loss": 0.2639, "step": 841 }, { "epoch": 0.11, "grad_norm": 1.1842021432539755, "learning_rate": 9.818384008875347e-06, "loss": 0.263, "step": 842 }, { "epoch": 0.11, "grad_norm": 1.4047087277327763, "learning_rate": 9.817800323344097e-06, "loss": 0.2919, "step": 843 }, { "epoch": 0.11, "grad_norm": 1.2213172698031447, "learning_rate": 9.817215718785543e-06, "loss": 0.2173, "step": 844 }, { "epoch": 0.11, "grad_norm": 1.193541635181449, "learning_rate": 9.816630195311202e-06, "loss": 0.2072, "step": 845 }, { "epoch": 0.11, "grad_norm": 1.143664025867426, "learning_rate": 9.816043753032766e-06, "loss": 0.2431, "step": 846 }, { "epoch": 0.11, "grad_norm": 1.6792810241016678, "learning_rate": 9.815456392062104e-06, "loss": 0.3261, "step": 847 }, { "epoch": 0.11, "grad_norm": 1.1828813782502552, "learning_rate": 9.814868112511258e-06, "loss": 0.2736, "step": 848 }, { "epoch": 0.11, "grad_norm": 1.071191895121823, "learning_rate": 9.814278914492447e-06, "loss": 0.2476, "step": 849 }, { "epoch": 0.11, "grad_norm": 0.7559637224922314, "learning_rate": 9.813688798118066e-06, "loss": 0.1669, "step": 850 }, { "epoch": 0.11, "grad_norm": 0.9801631326379127, "learning_rate": 9.813097763500678e-06, "loss": 0.2286, "step": 851 }, { "epoch": 0.11, "grad_norm": 1.2499603650846947, "learning_rate": 9.812505810753034e-06, "loss": 0.1989, "step": 852 }, { "epoch": 0.12, "grad_norm": 0.8842321667236669, "learning_rate": 9.811912939988048e-06, "loss": 0.1448, "step": 853 }, { "epoch": 0.12, "grad_norm": 0.9351363078679387, "learning_rate": 9.811319151318815e-06, "loss": 0.1705, "step": 854 }, { "epoch": 0.12, "grad_norm": 1.6170599638403864, "learning_rate": 9.810724444858604e-06, "loss": 0.2814, "step": 855 }, { "epoch": 0.12, "grad_norm": 1.3209947376836961, "learning_rate": 9.810128820720861e-06, "loss": 0.2616, "step": 856 }, { "epoch": 0.12, "grad_norm": 1.393112412771032, "learning_rate": 9.809532279019203e-06, "loss": 0.216, "step": 857 }, { "epoch": 0.12, "grad_norm": 1.1163247525303877, "learning_rate": 9.808934819867423e-06, "loss": 0.2198, "step": 858 }, { "epoch": 0.12, "grad_norm": 1.0806463986698724, "learning_rate": 9.808336443379495e-06, "loss": 0.2466, "step": 859 }, { "epoch": 0.12, "grad_norm": 0.8010884241627552, "learning_rate": 9.807737149669562e-06, "loss": 0.1953, "step": 860 }, { "epoch": 0.12, "grad_norm": 1.1554627175843275, "learning_rate": 9.80713693885194e-06, "loss": 0.187, "step": 861 }, { "epoch": 0.12, "grad_norm": 0.7409948323291893, "learning_rate": 9.806535811041126e-06, "loss": 0.0877, "step": 862 }, { "epoch": 0.12, "grad_norm": 1.289278690538077, "learning_rate": 9.805933766351787e-06, "loss": 0.2252, "step": 863 }, { "epoch": 0.12, "grad_norm": 1.199904611162512, "learning_rate": 9.80533080489877e-06, "loss": 0.2468, "step": 864 }, { "epoch": 0.12, "grad_norm": 0.7409285219789339, "learning_rate": 9.804726926797092e-06, "loss": 0.1803, "step": 865 }, { "epoch": 0.12, "grad_norm": 1.287679436974793, "learning_rate": 9.804122132161946e-06, "loss": 0.201, "step": 866 }, { "epoch": 0.12, "grad_norm": 1.1389185639084853, "learning_rate": 9.803516421108704e-06, "loss": 0.22, "step": 867 }, { "epoch": 0.12, "grad_norm": 1.258888144967439, "learning_rate": 9.802909793752903e-06, "loss": 0.2444, "step": 868 }, { "epoch": 0.12, "grad_norm": 1.1681034775431713, "learning_rate": 9.802302250210268e-06, "loss": 0.2244, "step": 869 }, { "epoch": 0.12, "grad_norm": 1.0420843480823854, "learning_rate": 9.801693790596688e-06, "loss": 0.1994, "step": 870 }, { "epoch": 0.12, "grad_norm": 1.3536168320724469, "learning_rate": 9.80108441502823e-06, "loss": 0.2707, "step": 871 }, { "epoch": 0.12, "grad_norm": 1.0176590752762766, "learning_rate": 9.800474123621141e-06, "loss": 0.241, "step": 872 }, { "epoch": 0.12, "grad_norm": 1.1653980761953597, "learning_rate": 9.799862916491834e-06, "loss": 0.2152, "step": 873 }, { "epoch": 0.12, "grad_norm": 1.5365545265624991, "learning_rate": 9.799250793756902e-06, "loss": 0.3097, "step": 874 }, { "epoch": 0.12, "grad_norm": 1.238844234980156, "learning_rate": 9.798637755533112e-06, "loss": 0.1747, "step": 875 }, { "epoch": 0.12, "grad_norm": 1.3679210895417944, "learning_rate": 9.798023801937406e-06, "loss": 0.2637, "step": 876 }, { "epoch": 0.12, "grad_norm": 0.9003740919391385, "learning_rate": 9.797408933086896e-06, "loss": 0.21, "step": 877 }, { "epoch": 0.12, "grad_norm": 1.3394407126552694, "learning_rate": 9.796793149098876e-06, "loss": 0.2774, "step": 878 }, { "epoch": 0.12, "grad_norm": 1.504199413751013, "learning_rate": 9.796176450090808e-06, "loss": 0.2513, "step": 879 }, { "epoch": 0.12, "grad_norm": 1.2512253174107395, "learning_rate": 9.795558836180336e-06, "loss": 0.2472, "step": 880 }, { "epoch": 0.12, "grad_norm": 0.9124681054381032, "learning_rate": 9.794940307485268e-06, "loss": 0.2144, "step": 881 }, { "epoch": 0.12, "grad_norm": 1.1111154569435775, "learning_rate": 9.794320864123597e-06, "loss": 0.1822, "step": 882 }, { "epoch": 0.12, "grad_norm": 1.424794709786109, "learning_rate": 9.793700506213484e-06, "loss": 0.2458, "step": 883 }, { "epoch": 0.12, "grad_norm": 0.9637698398180545, "learning_rate": 9.793079233873266e-06, "loss": 0.1494, "step": 884 }, { "epoch": 0.12, "grad_norm": 1.2108863153546066, "learning_rate": 9.792457047221457e-06, "loss": 0.1779, "step": 885 }, { "epoch": 0.12, "grad_norm": 1.263648541641633, "learning_rate": 9.79183394637674e-06, "loss": 0.2196, "step": 886 }, { "epoch": 0.12, "grad_norm": 1.3205428572766154, "learning_rate": 9.791209931457978e-06, "loss": 0.2423, "step": 887 }, { "epoch": 0.12, "grad_norm": 1.4952871923081208, "learning_rate": 9.790585002584206e-06, "loss": 0.2521, "step": 888 }, { "epoch": 0.12, "grad_norm": 1.0746756690598378, "learning_rate": 9.789959159874632e-06, "loss": 0.2286, "step": 889 }, { "epoch": 0.12, "grad_norm": 1.406025524250865, "learning_rate": 9.789332403448639e-06, "loss": 0.2834, "step": 890 }, { "epoch": 0.12, "grad_norm": 1.373034527547828, "learning_rate": 9.788704733425787e-06, "loss": 0.2483, "step": 891 }, { "epoch": 0.12, "grad_norm": 1.2156262392762067, "learning_rate": 9.788076149925807e-06, "loss": 0.2509, "step": 892 }, { "epoch": 0.12, "grad_norm": 1.1731824490871774, "learning_rate": 9.787446653068607e-06, "loss": 0.2632, "step": 893 }, { "epoch": 0.12, "grad_norm": 0.9510618425930021, "learning_rate": 9.786816242974267e-06, "loss": 0.1572, "step": 894 }, { "epoch": 0.12, "grad_norm": 1.106089084668461, "learning_rate": 9.786184919763039e-06, "loss": 0.2064, "step": 895 }, { "epoch": 0.12, "grad_norm": 0.9917485123901718, "learning_rate": 9.785552683555357e-06, "loss": 0.1704, "step": 896 }, { "epoch": 0.12, "grad_norm": 1.4838167878331008, "learning_rate": 9.78491953447182e-06, "loss": 0.2311, "step": 897 }, { "epoch": 0.12, "grad_norm": 1.2159156668043185, "learning_rate": 9.784285472633208e-06, "loss": 0.2057, "step": 898 }, { "epoch": 0.12, "grad_norm": 1.2129663933264117, "learning_rate": 9.783650498160472e-06, "loss": 0.2639, "step": 899 }, { "epoch": 0.12, "grad_norm": 1.1245449020040288, "learning_rate": 9.783014611174737e-06, "loss": 0.1945, "step": 900 }, { "epoch": 0.12, "grad_norm": 1.2982282725613732, "learning_rate": 9.782377811797302e-06, "loss": 0.2238, "step": 901 }, { "epoch": 0.12, "grad_norm": 1.3822367836242817, "learning_rate": 9.781740100149643e-06, "loss": 0.2831, "step": 902 }, { "epoch": 0.12, "grad_norm": 1.3814354882044346, "learning_rate": 9.781101476353406e-06, "loss": 0.235, "step": 903 }, { "epoch": 0.12, "grad_norm": 1.4718602757829804, "learning_rate": 9.780461940530411e-06, "loss": 0.2178, "step": 904 }, { "epoch": 0.12, "grad_norm": 1.0976476185950974, "learning_rate": 9.779821492802657e-06, "loss": 0.2163, "step": 905 }, { "epoch": 0.12, "grad_norm": 1.2666236548734706, "learning_rate": 9.779180133292313e-06, "loss": 0.2304, "step": 906 }, { "epoch": 0.12, "grad_norm": 1.4155081023971583, "learning_rate": 9.778537862121722e-06, "loss": 0.2637, "step": 907 }, { "epoch": 0.12, "grad_norm": 1.3379259876292087, "learning_rate": 9.7778946794134e-06, "loss": 0.2283, "step": 908 }, { "epoch": 0.12, "grad_norm": 1.1669581822274064, "learning_rate": 9.777250585290041e-06, "loss": 0.2285, "step": 909 }, { "epoch": 0.12, "grad_norm": 1.2777471697908045, "learning_rate": 9.776605579874508e-06, "loss": 0.2595, "step": 910 }, { "epoch": 0.12, "grad_norm": 0.9449617591786018, "learning_rate": 9.775959663289843e-06, "loss": 0.1945, "step": 911 }, { "epoch": 0.12, "grad_norm": 1.233866542530544, "learning_rate": 9.775312835659254e-06, "loss": 0.208, "step": 912 }, { "epoch": 0.12, "grad_norm": 1.062821607517932, "learning_rate": 9.774665097106133e-06, "loss": 0.1976, "step": 913 }, { "epoch": 0.12, "grad_norm": 0.9668099889536191, "learning_rate": 9.774016447754037e-06, "loss": 0.2151, "step": 914 }, { "epoch": 0.12, "grad_norm": 1.2473935576485717, "learning_rate": 9.773366887726701e-06, "loss": 0.253, "step": 915 }, { "epoch": 0.12, "grad_norm": 1.0570060466683904, "learning_rate": 9.772716417148032e-06, "loss": 0.1939, "step": 916 }, { "epoch": 0.12, "grad_norm": 1.4695428095874645, "learning_rate": 9.772065036142115e-06, "loss": 0.261, "step": 917 }, { "epoch": 0.12, "grad_norm": 0.9507937485354795, "learning_rate": 9.771412744833202e-06, "loss": 0.2256, "step": 918 }, { "epoch": 0.12, "grad_norm": 1.5058998485172894, "learning_rate": 9.77075954334572e-06, "loss": 0.2404, "step": 919 }, { "epoch": 0.12, "grad_norm": 0.9673202745864136, "learning_rate": 9.770105431804278e-06, "loss": 0.1417, "step": 920 }, { "epoch": 0.12, "grad_norm": 1.2248565671907512, "learning_rate": 9.769450410333646e-06, "loss": 0.2564, "step": 921 }, { "epoch": 0.12, "grad_norm": 1.007215697227723, "learning_rate": 9.768794479058776e-06, "loss": 0.2507, "step": 922 }, { "epoch": 0.12, "grad_norm": 1.3384354490879686, "learning_rate": 9.768137638104793e-06, "loss": 0.1923, "step": 923 }, { "epoch": 0.12, "grad_norm": 1.131798157426711, "learning_rate": 9.76747988759699e-06, "loss": 0.2083, "step": 924 }, { "epoch": 0.12, "grad_norm": 1.3943147741091138, "learning_rate": 9.76682122766084e-06, "loss": 0.2408, "step": 925 }, { "epoch": 0.12, "grad_norm": 1.0155290576271687, "learning_rate": 9.766161658421986e-06, "loss": 0.166, "step": 926 }, { "epoch": 0.12, "grad_norm": 1.1854554786395386, "learning_rate": 9.765501180006246e-06, "loss": 0.1922, "step": 927 }, { "epoch": 0.13, "grad_norm": 1.4516584876698508, "learning_rate": 9.76483979253961e-06, "loss": 0.305, "step": 928 }, { "epoch": 0.13, "grad_norm": 1.2804253635435459, "learning_rate": 9.76417749614824e-06, "loss": 0.2412, "step": 929 }, { "epoch": 0.13, "grad_norm": 0.9816431087333565, "learning_rate": 9.763514290958476e-06, "loss": 0.1854, "step": 930 }, { "epoch": 0.13, "grad_norm": 0.9461225050509303, "learning_rate": 9.762850177096828e-06, "loss": 0.2107, "step": 931 }, { "epoch": 0.13, "grad_norm": 1.0161991894204472, "learning_rate": 9.76218515468998e-06, "loss": 0.2262, "step": 932 }, { "epoch": 0.13, "grad_norm": 1.0522249070480922, "learning_rate": 9.761519223864789e-06, "loss": 0.2229, "step": 933 }, { "epoch": 0.13, "grad_norm": 1.2271895634404208, "learning_rate": 9.760852384748288e-06, "loss": 0.2174, "step": 934 }, { "epoch": 0.13, "grad_norm": 1.2156561510736084, "learning_rate": 9.760184637467678e-06, "loss": 0.2372, "step": 935 }, { "epoch": 0.13, "grad_norm": 1.4637896036886333, "learning_rate": 9.759515982150338e-06, "loss": 0.3021, "step": 936 }, { "epoch": 0.13, "grad_norm": 1.3182802344965028, "learning_rate": 9.758846418923818e-06, "loss": 0.2753, "step": 937 }, { "epoch": 0.13, "grad_norm": 1.257038988888129, "learning_rate": 9.75817594791584e-06, "loss": 0.2585, "step": 938 }, { "epoch": 0.13, "grad_norm": 1.0008764055138328, "learning_rate": 9.757504569254305e-06, "loss": 0.2837, "step": 939 }, { "epoch": 0.13, "grad_norm": 1.1696833468889771, "learning_rate": 9.75683228306728e-06, "loss": 0.1776, "step": 940 }, { "epoch": 0.13, "grad_norm": 0.6825642457408924, "learning_rate": 9.756159089483006e-06, "loss": 0.1596, "step": 941 }, { "epoch": 0.13, "grad_norm": 1.057846852035875, "learning_rate": 9.755484988629901e-06, "loss": 0.1615, "step": 942 }, { "epoch": 0.13, "grad_norm": 1.2012263861791013, "learning_rate": 9.754809980636557e-06, "loss": 0.1944, "step": 943 }, { "epoch": 0.13, "grad_norm": 1.1853019440323378, "learning_rate": 9.754134065631733e-06, "loss": 0.2051, "step": 944 }, { "epoch": 0.13, "grad_norm": 0.8413821630531813, "learning_rate": 9.753457243744366e-06, "loss": 0.1536, "step": 945 }, { "epoch": 0.13, "grad_norm": 1.2698890502929812, "learning_rate": 9.752779515103563e-06, "loss": 0.1938, "step": 946 }, { "epoch": 0.13, "grad_norm": 1.1031617841954355, "learning_rate": 9.752100879838604e-06, "loss": 0.2102, "step": 947 }, { "epoch": 0.13, "grad_norm": 1.026323523227093, "learning_rate": 9.751421338078946e-06, "loss": 0.1589, "step": 948 }, { "epoch": 0.13, "grad_norm": 1.2170136071098372, "learning_rate": 9.750740889954215e-06, "loss": 0.2057, "step": 949 }, { "epoch": 0.13, "grad_norm": 1.4289051545051177, "learning_rate": 9.750059535594211e-06, "loss": 0.2359, "step": 950 }, { "epoch": 0.13, "grad_norm": 1.3628091260997732, "learning_rate": 9.749377275128906e-06, "loss": 0.2159, "step": 951 }, { "epoch": 0.13, "grad_norm": 1.319536677477234, "learning_rate": 9.748694108688446e-06, "loss": 0.2404, "step": 952 }, { "epoch": 0.13, "grad_norm": 1.4389475574857147, "learning_rate": 9.748010036403151e-06, "loss": 0.2416, "step": 953 }, { "epoch": 0.13, "grad_norm": 1.2417447963432469, "learning_rate": 9.747325058403512e-06, "loss": 0.2456, "step": 954 }, { "epoch": 0.13, "grad_norm": 0.9977698423398847, "learning_rate": 9.746639174820191e-06, "loss": 0.1457, "step": 955 }, { "epoch": 0.13, "grad_norm": 1.1785482101248568, "learning_rate": 9.745952385784025e-06, "loss": 0.1999, "step": 956 }, { "epoch": 0.13, "grad_norm": 1.2507402053912173, "learning_rate": 9.745264691426027e-06, "loss": 0.2051, "step": 957 }, { "epoch": 0.13, "grad_norm": 1.050373772005299, "learning_rate": 9.744576091877375e-06, "loss": 0.1739, "step": 958 }, { "epoch": 0.13, "grad_norm": 1.3566177138457165, "learning_rate": 9.743886587269428e-06, "loss": 0.214, "step": 959 }, { "epoch": 0.13, "grad_norm": 1.3217776955259406, "learning_rate": 9.743196177733709e-06, "loss": 0.251, "step": 960 }, { "epoch": 0.13, "grad_norm": 1.5290523638948337, "learning_rate": 9.742504863401923e-06, "loss": 0.2901, "step": 961 }, { "epoch": 0.13, "grad_norm": 1.3211695397373946, "learning_rate": 9.741812644405939e-06, "loss": 0.1842, "step": 962 }, { "epoch": 0.13, "grad_norm": 0.8958521675544511, "learning_rate": 9.741119520877804e-06, "loss": 0.1119, "step": 963 }, { "epoch": 0.13, "grad_norm": 1.27148202666826, "learning_rate": 9.740425492949737e-06, "loss": 0.2224, "step": 964 }, { "epoch": 0.13, "grad_norm": 1.1245190771152203, "learning_rate": 9.739730560754125e-06, "loss": 0.1952, "step": 965 }, { "epoch": 0.13, "grad_norm": 1.1393773542864605, "learning_rate": 9.739034724423535e-06, "loss": 0.1893, "step": 966 }, { "epoch": 0.13, "grad_norm": 1.559109215612849, "learning_rate": 9.738337984090699e-06, "loss": 0.259, "step": 967 }, { "epoch": 0.13, "grad_norm": 0.7983358895919931, "learning_rate": 9.737640339888527e-06, "loss": 0.156, "step": 968 }, { "epoch": 0.13, "grad_norm": 1.3039252362626712, "learning_rate": 9.736941791950097e-06, "loss": 0.2144, "step": 969 }, { "epoch": 0.13, "grad_norm": 1.171530814243301, "learning_rate": 9.736242340408665e-06, "loss": 0.198, "step": 970 }, { "epoch": 0.13, "grad_norm": 1.260244965677875, "learning_rate": 9.735541985397654e-06, "loss": 0.2778, "step": 971 }, { "epoch": 0.13, "grad_norm": 1.1326433473588755, "learning_rate": 9.73484072705066e-06, "loss": 0.2346, "step": 972 }, { "epoch": 0.13, "grad_norm": 1.0145472705277858, "learning_rate": 9.734138565501455e-06, "loss": 0.165, "step": 973 }, { "epoch": 0.13, "grad_norm": 0.9508044762452798, "learning_rate": 9.733435500883982e-06, "loss": 0.1918, "step": 974 }, { "epoch": 0.13, "grad_norm": 1.0911061797576933, "learning_rate": 9.73273153333235e-06, "loss": 0.2078, "step": 975 }, { "epoch": 0.13, "grad_norm": 0.9512423707426831, "learning_rate": 9.73202666298085e-06, "loss": 0.1745, "step": 976 }, { "epoch": 0.13, "grad_norm": 1.2348568077118414, "learning_rate": 9.73132088996394e-06, "loss": 0.2703, "step": 977 }, { "epoch": 0.13, "grad_norm": 1.450752959964214, "learning_rate": 9.730614214416251e-06, "loss": 0.1878, "step": 978 }, { "epoch": 0.13, "grad_norm": 1.027328876612728, "learning_rate": 9.729906636472584e-06, "loss": 0.2085, "step": 979 }, { "epoch": 0.13, "grad_norm": 1.2550651447210104, "learning_rate": 9.729198156267915e-06, "loss": 0.2329, "step": 980 }, { "epoch": 0.13, "grad_norm": 1.2544193131665407, "learning_rate": 9.728488773937394e-06, "loss": 0.2412, "step": 981 }, { "epoch": 0.13, "grad_norm": 1.0816037526033715, "learning_rate": 9.727778489616338e-06, "loss": 0.223, "step": 982 }, { "epoch": 0.13, "grad_norm": 1.003609851622092, "learning_rate": 9.727067303440238e-06, "loss": 0.2003, "step": 983 }, { "epoch": 0.13, "grad_norm": 0.8700162682064565, "learning_rate": 9.726355215544758e-06, "loss": 0.1337, "step": 984 }, { "epoch": 0.13, "grad_norm": 0.9469457222492842, "learning_rate": 9.725642226065735e-06, "loss": 0.1598, "step": 985 }, { "epoch": 0.13, "grad_norm": 1.1783659354800284, "learning_rate": 9.724928335139174e-06, "loss": 0.2245, "step": 986 }, { "epoch": 0.13, "grad_norm": 1.7058645124873606, "learning_rate": 9.724213542901256e-06, "loss": 0.2791, "step": 987 }, { "epoch": 0.13, "grad_norm": 0.9957354935155431, "learning_rate": 9.723497849488331e-06, "loss": 0.1567, "step": 988 }, { "epoch": 0.13, "grad_norm": 1.2752089749538313, "learning_rate": 9.722781255036924e-06, "loss": 0.2548, "step": 989 }, { "epoch": 0.13, "grad_norm": 1.0701268431126436, "learning_rate": 9.722063759683728e-06, "loss": 0.191, "step": 990 }, { "epoch": 0.13, "grad_norm": 1.132315377283826, "learning_rate": 9.721345363565614e-06, "loss": 0.2387, "step": 991 }, { "epoch": 0.13, "grad_norm": 1.2255486120656973, "learning_rate": 9.720626066819616e-06, "loss": 0.2241, "step": 992 }, { "epoch": 0.13, "grad_norm": 1.2666622916266597, "learning_rate": 9.719905869582947e-06, "loss": 0.2435, "step": 993 }, { "epoch": 0.13, "grad_norm": 0.9715254336011716, "learning_rate": 9.719184771992989e-06, "loss": 0.2036, "step": 994 }, { "epoch": 0.13, "grad_norm": 1.2172737665302815, "learning_rate": 9.718462774187297e-06, "loss": 0.2358, "step": 995 }, { "epoch": 0.13, "grad_norm": 1.2305014223774375, "learning_rate": 9.717739876303596e-06, "loss": 0.2055, "step": 996 }, { "epoch": 0.13, "grad_norm": 1.0968907591994232, "learning_rate": 9.717016078479782e-06, "loss": 0.2357, "step": 997 }, { "epoch": 0.13, "grad_norm": 1.1421367810087129, "learning_rate": 9.716291380853929e-06, "loss": 0.1928, "step": 998 }, { "epoch": 0.13, "grad_norm": 1.1747326583658761, "learning_rate": 9.715565783564272e-06, "loss": 0.2512, "step": 999 }, { "epoch": 0.13, "grad_norm": 1.3058131680070366, "learning_rate": 9.714839286749227e-06, "loss": 0.2154, "step": 1000 }, { "epoch": 0.13, "grad_norm": 1.15948288444323, "learning_rate": 9.714111890547377e-06, "loss": 0.1765, "step": 1001 }, { "epoch": 0.14, "grad_norm": 1.1891269750276225, "learning_rate": 9.713383595097479e-06, "loss": 0.2326, "step": 1002 }, { "epoch": 0.14, "grad_norm": 1.1290810002785425, "learning_rate": 9.712654400538457e-06, "loss": 0.169, "step": 1003 }, { "epoch": 0.14, "grad_norm": 1.2892014103498604, "learning_rate": 9.711924307009414e-06, "loss": 0.2111, "step": 1004 }, { "epoch": 0.14, "grad_norm": 1.197118596960319, "learning_rate": 9.711193314649618e-06, "loss": 0.227, "step": 1005 }, { "epoch": 0.14, "grad_norm": 1.2227381394997423, "learning_rate": 9.71046142359851e-06, "loss": 0.2163, "step": 1006 }, { "epoch": 0.14, "grad_norm": 1.1958185386785332, "learning_rate": 9.709728633995704e-06, "loss": 0.2015, "step": 1007 }, { "epoch": 0.14, "grad_norm": 0.9557314441033037, "learning_rate": 9.708994945980983e-06, "loss": 0.1864, "step": 1008 }, { "epoch": 0.14, "grad_norm": 0.9548185436495796, "learning_rate": 9.708260359694303e-06, "loss": 0.193, "step": 1009 }, { "epoch": 0.14, "grad_norm": 1.0842099440211261, "learning_rate": 9.707524875275796e-06, "loss": 0.1946, "step": 1010 }, { "epoch": 0.14, "grad_norm": 0.9210583003036165, "learning_rate": 9.706788492865753e-06, "loss": 0.176, "step": 1011 }, { "epoch": 0.14, "grad_norm": 1.495003267106425, "learning_rate": 9.70605121260465e-06, "loss": 0.2768, "step": 1012 }, { "epoch": 0.14, "grad_norm": 1.407185405310799, "learning_rate": 9.705313034633123e-06, "loss": 0.2287, "step": 1013 }, { "epoch": 0.14, "grad_norm": 1.041477945536473, "learning_rate": 9.704573959091989e-06, "loss": 0.1869, "step": 1014 }, { "epoch": 0.14, "grad_norm": 0.7991879403727413, "learning_rate": 9.70383398612223e-06, "loss": 0.1591, "step": 1015 }, { "epoch": 0.14, "grad_norm": 1.154677799020606, "learning_rate": 9.703093115865e-06, "loss": 0.2197, "step": 1016 }, { "epoch": 0.14, "grad_norm": 1.0661693894018154, "learning_rate": 9.702351348461624e-06, "loss": 0.2371, "step": 1017 }, { "epoch": 0.14, "grad_norm": 1.3540584962101443, "learning_rate": 9.7016086840536e-06, "loss": 0.2413, "step": 1018 }, { "epoch": 0.14, "grad_norm": 0.9273946337523115, "learning_rate": 9.7008651227826e-06, "loss": 0.1905, "step": 1019 }, { "epoch": 0.14, "grad_norm": 0.851235795705078, "learning_rate": 9.700120664790456e-06, "loss": 0.1759, "step": 1020 }, { "epoch": 0.14, "grad_norm": 1.368274937699009, "learning_rate": 9.699375310219182e-06, "loss": 0.3157, "step": 1021 }, { "epoch": 0.14, "grad_norm": 1.5333554326693353, "learning_rate": 9.69862905921096e-06, "loss": 0.3077, "step": 1022 }, { "epoch": 0.14, "grad_norm": 1.0651338696855408, "learning_rate": 9.697881911908141e-06, "loss": 0.2076, "step": 1023 }, { "epoch": 0.14, "grad_norm": 1.2696309353612496, "learning_rate": 9.69713386845325e-06, "loss": 0.2757, "step": 1024 }, { "epoch": 0.14, "grad_norm": 0.7858681698772676, "learning_rate": 9.696384928988979e-06, "loss": 0.168, "step": 1025 }, { "epoch": 0.14, "grad_norm": 1.2807881468736741, "learning_rate": 9.695635093658195e-06, "loss": 0.2921, "step": 1026 }, { "epoch": 0.14, "grad_norm": 1.3593957685474511, "learning_rate": 9.694884362603933e-06, "loss": 0.2383, "step": 1027 }, { "epoch": 0.14, "grad_norm": 1.1141293436430735, "learning_rate": 9.6941327359694e-06, "loss": 0.1906, "step": 1028 }, { "epoch": 0.14, "grad_norm": 0.996447091802241, "learning_rate": 9.693380213897974e-06, "loss": 0.1685, "step": 1029 }, { "epoch": 0.14, "grad_norm": 1.4300124129844893, "learning_rate": 9.692626796533203e-06, "loss": 0.2353, "step": 1030 }, { "epoch": 0.14, "grad_norm": 1.0843082953409242, "learning_rate": 9.691872484018807e-06, "loss": 0.145, "step": 1031 }, { "epoch": 0.14, "grad_norm": 1.4332564992900834, "learning_rate": 9.691117276498675e-06, "loss": 0.2821, "step": 1032 }, { "epoch": 0.14, "grad_norm": 1.439966184825289, "learning_rate": 9.690361174116871e-06, "loss": 0.2474, "step": 1033 }, { "epoch": 0.14, "grad_norm": 1.2126280203982789, "learning_rate": 9.689604177017623e-06, "loss": 0.2172, "step": 1034 }, { "epoch": 0.14, "grad_norm": 0.9951212997777158, "learning_rate": 9.688846285345334e-06, "loss": 0.2112, "step": 1035 }, { "epoch": 0.14, "grad_norm": 1.6605901567663932, "learning_rate": 9.68808749924458e-06, "loss": 0.254, "step": 1036 }, { "epoch": 0.14, "grad_norm": 1.1385312758597268, "learning_rate": 9.6873278188601e-06, "loss": 0.2077, "step": 1037 }, { "epoch": 0.14, "grad_norm": 0.6533924211742017, "learning_rate": 9.686567244336809e-06, "loss": 0.1594, "step": 1038 }, { "epoch": 0.14, "grad_norm": 0.9423383102881253, "learning_rate": 9.685805775819794e-06, "loss": 0.2056, "step": 1039 }, { "epoch": 0.14, "grad_norm": 1.3222088233691038, "learning_rate": 9.685043413454309e-06, "loss": 0.2054, "step": 1040 }, { "epoch": 0.14, "grad_norm": 1.2361435842748218, "learning_rate": 9.684280157385777e-06, "loss": 0.2044, "step": 1041 }, { "epoch": 0.14, "grad_norm": 1.1479849706260885, "learning_rate": 9.6835160077598e-06, "loss": 0.1898, "step": 1042 }, { "epoch": 0.14, "grad_norm": 1.2204179738239862, "learning_rate": 9.682750964722139e-06, "loss": 0.2126, "step": 1043 }, { "epoch": 0.14, "grad_norm": 1.191400635620407, "learning_rate": 9.681985028418733e-06, "loss": 0.226, "step": 1044 }, { "epoch": 0.14, "grad_norm": 1.2117485524237865, "learning_rate": 9.68121819899569e-06, "loss": 0.2214, "step": 1045 }, { "epoch": 0.14, "grad_norm": 1.263817370373972, "learning_rate": 9.680450476599288e-06, "loss": 0.2106, "step": 1046 }, { "epoch": 0.14, "grad_norm": 1.1509589921323664, "learning_rate": 9.679681861375974e-06, "loss": 0.2274, "step": 1047 }, { "epoch": 0.14, "grad_norm": 1.1717580826603413, "learning_rate": 9.678912353472367e-06, "loss": 0.2297, "step": 1048 }, { "epoch": 0.14, "grad_norm": 1.1381281169912367, "learning_rate": 9.678141953035255e-06, "loss": 0.2384, "step": 1049 }, { "epoch": 0.14, "grad_norm": 1.1502519140535346, "learning_rate": 9.677370660211599e-06, "loss": 0.2553, "step": 1050 }, { "epoch": 0.14, "grad_norm": 0.9323126498846268, "learning_rate": 9.676598475148525e-06, "loss": 0.1404, "step": 1051 }, { "epoch": 0.14, "grad_norm": 1.1019815728941744, "learning_rate": 9.675825397993336e-06, "loss": 0.2174, "step": 1052 }, { "epoch": 0.14, "grad_norm": 1.060933607378649, "learning_rate": 9.675051428893499e-06, "loss": 0.1817, "step": 1053 }, { "epoch": 0.14, "grad_norm": 1.196814687062691, "learning_rate": 9.674276567996655e-06, "loss": 0.2253, "step": 1054 }, { "epoch": 0.14, "grad_norm": 0.980535231569342, "learning_rate": 9.673500815450612e-06, "loss": 0.1644, "step": 1055 }, { "epoch": 0.14, "grad_norm": 0.9403965107740396, "learning_rate": 9.672724171403353e-06, "loss": 0.1736, "step": 1056 }, { "epoch": 0.14, "grad_norm": 1.1468239963032874, "learning_rate": 9.671946636003025e-06, "loss": 0.1727, "step": 1057 }, { "epoch": 0.14, "grad_norm": 0.9646642863335593, "learning_rate": 9.671168209397951e-06, "loss": 0.129, "step": 1058 }, { "epoch": 0.14, "grad_norm": 0.9411456480082613, "learning_rate": 9.670388891736619e-06, "loss": 0.1123, "step": 1059 }, { "epoch": 0.14, "grad_norm": 1.4568307691292408, "learning_rate": 9.669608683167688e-06, "loss": 0.2429, "step": 1060 }, { "epoch": 0.14, "grad_norm": 1.317543325774873, "learning_rate": 9.66882758383999e-06, "loss": 0.2369, "step": 1061 }, { "epoch": 0.14, "grad_norm": 1.3857008940508297, "learning_rate": 9.668045593902526e-06, "loss": 0.1898, "step": 1062 }, { "epoch": 0.14, "grad_norm": 0.9374683182173494, "learning_rate": 9.667262713504462e-06, "loss": 0.1904, "step": 1063 }, { "epoch": 0.14, "grad_norm": 1.3298445907356944, "learning_rate": 9.666478942795141e-06, "loss": 0.2614, "step": 1064 }, { "epoch": 0.14, "grad_norm": 1.0358601707257902, "learning_rate": 9.66569428192407e-06, "loss": 0.2323, "step": 1065 }, { "epoch": 0.14, "grad_norm": 1.4773712958550964, "learning_rate": 9.66490873104093e-06, "loss": 0.2465, "step": 1066 }, { "epoch": 0.14, "grad_norm": 0.9298322349758408, "learning_rate": 9.664122290295568e-06, "loss": 0.2154, "step": 1067 }, { "epoch": 0.14, "grad_norm": 1.3128770156509715, "learning_rate": 9.663334959838004e-06, "loss": 0.246, "step": 1068 }, { "epoch": 0.14, "grad_norm": 0.9227606039712569, "learning_rate": 9.662546739818427e-06, "loss": 0.166, "step": 1069 }, { "epoch": 0.14, "grad_norm": 1.0017074248239382, "learning_rate": 9.661757630387197e-06, "loss": 0.1774, "step": 1070 }, { "epoch": 0.14, "grad_norm": 1.3456789286595758, "learning_rate": 9.660967631694837e-06, "loss": 0.222, "step": 1071 }, { "epoch": 0.14, "grad_norm": 1.389175242445496, "learning_rate": 9.66017674389205e-06, "loss": 0.2228, "step": 1072 }, { "epoch": 0.14, "grad_norm": 1.0711348420094444, "learning_rate": 9.659384967129697e-06, "loss": 0.2108, "step": 1073 }, { "epoch": 0.14, "grad_norm": 1.1753401107045678, "learning_rate": 9.65859230155882e-06, "loss": 0.2552, "step": 1074 }, { "epoch": 0.14, "grad_norm": 1.2282517688145893, "learning_rate": 9.657798747330622e-06, "loss": 0.1795, "step": 1075 }, { "epoch": 0.15, "grad_norm": 1.2765197164439281, "learning_rate": 9.65700430459648e-06, "loss": 0.2155, "step": 1076 }, { "epoch": 0.15, "grad_norm": 1.286428884728882, "learning_rate": 9.656208973507939e-06, "loss": 0.2226, "step": 1077 }, { "epoch": 0.15, "grad_norm": 0.9398344652364614, "learning_rate": 9.655412754216714e-06, "loss": 0.2332, "step": 1078 }, { "epoch": 0.15, "grad_norm": 0.7618204014170626, "learning_rate": 9.654615646874688e-06, "loss": 0.1455, "step": 1079 }, { "epoch": 0.15, "grad_norm": 1.3334148024938561, "learning_rate": 9.653817651633917e-06, "loss": 0.2631, "step": 1080 }, { "epoch": 0.15, "grad_norm": 0.8275839503860144, "learning_rate": 9.65301876864662e-06, "loss": 0.146, "step": 1081 }, { "epoch": 0.15, "grad_norm": 1.0333670664282937, "learning_rate": 9.652218998065193e-06, "loss": 0.1933, "step": 1082 }, { "epoch": 0.15, "grad_norm": 0.953891702214587, "learning_rate": 9.651418340042195e-06, "loss": 0.1859, "step": 1083 }, { "epoch": 0.15, "grad_norm": 0.9863329055686668, "learning_rate": 9.650616794730358e-06, "loss": 0.1704, "step": 1084 }, { "epoch": 0.15, "grad_norm": 1.4697676330241034, "learning_rate": 9.649814362282582e-06, "loss": 0.287, "step": 1085 }, { "epoch": 0.15, "grad_norm": 0.8057640066703509, "learning_rate": 9.649011042851934e-06, "loss": 0.1682, "step": 1086 }, { "epoch": 0.15, "grad_norm": 1.1945104427606248, "learning_rate": 9.648206836591658e-06, "loss": 0.2231, "step": 1087 }, { "epoch": 0.15, "grad_norm": 1.181247126916834, "learning_rate": 9.647401743655156e-06, "loss": 0.2209, "step": 1088 }, { "epoch": 0.15, "grad_norm": 0.8948900479012942, "learning_rate": 9.646595764196008e-06, "loss": 0.1786, "step": 1089 }, { "epoch": 0.15, "grad_norm": 1.461964396200488, "learning_rate": 9.645788898367959e-06, "loss": 0.2838, "step": 1090 }, { "epoch": 0.15, "grad_norm": 1.3812613471671893, "learning_rate": 9.644981146324923e-06, "loss": 0.2737, "step": 1091 }, { "epoch": 0.15, "grad_norm": 1.0736571783090652, "learning_rate": 9.644172508220986e-06, "loss": 0.2156, "step": 1092 }, { "epoch": 0.15, "grad_norm": 1.02370578463875, "learning_rate": 9.6433629842104e-06, "loss": 0.2293, "step": 1093 }, { "epoch": 0.15, "grad_norm": 1.2484897476828807, "learning_rate": 9.642552574447587e-06, "loss": 0.239, "step": 1094 }, { "epoch": 0.15, "grad_norm": 1.363928508165632, "learning_rate": 9.641741279087138e-06, "loss": 0.2519, "step": 1095 }, { "epoch": 0.15, "grad_norm": 1.1717482802607935, "learning_rate": 9.640929098283813e-06, "loss": 0.2748, "step": 1096 }, { "epoch": 0.15, "grad_norm": 1.2624343923250165, "learning_rate": 9.640116032192542e-06, "loss": 0.2806, "step": 1097 }, { "epoch": 0.15, "grad_norm": 1.107929779089859, "learning_rate": 9.639302080968421e-06, "loss": 0.2122, "step": 1098 }, { "epoch": 0.15, "grad_norm": 1.0673894770115837, "learning_rate": 9.638487244766718e-06, "loss": 0.1841, "step": 1099 }, { "epoch": 0.15, "grad_norm": 1.331812314870078, "learning_rate": 9.637671523742867e-06, "loss": 0.2589, "step": 1100 }, { "epoch": 0.15, "grad_norm": 1.0740935287024393, "learning_rate": 9.636854918052474e-06, "loss": 0.1818, "step": 1101 }, { "epoch": 0.15, "grad_norm": 1.1409708495515327, "learning_rate": 9.63603742785131e-06, "loss": 0.2251, "step": 1102 }, { "epoch": 0.15, "grad_norm": 0.7849481191889477, "learning_rate": 9.635219053295317e-06, "loss": 0.1437, "step": 1103 }, { "epoch": 0.15, "grad_norm": 0.8381744766641654, "learning_rate": 9.634399794540607e-06, "loss": 0.1993, "step": 1104 }, { "epoch": 0.15, "grad_norm": 1.2585710683605937, "learning_rate": 9.633579651743456e-06, "loss": 0.2095, "step": 1105 }, { "epoch": 0.15, "grad_norm": 0.9295452518760953, "learning_rate": 9.632758625060317e-06, "loss": 0.1973, "step": 1106 }, { "epoch": 0.15, "grad_norm": 1.3496689935773964, "learning_rate": 9.6319367146478e-06, "loss": 0.1945, "step": 1107 }, { "epoch": 0.15, "grad_norm": 0.9370847278454489, "learning_rate": 9.631113920662693e-06, "loss": 0.1424, "step": 1108 }, { "epoch": 0.15, "grad_norm": 1.0077328210611567, "learning_rate": 9.630290243261951e-06, "loss": 0.1674, "step": 1109 }, { "epoch": 0.15, "grad_norm": 1.22663873410408, "learning_rate": 9.629465682602692e-06, "loss": 0.2463, "step": 1110 }, { "epoch": 0.15, "grad_norm": 1.2190325871713967, "learning_rate": 9.62864023884221e-06, "loss": 0.2225, "step": 1111 }, { "epoch": 0.15, "grad_norm": 1.2521482160669737, "learning_rate": 9.627813912137961e-06, "loss": 0.2363, "step": 1112 }, { "epoch": 0.15, "grad_norm": 1.007657500532078, "learning_rate": 9.626986702647573e-06, "loss": 0.1939, "step": 1113 }, { "epoch": 0.15, "grad_norm": 1.507306946492507, "learning_rate": 9.626158610528844e-06, "loss": 0.2493, "step": 1114 }, { "epoch": 0.15, "grad_norm": 1.265076730608744, "learning_rate": 9.625329635939736e-06, "loss": 0.1873, "step": 1115 }, { "epoch": 0.15, "grad_norm": 1.0423688261066402, "learning_rate": 9.624499779038382e-06, "loss": 0.1613, "step": 1116 }, { "epoch": 0.15, "grad_norm": 1.3081869604580505, "learning_rate": 9.623669039983082e-06, "loss": 0.2334, "step": 1117 }, { "epoch": 0.15, "grad_norm": 1.0290613379143476, "learning_rate": 9.622837418932304e-06, "loss": 0.1741, "step": 1118 }, { "epoch": 0.15, "grad_norm": 1.2956490738772053, "learning_rate": 9.622004916044689e-06, "loss": 0.2339, "step": 1119 }, { "epoch": 0.15, "grad_norm": 1.122511907940771, "learning_rate": 9.621171531479038e-06, "loss": 0.2258, "step": 1120 }, { "epoch": 0.15, "grad_norm": 0.9485507819442724, "learning_rate": 9.620337265394328e-06, "loss": 0.1933, "step": 1121 }, { "epoch": 0.15, "grad_norm": 0.9187191224146155, "learning_rate": 9.6195021179497e-06, "loss": 0.1942, "step": 1122 }, { "epoch": 0.15, "grad_norm": 0.9388905660898165, "learning_rate": 9.618666089304463e-06, "loss": 0.1444, "step": 1123 }, { "epoch": 0.15, "grad_norm": 1.272403016925933, "learning_rate": 9.617829179618094e-06, "loss": 0.2142, "step": 1124 }, { "epoch": 0.15, "grad_norm": 1.2261398376817425, "learning_rate": 9.61699138905024e-06, "loss": 0.2442, "step": 1125 }, { "epoch": 0.15, "grad_norm": 0.7676726460402571, "learning_rate": 9.616152717760718e-06, "loss": 0.1406, "step": 1126 }, { "epoch": 0.15, "grad_norm": 1.1891428583031687, "learning_rate": 9.615313165909507e-06, "loss": 0.2242, "step": 1127 }, { "epoch": 0.15, "grad_norm": 1.2589626804370138, "learning_rate": 9.614472733656756e-06, "loss": 0.1748, "step": 1128 }, { "epoch": 0.15, "grad_norm": 1.2477201336789072, "learning_rate": 9.613631421162786e-06, "loss": 0.2565, "step": 1129 }, { "epoch": 0.15, "grad_norm": 1.1320053879678584, "learning_rate": 9.612789228588082e-06, "loss": 0.2435, "step": 1130 }, { "epoch": 0.15, "grad_norm": 1.2149398108545109, "learning_rate": 9.611946156093298e-06, "loss": 0.198, "step": 1131 }, { "epoch": 0.15, "grad_norm": 1.1103558749799733, "learning_rate": 9.611102203839254e-06, "loss": 0.2259, "step": 1132 }, { "epoch": 0.15, "grad_norm": 1.3114409970976793, "learning_rate": 9.610257371986939e-06, "loss": 0.1933, "step": 1133 }, { "epoch": 0.15, "grad_norm": 1.3205481625531033, "learning_rate": 9.609411660697515e-06, "loss": 0.2232, "step": 1134 }, { "epoch": 0.15, "grad_norm": 1.150517183965438, "learning_rate": 9.608565070132301e-06, "loss": 0.2067, "step": 1135 }, { "epoch": 0.15, "grad_norm": 1.2886181428742804, "learning_rate": 9.607717600452796e-06, "loss": 0.2507, "step": 1136 }, { "epoch": 0.15, "grad_norm": 1.2205286027797577, "learning_rate": 9.606869251820654e-06, "loss": 0.2149, "step": 1137 }, { "epoch": 0.15, "grad_norm": 1.5080366893023396, "learning_rate": 9.606020024397707e-06, "loss": 0.2732, "step": 1138 }, { "epoch": 0.15, "grad_norm": 1.612942114421112, "learning_rate": 9.60516991834595e-06, "loss": 0.2884, "step": 1139 }, { "epoch": 0.15, "grad_norm": 1.1736802873624017, "learning_rate": 9.604318933827546e-06, "loss": 0.2205, "step": 1140 }, { "epoch": 0.15, "grad_norm": 1.0511748412453092, "learning_rate": 9.603467071004826e-06, "loss": 0.258, "step": 1141 }, { "epoch": 0.15, "grad_norm": 0.7485823950562321, "learning_rate": 9.60261433004029e-06, "loss": 0.1764, "step": 1142 }, { "epoch": 0.15, "grad_norm": 0.830953394130942, "learning_rate": 9.601760711096601e-06, "loss": 0.1924, "step": 1143 }, { "epoch": 0.15, "grad_norm": 0.7769635942376537, "learning_rate": 9.600906214336596e-06, "loss": 0.1475, "step": 1144 }, { "epoch": 0.15, "grad_norm": 1.6024804249783293, "learning_rate": 9.600050839923274e-06, "loss": 0.2696, "step": 1145 }, { "epoch": 0.15, "grad_norm": 0.9424553875042163, "learning_rate": 9.599194588019803e-06, "loss": 0.1104, "step": 1146 }, { "epoch": 0.15, "grad_norm": 0.9136125548331003, "learning_rate": 9.598337458789518e-06, "loss": 0.2333, "step": 1147 }, { "epoch": 0.15, "grad_norm": 1.0620346451149478, "learning_rate": 9.597479452395923e-06, "loss": 0.2128, "step": 1148 }, { "epoch": 0.15, "grad_norm": 1.503369834751904, "learning_rate": 9.59662056900269e-06, "loss": 0.2896, "step": 1149 }, { "epoch": 0.16, "grad_norm": 1.1686489370250432, "learning_rate": 9.595760808773656e-06, "loss": 0.2381, "step": 1150 }, { "epoch": 0.16, "grad_norm": 1.0561995087935456, "learning_rate": 9.594900171872824e-06, "loss": 0.1653, "step": 1151 }, { "epoch": 0.16, "grad_norm": 1.2786601533029027, "learning_rate": 9.594038658464369e-06, "loss": 0.2571, "step": 1152 }, { "epoch": 0.16, "grad_norm": 1.3732019238916107, "learning_rate": 9.593176268712626e-06, "loss": 0.2827, "step": 1153 }, { "epoch": 0.16, "grad_norm": 0.7625454388022433, "learning_rate": 9.592313002782105e-06, "loss": 0.118, "step": 1154 }, { "epoch": 0.16, "grad_norm": 1.332617884504222, "learning_rate": 9.591448860837481e-06, "loss": 0.2654, "step": 1155 }, { "epoch": 0.16, "grad_norm": 1.0852506726600877, "learning_rate": 9.590583843043591e-06, "loss": 0.2346, "step": 1156 }, { "epoch": 0.16, "grad_norm": 1.1306802305922279, "learning_rate": 9.589717949565447e-06, "loss": 0.1839, "step": 1157 }, { "epoch": 0.16, "grad_norm": 1.0436838721985429, "learning_rate": 9.588851180568219e-06, "loss": 0.1798, "step": 1158 }, { "epoch": 0.16, "grad_norm": 1.1606320148538645, "learning_rate": 9.587983536217254e-06, "loss": 0.2446, "step": 1159 }, { "epoch": 0.16, "grad_norm": 0.8910815432364031, "learning_rate": 9.587115016678056e-06, "loss": 0.1616, "step": 1160 }, { "epoch": 0.16, "grad_norm": 1.2409080482740522, "learning_rate": 9.586245622116304e-06, "loss": 0.2381, "step": 1161 }, { "epoch": 0.16, "grad_norm": 1.3754486225804794, "learning_rate": 9.585375352697841e-06, "loss": 0.2732, "step": 1162 }, { "epoch": 0.16, "grad_norm": 0.8874894145105835, "learning_rate": 9.584504208588676e-06, "loss": 0.1986, "step": 1163 }, { "epoch": 0.16, "grad_norm": 1.2904501063591796, "learning_rate": 9.583632189954984e-06, "loss": 0.1995, "step": 1164 }, { "epoch": 0.16, "grad_norm": 1.0841422014660527, "learning_rate": 9.58275929696311e-06, "loss": 0.2076, "step": 1165 }, { "epoch": 0.16, "grad_norm": 1.2366742455912014, "learning_rate": 9.581885529779565e-06, "loss": 0.1946, "step": 1166 }, { "epoch": 0.16, "grad_norm": 1.329732326807086, "learning_rate": 9.581010888571022e-06, "loss": 0.2725, "step": 1167 }, { "epoch": 0.16, "grad_norm": 0.9193609051332604, "learning_rate": 9.580135373504329e-06, "loss": 0.2018, "step": 1168 }, { "epoch": 0.16, "grad_norm": 1.2031463627541326, "learning_rate": 9.579258984746493e-06, "loss": 0.2622, "step": 1169 }, { "epoch": 0.16, "grad_norm": 0.9628977623325746, "learning_rate": 9.578381722464693e-06, "loss": 0.2112, "step": 1170 }, { "epoch": 0.16, "grad_norm": 1.1827385325824264, "learning_rate": 9.577503586826273e-06, "loss": 0.2019, "step": 1171 }, { "epoch": 0.16, "grad_norm": 0.988483598878411, "learning_rate": 9.576624577998742e-06, "loss": 0.2126, "step": 1172 }, { "epoch": 0.16, "grad_norm": 1.2009479391157616, "learning_rate": 9.575744696149777e-06, "loss": 0.1792, "step": 1173 }, { "epoch": 0.16, "grad_norm": 0.8911087960323849, "learning_rate": 9.57486394144722e-06, "loss": 0.0925, "step": 1174 }, { "epoch": 0.16, "grad_norm": 0.8841993379951325, "learning_rate": 9.573982314059082e-06, "loss": 0.161, "step": 1175 }, { "epoch": 0.16, "grad_norm": 1.195730955197461, "learning_rate": 9.573099814153541e-06, "loss": 0.1889, "step": 1176 }, { "epoch": 0.16, "grad_norm": 1.2660932986371676, "learning_rate": 9.572216441898937e-06, "loss": 0.241, "step": 1177 }, { "epoch": 0.16, "grad_norm": 0.8975349864219017, "learning_rate": 9.57133219746378e-06, "loss": 0.2537, "step": 1178 }, { "epoch": 0.16, "grad_norm": 1.4019651766461163, "learning_rate": 9.570447081016746e-06, "loss": 0.246, "step": 1179 }, { "epoch": 0.16, "grad_norm": 1.332356412132944, "learning_rate": 9.569561092726676e-06, "loss": 0.2303, "step": 1180 }, { "epoch": 0.16, "grad_norm": 1.2350372706886266, "learning_rate": 9.568674232762578e-06, "loss": 0.1765, "step": 1181 }, { "epoch": 0.16, "grad_norm": 1.1132395081821318, "learning_rate": 9.567786501293628e-06, "loss": 0.1933, "step": 1182 }, { "epoch": 0.16, "grad_norm": 1.1112037739761818, "learning_rate": 9.566897898489165e-06, "loss": 0.1725, "step": 1183 }, { "epoch": 0.16, "grad_norm": 1.1456869152498839, "learning_rate": 9.566008424518696e-06, "loss": 0.2347, "step": 1184 }, { "epoch": 0.16, "grad_norm": 1.3266776695053482, "learning_rate": 9.565118079551896e-06, "loss": 0.2207, "step": 1185 }, { "epoch": 0.16, "grad_norm": 1.4163991496145405, "learning_rate": 9.5642268637586e-06, "loss": 0.2735, "step": 1186 }, { "epoch": 0.16, "grad_norm": 0.8547791656359769, "learning_rate": 9.563334777308819e-06, "loss": 0.2054, "step": 1187 }, { "epoch": 0.16, "grad_norm": 1.2827578768680523, "learning_rate": 9.56244182037272e-06, "loss": 0.1815, "step": 1188 }, { "epoch": 0.16, "grad_norm": 1.3105318694060875, "learning_rate": 9.561547993120642e-06, "loss": 0.2212, "step": 1189 }, { "epoch": 0.16, "grad_norm": 1.2758431858877899, "learning_rate": 9.560653295723088e-06, "loss": 0.2979, "step": 1190 }, { "epoch": 0.16, "grad_norm": 0.9732365465609929, "learning_rate": 9.559757728350728e-06, "loss": 0.1916, "step": 1191 }, { "epoch": 0.16, "grad_norm": 0.916624608462773, "learning_rate": 9.558861291174397e-06, "loss": 0.1518, "step": 1192 }, { "epoch": 0.16, "grad_norm": 1.1890458142133697, "learning_rate": 9.557963984365097e-06, "loss": 0.2023, "step": 1193 }, { "epoch": 0.16, "grad_norm": 1.1382104945799958, "learning_rate": 9.557065808093994e-06, "loss": 0.2461, "step": 1194 }, { "epoch": 0.16, "grad_norm": 1.2309075233911893, "learning_rate": 9.556166762532422e-06, "loss": 0.2408, "step": 1195 }, { "epoch": 0.16, "grad_norm": 0.93238723906668, "learning_rate": 9.555266847851881e-06, "loss": 0.1901, "step": 1196 }, { "epoch": 0.16, "grad_norm": 1.118775341971752, "learning_rate": 9.554366064224032e-06, "loss": 0.1738, "step": 1197 }, { "epoch": 0.16, "grad_norm": 0.9333945866797656, "learning_rate": 9.55346441182071e-06, "loss": 0.1787, "step": 1198 }, { "epoch": 0.16, "grad_norm": 0.9603617560606039, "learning_rate": 9.552561890813907e-06, "loss": 0.1439, "step": 1199 }, { "epoch": 0.16, "grad_norm": 1.301470499826816, "learning_rate": 9.551658501375787e-06, "loss": 0.2385, "step": 1200 }, { "epoch": 0.16, "grad_norm": 1.3238337270269869, "learning_rate": 9.55075424367868e-06, "loss": 0.2216, "step": 1201 }, { "epoch": 0.16, "grad_norm": 0.9787082848257809, "learning_rate": 9.549849117895074e-06, "loss": 0.2062, "step": 1202 }, { "epoch": 0.16, "grad_norm": 1.0397382637455896, "learning_rate": 9.54894312419763e-06, "loss": 0.2081, "step": 1203 }, { "epoch": 0.16, "grad_norm": 1.256640869578597, "learning_rate": 9.548036262759172e-06, "loss": 0.2186, "step": 1204 }, { "epoch": 0.16, "grad_norm": 1.1060298605596957, "learning_rate": 9.547128533752693e-06, "loss": 0.1961, "step": 1205 }, { "epoch": 0.16, "grad_norm": 1.1137539924866184, "learning_rate": 9.546219937351344e-06, "loss": 0.1655, "step": 1206 }, { "epoch": 0.16, "grad_norm": 1.1129511486675097, "learning_rate": 9.545310473728447e-06, "loss": 0.2434, "step": 1207 }, { "epoch": 0.16, "grad_norm": 1.0078928606108777, "learning_rate": 9.54440014305749e-06, "loss": 0.1507, "step": 1208 }, { "epoch": 0.16, "grad_norm": 0.7866304939962052, "learning_rate": 9.543488945512121e-06, "loss": 0.2085, "step": 1209 }, { "epoch": 0.16, "grad_norm": 1.2074730519554566, "learning_rate": 9.542576881266162e-06, "loss": 0.205, "step": 1210 }, { "epoch": 0.16, "grad_norm": 0.8731586874153628, "learning_rate": 9.541663950493591e-06, "loss": 0.1499, "step": 1211 }, { "epoch": 0.16, "grad_norm": 1.4535340300532613, "learning_rate": 9.540750153368556e-06, "loss": 0.2777, "step": 1212 }, { "epoch": 0.16, "grad_norm": 1.1220003768123943, "learning_rate": 9.539835490065373e-06, "loss": 0.1722, "step": 1213 }, { "epoch": 0.16, "grad_norm": 0.9293172636826897, "learning_rate": 9.538919960758518e-06, "loss": 0.2165, "step": 1214 }, { "epoch": 0.16, "grad_norm": 0.717767948535582, "learning_rate": 9.538003565622631e-06, "loss": 0.1464, "step": 1215 }, { "epoch": 0.16, "grad_norm": 1.2406611386746451, "learning_rate": 9.537086304832527e-06, "loss": 0.1873, "step": 1216 }, { "epoch": 0.16, "grad_norm": 1.2156588189106918, "learning_rate": 9.536168178563174e-06, "loss": 0.1893, "step": 1217 }, { "epoch": 0.16, "grad_norm": 0.8670583141941463, "learning_rate": 9.535249186989715e-06, "loss": 0.1505, "step": 1218 }, { "epoch": 0.16, "grad_norm": 1.2456195580812153, "learning_rate": 9.53432933028745e-06, "loss": 0.2158, "step": 1219 }, { "epoch": 0.16, "grad_norm": 1.2614697455449384, "learning_rate": 9.533408608631851e-06, "loss": 0.2438, "step": 1220 }, { "epoch": 0.16, "grad_norm": 1.3493355674377996, "learning_rate": 9.53248702219855e-06, "loss": 0.2278, "step": 1221 }, { "epoch": 0.16, "grad_norm": 0.5957818940648346, "learning_rate": 9.531564571163346e-06, "loss": 0.0824, "step": 1222 }, { "epoch": 0.16, "grad_norm": 1.0804789206940453, "learning_rate": 9.530641255702201e-06, "loss": 0.2029, "step": 1223 }, { "epoch": 0.17, "grad_norm": 0.8469354983254268, "learning_rate": 9.529717075991247e-06, "loss": 0.1601, "step": 1224 }, { "epoch": 0.17, "grad_norm": 1.0113073195165758, "learning_rate": 9.528792032206773e-06, "loss": 0.1764, "step": 1225 }, { "epoch": 0.17, "grad_norm": 1.267525467298154, "learning_rate": 9.527866124525242e-06, "loss": 0.2751, "step": 1226 }, { "epoch": 0.17, "grad_norm": 1.2334409523425514, "learning_rate": 9.526939353123274e-06, "loss": 0.1773, "step": 1227 }, { "epoch": 0.17, "grad_norm": 1.3631160607982924, "learning_rate": 9.526011718177657e-06, "loss": 0.2335, "step": 1228 }, { "epoch": 0.17, "grad_norm": 0.8135715870974289, "learning_rate": 9.525083219865345e-06, "loss": 0.239, "step": 1229 }, { "epoch": 0.17, "grad_norm": 1.2681547933166373, "learning_rate": 9.524153858363452e-06, "loss": 0.2378, "step": 1230 }, { "epoch": 0.17, "grad_norm": 1.1566594306675793, "learning_rate": 9.523223633849264e-06, "loss": 0.2192, "step": 1231 }, { "epoch": 0.17, "grad_norm": 0.9816764184940558, "learning_rate": 9.522292546500225e-06, "loss": 0.1815, "step": 1232 }, { "epoch": 0.17, "grad_norm": 1.4757155973825244, "learning_rate": 9.521360596493946e-06, "loss": 0.2151, "step": 1233 }, { "epoch": 0.17, "grad_norm": 1.0479133066838784, "learning_rate": 9.520427784008206e-06, "loss": 0.1298, "step": 1234 }, { "epoch": 0.17, "grad_norm": 1.2207224037624997, "learning_rate": 9.519494109220939e-06, "loss": 0.2178, "step": 1235 }, { "epoch": 0.17, "grad_norm": 1.1355437706837639, "learning_rate": 9.518559572310256e-06, "loss": 0.2018, "step": 1236 }, { "epoch": 0.17, "grad_norm": 1.226501876774218, "learning_rate": 9.51762417345442e-06, "loss": 0.2836, "step": 1237 }, { "epoch": 0.17, "grad_norm": 1.2589881509290475, "learning_rate": 9.51668791283187e-06, "loss": 0.2633, "step": 1238 }, { "epoch": 0.17, "grad_norm": 1.2047667148502224, "learning_rate": 9.515750790621201e-06, "loss": 0.1603, "step": 1239 }, { "epoch": 0.17, "grad_norm": 1.0600494264355125, "learning_rate": 9.514812807001174e-06, "loss": 0.2492, "step": 1240 }, { "epoch": 0.17, "grad_norm": 1.1604624262350882, "learning_rate": 9.51387396215072e-06, "loss": 0.248, "step": 1241 }, { "epoch": 0.17, "grad_norm": 1.1132782973380941, "learning_rate": 9.512934256248927e-06, "loss": 0.1866, "step": 1242 }, { "epoch": 0.17, "grad_norm": 1.3643811330056275, "learning_rate": 9.51199368947505e-06, "loss": 0.2322, "step": 1243 }, { "epoch": 0.17, "grad_norm": 1.0023400245131475, "learning_rate": 9.511052262008507e-06, "loss": 0.1354, "step": 1244 }, { "epoch": 0.17, "grad_norm": 1.2171583129300434, "learning_rate": 9.510109974028885e-06, "loss": 0.1706, "step": 1245 }, { "epoch": 0.17, "grad_norm": 1.1418114862171553, "learning_rate": 9.509166825715929e-06, "loss": 0.2676, "step": 1246 }, { "epoch": 0.17, "grad_norm": 1.4037161666347713, "learning_rate": 9.508222817249554e-06, "loss": 0.2704, "step": 1247 }, { "epoch": 0.17, "grad_norm": 1.1212998726399668, "learning_rate": 9.507277948809833e-06, "loss": 0.2061, "step": 1248 }, { "epoch": 0.17, "grad_norm": 1.5139757886853946, "learning_rate": 9.506332220577005e-06, "loss": 0.3237, "step": 1249 }, { "epoch": 0.17, "grad_norm": 1.4252448651094596, "learning_rate": 9.505385632731477e-06, "loss": 0.2565, "step": 1250 }, { "epoch": 0.17, "grad_norm": 1.188770774749532, "learning_rate": 9.504438185453812e-06, "loss": 0.2199, "step": 1251 }, { "epoch": 0.17, "grad_norm": 1.246840475938388, "learning_rate": 9.503489878924749e-06, "loss": 0.2475, "step": 1252 }, { "epoch": 0.17, "grad_norm": 1.258458838261731, "learning_rate": 9.502540713325178e-06, "loss": 0.2013, "step": 1253 }, { "epoch": 0.17, "grad_norm": 1.1803411937495225, "learning_rate": 9.501590688836161e-06, "loss": 0.2006, "step": 1254 }, { "epoch": 0.17, "grad_norm": 1.074117413298923, "learning_rate": 9.50063980563892e-06, "loss": 0.2209, "step": 1255 }, { "epoch": 0.17, "grad_norm": 1.0576948345794406, "learning_rate": 9.499688063914845e-06, "loss": 0.189, "step": 1256 }, { "epoch": 0.17, "grad_norm": 1.366206019574188, "learning_rate": 9.498735463845486e-06, "loss": 0.2656, "step": 1257 }, { "epoch": 0.17, "grad_norm": 0.8367510786001132, "learning_rate": 9.497782005612555e-06, "loss": 0.1821, "step": 1258 }, { "epoch": 0.17, "grad_norm": 1.0547450207674944, "learning_rate": 9.496827689397933e-06, "loss": 0.1891, "step": 1259 }, { "epoch": 0.17, "grad_norm": 1.3281487196663453, "learning_rate": 9.495872515383661e-06, "loss": 0.22, "step": 1260 }, { "epoch": 0.17, "grad_norm": 1.1931805709169878, "learning_rate": 9.494916483751948e-06, "loss": 0.2292, "step": 1261 }, { "epoch": 0.17, "grad_norm": 1.190429146867582, "learning_rate": 9.493959594685158e-06, "loss": 0.216, "step": 1262 }, { "epoch": 0.17, "grad_norm": 1.1641512939998748, "learning_rate": 9.493001848365828e-06, "loss": 0.2395, "step": 1263 }, { "epoch": 0.17, "grad_norm": 0.8590596009316729, "learning_rate": 9.492043244976652e-06, "loss": 0.1347, "step": 1264 }, { "epoch": 0.17, "grad_norm": 1.396173402516045, "learning_rate": 9.491083784700494e-06, "loss": 0.2496, "step": 1265 }, { "epoch": 0.17, "grad_norm": 1.071488111153972, "learning_rate": 9.49012346772037e-06, "loss": 0.1833, "step": 1266 }, { "epoch": 0.17, "grad_norm": 0.7557918296621187, "learning_rate": 9.489162294219476e-06, "loss": 0.1276, "step": 1267 }, { "epoch": 0.17, "grad_norm": 1.3201148710200241, "learning_rate": 9.488200264381155e-06, "loss": 0.2503, "step": 1268 }, { "epoch": 0.17, "grad_norm": 1.1817747437793256, "learning_rate": 9.487237378388924e-06, "loss": 0.2193, "step": 1269 }, { "epoch": 0.17, "grad_norm": 1.465753168830945, "learning_rate": 9.486273636426459e-06, "loss": 0.2513, "step": 1270 }, { "epoch": 0.17, "grad_norm": 1.2888153592671217, "learning_rate": 9.4853090386776e-06, "loss": 0.1965, "step": 1271 }, { "epoch": 0.17, "grad_norm": 1.4262373215542445, "learning_rate": 9.484343585326349e-06, "loss": 0.2728, "step": 1272 }, { "epoch": 0.17, "grad_norm": 1.1838141194723395, "learning_rate": 9.483377276556876e-06, "loss": 0.188, "step": 1273 }, { "epoch": 0.17, "grad_norm": 1.0814583714173214, "learning_rate": 9.48241011255351e-06, "loss": 0.1941, "step": 1274 }, { "epoch": 0.17, "grad_norm": 1.3408041244275855, "learning_rate": 9.481442093500738e-06, "loss": 0.2715, "step": 1275 }, { "epoch": 0.17, "grad_norm": 1.183992329860293, "learning_rate": 9.480473219583227e-06, "loss": 0.1776, "step": 1276 }, { "epoch": 0.17, "grad_norm": 1.2227876370853041, "learning_rate": 9.479503490985786e-06, "loss": 0.1952, "step": 1277 }, { "epoch": 0.17, "grad_norm": 1.0745458675231347, "learning_rate": 9.478532907893403e-06, "loss": 0.1534, "step": 1278 }, { "epoch": 0.17, "grad_norm": 1.0444229169451351, "learning_rate": 9.47756147049122e-06, "loss": 0.1527, "step": 1279 }, { "epoch": 0.17, "grad_norm": 1.0378638115433003, "learning_rate": 9.476589178964548e-06, "loss": 0.1469, "step": 1280 }, { "epoch": 0.17, "grad_norm": 1.1254333526734879, "learning_rate": 9.475616033498855e-06, "loss": 0.2028, "step": 1281 }, { "epoch": 0.17, "grad_norm": 1.3880828240974858, "learning_rate": 9.474642034279778e-06, "loss": 0.2649, "step": 1282 }, { "epoch": 0.17, "grad_norm": 0.932313714615273, "learning_rate": 9.473667181493111e-06, "loss": 0.221, "step": 1283 }, { "epoch": 0.17, "grad_norm": 1.054437678899713, "learning_rate": 9.472691475324816e-06, "loss": 0.2174, "step": 1284 }, { "epoch": 0.17, "grad_norm": 1.1197854244450405, "learning_rate": 9.471714915961015e-06, "loss": 0.192, "step": 1285 }, { "epoch": 0.17, "grad_norm": 1.1231208230220715, "learning_rate": 9.47073750358799e-06, "loss": 0.2048, "step": 1286 }, { "epoch": 0.17, "grad_norm": 0.7640312112234083, "learning_rate": 9.469759238392192e-06, "loss": 0.156, "step": 1287 }, { "epoch": 0.17, "grad_norm": 1.1534497251578217, "learning_rate": 9.46878012056023e-06, "loss": 0.206, "step": 1288 }, { "epoch": 0.17, "grad_norm": 0.9763029554570702, "learning_rate": 9.46780015027888e-06, "loss": 0.194, "step": 1289 }, { "epoch": 0.17, "grad_norm": 1.1772949888143258, "learning_rate": 9.466819327735076e-06, "loss": 0.255, "step": 1290 }, { "epoch": 0.17, "grad_norm": 1.2895104874750818, "learning_rate": 9.465837653115916e-06, "loss": 0.2381, "step": 1291 }, { "epoch": 0.17, "grad_norm": 0.9183365522803708, "learning_rate": 9.46485512660866e-06, "loss": 0.1361, "step": 1292 }, { "epoch": 0.17, "grad_norm": 1.4469690557016106, "learning_rate": 9.463871748400734e-06, "loss": 0.2646, "step": 1293 }, { "epoch": 0.17, "grad_norm": 0.9987004330807961, "learning_rate": 9.462887518679722e-06, "loss": 0.2046, "step": 1294 }, { "epoch": 0.17, "grad_norm": 1.000827125202876, "learning_rate": 9.461902437633374e-06, "loss": 0.1816, "step": 1295 }, { "epoch": 0.17, "grad_norm": 1.0006403762610545, "learning_rate": 9.460916505449598e-06, "loss": 0.2129, "step": 1296 }, { "epoch": 0.17, "grad_norm": 1.3623446086305673, "learning_rate": 9.45992972231647e-06, "loss": 0.1984, "step": 1297 }, { "epoch": 0.18, "grad_norm": 1.0663855119826988, "learning_rate": 9.458942088422223e-06, "loss": 0.215, "step": 1298 }, { "epoch": 0.18, "grad_norm": 1.1908652701590956, "learning_rate": 9.457953603955257e-06, "loss": 0.2188, "step": 1299 }, { "epoch": 0.18, "grad_norm": 1.2291552627243827, "learning_rate": 9.45696426910413e-06, "loss": 0.2266, "step": 1300 }, { "epoch": 0.18, "grad_norm": 1.0449755274051664, "learning_rate": 9.455974084057564e-06, "loss": 0.1969, "step": 1301 }, { "epoch": 0.18, "grad_norm": 1.2749205153211525, "learning_rate": 9.454983049004448e-06, "loss": 0.2532, "step": 1302 }, { "epoch": 0.18, "grad_norm": 1.312484380921904, "learning_rate": 9.45399116413382e-06, "loss": 0.2761, "step": 1303 }, { "epoch": 0.18, "grad_norm": 0.965773501644433, "learning_rate": 9.452998429634896e-06, "loss": 0.2191, "step": 1304 }, { "epoch": 0.18, "grad_norm": 1.0342458562020738, "learning_rate": 9.452004845697045e-06, "loss": 0.1886, "step": 1305 }, { "epoch": 0.18, "grad_norm": 1.0778722268713659, "learning_rate": 9.451010412509796e-06, "loss": 0.2236, "step": 1306 }, { "epoch": 0.18, "grad_norm": 1.1220508514640701, "learning_rate": 9.450015130262848e-06, "loss": 0.2301, "step": 1307 }, { "epoch": 0.18, "grad_norm": 1.3077897430058296, "learning_rate": 9.449018999146058e-06, "loss": 0.2215, "step": 1308 }, { "epoch": 0.18, "grad_norm": 1.017601901023007, "learning_rate": 9.448022019349441e-06, "loss": 0.1711, "step": 1309 }, { "epoch": 0.18, "grad_norm": 1.2303985657369778, "learning_rate": 9.447024191063179e-06, "loss": 0.2065, "step": 1310 }, { "epoch": 0.18, "grad_norm": 1.065178304903982, "learning_rate": 9.446025514477615e-06, "loss": 0.1802, "step": 1311 }, { "epoch": 0.18, "grad_norm": 0.9152899306754386, "learning_rate": 9.445025989783254e-06, "loss": 0.222, "step": 1312 }, { "epoch": 0.18, "grad_norm": 1.2774730013156146, "learning_rate": 9.44402561717076e-06, "loss": 0.2308, "step": 1313 }, { "epoch": 0.18, "grad_norm": 1.196692227343428, "learning_rate": 9.443024396830961e-06, "loss": 0.2091, "step": 1314 }, { "epoch": 0.18, "grad_norm": 1.1471203990063439, "learning_rate": 9.442022328954848e-06, "loss": 0.2202, "step": 1315 }, { "epoch": 0.18, "grad_norm": 1.46989321066294, "learning_rate": 9.44101941373357e-06, "loss": 0.2879, "step": 1316 }, { "epoch": 0.18, "grad_norm": 0.7453370771669499, "learning_rate": 9.44001565135844e-06, "loss": 0.1792, "step": 1317 }, { "epoch": 0.18, "grad_norm": 1.1165493510540023, "learning_rate": 9.439011042020934e-06, "loss": 0.1985, "step": 1318 }, { "epoch": 0.18, "grad_norm": 0.9617976217890614, "learning_rate": 9.438005585912687e-06, "loss": 0.1939, "step": 1319 }, { "epoch": 0.18, "grad_norm": 0.9814035128469566, "learning_rate": 9.436999283225497e-06, "loss": 0.1772, "step": 1320 }, { "epoch": 0.18, "grad_norm": 1.3091382084549652, "learning_rate": 9.435992134151319e-06, "loss": 0.1855, "step": 1321 }, { "epoch": 0.18, "grad_norm": 1.047718291210123, "learning_rate": 9.43498413888228e-06, "loss": 0.2247, "step": 1322 }, { "epoch": 0.18, "grad_norm": 1.1954986300364343, "learning_rate": 9.433975297610657e-06, "loss": 0.2062, "step": 1323 }, { "epoch": 0.18, "grad_norm": 0.7724632292834033, "learning_rate": 9.432965610528896e-06, "loss": 0.16, "step": 1324 }, { "epoch": 0.18, "grad_norm": 0.9495153348509582, "learning_rate": 9.431955077829596e-06, "loss": 0.2001, "step": 1325 }, { "epoch": 0.18, "grad_norm": 0.8530716164033434, "learning_rate": 9.430943699705531e-06, "loss": 0.14, "step": 1326 }, { "epoch": 0.18, "grad_norm": 1.160185288576289, "learning_rate": 9.429931476349621e-06, "loss": 0.2219, "step": 1327 }, { "epoch": 0.18, "grad_norm": 1.044572332259646, "learning_rate": 9.428918407954959e-06, "loss": 0.1698, "step": 1328 }, { "epoch": 0.18, "grad_norm": 1.3509907544400608, "learning_rate": 9.427904494714791e-06, "loss": 0.222, "step": 1329 }, { "epoch": 0.18, "grad_norm": 0.9826549139048116, "learning_rate": 9.42688973682253e-06, "loss": 0.2254, "step": 1330 }, { "epoch": 0.18, "grad_norm": 1.2831019090207385, "learning_rate": 9.425874134471748e-06, "loss": 0.2337, "step": 1331 }, { "epoch": 0.18, "grad_norm": 0.7867169115187318, "learning_rate": 9.424857687856177e-06, "loss": 0.2099, "step": 1332 }, { "epoch": 0.18, "grad_norm": 0.9799146431161838, "learning_rate": 9.423840397169711e-06, "loss": 0.2259, "step": 1333 }, { "epoch": 0.18, "grad_norm": 1.2401579544574053, "learning_rate": 9.422822262606403e-06, "loss": 0.2345, "step": 1334 }, { "epoch": 0.18, "grad_norm": 1.2175589818755919, "learning_rate": 9.421803284360472e-06, "loss": 0.2124, "step": 1335 }, { "epoch": 0.18, "grad_norm": 1.2379111907003066, "learning_rate": 9.420783462626294e-06, "loss": 0.2068, "step": 1336 }, { "epoch": 0.18, "grad_norm": 1.1213833944311444, "learning_rate": 9.419762797598404e-06, "loss": 0.2174, "step": 1337 }, { "epoch": 0.18, "grad_norm": 0.8353186280369437, "learning_rate": 9.418741289471505e-06, "loss": 0.1776, "step": 1338 }, { "epoch": 0.18, "grad_norm": 1.1434792743099975, "learning_rate": 9.417718938440454e-06, "loss": 0.1929, "step": 1339 }, { "epoch": 0.18, "grad_norm": 0.898059292139711, "learning_rate": 9.41669574470027e-06, "loss": 0.2039, "step": 1340 }, { "epoch": 0.18, "grad_norm": 1.067415795889318, "learning_rate": 9.415671708446139e-06, "loss": 0.2127, "step": 1341 }, { "epoch": 0.18, "grad_norm": 0.9298681812762737, "learning_rate": 9.414646829873397e-06, "loss": 0.1685, "step": 1342 }, { "epoch": 0.18, "grad_norm": 1.2274297835645902, "learning_rate": 9.41362110917755e-06, "loss": 0.2569, "step": 1343 }, { "epoch": 0.18, "grad_norm": 1.0688644385678754, "learning_rate": 9.412594546554257e-06, "loss": 0.2064, "step": 1344 }, { "epoch": 0.18, "grad_norm": 0.9456053379909806, "learning_rate": 9.411567142199345e-06, "loss": 0.1788, "step": 1345 }, { "epoch": 0.18, "grad_norm": 1.0480112315953471, "learning_rate": 9.410538896308799e-06, "loss": 0.1492, "step": 1346 }, { "epoch": 0.18, "grad_norm": 1.0458523819267527, "learning_rate": 9.409509809078761e-06, "loss": 0.1983, "step": 1347 }, { "epoch": 0.18, "grad_norm": 0.6531864764107355, "learning_rate": 9.408479880705538e-06, "loss": 0.1444, "step": 1348 }, { "epoch": 0.18, "grad_norm": 1.2200668029224015, "learning_rate": 9.407449111385596e-06, "loss": 0.2148, "step": 1349 }, { "epoch": 0.18, "grad_norm": 0.6683979689319439, "learning_rate": 9.406417501315559e-06, "loss": 0.1177, "step": 1350 }, { "epoch": 0.18, "grad_norm": 1.0114290606470093, "learning_rate": 9.405385050692213e-06, "loss": 0.1603, "step": 1351 }, { "epoch": 0.18, "grad_norm": 1.323446424933365, "learning_rate": 9.404351759712509e-06, "loss": 0.2489, "step": 1352 }, { "epoch": 0.18, "grad_norm": 1.1651874870976917, "learning_rate": 9.40331762857355e-06, "loss": 0.2225, "step": 1353 }, { "epoch": 0.18, "grad_norm": 1.0863994380675386, "learning_rate": 9.402282657472605e-06, "loss": 0.1835, "step": 1354 }, { "epoch": 0.18, "grad_norm": 1.265904483649397, "learning_rate": 9.401246846607101e-06, "loss": 0.1914, "step": 1355 }, { "epoch": 0.18, "grad_norm": 1.1772677785218657, "learning_rate": 9.400210196174626e-06, "loss": 0.2028, "step": 1356 }, { "epoch": 0.18, "grad_norm": 1.0479202504151102, "learning_rate": 9.39917270637293e-06, "loss": 0.2039, "step": 1357 }, { "epoch": 0.18, "grad_norm": 1.0738800219245066, "learning_rate": 9.398134377399916e-06, "loss": 0.1626, "step": 1358 }, { "epoch": 0.18, "grad_norm": 1.38679200397081, "learning_rate": 9.397095209453657e-06, "loss": 0.2529, "step": 1359 }, { "epoch": 0.18, "grad_norm": 0.8315418187087015, "learning_rate": 9.39605520273238e-06, "loss": 0.1387, "step": 1360 }, { "epoch": 0.18, "grad_norm": 1.3335374890830352, "learning_rate": 9.395014357434472e-06, "loss": 0.192, "step": 1361 }, { "epoch": 0.18, "grad_norm": 0.9926515361930728, "learning_rate": 9.393972673758482e-06, "loss": 0.1701, "step": 1362 }, { "epoch": 0.18, "grad_norm": 0.9599779216582695, "learning_rate": 9.392930151903118e-06, "loss": 0.2065, "step": 1363 }, { "epoch": 0.18, "grad_norm": 1.2312987980528967, "learning_rate": 9.391886792067247e-06, "loss": 0.2031, "step": 1364 }, { "epoch": 0.18, "grad_norm": 0.9135015068916923, "learning_rate": 9.390842594449897e-06, "loss": 0.1704, "step": 1365 }, { "epoch": 0.18, "grad_norm": 1.1146729671723443, "learning_rate": 9.389797559250257e-06, "loss": 0.2196, "step": 1366 }, { "epoch": 0.18, "grad_norm": 0.9898532899813796, "learning_rate": 9.388751686667674e-06, "loss": 0.2002, "step": 1367 }, { "epoch": 0.18, "grad_norm": 1.4150636690251337, "learning_rate": 9.387704976901654e-06, "loss": 0.2687, "step": 1368 }, { "epoch": 0.18, "grad_norm": 1.1073353375084933, "learning_rate": 9.386657430151864e-06, "loss": 0.1936, "step": 1369 }, { "epoch": 0.18, "grad_norm": 0.8192450284298489, "learning_rate": 9.385609046618132e-06, "loss": 0.1967, "step": 1370 }, { "epoch": 0.18, "grad_norm": 1.0186746820774528, "learning_rate": 9.384559826500441e-06, "loss": 0.1832, "step": 1371 }, { "epoch": 0.19, "grad_norm": 1.3577984999649868, "learning_rate": 9.38350976999894e-06, "loss": 0.261, "step": 1372 }, { "epoch": 0.19, "grad_norm": 1.1559700076029154, "learning_rate": 9.382458877313933e-06, "loss": 0.2373, "step": 1373 }, { "epoch": 0.19, "grad_norm": 1.193362364854207, "learning_rate": 9.381407148645885e-06, "loss": 0.1896, "step": 1374 }, { "epoch": 0.19, "grad_norm": 0.9417756543783571, "learning_rate": 9.380354584195419e-06, "loss": 0.1662, "step": 1375 }, { "epoch": 0.19, "grad_norm": 0.8407304748133302, "learning_rate": 9.379301184163321e-06, "loss": 0.2, "step": 1376 }, { "epoch": 0.19, "grad_norm": 1.0321581781479057, "learning_rate": 9.37824694875053e-06, "loss": 0.1887, "step": 1377 }, { "epoch": 0.19, "grad_norm": 0.9503765447870464, "learning_rate": 9.377191878158153e-06, "loss": 0.1865, "step": 1378 }, { "epoch": 0.19, "grad_norm": 0.8824765117006864, "learning_rate": 9.37613597258745e-06, "loss": 0.1717, "step": 1379 }, { "epoch": 0.19, "grad_norm": 1.2134470789987946, "learning_rate": 9.375079232239841e-06, "loss": 0.2536, "step": 1380 }, { "epoch": 0.19, "grad_norm": 1.1066167707987136, "learning_rate": 9.374021657316906e-06, "loss": 0.1451, "step": 1381 }, { "epoch": 0.19, "grad_norm": 0.7892429218553653, "learning_rate": 9.372963248020385e-06, "loss": 0.1567, "step": 1382 }, { "epoch": 0.19, "grad_norm": 0.9244167988713291, "learning_rate": 9.371904004552179e-06, "loss": 0.182, "step": 1383 }, { "epoch": 0.19, "grad_norm": 1.1247757172092325, "learning_rate": 9.370843927114342e-06, "loss": 0.2085, "step": 1384 }, { "epoch": 0.19, "grad_norm": 1.518861098449288, "learning_rate": 9.369783015909091e-06, "loss": 0.2338, "step": 1385 }, { "epoch": 0.19, "grad_norm": 0.879380615805842, "learning_rate": 9.368721271138804e-06, "loss": 0.1949, "step": 1386 }, { "epoch": 0.19, "grad_norm": 1.2165336370192013, "learning_rate": 9.367658693006017e-06, "loss": 0.2301, "step": 1387 }, { "epoch": 0.19, "grad_norm": 0.8402258784828598, "learning_rate": 9.36659528171342e-06, "loss": 0.1479, "step": 1388 }, { "epoch": 0.19, "grad_norm": 1.0787272604799072, "learning_rate": 9.365531037463866e-06, "loss": 0.2418, "step": 1389 }, { "epoch": 0.19, "grad_norm": 1.1124918775863204, "learning_rate": 9.364465960460372e-06, "loss": 0.234, "step": 1390 }, { "epoch": 0.19, "grad_norm": 1.0518881476987085, "learning_rate": 9.363400050906102e-06, "loss": 0.1993, "step": 1391 }, { "epoch": 0.19, "grad_norm": 1.1784561199017143, "learning_rate": 9.362333309004387e-06, "loss": 0.1975, "step": 1392 }, { "epoch": 0.19, "grad_norm": 1.1725914410680207, "learning_rate": 9.361265734958719e-06, "loss": 0.2364, "step": 1393 }, { "epoch": 0.19, "grad_norm": 1.0793480811987783, "learning_rate": 9.36019732897274e-06, "loss": 0.2104, "step": 1394 }, { "epoch": 0.19, "grad_norm": 1.1988204162624065, "learning_rate": 9.359128091250258e-06, "loss": 0.2395, "step": 1395 }, { "epoch": 0.19, "grad_norm": 0.9820427637522303, "learning_rate": 9.358058021995236e-06, "loss": 0.2337, "step": 1396 }, { "epoch": 0.19, "grad_norm": 1.0176308653089274, "learning_rate": 9.356987121411798e-06, "loss": 0.2087, "step": 1397 }, { "epoch": 0.19, "grad_norm": 0.9011558639008038, "learning_rate": 9.355915389704224e-06, "loss": 0.1822, "step": 1398 }, { "epoch": 0.19, "grad_norm": 0.8141417231083634, "learning_rate": 9.354842827076955e-06, "loss": 0.0955, "step": 1399 }, { "epoch": 0.19, "grad_norm": 1.285450697716775, "learning_rate": 9.35376943373459e-06, "loss": 0.2534, "step": 1400 }, { "epoch": 0.19, "grad_norm": 1.014195600701642, "learning_rate": 9.352695209881883e-06, "loss": 0.1814, "step": 1401 }, { "epoch": 0.19, "grad_norm": 1.1940259815316645, "learning_rate": 9.351620155723753e-06, "loss": 0.2302, "step": 1402 }, { "epoch": 0.19, "grad_norm": 1.098156741687081, "learning_rate": 9.35054427146527e-06, "loss": 0.2072, "step": 1403 }, { "epoch": 0.19, "grad_norm": 1.0666109952590201, "learning_rate": 9.34946755731167e-06, "loss": 0.2173, "step": 1404 }, { "epoch": 0.19, "grad_norm": 1.1533726973528506, "learning_rate": 9.34839001346834e-06, "loss": 0.1667, "step": 1405 }, { "epoch": 0.19, "grad_norm": 0.80090677438408, "learning_rate": 9.34731164014083e-06, "loss": 0.194, "step": 1406 }, { "epoch": 0.19, "grad_norm": 1.0462866326412439, "learning_rate": 9.346232437534848e-06, "loss": 0.2093, "step": 1407 }, { "epoch": 0.19, "grad_norm": 0.8836908236450278, "learning_rate": 9.345152405856257e-06, "loss": 0.1829, "step": 1408 }, { "epoch": 0.19, "grad_norm": 1.1611138336085767, "learning_rate": 9.344071545311082e-06, "loss": 0.2438, "step": 1409 }, { "epoch": 0.19, "grad_norm": 1.0376402115577898, "learning_rate": 9.342989856105502e-06, "loss": 0.1692, "step": 1410 }, { "epoch": 0.19, "grad_norm": 1.1248958809118115, "learning_rate": 9.34190733844586e-06, "loss": 0.2018, "step": 1411 }, { "epoch": 0.19, "grad_norm": 0.9432288564211504, "learning_rate": 9.340823992538647e-06, "loss": 0.1911, "step": 1412 }, { "epoch": 0.19, "grad_norm": 1.0098091514659708, "learning_rate": 9.339739818590526e-06, "loss": 0.1461, "step": 1413 }, { "epoch": 0.19, "grad_norm": 0.9916106804922664, "learning_rate": 9.338654816808306e-06, "loss": 0.155, "step": 1414 }, { "epoch": 0.19, "grad_norm": 1.1056666631855174, "learning_rate": 9.337568987398959e-06, "loss": 0.1781, "step": 1415 }, { "epoch": 0.19, "grad_norm": 1.1233792108377931, "learning_rate": 9.336482330569616e-06, "loss": 0.2121, "step": 1416 }, { "epoch": 0.19, "grad_norm": 1.117179746035687, "learning_rate": 9.335394846527559e-06, "loss": 0.213, "step": 1417 }, { "epoch": 0.19, "grad_norm": 0.8189464760452472, "learning_rate": 9.334306535480237e-06, "loss": 0.2013, "step": 1418 }, { "epoch": 0.19, "grad_norm": 1.0223864348414202, "learning_rate": 9.333217397635255e-06, "loss": 0.2242, "step": 1419 }, { "epoch": 0.19, "grad_norm": 1.4267561211107003, "learning_rate": 9.332127433200366e-06, "loss": 0.2931, "step": 1420 }, { "epoch": 0.19, "grad_norm": 0.7233540512927386, "learning_rate": 9.331036642383493e-06, "loss": 0.1328, "step": 1421 }, { "epoch": 0.19, "grad_norm": 1.105557245598516, "learning_rate": 9.329945025392711e-06, "loss": 0.201, "step": 1422 }, { "epoch": 0.19, "grad_norm": 1.032154774483685, "learning_rate": 9.328852582436252e-06, "loss": 0.1795, "step": 1423 }, { "epoch": 0.19, "grad_norm": 1.2256741970481615, "learning_rate": 9.327759313722506e-06, "loss": 0.2498, "step": 1424 }, { "epoch": 0.19, "grad_norm": 1.108223479243863, "learning_rate": 9.326665219460025e-06, "loss": 0.2346, "step": 1425 }, { "epoch": 0.19, "grad_norm": 1.1521235820543951, "learning_rate": 9.32557029985751e-06, "loss": 0.1866, "step": 1426 }, { "epoch": 0.19, "grad_norm": 1.4342270070100993, "learning_rate": 9.324474555123827e-06, "loss": 0.2495, "step": 1427 }, { "epoch": 0.19, "grad_norm": 1.1781587420026451, "learning_rate": 9.323377985467997e-06, "loss": 0.2036, "step": 1428 }, { "epoch": 0.19, "grad_norm": 1.0458754278225715, "learning_rate": 9.322280591099197e-06, "loss": 0.2225, "step": 1429 }, { "epoch": 0.19, "grad_norm": 1.2163620164959257, "learning_rate": 9.32118237222676e-06, "loss": 0.215, "step": 1430 }, { "epoch": 0.19, "grad_norm": 1.5506525727095646, "learning_rate": 9.320083329060183e-06, "loss": 0.1756, "step": 1431 }, { "epoch": 0.19, "grad_norm": 0.7336673036105852, "learning_rate": 9.318983461809112e-06, "loss": 0.1087, "step": 1432 }, { "epoch": 0.19, "grad_norm": 1.135412560313661, "learning_rate": 9.317882770683356e-06, "loss": 0.1871, "step": 1433 }, { "epoch": 0.19, "grad_norm": 1.035614041080044, "learning_rate": 9.31678125589288e-06, "loss": 0.1549, "step": 1434 }, { "epoch": 0.19, "grad_norm": 0.9467089464593578, "learning_rate": 9.315678917647802e-06, "loss": 0.1953, "step": 1435 }, { "epoch": 0.19, "grad_norm": 1.0869166707737419, "learning_rate": 9.3145757561584e-06, "loss": 0.1927, "step": 1436 }, { "epoch": 0.19, "grad_norm": 0.9029920088881618, "learning_rate": 9.313471771635115e-06, "loss": 0.1602, "step": 1437 }, { "epoch": 0.19, "grad_norm": 0.7819406102217891, "learning_rate": 9.312366964288532e-06, "loss": 0.1388, "step": 1438 }, { "epoch": 0.19, "grad_norm": 1.2064241316243431, "learning_rate": 9.311261334329407e-06, "loss": 0.2481, "step": 1439 }, { "epoch": 0.19, "grad_norm": 1.1483740809539507, "learning_rate": 9.310154881968644e-06, "loss": 0.2325, "step": 1440 }, { "epoch": 0.19, "grad_norm": 1.3956731157403215, "learning_rate": 9.309047607417303e-06, "loss": 0.2298, "step": 1441 }, { "epoch": 0.19, "grad_norm": 1.3479740024859597, "learning_rate": 9.307939510886607e-06, "loss": 0.2794, "step": 1442 }, { "epoch": 0.19, "grad_norm": 1.4005919299431968, "learning_rate": 9.306830592587932e-06, "loss": 0.292, "step": 1443 }, { "epoch": 0.19, "grad_norm": 1.0348960776233804, "learning_rate": 9.305720852732811e-06, "loss": 0.1954, "step": 1444 }, { "epoch": 0.19, "grad_norm": 1.3564956036231, "learning_rate": 9.304610291532936e-06, "loss": 0.2469, "step": 1445 }, { "epoch": 0.19, "grad_norm": 1.1481882661372314, "learning_rate": 9.303498909200152e-06, "loss": 0.2169, "step": 1446 }, { "epoch": 0.2, "grad_norm": 1.0851299227960347, "learning_rate": 9.302386705946465e-06, "loss": 0.2376, "step": 1447 }, { "epoch": 0.2, "grad_norm": 1.0103811494054087, "learning_rate": 9.301273681984032e-06, "loss": 0.222, "step": 1448 }, { "epoch": 0.2, "grad_norm": 1.3603348672481066, "learning_rate": 9.30015983752517e-06, "loss": 0.2646, "step": 1449 }, { "epoch": 0.2, "grad_norm": 1.357057166028174, "learning_rate": 9.299045172782356e-06, "loss": 0.2627, "step": 1450 }, { "epoch": 0.2, "grad_norm": 1.2032986124004126, "learning_rate": 9.297929687968216e-06, "loss": 0.2115, "step": 1451 }, { "epoch": 0.2, "grad_norm": 1.1690541548057256, "learning_rate": 9.296813383295535e-06, "loss": 0.208, "step": 1452 }, { "epoch": 0.2, "grad_norm": 1.3489356111233854, "learning_rate": 9.29569625897726e-06, "loss": 0.2267, "step": 1453 }, { "epoch": 0.2, "grad_norm": 1.1269042276837147, "learning_rate": 9.294578315226488e-06, "loss": 0.2406, "step": 1454 }, { "epoch": 0.2, "grad_norm": 1.1341300489118624, "learning_rate": 9.293459552256472e-06, "loss": 0.2395, "step": 1455 }, { "epoch": 0.2, "grad_norm": 1.0983926936213642, "learning_rate": 9.292339970280624e-06, "loss": 0.1633, "step": 1456 }, { "epoch": 0.2, "grad_norm": 1.0619820731874734, "learning_rate": 9.291219569512514e-06, "loss": 0.2141, "step": 1457 }, { "epoch": 0.2, "grad_norm": 1.1884771907148304, "learning_rate": 9.290098350165866e-06, "loss": 0.2073, "step": 1458 }, { "epoch": 0.2, "grad_norm": 1.1053333921613302, "learning_rate": 9.288976312454557e-06, "loss": 0.2137, "step": 1459 }, { "epoch": 0.2, "grad_norm": 1.0685843013684573, "learning_rate": 9.287853456592625e-06, "loss": 0.2382, "step": 1460 }, { "epoch": 0.2, "grad_norm": 1.0467676460029007, "learning_rate": 9.286729782794261e-06, "loss": 0.2281, "step": 1461 }, { "epoch": 0.2, "grad_norm": 1.2481124092015607, "learning_rate": 9.285605291273816e-06, "loss": 0.1586, "step": 1462 }, { "epoch": 0.2, "grad_norm": 1.067062650866249, "learning_rate": 9.28447998224579e-06, "loss": 0.2194, "step": 1463 }, { "epoch": 0.2, "grad_norm": 0.9914626440797804, "learning_rate": 9.283353855924848e-06, "loss": 0.1972, "step": 1464 }, { "epoch": 0.2, "grad_norm": 0.997483680359301, "learning_rate": 9.282226912525803e-06, "loss": 0.1645, "step": 1465 }, { "epoch": 0.2, "grad_norm": 1.202528892051415, "learning_rate": 9.281099152263626e-06, "loss": 0.2102, "step": 1466 }, { "epoch": 0.2, "grad_norm": 0.6830996218593023, "learning_rate": 9.279970575353447e-06, "loss": 0.1127, "step": 1467 }, { "epoch": 0.2, "grad_norm": 1.5524209458759173, "learning_rate": 9.27884118201055e-06, "loss": 0.2597, "step": 1468 }, { "epoch": 0.2, "grad_norm": 1.3585944758009245, "learning_rate": 9.277710972450371e-06, "loss": 0.2423, "step": 1469 }, { "epoch": 0.2, "grad_norm": 0.99674785610715, "learning_rate": 9.276579946888507e-06, "loss": 0.1696, "step": 1470 }, { "epoch": 0.2, "grad_norm": 1.1411090903144085, "learning_rate": 9.275448105540709e-06, "loss": 0.2311, "step": 1471 }, { "epoch": 0.2, "grad_norm": 1.2568637502454902, "learning_rate": 9.274315448622884e-06, "loss": 0.2421, "step": 1472 }, { "epoch": 0.2, "grad_norm": 1.2271374196529397, "learning_rate": 9.273181976351091e-06, "loss": 0.2324, "step": 1473 }, { "epoch": 0.2, "grad_norm": 1.061633215919352, "learning_rate": 9.27204768894155e-06, "loss": 0.218, "step": 1474 }, { "epoch": 0.2, "grad_norm": 1.0123112063172717, "learning_rate": 9.270912586610632e-06, "loss": 0.2069, "step": 1475 }, { "epoch": 0.2, "grad_norm": 1.0417620347663756, "learning_rate": 9.269776669574866e-06, "loss": 0.1956, "step": 1476 }, { "epoch": 0.2, "grad_norm": 1.228347392323195, "learning_rate": 9.268639938050937e-06, "loss": 0.2458, "step": 1477 }, { "epoch": 0.2, "grad_norm": 1.0949542009115703, "learning_rate": 9.26750239225568e-06, "loss": 0.2406, "step": 1478 }, { "epoch": 0.2, "grad_norm": 0.9655796821824766, "learning_rate": 9.266364032406095e-06, "loss": 0.2149, "step": 1479 }, { "epoch": 0.2, "grad_norm": 1.3889475446098083, "learning_rate": 9.265224858719327e-06, "loss": 0.2018, "step": 1480 }, { "epoch": 0.2, "grad_norm": 1.278893994751683, "learning_rate": 9.264084871412684e-06, "loss": 0.2401, "step": 1481 }, { "epoch": 0.2, "grad_norm": 1.2071314287889525, "learning_rate": 9.262944070703623e-06, "loss": 0.2334, "step": 1482 }, { "epoch": 0.2, "grad_norm": 0.7924856171946325, "learning_rate": 9.261802456809763e-06, "loss": 0.1687, "step": 1483 }, { "epoch": 0.2, "grad_norm": 1.388515284947204, "learning_rate": 9.260660029948872e-06, "loss": 0.183, "step": 1484 }, { "epoch": 0.2, "grad_norm": 1.2957531360175716, "learning_rate": 9.259516790338876e-06, "loss": 0.2283, "step": 1485 }, { "epoch": 0.2, "grad_norm": 0.8402747141795965, "learning_rate": 9.258372738197856e-06, "loss": 0.1579, "step": 1486 }, { "epoch": 0.2, "grad_norm": 1.0815791474298202, "learning_rate": 9.257227873744047e-06, "loss": 0.1795, "step": 1487 }, { "epoch": 0.2, "grad_norm": 1.2174924385904429, "learning_rate": 9.25608219719584e-06, "loss": 0.1666, "step": 1488 }, { "epoch": 0.2, "grad_norm": 1.4542752798493963, "learning_rate": 9.254935708771779e-06, "loss": 0.2946, "step": 1489 }, { "epoch": 0.2, "grad_norm": 1.3279757214896084, "learning_rate": 9.253788408690564e-06, "loss": 0.2593, "step": 1490 }, { "epoch": 0.2, "grad_norm": 1.1599835398436547, "learning_rate": 9.252640297171053e-06, "loss": 0.2292, "step": 1491 }, { "epoch": 0.2, "grad_norm": 1.0787593260397648, "learning_rate": 9.251491374432256e-06, "loss": 0.1921, "step": 1492 }, { "epoch": 0.2, "grad_norm": 1.0001353484063347, "learning_rate": 9.250341640693332e-06, "loss": 0.1838, "step": 1493 }, { "epoch": 0.2, "grad_norm": 1.4474775407621767, "learning_rate": 9.249191096173606e-06, "loss": 0.213, "step": 1494 }, { "epoch": 0.2, "grad_norm": 0.8008577839529504, "learning_rate": 9.248039741092551e-06, "loss": 0.1858, "step": 1495 }, { "epoch": 0.2, "grad_norm": 1.412944114565353, "learning_rate": 9.246887575669794e-06, "loss": 0.3169, "step": 1496 }, { "epoch": 0.2, "grad_norm": 0.9085813238258992, "learning_rate": 9.245734600125118e-06, "loss": 0.1648, "step": 1497 }, { "epoch": 0.2, "grad_norm": 1.062201542350698, "learning_rate": 9.244580814678464e-06, "loss": 0.2326, "step": 1498 }, { "epoch": 0.2, "grad_norm": 1.036612523344992, "learning_rate": 9.243426219549918e-06, "loss": 0.187, "step": 1499 }, { "epoch": 0.2, "grad_norm": 1.2754024714383907, "learning_rate": 9.242270814959732e-06, "loss": 0.285, "step": 1500 }, { "epoch": 0.2, "grad_norm": 0.9809321133667331, "learning_rate": 9.241114601128307e-06, "loss": 0.1185, "step": 1501 }, { "epoch": 0.2, "grad_norm": 0.9163020199571161, "learning_rate": 9.239957578276197e-06, "loss": 0.1969, "step": 1502 }, { "epoch": 0.2, "grad_norm": 1.0117550402273363, "learning_rate": 9.238799746624111e-06, "loss": 0.1664, "step": 1503 }, { "epoch": 0.2, "grad_norm": 1.0145394921514055, "learning_rate": 9.237641106392913e-06, "loss": 0.2028, "step": 1504 }, { "epoch": 0.2, "grad_norm": 0.77726701230136, "learning_rate": 9.236481657803624e-06, "loss": 0.162, "step": 1505 }, { "epoch": 0.2, "grad_norm": 0.991207913163313, "learning_rate": 9.235321401077412e-06, "loss": 0.1992, "step": 1506 }, { "epoch": 0.2, "grad_norm": 1.1679195524127397, "learning_rate": 9.23416033643561e-06, "loss": 0.245, "step": 1507 }, { "epoch": 0.2, "grad_norm": 0.6920723924781512, "learning_rate": 9.232998464099691e-06, "loss": 0.1618, "step": 1508 }, { "epoch": 0.2, "grad_norm": 1.2457056435578018, "learning_rate": 9.231835784291297e-06, "loss": 0.2616, "step": 1509 }, { "epoch": 0.2, "grad_norm": 1.1843597443559066, "learning_rate": 9.230672297232211e-06, "loss": 0.2547, "step": 1510 }, { "epoch": 0.2, "grad_norm": 1.352660037686944, "learning_rate": 9.22950800314438e-06, "loss": 0.2892, "step": 1511 }, { "epoch": 0.2, "grad_norm": 1.0153723469288776, "learning_rate": 9.228342902249901e-06, "loss": 0.1706, "step": 1512 }, { "epoch": 0.2, "grad_norm": 1.0956617447586077, "learning_rate": 9.22717699477102e-06, "loss": 0.2138, "step": 1513 }, { "epoch": 0.2, "grad_norm": 1.2507607641191312, "learning_rate": 9.226010280930146e-06, "loss": 0.2213, "step": 1514 }, { "epoch": 0.2, "grad_norm": 1.0453841588862651, "learning_rate": 9.224842760949836e-06, "loss": 0.1786, "step": 1515 }, { "epoch": 0.2, "grad_norm": 1.1921653031415262, "learning_rate": 9.223674435052803e-06, "loss": 0.2396, "step": 1516 }, { "epoch": 0.2, "grad_norm": 1.1977794038756189, "learning_rate": 9.22250530346191e-06, "loss": 0.1819, "step": 1517 }, { "epoch": 0.2, "grad_norm": 1.032771760646993, "learning_rate": 9.221335366400181e-06, "loss": 0.2278, "step": 1518 }, { "epoch": 0.2, "grad_norm": 1.1668377196794233, "learning_rate": 9.220164624090786e-06, "loss": 0.2145, "step": 1519 }, { "epoch": 0.2, "grad_norm": 1.0151769164096038, "learning_rate": 9.218993076757054e-06, "loss": 0.2304, "step": 1520 }, { "epoch": 0.21, "grad_norm": 0.7297381173272683, "learning_rate": 9.217820724622463e-06, "loss": 0.1444, "step": 1521 }, { "epoch": 0.21, "grad_norm": 0.8911880239532375, "learning_rate": 9.216647567910649e-06, "loss": 0.2031, "step": 1522 }, { "epoch": 0.21, "grad_norm": 1.4295469270888457, "learning_rate": 9.215473606845398e-06, "loss": 0.2145, "step": 1523 }, { "epoch": 0.21, "grad_norm": 1.0863938514123614, "learning_rate": 9.214298841650651e-06, "loss": 0.1804, "step": 1524 }, { "epoch": 0.21, "grad_norm": 0.6585070355999181, "learning_rate": 9.2131232725505e-06, "loss": 0.16, "step": 1525 }, { "epoch": 0.21, "grad_norm": 0.9349115268058642, "learning_rate": 9.2119468997692e-06, "loss": 0.2074, "step": 1526 }, { "epoch": 0.21, "grad_norm": 0.7250598654587629, "learning_rate": 9.210769723531146e-06, "loss": 0.1224, "step": 1527 }, { "epoch": 0.21, "grad_norm": 1.143791457099049, "learning_rate": 9.209591744060893e-06, "loss": 0.2088, "step": 1528 }, { "epoch": 0.21, "grad_norm": 1.3786990676010165, "learning_rate": 9.208412961583146e-06, "loss": 0.2447, "step": 1529 }, { "epoch": 0.21, "grad_norm": 1.125852670757636, "learning_rate": 9.207233376322772e-06, "loss": 0.2234, "step": 1530 }, { "epoch": 0.21, "grad_norm": 1.0889978739534267, "learning_rate": 9.206052988504778e-06, "loss": 0.2096, "step": 1531 }, { "epoch": 0.21, "grad_norm": 1.107429994247639, "learning_rate": 9.204871798354336e-06, "loss": 0.149, "step": 1532 }, { "epoch": 0.21, "grad_norm": 0.9539491794290632, "learning_rate": 9.203689806096763e-06, "loss": 0.1936, "step": 1533 }, { "epoch": 0.21, "grad_norm": 1.058257004324509, "learning_rate": 9.202507011957531e-06, "loss": 0.1617, "step": 1534 }, { "epoch": 0.21, "grad_norm": 0.9264789632283602, "learning_rate": 9.201323416162268e-06, "loss": 0.1806, "step": 1535 }, { "epoch": 0.21, "grad_norm": 1.1554469784085288, "learning_rate": 9.200139018936753e-06, "loss": 0.25, "step": 1536 }, { "epoch": 0.21, "grad_norm": 1.1760356701209589, "learning_rate": 9.198953820506915e-06, "loss": 0.2292, "step": 1537 }, { "epoch": 0.21, "grad_norm": 1.2719932284194915, "learning_rate": 9.197767821098841e-06, "loss": 0.2292, "step": 1538 }, { "epoch": 0.21, "grad_norm": 0.9769969255195149, "learning_rate": 9.196581020938767e-06, "loss": 0.2163, "step": 1539 }, { "epoch": 0.21, "grad_norm": 0.9255479970934233, "learning_rate": 9.195393420253082e-06, "loss": 0.2294, "step": 1540 }, { "epoch": 0.21, "grad_norm": 1.0891444047088443, "learning_rate": 9.194205019268332e-06, "loss": 0.176, "step": 1541 }, { "epoch": 0.21, "grad_norm": 0.7313597526813154, "learning_rate": 9.193015818211209e-06, "loss": 0.1676, "step": 1542 }, { "epoch": 0.21, "grad_norm": 1.2646857260496218, "learning_rate": 9.191825817308561e-06, "loss": 0.2066, "step": 1543 }, { "epoch": 0.21, "grad_norm": 0.9920022070468745, "learning_rate": 9.190635016787392e-06, "loss": 0.168, "step": 1544 }, { "epoch": 0.21, "grad_norm": 0.9954050825764155, "learning_rate": 9.189443416874852e-06, "loss": 0.1934, "step": 1545 }, { "epoch": 0.21, "grad_norm": 0.9568026384042294, "learning_rate": 9.188251017798248e-06, "loss": 0.2173, "step": 1546 }, { "epoch": 0.21, "grad_norm": 1.0155756099014628, "learning_rate": 9.187057819785039e-06, "loss": 0.2257, "step": 1547 }, { "epoch": 0.21, "grad_norm": 0.8900524993057212, "learning_rate": 9.18586382306283e-06, "loss": 0.1841, "step": 1548 }, { "epoch": 0.21, "grad_norm": 1.0139633435572508, "learning_rate": 9.18466902785939e-06, "loss": 0.1489, "step": 1549 }, { "epoch": 0.21, "grad_norm": 1.0916149635803472, "learning_rate": 9.183473434402631e-06, "loss": 0.2103, "step": 1550 }, { "epoch": 0.21, "grad_norm": 0.9056649715429842, "learning_rate": 9.182277042920622e-06, "loss": 0.1825, "step": 1551 }, { "epoch": 0.21, "grad_norm": 1.2523603072498686, "learning_rate": 9.181079853641581e-06, "loss": 0.3, "step": 1552 }, { "epoch": 0.21, "grad_norm": 1.3069531770117335, "learning_rate": 9.17988186679388e-06, "loss": 0.1707, "step": 1553 }, { "epoch": 0.21, "grad_norm": 0.8887887824826743, "learning_rate": 9.178683082606045e-06, "loss": 0.1489, "step": 1554 }, { "epoch": 0.21, "grad_norm": 1.034008377655116, "learning_rate": 9.17748350130675e-06, "loss": 0.1826, "step": 1555 }, { "epoch": 0.21, "grad_norm": 1.0665972223031897, "learning_rate": 9.176283123124823e-06, "loss": 0.2066, "step": 1556 }, { "epoch": 0.21, "grad_norm": 1.1053041083521946, "learning_rate": 9.175081948289244e-06, "loss": 0.1984, "step": 1557 }, { "epoch": 0.21, "grad_norm": 0.8047395507716162, "learning_rate": 9.173879977029146e-06, "loss": 0.1762, "step": 1558 }, { "epoch": 0.21, "grad_norm": 0.9204690988605108, "learning_rate": 9.172677209573813e-06, "loss": 0.1483, "step": 1559 }, { "epoch": 0.21, "grad_norm": 1.0725299598409954, "learning_rate": 9.17147364615268e-06, "loss": 0.1883, "step": 1560 }, { "epoch": 0.21, "grad_norm": 1.3420305941357362, "learning_rate": 9.170269286995336e-06, "loss": 0.2486, "step": 1561 }, { "epoch": 0.21, "grad_norm": 0.92476836599532, "learning_rate": 9.16906413233152e-06, "loss": 0.1506, "step": 1562 }, { "epoch": 0.21, "grad_norm": 1.342283669120384, "learning_rate": 9.167858182391121e-06, "loss": 0.2742, "step": 1563 }, { "epoch": 0.21, "grad_norm": 1.1506160989225764, "learning_rate": 9.166651437404184e-06, "loss": 0.2048, "step": 1564 }, { "epoch": 0.21, "grad_norm": 0.8678215340494866, "learning_rate": 9.165443897600905e-06, "loss": 0.1566, "step": 1565 }, { "epoch": 0.21, "grad_norm": 1.34815728049161, "learning_rate": 9.164235563211628e-06, "loss": 0.2302, "step": 1566 }, { "epoch": 0.21, "grad_norm": 0.9018699570832143, "learning_rate": 9.16302643446685e-06, "loss": 0.165, "step": 1567 }, { "epoch": 0.21, "grad_norm": 1.0892486909749626, "learning_rate": 9.161816511597224e-06, "loss": 0.1952, "step": 1568 }, { "epoch": 0.21, "grad_norm": 0.9282420622543063, "learning_rate": 9.160605794833548e-06, "loss": 0.155, "step": 1569 }, { "epoch": 0.21, "grad_norm": 1.3208469642775909, "learning_rate": 9.159394284406775e-06, "loss": 0.2494, "step": 1570 }, { "epoch": 0.21, "grad_norm": 0.8421285165090833, "learning_rate": 9.158181980548006e-06, "loss": 0.1472, "step": 1571 }, { "epoch": 0.21, "grad_norm": 0.9547993556009755, "learning_rate": 9.1569688834885e-06, "loss": 0.1712, "step": 1572 }, { "epoch": 0.21, "grad_norm": 0.9227599170124827, "learning_rate": 9.155754993459662e-06, "loss": 0.1771, "step": 1573 }, { "epoch": 0.21, "grad_norm": 0.889515728208581, "learning_rate": 9.154540310693048e-06, "loss": 0.1513, "step": 1574 }, { "epoch": 0.21, "grad_norm": 1.2994128229710484, "learning_rate": 9.15332483542037e-06, "loss": 0.2505, "step": 1575 }, { "epoch": 0.21, "grad_norm": 1.3941551430689056, "learning_rate": 9.152108567873485e-06, "loss": 0.2854, "step": 1576 }, { "epoch": 0.21, "grad_norm": 1.063348272885827, "learning_rate": 9.150891508284405e-06, "loss": 0.2015, "step": 1577 }, { "epoch": 0.21, "grad_norm": 0.6520257605960184, "learning_rate": 9.149673656885292e-06, "loss": 0.1484, "step": 1578 }, { "epoch": 0.21, "grad_norm": 1.3077789835928049, "learning_rate": 9.14845501390846e-06, "loss": 0.2415, "step": 1579 }, { "epoch": 0.21, "grad_norm": 0.9483367858698818, "learning_rate": 9.14723557958637e-06, "loss": 0.2022, "step": 1580 }, { "epoch": 0.21, "grad_norm": 1.1234036203973645, "learning_rate": 9.146015354151642e-06, "loss": 0.2112, "step": 1581 }, { "epoch": 0.21, "grad_norm": 1.023458632407832, "learning_rate": 9.144794337837039e-06, "loss": 0.1793, "step": 1582 }, { "epoch": 0.21, "grad_norm": 1.2281308611547879, "learning_rate": 9.14357253087548e-06, "loss": 0.2044, "step": 1583 }, { "epoch": 0.21, "grad_norm": 0.9172663928973426, "learning_rate": 9.142349933500032e-06, "loss": 0.1448, "step": 1584 }, { "epoch": 0.21, "grad_norm": 1.1135701143010492, "learning_rate": 9.141126545943913e-06, "loss": 0.2311, "step": 1585 }, { "epoch": 0.21, "grad_norm": 0.9265857512066183, "learning_rate": 9.139902368440491e-06, "loss": 0.2025, "step": 1586 }, { "epoch": 0.21, "grad_norm": 0.7613838750039221, "learning_rate": 9.138677401223288e-06, "loss": 0.1405, "step": 1587 }, { "epoch": 0.21, "grad_norm": 1.017339311414063, "learning_rate": 9.137451644525975e-06, "loss": 0.1818, "step": 1588 }, { "epoch": 0.21, "grad_norm": 0.9707496341302665, "learning_rate": 9.136225098582369e-06, "loss": 0.1869, "step": 1589 }, { "epoch": 0.21, "grad_norm": 1.283264927566998, "learning_rate": 9.134997763626448e-06, "loss": 0.1668, "step": 1590 }, { "epoch": 0.21, "grad_norm": 1.2094704855322616, "learning_rate": 9.133769639892332e-06, "loss": 0.2447, "step": 1591 }, { "epoch": 0.21, "grad_norm": 0.7111338152662267, "learning_rate": 9.13254072761429e-06, "loss": 0.1876, "step": 1592 }, { "epoch": 0.21, "grad_norm": 0.9994489411968169, "learning_rate": 9.13131102702675e-06, "loss": 0.2242, "step": 1593 }, { "epoch": 0.21, "grad_norm": 1.1818575782640182, "learning_rate": 9.130080538364283e-06, "loss": 0.2312, "step": 1594 }, { "epoch": 0.22, "grad_norm": 1.00253945587966, "learning_rate": 9.128849261861615e-06, "loss": 0.1544, "step": 1595 }, { "epoch": 0.22, "grad_norm": 1.091956056308726, "learning_rate": 9.127617197753615e-06, "loss": 0.2115, "step": 1596 }, { "epoch": 0.22, "grad_norm": 1.2567975015434925, "learning_rate": 9.126384346275315e-06, "loss": 0.2477, "step": 1597 }, { "epoch": 0.22, "grad_norm": 0.9268823948931293, "learning_rate": 9.125150707661882e-06, "loss": 0.2155, "step": 1598 }, { "epoch": 0.22, "grad_norm": 1.1833535557873154, "learning_rate": 9.123916282148648e-06, "loss": 0.2198, "step": 1599 }, { "epoch": 0.22, "grad_norm": 0.9306418012061235, "learning_rate": 9.12268106997108e-06, "loss": 0.2227, "step": 1600 }, { "epoch": 0.22, "grad_norm": 0.9105445777926785, "learning_rate": 9.121445071364811e-06, "loss": 0.1902, "step": 1601 }, { "epoch": 0.22, "grad_norm": 1.0554519078304228, "learning_rate": 9.12020828656561e-06, "loss": 0.2145, "step": 1602 }, { "epoch": 0.22, "grad_norm": 0.9444092966975481, "learning_rate": 9.118970715809404e-06, "loss": 0.1907, "step": 1603 }, { "epoch": 0.22, "grad_norm": 1.037022618330147, "learning_rate": 9.117732359332267e-06, "loss": 0.1522, "step": 1604 }, { "epoch": 0.22, "grad_norm": 1.0624352071387126, "learning_rate": 9.116493217370425e-06, "loss": 0.1856, "step": 1605 }, { "epoch": 0.22, "grad_norm": 1.2713046608890548, "learning_rate": 9.115253290160252e-06, "loss": 0.2798, "step": 1606 }, { "epoch": 0.22, "grad_norm": 0.8734310376925507, "learning_rate": 9.114012577938273e-06, "loss": 0.1262, "step": 1607 }, { "epoch": 0.22, "grad_norm": 0.8722204972213148, "learning_rate": 9.112771080941162e-06, "loss": 0.1518, "step": 1608 }, { "epoch": 0.22, "grad_norm": 0.9130812959259934, "learning_rate": 9.11152879940574e-06, "loss": 0.1531, "step": 1609 }, { "epoch": 0.22, "grad_norm": 1.3544448381382297, "learning_rate": 9.110285733568983e-06, "loss": 0.2537, "step": 1610 }, { "epoch": 0.22, "grad_norm": 0.7928535412133841, "learning_rate": 9.109041883668014e-06, "loss": 0.1206, "step": 1611 }, { "epoch": 0.22, "grad_norm": 1.4162297160012143, "learning_rate": 9.107797249940105e-06, "loss": 0.2209, "step": 1612 }, { "epoch": 0.22, "grad_norm": 0.9136038302597628, "learning_rate": 9.106551832622678e-06, "loss": 0.1944, "step": 1613 }, { "epoch": 0.22, "grad_norm": 1.1665382479296638, "learning_rate": 9.105305631953306e-06, "loss": 0.2244, "step": 1614 }, { "epoch": 0.22, "grad_norm": 0.8983300212275168, "learning_rate": 9.104058648169706e-06, "loss": 0.1875, "step": 1615 }, { "epoch": 0.22, "grad_norm": 0.912476907913707, "learning_rate": 9.102810881509754e-06, "loss": 0.2207, "step": 1616 }, { "epoch": 0.22, "grad_norm": 1.0805817817700083, "learning_rate": 9.101562332211467e-06, "loss": 0.2084, "step": 1617 }, { "epoch": 0.22, "grad_norm": 1.343770812533494, "learning_rate": 9.100313000513013e-06, "loss": 0.2621, "step": 1618 }, { "epoch": 0.22, "grad_norm": 0.9379267213928356, "learning_rate": 9.099062886652712e-06, "loss": 0.1757, "step": 1619 }, { "epoch": 0.22, "grad_norm": 0.837849028982546, "learning_rate": 9.09781199086903e-06, "loss": 0.1791, "step": 1620 }, { "epoch": 0.22, "grad_norm": 1.0883387716541542, "learning_rate": 9.096560313400584e-06, "loss": 0.2308, "step": 1621 }, { "epoch": 0.22, "grad_norm": 0.9374119003452883, "learning_rate": 9.095307854486141e-06, "loss": 0.1362, "step": 1622 }, { "epoch": 0.22, "grad_norm": 1.016157972237035, "learning_rate": 9.094054614364617e-06, "loss": 0.1811, "step": 1623 }, { "epoch": 0.22, "grad_norm": 0.9278455014416334, "learning_rate": 9.092800593275073e-06, "loss": 0.1855, "step": 1624 }, { "epoch": 0.22, "grad_norm": 0.9137227419966132, "learning_rate": 9.091545791456723e-06, "loss": 0.2177, "step": 1625 }, { "epoch": 0.22, "grad_norm": 1.036739168446271, "learning_rate": 9.090290209148926e-06, "loss": 0.2102, "step": 1626 }, { "epoch": 0.22, "grad_norm": 0.9805996587175271, "learning_rate": 9.089033846591198e-06, "loss": 0.195, "step": 1627 }, { "epoch": 0.22, "grad_norm": 1.2239230577003823, "learning_rate": 9.087776704023193e-06, "loss": 0.2206, "step": 1628 }, { "epoch": 0.22, "grad_norm": 0.8533110896917859, "learning_rate": 9.086518781684724e-06, "loss": 0.1769, "step": 1629 }, { "epoch": 0.22, "grad_norm": 1.2907361790185108, "learning_rate": 9.085260079815745e-06, "loss": 0.2685, "step": 1630 }, { "epoch": 0.22, "grad_norm": 0.5732095693371093, "learning_rate": 9.084000598656365e-06, "loss": 0.134, "step": 1631 }, { "epoch": 0.22, "grad_norm": 0.9004932746526683, "learning_rate": 9.082740338446833e-06, "loss": 0.1619, "step": 1632 }, { "epoch": 0.22, "grad_norm": 1.104062670699642, "learning_rate": 9.081479299427557e-06, "loss": 0.2272, "step": 1633 }, { "epoch": 0.22, "grad_norm": 0.9303202908122238, "learning_rate": 9.080217481839084e-06, "loss": 0.1628, "step": 1634 }, { "epoch": 0.22, "grad_norm": 1.073285497233567, "learning_rate": 9.07895488592212e-06, "loss": 0.2213, "step": 1635 }, { "epoch": 0.22, "grad_norm": 0.9312347480275127, "learning_rate": 9.077691511917508e-06, "loss": 0.2135, "step": 1636 }, { "epoch": 0.22, "grad_norm": 1.0550920605294838, "learning_rate": 9.076427360066247e-06, "loss": 0.1892, "step": 1637 }, { "epoch": 0.22, "grad_norm": 1.348289882775555, "learning_rate": 9.075162430609484e-06, "loss": 0.252, "step": 1638 }, { "epoch": 0.22, "grad_norm": 1.3306911448458103, "learning_rate": 9.073896723788509e-06, "loss": 0.2476, "step": 1639 }, { "epoch": 0.22, "grad_norm": 0.8946768124950375, "learning_rate": 9.072630239844766e-06, "loss": 0.1922, "step": 1640 }, { "epoch": 0.22, "grad_norm": 0.7606023224135805, "learning_rate": 9.071362979019846e-06, "loss": 0.1951, "step": 1641 }, { "epoch": 0.22, "grad_norm": 0.9060080365525509, "learning_rate": 9.070094941555487e-06, "loss": 0.2019, "step": 1642 }, { "epoch": 0.22, "grad_norm": 1.131297500733499, "learning_rate": 9.068826127693576e-06, "loss": 0.2187, "step": 1643 }, { "epoch": 0.22, "grad_norm": 1.0135871331928492, "learning_rate": 9.067556537676146e-06, "loss": 0.1977, "step": 1644 }, { "epoch": 0.22, "grad_norm": 1.1364504517439749, "learning_rate": 9.06628617174538e-06, "loss": 0.2628, "step": 1645 }, { "epoch": 0.22, "grad_norm": 0.9031845283433102, "learning_rate": 9.065015030143609e-06, "loss": 0.157, "step": 1646 }, { "epoch": 0.22, "grad_norm": 1.0262464734761978, "learning_rate": 9.063743113113312e-06, "loss": 0.1937, "step": 1647 }, { "epoch": 0.22, "grad_norm": 1.2385567951688508, "learning_rate": 9.062470420897115e-06, "loss": 0.2217, "step": 1648 }, { "epoch": 0.22, "grad_norm": 0.8894587791133882, "learning_rate": 9.061196953737794e-06, "loss": 0.1688, "step": 1649 }, { "epoch": 0.22, "grad_norm": 0.9210339349152251, "learning_rate": 9.059922711878271e-06, "loss": 0.1795, "step": 1650 }, { "epoch": 0.22, "grad_norm": 1.0317153572503333, "learning_rate": 9.058647695561615e-06, "loss": 0.2276, "step": 1651 }, { "epoch": 0.22, "grad_norm": 1.0999947326355357, "learning_rate": 9.057371905031043e-06, "loss": 0.2318, "step": 1652 }, { "epoch": 0.22, "grad_norm": 1.1128058522363629, "learning_rate": 9.05609534052992e-06, "loss": 0.241, "step": 1653 }, { "epoch": 0.22, "grad_norm": 1.3208764781744762, "learning_rate": 9.054818002301764e-06, "loss": 0.2457, "step": 1654 }, { "epoch": 0.22, "grad_norm": 0.7762426434722949, "learning_rate": 9.053539890590231e-06, "loss": 0.1546, "step": 1655 }, { "epoch": 0.22, "grad_norm": 0.9800432751187034, "learning_rate": 9.052261005639132e-06, "loss": 0.1798, "step": 1656 }, { "epoch": 0.22, "grad_norm": 1.041569796792994, "learning_rate": 9.05098134769242e-06, "loss": 0.2048, "step": 1657 }, { "epoch": 0.22, "grad_norm": 0.8603717385001127, "learning_rate": 9.0497009169942e-06, "loss": 0.2004, "step": 1658 }, { "epoch": 0.22, "grad_norm": 1.27703549308256, "learning_rate": 9.048419713788724e-06, "loss": 0.269, "step": 1659 }, { "epoch": 0.22, "grad_norm": 1.1514517915833544, "learning_rate": 9.047137738320385e-06, "loss": 0.2553, "step": 1660 }, { "epoch": 0.22, "grad_norm": 1.4410717355223455, "learning_rate": 9.045854990833734e-06, "loss": 0.3021, "step": 1661 }, { "epoch": 0.22, "grad_norm": 0.9592655763708217, "learning_rate": 9.04457147157346e-06, "loss": 0.1424, "step": 1662 }, { "epoch": 0.22, "grad_norm": 1.3038397436504692, "learning_rate": 9.043287180784405e-06, "loss": 0.2445, "step": 1663 }, { "epoch": 0.22, "grad_norm": 1.5619736230225016, "learning_rate": 9.042002118711555e-06, "loss": 0.3153, "step": 1664 }, { "epoch": 0.22, "grad_norm": 1.1656889790933496, "learning_rate": 9.040716285600043e-06, "loss": 0.1967, "step": 1665 }, { "epoch": 0.22, "grad_norm": 0.9898476906497795, "learning_rate": 9.039429681695153e-06, "loss": 0.2176, "step": 1666 }, { "epoch": 0.22, "grad_norm": 1.6083598363933727, "learning_rate": 9.03814230724231e-06, "loss": 0.3286, "step": 1667 }, { "epoch": 0.22, "grad_norm": 0.8116882313098569, "learning_rate": 9.036854162487092e-06, "loss": 0.1889, "step": 1668 }, { "epoch": 0.23, "grad_norm": 1.3090562569557114, "learning_rate": 9.035565247675218e-06, "loss": 0.2685, "step": 1669 }, { "epoch": 0.23, "grad_norm": 0.9131405218324317, "learning_rate": 9.03427556305256e-06, "loss": 0.1978, "step": 1670 }, { "epoch": 0.23, "grad_norm": 1.403551693409035, "learning_rate": 9.032985108865134e-06, "loss": 0.2282, "step": 1671 }, { "epoch": 0.23, "grad_norm": 1.1387621673303578, "learning_rate": 9.031693885359099e-06, "loss": 0.1951, "step": 1672 }, { "epoch": 0.23, "grad_norm": 1.0577291974501468, "learning_rate": 9.030401892780767e-06, "loss": 0.1989, "step": 1673 }, { "epoch": 0.23, "grad_norm": 1.1810830996910942, "learning_rate": 9.029109131376595e-06, "loss": 0.2446, "step": 1674 }, { "epoch": 0.23, "grad_norm": 1.0443640551966644, "learning_rate": 9.027815601393184e-06, "loss": 0.2045, "step": 1675 }, { "epoch": 0.23, "grad_norm": 1.1298762245493001, "learning_rate": 9.026521303077284e-06, "loss": 0.2076, "step": 1676 }, { "epoch": 0.23, "grad_norm": 1.0080256815383242, "learning_rate": 9.025226236675792e-06, "loss": 0.2124, "step": 1677 }, { "epoch": 0.23, "grad_norm": 1.271611665571337, "learning_rate": 9.02393040243575e-06, "loss": 0.2044, "step": 1678 }, { "epoch": 0.23, "grad_norm": 1.0421769777744485, "learning_rate": 9.022633800604345e-06, "loss": 0.1791, "step": 1679 }, { "epoch": 0.23, "grad_norm": 1.0667323177065713, "learning_rate": 9.021336431428916e-06, "loss": 0.2073, "step": 1680 }, { "epoch": 0.23, "grad_norm": 1.2434461170594993, "learning_rate": 9.020038295156942e-06, "loss": 0.2143, "step": 1681 }, { "epoch": 0.23, "grad_norm": 1.091785917136842, "learning_rate": 9.018739392036051e-06, "loss": 0.1987, "step": 1682 }, { "epoch": 0.23, "grad_norm": 0.8788647673599841, "learning_rate": 9.017439722314018e-06, "loss": 0.1383, "step": 1683 }, { "epoch": 0.23, "grad_norm": 0.9130165643816887, "learning_rate": 9.016139286238765e-06, "loss": 0.1728, "step": 1684 }, { "epoch": 0.23, "grad_norm": 0.6299251636380091, "learning_rate": 9.014838084058358e-06, "loss": 0.1121, "step": 1685 }, { "epoch": 0.23, "grad_norm": 1.2400783395006718, "learning_rate": 9.01353611602101e-06, "loss": 0.2913, "step": 1686 }, { "epoch": 0.23, "grad_norm": 1.0287409614362961, "learning_rate": 9.012233382375076e-06, "loss": 0.1995, "step": 1687 }, { "epoch": 0.23, "grad_norm": 1.0932939447740333, "learning_rate": 9.010929883369068e-06, "loss": 0.2069, "step": 1688 }, { "epoch": 0.23, "grad_norm": 0.9295852283749378, "learning_rate": 9.009625619251631e-06, "loss": 0.169, "step": 1689 }, { "epoch": 0.23, "grad_norm": 0.9869629832871204, "learning_rate": 9.008320590271567e-06, "loss": 0.1707, "step": 1690 }, { "epoch": 0.23, "grad_norm": 1.0136131254602732, "learning_rate": 9.007014796677815e-06, "loss": 0.2145, "step": 1691 }, { "epoch": 0.23, "grad_norm": 0.7226709202851849, "learning_rate": 9.005708238719466e-06, "loss": 0.1345, "step": 1692 }, { "epoch": 0.23, "grad_norm": 0.8467489927296917, "learning_rate": 9.004400916645755e-06, "loss": 0.1471, "step": 1693 }, { "epoch": 0.23, "grad_norm": 0.8572863126166351, "learning_rate": 9.00309283070606e-06, "loss": 0.1557, "step": 1694 }, { "epoch": 0.23, "grad_norm": 0.8752086441157216, "learning_rate": 9.001783981149907e-06, "loss": 0.1735, "step": 1695 }, { "epoch": 0.23, "grad_norm": 1.0289172055014502, "learning_rate": 9.000474368226972e-06, "loss": 0.2262, "step": 1696 }, { "epoch": 0.23, "grad_norm": 0.9593185115054929, "learning_rate": 8.999163992187068e-06, "loss": 0.175, "step": 1697 }, { "epoch": 0.23, "grad_norm": 0.873557036347934, "learning_rate": 8.997852853280159e-06, "loss": 0.1979, "step": 1698 }, { "epoch": 0.23, "grad_norm": 1.0493897988597436, "learning_rate": 8.996540951756354e-06, "loss": 0.1742, "step": 1699 }, { "epoch": 0.23, "grad_norm": 1.365144570399527, "learning_rate": 8.995228287865907e-06, "loss": 0.2572, "step": 1700 }, { "epoch": 0.23, "grad_norm": 1.0580859624821533, "learning_rate": 8.993914861859216e-06, "loss": 0.2438, "step": 1701 }, { "epoch": 0.23, "grad_norm": 1.0691545076268338, "learning_rate": 8.992600673986828e-06, "loss": 0.235, "step": 1702 }, { "epoch": 0.23, "grad_norm": 0.8654326544616091, "learning_rate": 8.99128572449943e-06, "loss": 0.1358, "step": 1703 }, { "epoch": 0.23, "grad_norm": 1.096105673009714, "learning_rate": 8.98997001364786e-06, "loss": 0.2291, "step": 1704 }, { "epoch": 0.23, "grad_norm": 0.8704918258579404, "learning_rate": 8.988653541683098e-06, "loss": 0.1918, "step": 1705 }, { "epoch": 0.23, "grad_norm": 1.1349566503199058, "learning_rate": 8.98733630885627e-06, "loss": 0.2093, "step": 1706 }, { "epoch": 0.23, "grad_norm": 1.0959350080402757, "learning_rate": 8.986018315418643e-06, "loss": 0.1891, "step": 1707 }, { "epoch": 0.23, "grad_norm": 0.8020375342466055, "learning_rate": 8.98469956162164e-06, "loss": 0.1718, "step": 1708 }, { "epoch": 0.23, "grad_norm": 0.993906909341975, "learning_rate": 8.983380047716815e-06, "loss": 0.2127, "step": 1709 }, { "epoch": 0.23, "grad_norm": 0.975853827045483, "learning_rate": 8.982059773955879e-06, "loss": 0.1926, "step": 1710 }, { "epoch": 0.23, "grad_norm": 0.9444088510734382, "learning_rate": 8.98073874059068e-06, "loss": 0.1891, "step": 1711 }, { "epoch": 0.23, "grad_norm": 1.2415364240238003, "learning_rate": 8.979416947873217e-06, "loss": 0.2174, "step": 1712 }, { "epoch": 0.23, "grad_norm": 0.8793771998850708, "learning_rate": 8.978094396055626e-06, "loss": 0.15, "step": 1713 }, { "epoch": 0.23, "grad_norm": 1.1458465420405384, "learning_rate": 8.976771085390197e-06, "loss": 0.1505, "step": 1714 }, { "epoch": 0.23, "grad_norm": 0.7257605689769333, "learning_rate": 8.975447016129358e-06, "loss": 0.1331, "step": 1715 }, { "epoch": 0.23, "grad_norm": 0.90159187306588, "learning_rate": 8.974122188525685e-06, "loss": 0.1686, "step": 1716 }, { "epoch": 0.23, "grad_norm": 0.872478248719135, "learning_rate": 8.972796602831897e-06, "loss": 0.226, "step": 1717 }, { "epoch": 0.23, "grad_norm": 1.2480180312914106, "learning_rate": 8.971470259300858e-06, "loss": 0.2406, "step": 1718 }, { "epoch": 0.23, "grad_norm": 1.211797070543537, "learning_rate": 8.970143158185576e-06, "loss": 0.202, "step": 1719 }, { "epoch": 0.23, "grad_norm": 1.1159376023177094, "learning_rate": 8.968815299739206e-06, "loss": 0.2004, "step": 1720 }, { "epoch": 0.23, "grad_norm": 1.124144008312983, "learning_rate": 8.967486684215048e-06, "loss": 0.2127, "step": 1721 }, { "epoch": 0.23, "grad_norm": 0.7163225401869178, "learning_rate": 8.96615731186654e-06, "loss": 0.1402, "step": 1722 }, { "epoch": 0.23, "grad_norm": 1.158452181459997, "learning_rate": 8.964827182947268e-06, "loss": 0.2265, "step": 1723 }, { "epoch": 0.23, "grad_norm": 0.8879215377853503, "learning_rate": 8.963496297710967e-06, "loss": 0.1911, "step": 1724 }, { "epoch": 0.23, "grad_norm": 1.1549441820873323, "learning_rate": 8.96216465641151e-06, "loss": 0.1942, "step": 1725 }, { "epoch": 0.23, "grad_norm": 1.2045666010152052, "learning_rate": 8.960832259302914e-06, "loss": 0.1988, "step": 1726 }, { "epoch": 0.23, "grad_norm": 1.0150747378201295, "learning_rate": 8.95949910663935e-06, "loss": 0.1712, "step": 1727 }, { "epoch": 0.23, "grad_norm": 1.054759872877421, "learning_rate": 8.958165198675116e-06, "loss": 0.221, "step": 1728 }, { "epoch": 0.23, "grad_norm": 1.2959290473577743, "learning_rate": 8.95683053566467e-06, "loss": 0.2347, "step": 1729 }, { "epoch": 0.23, "grad_norm": 1.3781736247461709, "learning_rate": 8.955495117862607e-06, "loss": 0.272, "step": 1730 }, { "epoch": 0.23, "grad_norm": 0.9639719271945713, "learning_rate": 8.954158945523665e-06, "loss": 0.147, "step": 1731 }, { "epoch": 0.23, "grad_norm": 0.8991365230704169, "learning_rate": 8.952822018902731e-06, "loss": 0.19, "step": 1732 }, { "epoch": 0.23, "grad_norm": 1.2374752413004233, "learning_rate": 8.95148433825483e-06, "loss": 0.183, "step": 1733 }, { "epoch": 0.23, "grad_norm": 1.1701562426685816, "learning_rate": 8.950145903835132e-06, "loss": 0.2271, "step": 1734 }, { "epoch": 0.23, "grad_norm": 0.7864694602478843, "learning_rate": 8.948806715898957e-06, "loss": 0.2167, "step": 1735 }, { "epoch": 0.23, "grad_norm": 1.2159161236131628, "learning_rate": 8.947466774701759e-06, "loss": 0.1951, "step": 1736 }, { "epoch": 0.23, "grad_norm": 1.126971441932858, "learning_rate": 8.946126080499142e-06, "loss": 0.2351, "step": 1737 }, { "epoch": 0.23, "grad_norm": 0.8726634675307856, "learning_rate": 8.944784633546855e-06, "loss": 0.1347, "step": 1738 }, { "epoch": 0.23, "grad_norm": 0.8113588119567229, "learning_rate": 8.943442434100784e-06, "loss": 0.143, "step": 1739 }, { "epoch": 0.23, "grad_norm": 1.2422220630915788, "learning_rate": 8.942099482416965e-06, "loss": 0.2742, "step": 1740 }, { "epoch": 0.23, "grad_norm": 0.9803385270471321, "learning_rate": 8.940755778751576e-06, "loss": 0.1924, "step": 1741 }, { "epoch": 0.23, "grad_norm": 1.244847463192927, "learning_rate": 8.939411323360934e-06, "loss": 0.2528, "step": 1742 }, { "epoch": 0.24, "grad_norm": 0.9238350965221264, "learning_rate": 8.938066116501505e-06, "loss": 0.2186, "step": 1743 }, { "epoch": 0.24, "grad_norm": 1.2672046399069874, "learning_rate": 8.936720158429895e-06, "loss": 0.2644, "step": 1744 }, { "epoch": 0.24, "grad_norm": 0.8324730357913355, "learning_rate": 8.935373449402856e-06, "loss": 0.1782, "step": 1745 }, { "epoch": 0.24, "grad_norm": 0.9551310847202754, "learning_rate": 8.93402598967728e-06, "loss": 0.2005, "step": 1746 }, { "epoch": 0.24, "grad_norm": 0.5950512395785004, "learning_rate": 8.932677779510204e-06, "loss": 0.1695, "step": 1747 }, { "epoch": 0.24, "grad_norm": 0.9744679459291212, "learning_rate": 8.93132881915881e-06, "loss": 0.1859, "step": 1748 }, { "epoch": 0.24, "grad_norm": 1.290194305188234, "learning_rate": 8.929979108880417e-06, "loss": 0.2465, "step": 1749 }, { "epoch": 0.24, "grad_norm": 1.0488683721714607, "learning_rate": 8.928628648932496e-06, "loss": 0.2157, "step": 1750 }, { "epoch": 0.24, "grad_norm": 1.0595587497454866, "learning_rate": 8.927277439572653e-06, "loss": 0.1868, "step": 1751 }, { "epoch": 0.24, "grad_norm": 0.9239610722600683, "learning_rate": 8.925925481058641e-06, "loss": 0.1469, "step": 1752 }, { "epoch": 0.24, "grad_norm": 1.005423841454979, "learning_rate": 8.924572773648355e-06, "loss": 0.2021, "step": 1753 }, { "epoch": 0.24, "grad_norm": 0.8131904597505547, "learning_rate": 8.923219317599833e-06, "loss": 0.1518, "step": 1754 }, { "epoch": 0.24, "grad_norm": 0.9714592147511968, "learning_rate": 8.921865113171257e-06, "loss": 0.1648, "step": 1755 }, { "epoch": 0.24, "grad_norm": 0.94210685658369, "learning_rate": 8.920510160620948e-06, "loss": 0.1628, "step": 1756 }, { "epoch": 0.24, "grad_norm": 1.2378089881405154, "learning_rate": 8.919154460207373e-06, "loss": 0.2416, "step": 1757 }, { "epoch": 0.24, "grad_norm": 1.0193037789996382, "learning_rate": 8.917798012189142e-06, "loss": 0.1384, "step": 1758 }, { "epoch": 0.24, "grad_norm": 1.1112441824183277, "learning_rate": 8.916440816825006e-06, "loss": 0.1885, "step": 1759 }, { "epoch": 0.24, "grad_norm": 1.245622822552052, "learning_rate": 8.915082874373856e-06, "loss": 0.2422, "step": 1760 }, { "epoch": 0.24, "grad_norm": 1.429886089399871, "learning_rate": 8.913724185094734e-06, "loss": 0.2795, "step": 1761 }, { "epoch": 0.24, "grad_norm": 1.1396893973097424, "learning_rate": 8.912364749246813e-06, "loss": 0.1998, "step": 1762 }, { "epoch": 0.24, "grad_norm": 0.8863257942787386, "learning_rate": 8.911004567089421e-06, "loss": 0.2137, "step": 1763 }, { "epoch": 0.24, "grad_norm": 1.0796681603424005, "learning_rate": 8.909643638882017e-06, "loss": 0.2222, "step": 1764 }, { "epoch": 0.24, "grad_norm": 1.056746154459645, "learning_rate": 8.908281964884206e-06, "loss": 0.1892, "step": 1765 }, { "epoch": 0.24, "grad_norm": 1.1864278409701805, "learning_rate": 8.90691954535574e-06, "loss": 0.2477, "step": 1766 }, { "epoch": 0.24, "grad_norm": 1.124040906727328, "learning_rate": 8.90555638055651e-06, "loss": 0.2342, "step": 1767 }, { "epoch": 0.24, "grad_norm": 0.9498599383521321, "learning_rate": 8.904192470746546e-06, "loss": 0.1787, "step": 1768 }, { "epoch": 0.24, "grad_norm": 0.9600969735677135, "learning_rate": 8.902827816186024e-06, "loss": 0.2171, "step": 1769 }, { "epoch": 0.24, "grad_norm": 1.3073673183844057, "learning_rate": 8.90146241713526e-06, "loss": 0.2115, "step": 1770 }, { "epoch": 0.24, "grad_norm": 1.0959309449109231, "learning_rate": 8.900096273854712e-06, "loss": 0.2237, "step": 1771 }, { "epoch": 0.24, "grad_norm": 1.235952499760344, "learning_rate": 8.898729386604985e-06, "loss": 0.2607, "step": 1772 }, { "epoch": 0.24, "grad_norm": 1.1339341934683678, "learning_rate": 8.897361755646816e-06, "loss": 0.2234, "step": 1773 }, { "epoch": 0.24, "grad_norm": 0.8440200841492308, "learning_rate": 8.895993381241095e-06, "loss": 0.1547, "step": 1774 }, { "epoch": 0.24, "grad_norm": 0.9098059200146356, "learning_rate": 8.894624263648846e-06, "loss": 0.1691, "step": 1775 }, { "epoch": 0.24, "grad_norm": 0.9078250414345342, "learning_rate": 8.893254403131238e-06, "loss": 0.1883, "step": 1776 }, { "epoch": 0.24, "grad_norm": 0.9731823607225754, "learning_rate": 8.89188379994958e-06, "loss": 0.2027, "step": 1777 }, { "epoch": 0.24, "grad_norm": 1.2211337818984407, "learning_rate": 8.890512454365324e-06, "loss": 0.2202, "step": 1778 }, { "epoch": 0.24, "grad_norm": 0.903602475836941, "learning_rate": 8.889140366640063e-06, "loss": 0.1903, "step": 1779 }, { "epoch": 0.24, "grad_norm": 0.9515947191488106, "learning_rate": 8.887767537035532e-06, "loss": 0.2065, "step": 1780 }, { "epoch": 0.24, "grad_norm": 0.8897690626362017, "learning_rate": 8.886393965813608e-06, "loss": 0.1972, "step": 1781 }, { "epoch": 0.24, "grad_norm": 0.866287751698666, "learning_rate": 8.88501965323631e-06, "loss": 0.1717, "step": 1782 }, { "epoch": 0.24, "grad_norm": 0.9688229332849452, "learning_rate": 8.883644599565793e-06, "loss": 0.224, "step": 1783 }, { "epoch": 0.24, "grad_norm": 0.7637537868613037, "learning_rate": 8.882268805064363e-06, "loss": 0.1401, "step": 1784 }, { "epoch": 0.24, "grad_norm": 1.0480354178462314, "learning_rate": 8.880892269994455e-06, "loss": 0.1747, "step": 1785 }, { "epoch": 0.24, "grad_norm": 1.2409286853010952, "learning_rate": 8.879514994618659e-06, "loss": 0.2266, "step": 1786 }, { "epoch": 0.24, "grad_norm": 0.7918691557617817, "learning_rate": 8.878136979199697e-06, "loss": 0.1988, "step": 1787 }, { "epoch": 0.24, "grad_norm": 1.300292322793181, "learning_rate": 8.876758224000432e-06, "loss": 0.2379, "step": 1788 }, { "epoch": 0.24, "grad_norm": 1.1551226806324038, "learning_rate": 8.875378729283875e-06, "loss": 0.2377, "step": 1789 }, { "epoch": 0.24, "grad_norm": 1.0675824649413215, "learning_rate": 8.87399849531317e-06, "loss": 0.1841, "step": 1790 }, { "epoch": 0.24, "grad_norm": 0.9631439194818042, "learning_rate": 8.872617522351607e-06, "loss": 0.168, "step": 1791 }, { "epoch": 0.24, "grad_norm": 1.2401773522494002, "learning_rate": 8.871235810662618e-06, "loss": 0.2066, "step": 1792 }, { "epoch": 0.24, "grad_norm": 1.1223680402064242, "learning_rate": 8.869853360509771e-06, "loss": 0.2366, "step": 1793 }, { "epoch": 0.24, "grad_norm": 0.8400429973478517, "learning_rate": 8.868470172156778e-06, "loss": 0.1458, "step": 1794 }, { "epoch": 0.24, "grad_norm": 0.9675288250369417, "learning_rate": 8.867086245867492e-06, "loss": 0.1687, "step": 1795 }, { "epoch": 0.24, "grad_norm": 0.842657201019928, "learning_rate": 8.865701581905905e-06, "loss": 0.1188, "step": 1796 }, { "epoch": 0.24, "grad_norm": 1.4207706898255597, "learning_rate": 8.864316180536151e-06, "loss": 0.2974, "step": 1797 }, { "epoch": 0.24, "grad_norm": 0.9302073220861108, "learning_rate": 8.862930042022509e-06, "loss": 0.1388, "step": 1798 }, { "epoch": 0.24, "grad_norm": 0.9828915779217465, "learning_rate": 8.861543166629384e-06, "loss": 0.1649, "step": 1799 }, { "epoch": 0.24, "grad_norm": 0.9953106489722987, "learning_rate": 8.860155554621343e-06, "loss": 0.181, "step": 1800 }, { "epoch": 0.24, "grad_norm": 1.1739925784673515, "learning_rate": 8.858767206263075e-06, "loss": 0.195, "step": 1801 }, { "epoch": 0.24, "grad_norm": 0.7442932122132846, "learning_rate": 8.857378121819417e-06, "loss": 0.13, "step": 1802 }, { "epoch": 0.24, "grad_norm": 0.9310780129335169, "learning_rate": 8.85598830155535e-06, "loss": 0.1507, "step": 1803 }, { "epoch": 0.24, "grad_norm": 0.9818395174880251, "learning_rate": 8.854597745735989e-06, "loss": 0.2031, "step": 1804 }, { "epoch": 0.24, "grad_norm": 1.1219888163785094, "learning_rate": 8.853206454626591e-06, "loss": 0.2086, "step": 1805 }, { "epoch": 0.24, "grad_norm": 1.308051696459148, "learning_rate": 8.851814428492554e-06, "loss": 0.2782, "step": 1806 }, { "epoch": 0.24, "grad_norm": 0.8612334726654575, "learning_rate": 8.850421667599419e-06, "loss": 0.1191, "step": 1807 }, { "epoch": 0.24, "grad_norm": 1.1941704410438456, "learning_rate": 8.84902817221286e-06, "loss": 0.1757, "step": 1808 }, { "epoch": 0.24, "grad_norm": 1.008020334311649, "learning_rate": 8.8476339425987e-06, "loss": 0.1856, "step": 1809 }, { "epoch": 0.24, "grad_norm": 0.9837201377864982, "learning_rate": 8.846238979022894e-06, "loss": 0.1837, "step": 1810 }, { "epoch": 0.24, "grad_norm": 1.0575807730584905, "learning_rate": 8.84484328175154e-06, "loss": 0.1945, "step": 1811 }, { "epoch": 0.24, "grad_norm": 0.9819032925120229, "learning_rate": 8.843446851050882e-06, "loss": 0.2166, "step": 1812 }, { "epoch": 0.24, "grad_norm": 0.9520000938741624, "learning_rate": 8.842049687187293e-06, "loss": 0.2344, "step": 1813 }, { "epoch": 0.24, "grad_norm": 1.0291645268988383, "learning_rate": 8.840651790427293e-06, "loss": 0.1653, "step": 1814 }, { "epoch": 0.24, "grad_norm": 1.0234696864912818, "learning_rate": 8.83925316103754e-06, "loss": 0.1889, "step": 1815 }, { "epoch": 0.24, "grad_norm": 0.9856259821571862, "learning_rate": 8.837853799284833e-06, "loss": 0.1474, "step": 1816 }, { "epoch": 0.25, "grad_norm": 0.8990512697823295, "learning_rate": 8.836453705436107e-06, "loss": 0.2191, "step": 1817 }, { "epoch": 0.25, "grad_norm": 0.78236210230986, "learning_rate": 8.835052879758441e-06, "loss": 0.1246, "step": 1818 }, { "epoch": 0.25, "grad_norm": 0.887687347271875, "learning_rate": 8.83365132251905e-06, "loss": 0.1676, "step": 1819 }, { "epoch": 0.25, "grad_norm": 1.1715786342969685, "learning_rate": 8.832249033985293e-06, "loss": 0.1736, "step": 1820 }, { "epoch": 0.25, "grad_norm": 1.0249348035090398, "learning_rate": 8.830846014424666e-06, "loss": 0.2133, "step": 1821 }, { "epoch": 0.25, "grad_norm": 1.0762346183682316, "learning_rate": 8.829442264104802e-06, "loss": 0.2289, "step": 1822 }, { "epoch": 0.25, "grad_norm": 1.0823730659476294, "learning_rate": 8.828037783293474e-06, "loss": 0.2118, "step": 1823 }, { "epoch": 0.25, "grad_norm": 1.1533912980562724, "learning_rate": 8.826632572258602e-06, "loss": 0.2533, "step": 1824 }, { "epoch": 0.25, "grad_norm": 0.9961405516995437, "learning_rate": 8.825226631268234e-06, "loss": 0.1683, "step": 1825 }, { "epoch": 0.25, "grad_norm": 0.782749525657312, "learning_rate": 8.823819960590564e-06, "loss": 0.1664, "step": 1826 }, { "epoch": 0.25, "grad_norm": 0.7776663888697151, "learning_rate": 8.822412560493925e-06, "loss": 0.1551, "step": 1827 }, { "epoch": 0.25, "grad_norm": 1.372468759344486, "learning_rate": 8.821004431246786e-06, "loss": 0.2106, "step": 1828 }, { "epoch": 0.25, "grad_norm": 1.0753389145784056, "learning_rate": 8.819595573117758e-06, "loss": 0.178, "step": 1829 }, { "epoch": 0.25, "grad_norm": 1.4009344964318713, "learning_rate": 8.81818598637559e-06, "loss": 0.2484, "step": 1830 }, { "epoch": 0.25, "grad_norm": 1.132554327123729, "learning_rate": 8.81677567128917e-06, "loss": 0.2014, "step": 1831 }, { "epoch": 0.25, "grad_norm": 1.2080122424617399, "learning_rate": 8.815364628127525e-06, "loss": 0.2777, "step": 1832 }, { "epoch": 0.25, "grad_norm": 1.0545562913434603, "learning_rate": 8.81395285715982e-06, "loss": 0.182, "step": 1833 }, { "epoch": 0.25, "grad_norm": 1.2514752288493536, "learning_rate": 8.812540358655361e-06, "loss": 0.2494, "step": 1834 }, { "epoch": 0.25, "grad_norm": 1.2469238162075873, "learning_rate": 8.811127132883591e-06, "loss": 0.2306, "step": 1835 }, { "epoch": 0.25, "grad_norm": 1.1433497458377526, "learning_rate": 8.809713180114092e-06, "loss": 0.2331, "step": 1836 }, { "epoch": 0.25, "grad_norm": 1.219632968426858, "learning_rate": 8.808298500616583e-06, "loss": 0.2503, "step": 1837 }, { "epoch": 0.25, "grad_norm": 0.9384817878844491, "learning_rate": 8.806883094660928e-06, "loss": 0.1822, "step": 1838 }, { "epoch": 0.25, "grad_norm": 0.7207512832691771, "learning_rate": 8.80546696251712e-06, "loss": 0.1556, "step": 1839 }, { "epoch": 0.25, "grad_norm": 1.1440072117917917, "learning_rate": 8.8040501044553e-06, "loss": 0.2039, "step": 1840 }, { "epoch": 0.25, "grad_norm": 1.137254648695283, "learning_rate": 8.80263252074574e-06, "loss": 0.2115, "step": 1841 }, { "epoch": 0.25, "grad_norm": 1.1085309625018394, "learning_rate": 8.801214211658852e-06, "loss": 0.2261, "step": 1842 }, { "epoch": 0.25, "grad_norm": 0.7731984425067134, "learning_rate": 8.799795177465195e-06, "loss": 0.1477, "step": 1843 }, { "epoch": 0.25, "grad_norm": 1.076346605049355, "learning_rate": 8.798375418435452e-06, "loss": 0.2098, "step": 1844 }, { "epoch": 0.25, "grad_norm": 0.8995769106676461, "learning_rate": 8.796954934840453e-06, "loss": 0.2196, "step": 1845 }, { "epoch": 0.25, "grad_norm": 1.032966874227555, "learning_rate": 8.795533726951166e-06, "loss": 0.2127, "step": 1846 }, { "epoch": 0.25, "grad_norm": 0.9855966631030988, "learning_rate": 8.794111795038697e-06, "loss": 0.1655, "step": 1847 }, { "epoch": 0.25, "grad_norm": 0.8607357735504841, "learning_rate": 8.792689139374287e-06, "loss": 0.1531, "step": 1848 }, { "epoch": 0.25, "grad_norm": 1.0798402818449206, "learning_rate": 8.791265760229315e-06, "loss": 0.2444, "step": 1849 }, { "epoch": 0.25, "grad_norm": 1.0956873045187245, "learning_rate": 8.789841657875305e-06, "loss": 0.2498, "step": 1850 }, { "epoch": 0.25, "grad_norm": 1.0851445544848286, "learning_rate": 8.78841683258391e-06, "loss": 0.1862, "step": 1851 }, { "epoch": 0.25, "grad_norm": 0.7674252446038955, "learning_rate": 8.786991284626924e-06, "loss": 0.1496, "step": 1852 }, { "epoch": 0.25, "grad_norm": 0.7710955445694133, "learning_rate": 8.785565014276285e-06, "loss": 0.145, "step": 1853 }, { "epoch": 0.25, "grad_norm": 0.9364335854804174, "learning_rate": 8.784138021804058e-06, "loss": 0.1471, "step": 1854 }, { "epoch": 0.25, "grad_norm": 1.1903757625118065, "learning_rate": 8.782710307482456e-06, "loss": 0.2402, "step": 1855 }, { "epoch": 0.25, "grad_norm": 1.0631449535236093, "learning_rate": 8.78128187158382e-06, "loss": 0.2252, "step": 1856 }, { "epoch": 0.25, "grad_norm": 0.9147860470743994, "learning_rate": 8.779852714380636e-06, "loss": 0.1809, "step": 1857 }, { "epoch": 0.25, "grad_norm": 1.1328373490358221, "learning_rate": 8.778422836145529e-06, "loss": 0.2065, "step": 1858 }, { "epoch": 0.25, "grad_norm": 1.0917665395246432, "learning_rate": 8.77699223715125e-06, "loss": 0.2455, "step": 1859 }, { "epoch": 0.25, "grad_norm": 0.8142121591064512, "learning_rate": 8.775560917670701e-06, "loss": 0.1802, "step": 1860 }, { "epoch": 0.25, "grad_norm": 0.9526160718749868, "learning_rate": 8.774128877976914e-06, "loss": 0.1903, "step": 1861 }, { "epoch": 0.25, "grad_norm": 0.8683997736997215, "learning_rate": 8.77269611834306e-06, "loss": 0.1776, "step": 1862 }, { "epoch": 0.25, "grad_norm": 0.8851277310855235, "learning_rate": 8.771262639042446e-06, "loss": 0.1668, "step": 1863 }, { "epoch": 0.25, "grad_norm": 0.9529730888369812, "learning_rate": 8.76982844034852e-06, "loss": 0.1756, "step": 1864 }, { "epoch": 0.25, "grad_norm": 1.2787862705744721, "learning_rate": 8.768393522534864e-06, "loss": 0.2023, "step": 1865 }, { "epoch": 0.25, "grad_norm": 1.3880572232091226, "learning_rate": 8.766957885875198e-06, "loss": 0.2594, "step": 1866 }, { "epoch": 0.25, "grad_norm": 0.8833737871155324, "learning_rate": 8.76552153064338e-06, "loss": 0.1647, "step": 1867 }, { "epoch": 0.25, "grad_norm": 1.0682583149178926, "learning_rate": 8.764084457113403e-06, "loss": 0.2022, "step": 1868 }, { "epoch": 0.25, "grad_norm": 1.3907524870210932, "learning_rate": 8.762646665559398e-06, "loss": 0.2586, "step": 1869 }, { "epoch": 0.25, "grad_norm": 0.8120993716140003, "learning_rate": 8.761208156255634e-06, "loss": 0.1344, "step": 1870 }, { "epoch": 0.25, "grad_norm": 1.0321271546514192, "learning_rate": 8.759768929476517e-06, "loss": 0.2085, "step": 1871 }, { "epoch": 0.25, "grad_norm": 0.81804944960183, "learning_rate": 8.758328985496588e-06, "loss": 0.1313, "step": 1872 }, { "epoch": 0.25, "grad_norm": 1.1861084804274171, "learning_rate": 8.756888324590526e-06, "loss": 0.2204, "step": 1873 }, { "epoch": 0.25, "grad_norm": 0.9807124566694407, "learning_rate": 8.755446947033145e-06, "loss": 0.1749, "step": 1874 }, { "epoch": 0.25, "grad_norm": 0.8498154919278662, "learning_rate": 8.754004853099403e-06, "loss": 0.1233, "step": 1875 }, { "epoch": 0.25, "grad_norm": 1.437072159005827, "learning_rate": 8.752562043064382e-06, "loss": 0.2477, "step": 1876 }, { "epoch": 0.25, "grad_norm": 0.9034275390149012, "learning_rate": 8.751118517203311e-06, "loss": 0.1841, "step": 1877 }, { "epoch": 0.25, "grad_norm": 1.1118832155311673, "learning_rate": 8.749674275791552e-06, "loss": 0.2179, "step": 1878 }, { "epoch": 0.25, "grad_norm": 1.0008249642888791, "learning_rate": 8.748229319104602e-06, "loss": 0.1973, "step": 1879 }, { "epoch": 0.25, "grad_norm": 1.0336907687200303, "learning_rate": 8.746783647418097e-06, "loss": 0.1965, "step": 1880 }, { "epoch": 0.25, "grad_norm": 1.2096776607841795, "learning_rate": 8.74533726100781e-06, "loss": 0.2379, "step": 1881 }, { "epoch": 0.25, "grad_norm": 1.2151504022711428, "learning_rate": 8.74389016014965e-06, "loss": 0.2143, "step": 1882 }, { "epoch": 0.25, "grad_norm": 0.9545738241743099, "learning_rate": 8.742442345119655e-06, "loss": 0.2272, "step": 1883 }, { "epoch": 0.25, "grad_norm": 1.1151360766048921, "learning_rate": 8.74099381619401e-06, "loss": 0.2131, "step": 1884 }, { "epoch": 0.25, "grad_norm": 1.0590814644581865, "learning_rate": 8.73954457364903e-06, "loss": 0.1907, "step": 1885 }, { "epoch": 0.25, "grad_norm": 1.446541782891458, "learning_rate": 8.738094617761168e-06, "loss": 0.2566, "step": 1886 }, { "epoch": 0.25, "grad_norm": 1.004459850897038, "learning_rate": 8.736643948807013e-06, "loss": 0.1722, "step": 1887 }, { "epoch": 0.25, "grad_norm": 0.9459364052886919, "learning_rate": 8.735192567063292e-06, "loss": 0.1839, "step": 1888 }, { "epoch": 0.25, "grad_norm": 0.8841846674886797, "learning_rate": 8.73374047280686e-06, "loss": 0.1882, "step": 1889 }, { "epoch": 0.25, "grad_norm": 1.0066674939560427, "learning_rate": 8.732287666314716e-06, "loss": 0.1951, "step": 1890 }, { "epoch": 0.25, "grad_norm": 0.9812475604665867, "learning_rate": 8.730834147863993e-06, "loss": 0.1816, "step": 1891 }, { "epoch": 0.26, "grad_norm": 1.049854654326019, "learning_rate": 8.729379917731962e-06, "loss": 0.2451, "step": 1892 }, { "epoch": 0.26, "grad_norm": 0.8348921425263717, "learning_rate": 8.727924976196023e-06, "loss": 0.154, "step": 1893 }, { "epoch": 0.26, "grad_norm": 0.8724115884227754, "learning_rate": 8.726469323533715e-06, "loss": 0.1799, "step": 1894 }, { "epoch": 0.26, "grad_norm": 1.2488001813695329, "learning_rate": 8.725012960022719e-06, "loss": 0.2779, "step": 1895 }, { "epoch": 0.26, "grad_norm": 1.0428179929107515, "learning_rate": 8.723555885940839e-06, "loss": 0.1582, "step": 1896 }, { "epoch": 0.26, "grad_norm": 1.0431182978422335, "learning_rate": 8.722098101566029e-06, "loss": 0.2217, "step": 1897 }, { "epoch": 0.26, "grad_norm": 1.2766099546200196, "learning_rate": 8.720639607176365e-06, "loss": 0.2443, "step": 1898 }, { "epoch": 0.26, "grad_norm": 1.0604157596062975, "learning_rate": 8.719180403050065e-06, "loss": 0.1826, "step": 1899 }, { "epoch": 0.26, "grad_norm": 1.1081557475579054, "learning_rate": 8.717720489465484e-06, "loss": 0.2362, "step": 1900 }, { "epoch": 0.26, "grad_norm": 1.1792533709549378, "learning_rate": 8.716259866701111e-06, "loss": 0.2094, "step": 1901 }, { "epoch": 0.26, "grad_norm": 1.055889991862084, "learning_rate": 8.714798535035567e-06, "loss": 0.1728, "step": 1902 }, { "epoch": 0.26, "grad_norm": 1.0006001492307426, "learning_rate": 8.713336494747611e-06, "loss": 0.1988, "step": 1903 }, { "epoch": 0.26, "grad_norm": 0.9868181080040117, "learning_rate": 8.711873746116139e-06, "loss": 0.1923, "step": 1904 }, { "epoch": 0.26, "grad_norm": 0.8987411290605517, "learning_rate": 8.710410289420178e-06, "loss": 0.19, "step": 1905 }, { "epoch": 0.26, "grad_norm": 1.1723215829490277, "learning_rate": 8.708946124938893e-06, "loss": 0.2687, "step": 1906 }, { "epoch": 0.26, "grad_norm": 1.084765681103905, "learning_rate": 8.707481252951582e-06, "loss": 0.1864, "step": 1907 }, { "epoch": 0.26, "grad_norm": 1.0491588838906274, "learning_rate": 8.706015673737679e-06, "loss": 0.2005, "step": 1908 }, { "epoch": 0.26, "grad_norm": 1.130862352649595, "learning_rate": 8.704549387576754e-06, "loss": 0.2186, "step": 1909 }, { "epoch": 0.26, "grad_norm": 1.1814252044334612, "learning_rate": 8.703082394748511e-06, "loss": 0.2081, "step": 1910 }, { "epoch": 0.26, "grad_norm": 0.9149890794380442, "learning_rate": 8.701614695532787e-06, "loss": 0.1571, "step": 1911 }, { "epoch": 0.26, "grad_norm": 1.097814711263668, "learning_rate": 8.700146290209557e-06, "loss": 0.1868, "step": 1912 }, { "epoch": 0.26, "grad_norm": 1.1641004236142978, "learning_rate": 8.698677179058929e-06, "loss": 0.1847, "step": 1913 }, { "epoch": 0.26, "grad_norm": 0.7327648340289978, "learning_rate": 8.697207362361145e-06, "loss": 0.1499, "step": 1914 }, { "epoch": 0.26, "grad_norm": 0.9559676258055121, "learning_rate": 8.69573684039658e-06, "loss": 0.1411, "step": 1915 }, { "epoch": 0.26, "grad_norm": 0.9437080183340508, "learning_rate": 8.69426561344575e-06, "loss": 0.2142, "step": 1916 }, { "epoch": 0.26, "grad_norm": 1.0563519026090458, "learning_rate": 8.6927936817893e-06, "loss": 0.1912, "step": 1917 }, { "epoch": 0.26, "grad_norm": 0.9486421198814015, "learning_rate": 8.691321045708007e-06, "loss": 0.1472, "step": 1918 }, { "epoch": 0.26, "grad_norm": 1.2924385449116031, "learning_rate": 8.689847705482793e-06, "loss": 0.2572, "step": 1919 }, { "epoch": 0.26, "grad_norm": 1.0347564169723253, "learning_rate": 8.6883736613947e-06, "loss": 0.2219, "step": 1920 }, { "epoch": 0.26, "grad_norm": 1.028930330809374, "learning_rate": 8.686898913724917e-06, "loss": 0.1997, "step": 1921 }, { "epoch": 0.26, "grad_norm": 1.0721960554394292, "learning_rate": 8.685423462754761e-06, "loss": 0.1983, "step": 1922 }, { "epoch": 0.26, "grad_norm": 0.8900614353534536, "learning_rate": 8.683947308765681e-06, "loss": 0.1667, "step": 1923 }, { "epoch": 0.26, "grad_norm": 1.3181732470026806, "learning_rate": 8.682470452039268e-06, "loss": 0.2285, "step": 1924 }, { "epoch": 0.26, "grad_norm": 1.0588762854099099, "learning_rate": 8.680992892857237e-06, "loss": 0.2167, "step": 1925 }, { "epoch": 0.26, "grad_norm": 0.8641685474378329, "learning_rate": 8.679514631501447e-06, "loss": 0.1337, "step": 1926 }, { "epoch": 0.26, "grad_norm": 0.9297675419077986, "learning_rate": 8.678035668253882e-06, "loss": 0.1639, "step": 1927 }, { "epoch": 0.26, "grad_norm": 1.0745648250543616, "learning_rate": 8.676556003396665e-06, "loss": 0.2162, "step": 1928 }, { "epoch": 0.26, "grad_norm": 1.0394002832353888, "learning_rate": 8.675075637212055e-06, "loss": 0.2324, "step": 1929 }, { "epoch": 0.26, "grad_norm": 1.052048676507404, "learning_rate": 8.67359456998244e-06, "loss": 0.2149, "step": 1930 }, { "epoch": 0.26, "grad_norm": 0.8435604812344915, "learning_rate": 8.67211280199034e-06, "loss": 0.1526, "step": 1931 }, { "epoch": 0.26, "grad_norm": 1.1599228388798308, "learning_rate": 8.670630333518415e-06, "loss": 0.2624, "step": 1932 }, { "epoch": 0.26, "grad_norm": 1.1861912082637356, "learning_rate": 8.669147164849456e-06, "loss": 0.1739, "step": 1933 }, { "epoch": 0.26, "grad_norm": 0.9537810313852728, "learning_rate": 8.66766329626639e-06, "loss": 0.1912, "step": 1934 }, { "epoch": 0.26, "grad_norm": 0.7773047013699285, "learning_rate": 8.666178728052267e-06, "loss": 0.1276, "step": 1935 }, { "epoch": 0.26, "grad_norm": 0.767773770712797, "learning_rate": 8.664693460490283e-06, "loss": 0.121, "step": 1936 }, { "epoch": 0.26, "grad_norm": 0.9446821880588713, "learning_rate": 8.663207493863764e-06, "loss": 0.2096, "step": 1937 }, { "epoch": 0.26, "grad_norm": 1.2561640882535028, "learning_rate": 8.661720828456166e-06, "loss": 0.1546, "step": 1938 }, { "epoch": 0.26, "grad_norm": 1.2121716214993652, "learning_rate": 8.66023346455108e-06, "loss": 0.2555, "step": 1939 }, { "epoch": 0.26, "grad_norm": 1.2426588328091588, "learning_rate": 8.658745402432233e-06, "loss": 0.2201, "step": 1940 }, { "epoch": 0.26, "grad_norm": 0.9600979345145125, "learning_rate": 8.657256642383477e-06, "loss": 0.2382, "step": 1941 }, { "epoch": 0.26, "grad_norm": 0.9394460171049835, "learning_rate": 8.65576718468881e-06, "loss": 0.1684, "step": 1942 }, { "epoch": 0.26, "grad_norm": 0.9448011875693647, "learning_rate": 8.654277029632351e-06, "loss": 0.1855, "step": 1943 }, { "epoch": 0.26, "grad_norm": 0.9783526008781825, "learning_rate": 8.652786177498361e-06, "loss": 0.1945, "step": 1944 }, { "epoch": 0.26, "grad_norm": 0.8433159021410657, "learning_rate": 8.651294628571222e-06, "loss": 0.1987, "step": 1945 }, { "epoch": 0.26, "grad_norm": 1.186117646086067, "learning_rate": 8.649802383135466e-06, "loss": 0.2137, "step": 1946 }, { "epoch": 0.26, "grad_norm": 1.0116799432590853, "learning_rate": 8.648309441475745e-06, "loss": 0.1986, "step": 1947 }, { "epoch": 0.26, "grad_norm": 1.1603419090399503, "learning_rate": 8.646815803876847e-06, "loss": 0.174, "step": 1948 }, { "epoch": 0.26, "grad_norm": 1.0894123043761723, "learning_rate": 8.645321470623693e-06, "loss": 0.1972, "step": 1949 }, { "epoch": 0.26, "grad_norm": 1.0324607885720332, "learning_rate": 8.643826442001338e-06, "loss": 0.2016, "step": 1950 }, { "epoch": 0.26, "grad_norm": 0.9993722265041053, "learning_rate": 8.642330718294968e-06, "loss": 0.1709, "step": 1951 }, { "epoch": 0.26, "grad_norm": 0.7815244223447455, "learning_rate": 8.640834299789902e-06, "loss": 0.1503, "step": 1952 }, { "epoch": 0.26, "grad_norm": 0.8769201550753022, "learning_rate": 8.639337186771593e-06, "loss": 0.1706, "step": 1953 }, { "epoch": 0.26, "grad_norm": 1.0683517486433702, "learning_rate": 8.637839379525623e-06, "loss": 0.1745, "step": 1954 }, { "epoch": 0.26, "grad_norm": 0.975789593955614, "learning_rate": 8.636340878337713e-06, "loss": 0.2135, "step": 1955 }, { "epoch": 0.26, "grad_norm": 1.067047346306158, "learning_rate": 8.634841683493705e-06, "loss": 0.2474, "step": 1956 }, { "epoch": 0.26, "grad_norm": 0.9407841850405558, "learning_rate": 8.633341795279586e-06, "loss": 0.1382, "step": 1957 }, { "epoch": 0.26, "grad_norm": 0.9364733516242136, "learning_rate": 8.63184121398147e-06, "loss": 0.1892, "step": 1958 }, { "epoch": 0.26, "grad_norm": 0.9712729304765322, "learning_rate": 8.630339939885598e-06, "loss": 0.2025, "step": 1959 }, { "epoch": 0.26, "grad_norm": 0.9572674783973716, "learning_rate": 8.628837973278352e-06, "loss": 0.1624, "step": 1960 }, { "epoch": 0.26, "grad_norm": 0.9110067316133976, "learning_rate": 8.62733531444624e-06, "loss": 0.1993, "step": 1961 }, { "epoch": 0.26, "grad_norm": 1.2090757221040953, "learning_rate": 8.625831963675906e-06, "loss": 0.2754, "step": 1962 }, { "epoch": 0.26, "grad_norm": 0.9427877264373946, "learning_rate": 8.624327921254123e-06, "loss": 0.201, "step": 1963 }, { "epoch": 0.26, "grad_norm": 0.9128591564033987, "learning_rate": 8.622823187467796e-06, "loss": 0.1501, "step": 1964 }, { "epoch": 0.26, "grad_norm": 0.9672717438655641, "learning_rate": 8.621317762603966e-06, "loss": 0.1931, "step": 1965 }, { "epoch": 0.27, "grad_norm": 0.7478757123069414, "learning_rate": 8.619811646949801e-06, "loss": 0.1496, "step": 1966 }, { "epoch": 0.27, "grad_norm": 0.7289749322797419, "learning_rate": 8.618304840792599e-06, "loss": 0.1425, "step": 1967 }, { "epoch": 0.27, "grad_norm": 1.0022097262991652, "learning_rate": 8.616797344419799e-06, "loss": 0.2116, "step": 1968 }, { "epoch": 0.27, "grad_norm": 1.28730974633379, "learning_rate": 8.615289158118964e-06, "loss": 0.1739, "step": 1969 }, { "epoch": 0.27, "grad_norm": 0.9733871915714983, "learning_rate": 8.613780282177789e-06, "loss": 0.1739, "step": 1970 }, { "epoch": 0.27, "grad_norm": 1.0655021264087063, "learning_rate": 8.612270716884104e-06, "loss": 0.2126, "step": 1971 }, { "epoch": 0.27, "grad_norm": 0.8247128256540801, "learning_rate": 8.610760462525866e-06, "loss": 0.1888, "step": 1972 }, { "epoch": 0.27, "grad_norm": 1.0089086875287983, "learning_rate": 8.609249519391169e-06, "loss": 0.2052, "step": 1973 }, { "epoch": 0.27, "grad_norm": 1.0677706674673546, "learning_rate": 8.607737887768232e-06, "loss": 0.169, "step": 1974 }, { "epoch": 0.27, "grad_norm": 1.0543912617421036, "learning_rate": 8.606225567945412e-06, "loss": 0.2111, "step": 1975 }, { "epoch": 0.27, "grad_norm": 1.0363167373524964, "learning_rate": 8.604712560211195e-06, "loss": 0.1893, "step": 1976 }, { "epoch": 0.27, "grad_norm": 1.1224431226546217, "learning_rate": 8.603198864854192e-06, "loss": 0.2441, "step": 1977 }, { "epoch": 0.27, "grad_norm": 1.1950979460651014, "learning_rate": 8.601684482163157e-06, "loss": 0.2205, "step": 1978 }, { "epoch": 0.27, "grad_norm": 1.2166305909591952, "learning_rate": 8.600169412426962e-06, "loss": 0.2011, "step": 1979 }, { "epoch": 0.27, "grad_norm": 1.0030397557716961, "learning_rate": 8.598653655934621e-06, "loss": 0.1916, "step": 1980 }, { "epoch": 0.27, "grad_norm": 1.0109710745038214, "learning_rate": 8.597137212975273e-06, "loss": 0.2411, "step": 1981 }, { "epoch": 0.27, "grad_norm": 1.1673513637569877, "learning_rate": 8.595620083838189e-06, "loss": 0.1876, "step": 1982 }, { "epoch": 0.27, "grad_norm": 1.1041943001430568, "learning_rate": 8.594102268812772e-06, "loss": 0.2009, "step": 1983 }, { "epoch": 0.27, "grad_norm": 1.201295831217475, "learning_rate": 8.592583768188556e-06, "loss": 0.2097, "step": 1984 }, { "epoch": 0.27, "grad_norm": 1.0165686723734277, "learning_rate": 8.591064582255205e-06, "loss": 0.2141, "step": 1985 }, { "epoch": 0.27, "grad_norm": 0.6859249389128687, "learning_rate": 8.589544711302512e-06, "loss": 0.1451, "step": 1986 }, { "epoch": 0.27, "grad_norm": 1.0731701166616474, "learning_rate": 8.588024155620405e-06, "loss": 0.214, "step": 1987 }, { "epoch": 0.27, "grad_norm": 0.8742680880597794, "learning_rate": 8.586502915498938e-06, "loss": 0.205, "step": 1988 }, { "epoch": 0.27, "grad_norm": 0.9843910551220715, "learning_rate": 8.584980991228298e-06, "loss": 0.171, "step": 1989 }, { "epoch": 0.27, "grad_norm": 1.1124452285903583, "learning_rate": 8.583458383098803e-06, "loss": 0.2261, "step": 1990 }, { "epoch": 0.27, "grad_norm": 0.8641976497855357, "learning_rate": 8.581935091400898e-06, "loss": 0.1941, "step": 1991 }, { "epoch": 0.27, "grad_norm": 1.377612764852815, "learning_rate": 8.580411116425167e-06, "loss": 0.2453, "step": 1992 }, { "epoch": 0.27, "grad_norm": 1.1121492712162273, "learning_rate": 8.578886458462312e-06, "loss": 0.2018, "step": 1993 }, { "epoch": 0.27, "grad_norm": 0.868504321229707, "learning_rate": 8.577361117803174e-06, "loss": 0.1581, "step": 1994 }, { "epoch": 0.27, "grad_norm": 1.211883475194471, "learning_rate": 8.575835094738722e-06, "loss": 0.2708, "step": 1995 }, { "epoch": 0.27, "grad_norm": 1.1382046812552828, "learning_rate": 8.574308389560055e-06, "loss": 0.2213, "step": 1996 }, { "epoch": 0.27, "grad_norm": 1.0428991502414753, "learning_rate": 8.572781002558402e-06, "loss": 0.2478, "step": 1997 }, { "epoch": 0.27, "grad_norm": 0.9065203317041074, "learning_rate": 8.571252934025122e-06, "loss": 0.147, "step": 1998 }, { "epoch": 0.27, "grad_norm": 0.894437592196295, "learning_rate": 8.569724184251705e-06, "loss": 0.1581, "step": 1999 }, { "epoch": 0.27, "grad_norm": 0.994408458538145, "learning_rate": 8.568194753529769e-06, "loss": 0.1882, "step": 2000 }, { "epoch": 0.27, "grad_norm": 1.1498868051839164, "learning_rate": 8.566664642151061e-06, "loss": 0.1746, "step": 2001 }, { "epoch": 0.27, "grad_norm": 0.9466504893824678, "learning_rate": 8.565133850407466e-06, "loss": 0.1698, "step": 2002 }, { "epoch": 0.27, "grad_norm": 0.9295187407268806, "learning_rate": 8.563602378590987e-06, "loss": 0.2448, "step": 2003 }, { "epoch": 0.27, "grad_norm": 0.9484081477821281, "learning_rate": 8.562070226993764e-06, "loss": 0.1447, "step": 2004 }, { "epoch": 0.27, "grad_norm": 0.8568356678545455, "learning_rate": 8.560537395908067e-06, "loss": 0.1586, "step": 2005 }, { "epoch": 0.27, "grad_norm": 0.8146675552109632, "learning_rate": 8.55900388562629e-06, "loss": 0.183, "step": 2006 }, { "epoch": 0.27, "grad_norm": 1.136452491548966, "learning_rate": 8.557469696440963e-06, "loss": 0.2342, "step": 2007 }, { "epoch": 0.27, "grad_norm": 1.3164102096800907, "learning_rate": 8.555934828644743e-06, "loss": 0.2111, "step": 2008 }, { "epoch": 0.27, "grad_norm": 1.0685643835260215, "learning_rate": 8.554399282530414e-06, "loss": 0.23, "step": 2009 }, { "epoch": 0.27, "grad_norm": 0.9089065263163626, "learning_rate": 8.552863058390895e-06, "loss": 0.1559, "step": 2010 }, { "epoch": 0.27, "grad_norm": 1.1787899167080997, "learning_rate": 8.551326156519227e-06, "loss": 0.2347, "step": 2011 }, { "epoch": 0.27, "grad_norm": 0.6607855089409741, "learning_rate": 8.549788577208587e-06, "loss": 0.1663, "step": 2012 }, { "epoch": 0.27, "grad_norm": 1.3091561227500434, "learning_rate": 8.548250320752276e-06, "loss": 0.2513, "step": 2013 }, { "epoch": 0.27, "grad_norm": 1.2410518924005798, "learning_rate": 8.54671138744373e-06, "loss": 0.2189, "step": 2014 }, { "epoch": 0.27, "grad_norm": 1.0823444376603235, "learning_rate": 8.545171777576506e-06, "loss": 0.2096, "step": 2015 }, { "epoch": 0.27, "grad_norm": 1.3242166353640032, "learning_rate": 8.543631491444302e-06, "loss": 0.2631, "step": 2016 }, { "epoch": 0.27, "grad_norm": 0.6270691885352067, "learning_rate": 8.54209052934093e-06, "loss": 0.1268, "step": 2017 }, { "epoch": 0.27, "grad_norm": 1.0555907929290167, "learning_rate": 8.540548891560341e-06, "loss": 0.1849, "step": 2018 }, { "epoch": 0.27, "grad_norm": 1.027233791010255, "learning_rate": 8.539006578396616e-06, "loss": 0.1475, "step": 2019 }, { "epoch": 0.27, "grad_norm": 0.6816821754057859, "learning_rate": 8.537463590143958e-06, "loss": 0.135, "step": 2020 }, { "epoch": 0.27, "grad_norm": 0.906843750415452, "learning_rate": 8.535919927096703e-06, "loss": 0.225, "step": 2021 }, { "epoch": 0.27, "grad_norm": 1.1773169306260034, "learning_rate": 8.534375589549315e-06, "loss": 0.2386, "step": 2022 }, { "epoch": 0.27, "grad_norm": 1.3516612463332225, "learning_rate": 8.532830577796389e-06, "loss": 0.2542, "step": 2023 }, { "epoch": 0.27, "grad_norm": 1.0661947811454242, "learning_rate": 8.531284892132643e-06, "loss": 0.2109, "step": 2024 }, { "epoch": 0.27, "grad_norm": 1.2295638840830654, "learning_rate": 8.529738532852928e-06, "loss": 0.2236, "step": 2025 }, { "epoch": 0.27, "grad_norm": 0.9583619106103646, "learning_rate": 8.52819150025222e-06, "loss": 0.213, "step": 2026 }, { "epoch": 0.27, "grad_norm": 0.9743675238617364, "learning_rate": 8.526643794625628e-06, "loss": 0.1763, "step": 2027 }, { "epoch": 0.27, "grad_norm": 1.007182873655389, "learning_rate": 8.52509541626839e-06, "loss": 0.1962, "step": 2028 }, { "epoch": 0.27, "grad_norm": 0.9184664125563002, "learning_rate": 8.523546365475865e-06, "loss": 0.1725, "step": 2029 }, { "epoch": 0.27, "grad_norm": 1.0996527145852992, "learning_rate": 8.521996642543544e-06, "loss": 0.2059, "step": 2030 }, { "epoch": 0.27, "grad_norm": 0.9537688313346405, "learning_rate": 8.52044624776705e-06, "loss": 0.1924, "step": 2031 }, { "epoch": 0.27, "grad_norm": 0.8416750653537654, "learning_rate": 8.518895181442133e-06, "loss": 0.1952, "step": 2032 }, { "epoch": 0.27, "grad_norm": 1.0710569471983904, "learning_rate": 8.517343443864663e-06, "loss": 0.1756, "step": 2033 }, { "epoch": 0.27, "grad_norm": 0.9830754652782011, "learning_rate": 8.515791035330647e-06, "loss": 0.1685, "step": 2034 }, { "epoch": 0.27, "grad_norm": 0.7985965655100863, "learning_rate": 8.51423795613622e-06, "loss": 0.2072, "step": 2035 }, { "epoch": 0.27, "grad_norm": 1.1895126242532097, "learning_rate": 8.512684206577638e-06, "loss": 0.2056, "step": 2036 }, { "epoch": 0.27, "grad_norm": 1.3455448758693533, "learning_rate": 8.51112978695129e-06, "loss": 0.257, "step": 2037 }, { "epoch": 0.27, "grad_norm": 1.224906104691444, "learning_rate": 8.509574697553695e-06, "loss": 0.2635, "step": 2038 }, { "epoch": 0.27, "grad_norm": 0.8814652851647444, "learning_rate": 8.508018938681493e-06, "loss": 0.1572, "step": 2039 }, { "epoch": 0.28, "grad_norm": 0.9537238536317335, "learning_rate": 8.506462510631456e-06, "loss": 0.2155, "step": 2040 }, { "epoch": 0.28, "grad_norm": 1.1775442658689665, "learning_rate": 8.504905413700484e-06, "loss": 0.1812, "step": 2041 }, { "epoch": 0.28, "grad_norm": 0.8891481434014032, "learning_rate": 8.503347648185602e-06, "loss": 0.1823, "step": 2042 }, { "epoch": 0.28, "grad_norm": 1.2306856285310386, "learning_rate": 8.501789214383967e-06, "loss": 0.1866, "step": 2043 }, { "epoch": 0.28, "grad_norm": 0.8751652789540881, "learning_rate": 8.500230112592856e-06, "loss": 0.1902, "step": 2044 }, { "epoch": 0.28, "grad_norm": 1.1031770629966293, "learning_rate": 8.498670343109682e-06, "loss": 0.2309, "step": 2045 }, { "epoch": 0.28, "grad_norm": 1.0852301300940856, "learning_rate": 8.49710990623198e-06, "loss": 0.2058, "step": 2046 }, { "epoch": 0.28, "grad_norm": 0.7939582315932869, "learning_rate": 8.495548802257415e-06, "loss": 0.1644, "step": 2047 }, { "epoch": 0.28, "grad_norm": 1.2004857535959648, "learning_rate": 8.493987031483774e-06, "loss": 0.1791, "step": 2048 }, { "epoch": 0.28, "grad_norm": 0.9220256070986392, "learning_rate": 8.49242459420898e-06, "loss": 0.1692, "step": 2049 }, { "epoch": 0.28, "grad_norm": 1.1306981433521022, "learning_rate": 8.490861490731074e-06, "loss": 0.213, "step": 2050 }, { "epoch": 0.28, "grad_norm": 0.7740709431478887, "learning_rate": 8.48929772134823e-06, "loss": 0.1376, "step": 2051 }, { "epoch": 0.28, "grad_norm": 1.1308567145863244, "learning_rate": 8.48773328635875e-06, "loss": 0.2293, "step": 2052 }, { "epoch": 0.28, "grad_norm": 0.9487192206204771, "learning_rate": 8.486168186061057e-06, "loss": 0.2113, "step": 2053 }, { "epoch": 0.28, "grad_norm": 1.4272480071539562, "learning_rate": 8.484602420753705e-06, "loss": 0.2246, "step": 2054 }, { "epoch": 0.28, "grad_norm": 1.0978506131285124, "learning_rate": 8.483035990735374e-06, "loss": 0.1976, "step": 2055 }, { "epoch": 0.28, "grad_norm": 1.0198435792990055, "learning_rate": 8.48146889630487e-06, "loss": 0.2092, "step": 2056 }, { "epoch": 0.28, "grad_norm": 0.7929565993118171, "learning_rate": 8.479901137761128e-06, "loss": 0.1848, "step": 2057 }, { "epoch": 0.28, "grad_norm": 0.7824889683430029, "learning_rate": 8.47833271540321e-06, "loss": 0.1788, "step": 2058 }, { "epoch": 0.28, "grad_norm": 1.0353188576930867, "learning_rate": 8.4767636295303e-06, "loss": 0.2116, "step": 2059 }, { "epoch": 0.28, "grad_norm": 1.0253872907651664, "learning_rate": 8.475193880441715e-06, "loss": 0.2112, "step": 2060 }, { "epoch": 0.28, "grad_norm": 0.8974391024289713, "learning_rate": 8.473623468436888e-06, "loss": 0.2103, "step": 2061 }, { "epoch": 0.28, "grad_norm": 1.0175711128258655, "learning_rate": 8.472052393815394e-06, "loss": 0.1921, "step": 2062 }, { "epoch": 0.28, "grad_norm": 1.1698863977422596, "learning_rate": 8.47048065687692e-06, "loss": 0.1913, "step": 2063 }, { "epoch": 0.28, "grad_norm": 1.2919025748927975, "learning_rate": 8.468908257921288e-06, "loss": 0.2578, "step": 2064 }, { "epoch": 0.28, "grad_norm": 1.030631987505418, "learning_rate": 8.467335197248442e-06, "loss": 0.1729, "step": 2065 }, { "epoch": 0.28, "grad_norm": 0.9823951551021479, "learning_rate": 8.465761475158456e-06, "loss": 0.1663, "step": 2066 }, { "epoch": 0.28, "grad_norm": 0.6912203852332345, "learning_rate": 8.464187091951524e-06, "loss": 0.1495, "step": 2067 }, { "epoch": 0.28, "grad_norm": 1.005505771682938, "learning_rate": 8.462612047927974e-06, "loss": 0.2236, "step": 2068 }, { "epoch": 0.28, "grad_norm": 0.9854057787743936, "learning_rate": 8.461036343388254e-06, "loss": 0.1968, "step": 2069 }, { "epoch": 0.28, "grad_norm": 1.0842812639358081, "learning_rate": 8.45945997863294e-06, "loss": 0.2443, "step": 2070 }, { "epoch": 0.28, "grad_norm": 1.1883505325115087, "learning_rate": 8.457882953962735e-06, "loss": 0.224, "step": 2071 }, { "epoch": 0.28, "grad_norm": 0.8504447556465586, "learning_rate": 8.456305269678466e-06, "loss": 0.1842, "step": 2072 }, { "epoch": 0.28, "grad_norm": 0.6446930259409397, "learning_rate": 8.45472692608109e-06, "loss": 0.1148, "step": 2073 }, { "epoch": 0.28, "grad_norm": 1.3042396875181173, "learning_rate": 8.453147923471681e-06, "loss": 0.2458, "step": 2074 }, { "epoch": 0.28, "grad_norm": 0.8516262467781878, "learning_rate": 8.451568262151447e-06, "loss": 0.1511, "step": 2075 }, { "epoch": 0.28, "grad_norm": 1.198409002029336, "learning_rate": 8.449987942421718e-06, "loss": 0.2305, "step": 2076 }, { "epoch": 0.28, "grad_norm": 0.8181944075925733, "learning_rate": 8.448406964583953e-06, "loss": 0.1326, "step": 2077 }, { "epoch": 0.28, "grad_norm": 1.0781378172678426, "learning_rate": 8.446825328939731e-06, "loss": 0.2217, "step": 2078 }, { "epoch": 0.28, "grad_norm": 1.204379558193334, "learning_rate": 8.445243035790762e-06, "loss": 0.2787, "step": 2079 }, { "epoch": 0.28, "grad_norm": 0.607389295061974, "learning_rate": 8.443660085438876e-06, "loss": 0.1581, "step": 2080 }, { "epoch": 0.28, "grad_norm": 1.0162316707615406, "learning_rate": 8.442076478186033e-06, "loss": 0.1745, "step": 2081 }, { "epoch": 0.28, "grad_norm": 0.8971027455879684, "learning_rate": 8.440492214334318e-06, "loss": 0.1876, "step": 2082 }, { "epoch": 0.28, "grad_norm": 1.1619001731420728, "learning_rate": 8.438907294185936e-06, "loss": 0.1953, "step": 2083 }, { "epoch": 0.28, "grad_norm": 0.9030736676129841, "learning_rate": 8.437321718043223e-06, "loss": 0.1476, "step": 2084 }, { "epoch": 0.28, "grad_norm": 1.0311547837973922, "learning_rate": 8.43573548620864e-06, "loss": 0.1975, "step": 2085 }, { "epoch": 0.28, "grad_norm": 1.0010982724599153, "learning_rate": 8.434148598984769e-06, "loss": 0.1857, "step": 2086 }, { "epoch": 0.28, "grad_norm": 0.9635341413349177, "learning_rate": 8.432561056674318e-06, "loss": 0.1771, "step": 2087 }, { "epoch": 0.28, "grad_norm": 1.0153793569343534, "learning_rate": 8.430972859580124e-06, "loss": 0.2004, "step": 2088 }, { "epoch": 0.28, "grad_norm": 0.9917662340450992, "learning_rate": 8.429384008005146e-06, "loss": 0.162, "step": 2089 }, { "epoch": 0.28, "grad_norm": 1.4968991991235743, "learning_rate": 8.427794502252466e-06, "loss": 0.3151, "step": 2090 }, { "epoch": 0.28, "grad_norm": 0.9533342554082688, "learning_rate": 8.426204342625295e-06, "loss": 0.1857, "step": 2091 }, { "epoch": 0.28, "grad_norm": 1.1639844035678464, "learning_rate": 8.424613529426964e-06, "loss": 0.196, "step": 2092 }, { "epoch": 0.28, "grad_norm": 0.9316628958281509, "learning_rate": 8.423022062960934e-06, "loss": 0.1961, "step": 2093 }, { "epoch": 0.28, "grad_norm": 1.0092498680004418, "learning_rate": 8.421429943530786e-06, "loss": 0.2343, "step": 2094 }, { "epoch": 0.28, "grad_norm": 1.0292648275758112, "learning_rate": 8.419837171440226e-06, "loss": 0.1882, "step": 2095 }, { "epoch": 0.28, "grad_norm": 1.1290195325447587, "learning_rate": 8.418243746993087e-06, "loss": 0.2062, "step": 2096 }, { "epoch": 0.28, "grad_norm": 1.1402199320248103, "learning_rate": 8.416649670493326e-06, "loss": 0.1929, "step": 2097 }, { "epoch": 0.28, "grad_norm": 0.9069918480382964, "learning_rate": 8.415054942245025e-06, "loss": 0.2142, "step": 2098 }, { "epoch": 0.28, "grad_norm": 1.032761758193651, "learning_rate": 8.413459562552385e-06, "loss": 0.2159, "step": 2099 }, { "epoch": 0.28, "grad_norm": 1.1708123867692994, "learning_rate": 8.411863531719739e-06, "loss": 0.2348, "step": 2100 }, { "epoch": 0.28, "grad_norm": 0.9549449088917619, "learning_rate": 8.410266850051539e-06, "loss": 0.1868, "step": 2101 }, { "epoch": 0.28, "grad_norm": 0.6826461289342649, "learning_rate": 8.40866951785236e-06, "loss": 0.1421, "step": 2102 }, { "epoch": 0.28, "grad_norm": 0.9760367114584734, "learning_rate": 8.407071535426907e-06, "loss": 0.1851, "step": 2103 }, { "epoch": 0.28, "grad_norm": 0.9229048858117024, "learning_rate": 8.405472903080007e-06, "loss": 0.1899, "step": 2104 }, { "epoch": 0.28, "grad_norm": 1.2138508107119301, "learning_rate": 8.403873621116608e-06, "loss": 0.2524, "step": 2105 }, { "epoch": 0.28, "grad_norm": 1.3601868910060384, "learning_rate": 8.40227368984178e-06, "loss": 0.2072, "step": 2106 }, { "epoch": 0.28, "grad_norm": 0.9368571852907944, "learning_rate": 8.400673109560724e-06, "loss": 0.1863, "step": 2107 }, { "epoch": 0.28, "grad_norm": 1.2759749999119687, "learning_rate": 8.399071880578763e-06, "loss": 0.2047, "step": 2108 }, { "epoch": 0.28, "grad_norm": 1.3927423787247357, "learning_rate": 8.397470003201339e-06, "loss": 0.275, "step": 2109 }, { "epoch": 0.28, "grad_norm": 1.0006318089621322, "learning_rate": 8.395867477734023e-06, "loss": 0.2095, "step": 2110 }, { "epoch": 0.28, "grad_norm": 1.3845631014312223, "learning_rate": 8.394264304482505e-06, "loss": 0.2565, "step": 2111 }, { "epoch": 0.28, "grad_norm": 1.1291132255885314, "learning_rate": 8.3926604837526e-06, "loss": 0.1672, "step": 2112 }, { "epoch": 0.28, "grad_norm": 1.1879100933448277, "learning_rate": 8.391056015850251e-06, "loss": 0.1969, "step": 2113 }, { "epoch": 0.29, "grad_norm": 1.157212056844958, "learning_rate": 8.38945090108152e-06, "loss": 0.1953, "step": 2114 }, { "epoch": 0.29, "grad_norm": 0.9121009280373578, "learning_rate": 8.387845139752592e-06, "loss": 0.1784, "step": 2115 }, { "epoch": 0.29, "grad_norm": 1.0503942952325562, "learning_rate": 8.386238732169777e-06, "loss": 0.2226, "step": 2116 }, { "epoch": 0.29, "grad_norm": 0.8481931279410719, "learning_rate": 8.384631678639508e-06, "loss": 0.2121, "step": 2117 }, { "epoch": 0.29, "grad_norm": 1.0845021726862571, "learning_rate": 8.383023979468341e-06, "loss": 0.2349, "step": 2118 }, { "epoch": 0.29, "grad_norm": 1.3135883770682046, "learning_rate": 8.381415634962957e-06, "loss": 0.2246, "step": 2119 }, { "epoch": 0.29, "grad_norm": 1.177236133785637, "learning_rate": 8.379806645430155e-06, "loss": 0.228, "step": 2120 }, { "epoch": 0.29, "grad_norm": 1.1743999799511216, "learning_rate": 8.378197011176863e-06, "loss": 0.2299, "step": 2121 }, { "epoch": 0.29, "grad_norm": 1.0301760326431628, "learning_rate": 8.376586732510127e-06, "loss": 0.2352, "step": 2122 }, { "epoch": 0.29, "grad_norm": 0.9816770757315344, "learning_rate": 8.374975809737121e-06, "loss": 0.2095, "step": 2123 }, { "epoch": 0.29, "grad_norm": 0.9478696137284703, "learning_rate": 8.373364243165138e-06, "loss": 0.2008, "step": 2124 }, { "epoch": 0.29, "grad_norm": 1.0875572578828445, "learning_rate": 8.371752033101594e-06, "loss": 0.1933, "step": 2125 }, { "epoch": 0.29, "grad_norm": 0.9980537759770316, "learning_rate": 8.370139179854032e-06, "loss": 0.2175, "step": 2126 }, { "epoch": 0.29, "grad_norm": 0.9334440613287731, "learning_rate": 8.36852568373011e-06, "loss": 0.1801, "step": 2127 }, { "epoch": 0.29, "grad_norm": 1.2995319118279243, "learning_rate": 8.366911545037617e-06, "loss": 0.2527, "step": 2128 }, { "epoch": 0.29, "grad_norm": 1.0504647150006032, "learning_rate": 8.365296764084458e-06, "loss": 0.1483, "step": 2129 }, { "epoch": 0.29, "grad_norm": 1.1688104086618796, "learning_rate": 8.363681341178665e-06, "loss": 0.216, "step": 2130 }, { "epoch": 0.29, "grad_norm": 1.0654965907335177, "learning_rate": 8.362065276628387e-06, "loss": 0.2468, "step": 2131 }, { "epoch": 0.29, "grad_norm": 1.1184838988447174, "learning_rate": 8.360448570741903e-06, "loss": 0.1713, "step": 2132 }, { "epoch": 0.29, "grad_norm": 0.9377381946019561, "learning_rate": 8.358831223827611e-06, "loss": 0.1949, "step": 2133 }, { "epoch": 0.29, "grad_norm": 1.3360003846949062, "learning_rate": 8.357213236194027e-06, "loss": 0.2802, "step": 2134 }, { "epoch": 0.29, "grad_norm": 1.2956091077720964, "learning_rate": 8.355594608149794e-06, "loss": 0.2756, "step": 2135 }, { "epoch": 0.29, "grad_norm": 0.8561303617245075, "learning_rate": 8.353975340003677e-06, "loss": 0.1839, "step": 2136 }, { "epoch": 0.29, "grad_norm": 1.1084994214187498, "learning_rate": 8.352355432064563e-06, "loss": 0.2635, "step": 2137 }, { "epoch": 0.29, "grad_norm": 0.6413410704482605, "learning_rate": 8.350734884641457e-06, "loss": 0.1375, "step": 2138 }, { "epoch": 0.29, "grad_norm": 0.896328992123526, "learning_rate": 8.349113698043492e-06, "loss": 0.1982, "step": 2139 }, { "epoch": 0.29, "grad_norm": 1.1717254158867774, "learning_rate": 8.347491872579916e-06, "loss": 0.2114, "step": 2140 }, { "epoch": 0.29, "grad_norm": 0.9292744891810452, "learning_rate": 8.34586940856011e-06, "loss": 0.2102, "step": 2141 }, { "epoch": 0.29, "grad_norm": 1.1171861906924359, "learning_rate": 8.344246306293564e-06, "loss": 0.2226, "step": 2142 }, { "epoch": 0.29, "grad_norm": 1.0930157714729685, "learning_rate": 8.342622566089897e-06, "loss": 0.2342, "step": 2143 }, { "epoch": 0.29, "grad_norm": 1.2593278094074942, "learning_rate": 8.340998188258849e-06, "loss": 0.2195, "step": 2144 }, { "epoch": 0.29, "grad_norm": 1.1544048223347412, "learning_rate": 8.33937317311028e-06, "loss": 0.2222, "step": 2145 }, { "epoch": 0.29, "grad_norm": 0.9408133225311955, "learning_rate": 8.337747520954174e-06, "loss": 0.1643, "step": 2146 }, { "epoch": 0.29, "grad_norm": 1.0459033573133925, "learning_rate": 8.336121232100631e-06, "loss": 0.2027, "step": 2147 }, { "epoch": 0.29, "grad_norm": 0.7560717398783314, "learning_rate": 8.334494306859882e-06, "loss": 0.1413, "step": 2148 }, { "epoch": 0.29, "grad_norm": 1.0329919634914546, "learning_rate": 8.332866745542267e-06, "loss": 0.2488, "step": 2149 }, { "epoch": 0.29, "grad_norm": 0.9357437778574349, "learning_rate": 8.33123854845826e-06, "loss": 0.2094, "step": 2150 }, { "epoch": 0.29, "grad_norm": 0.8628132895545215, "learning_rate": 8.329609715918448e-06, "loss": 0.1509, "step": 2151 }, { "epoch": 0.29, "grad_norm": 1.2977898536219303, "learning_rate": 8.327980248233541e-06, "loss": 0.2377, "step": 2152 }, { "epoch": 0.29, "grad_norm": 1.0105442371702493, "learning_rate": 8.326350145714372e-06, "loss": 0.205, "step": 2153 }, { "epoch": 0.29, "grad_norm": 1.1534626907685037, "learning_rate": 8.324719408671893e-06, "loss": 0.2225, "step": 2154 }, { "epoch": 0.29, "grad_norm": 1.095552389974664, "learning_rate": 8.32308803741718e-06, "loss": 0.2437, "step": 2155 }, { "epoch": 0.29, "grad_norm": 1.0364710847571041, "learning_rate": 8.321456032261423e-06, "loss": 0.173, "step": 2156 }, { "epoch": 0.29, "grad_norm": 0.9266516557682718, "learning_rate": 8.319823393515943e-06, "loss": 0.193, "step": 2157 }, { "epoch": 0.29, "grad_norm": 1.0508847823003926, "learning_rate": 8.318190121492174e-06, "loss": 0.1974, "step": 2158 }, { "epoch": 0.29, "grad_norm": 1.11304544034215, "learning_rate": 8.316556216501674e-06, "loss": 0.2162, "step": 2159 }, { "epoch": 0.29, "grad_norm": 1.0514993703075277, "learning_rate": 8.31492167885612e-06, "loss": 0.2244, "step": 2160 }, { "epoch": 0.29, "grad_norm": 1.0838646458291894, "learning_rate": 8.313286508867314e-06, "loss": 0.1417, "step": 2161 }, { "epoch": 0.29, "grad_norm": 1.1047922122552807, "learning_rate": 8.311650706847172e-06, "loss": 0.1906, "step": 2162 }, { "epoch": 0.29, "grad_norm": 0.9452268967783143, "learning_rate": 8.310014273107735e-06, "loss": 0.2163, "step": 2163 }, { "epoch": 0.29, "grad_norm": 0.9063587016980882, "learning_rate": 8.308377207961166e-06, "loss": 0.2081, "step": 2164 }, { "epoch": 0.29, "grad_norm": 0.46291218696199904, "learning_rate": 8.306739511719745e-06, "loss": 0.1256, "step": 2165 }, { "epoch": 0.29, "grad_norm": 1.11655754717091, "learning_rate": 8.30510118469587e-06, "loss": 0.2524, "step": 2166 }, { "epoch": 0.29, "grad_norm": 1.2280038776555644, "learning_rate": 8.303462227202069e-06, "loss": 0.2394, "step": 2167 }, { "epoch": 0.29, "grad_norm": 0.6362539345600617, "learning_rate": 8.301822639550976e-06, "loss": 0.1531, "step": 2168 }, { "epoch": 0.29, "grad_norm": 0.9883033482091266, "learning_rate": 8.300182422055358e-06, "loss": 0.2011, "step": 2169 }, { "epoch": 0.29, "grad_norm": 0.9805711880479415, "learning_rate": 8.298541575028099e-06, "loss": 0.1635, "step": 2170 }, { "epoch": 0.29, "grad_norm": 0.600871032284938, "learning_rate": 8.296900098782197e-06, "loss": 0.1501, "step": 2171 }, { "epoch": 0.29, "grad_norm": 1.2716774357818486, "learning_rate": 8.295257993630778e-06, "loss": 0.2316, "step": 2172 }, { "epoch": 0.29, "grad_norm": 1.0672648696201783, "learning_rate": 8.293615259887085e-06, "loss": 0.2252, "step": 2173 }, { "epoch": 0.29, "grad_norm": 0.7528412316884567, "learning_rate": 8.291971897864475e-06, "loss": 0.1994, "step": 2174 }, { "epoch": 0.29, "grad_norm": 1.2641631015209451, "learning_rate": 8.290327907876437e-06, "loss": 0.2243, "step": 2175 }, { "epoch": 0.29, "grad_norm": 1.0194503091081621, "learning_rate": 8.288683290236567e-06, "loss": 0.1862, "step": 2176 }, { "epoch": 0.29, "grad_norm": 0.8671374328766818, "learning_rate": 8.287038045258591e-06, "loss": 0.1279, "step": 2177 }, { "epoch": 0.29, "grad_norm": 1.1406651779533552, "learning_rate": 8.285392173256348e-06, "loss": 0.1967, "step": 2178 }, { "epoch": 0.29, "grad_norm": 1.027605555908166, "learning_rate": 8.283745674543798e-06, "loss": 0.1675, "step": 2179 }, { "epoch": 0.29, "grad_norm": 1.1386709601053016, "learning_rate": 8.282098549435026e-06, "loss": 0.195, "step": 2180 }, { "epoch": 0.29, "grad_norm": 0.97179890238985, "learning_rate": 8.280450798244227e-06, "loss": 0.1875, "step": 2181 }, { "epoch": 0.29, "grad_norm": 0.8629062351789024, "learning_rate": 8.278802421285726e-06, "loss": 0.1507, "step": 2182 }, { "epoch": 0.29, "grad_norm": 0.8251024530739629, "learning_rate": 8.277153418873956e-06, "loss": 0.1639, "step": 2183 }, { "epoch": 0.29, "grad_norm": 1.0076829897083344, "learning_rate": 8.275503791323476e-06, "loss": 0.1667, "step": 2184 }, { "epoch": 0.29, "grad_norm": 0.9235913960296773, "learning_rate": 8.273853538948969e-06, "loss": 0.1507, "step": 2185 }, { "epoch": 0.29, "grad_norm": 1.2343223398150562, "learning_rate": 8.272202662065225e-06, "loss": 0.2585, "step": 2186 }, { "epoch": 0.29, "grad_norm": 0.8771120730814458, "learning_rate": 8.270551160987161e-06, "loss": 0.142, "step": 2187 }, { "epoch": 0.3, "grad_norm": 1.075361585575739, "learning_rate": 8.268899036029816e-06, "loss": 0.2387, "step": 2188 }, { "epoch": 0.3, "grad_norm": 0.9200708567116188, "learning_rate": 8.26724628750834e-06, "loss": 0.1446, "step": 2189 }, { "epoch": 0.3, "grad_norm": 1.204593667340598, "learning_rate": 8.265592915738006e-06, "loss": 0.1939, "step": 2190 }, { "epoch": 0.3, "grad_norm": 1.079451342683517, "learning_rate": 8.263938921034206e-06, "loss": 0.2049, "step": 2191 }, { "epoch": 0.3, "grad_norm": 1.3314767249926893, "learning_rate": 8.26228430371245e-06, "loss": 0.2524, "step": 2192 }, { "epoch": 0.3, "grad_norm": 1.027950821765022, "learning_rate": 8.26062906408837e-06, "loss": 0.1862, "step": 2193 }, { "epoch": 0.3, "grad_norm": 1.0696516968922556, "learning_rate": 8.25897320247771e-06, "loss": 0.2141, "step": 2194 }, { "epoch": 0.3, "grad_norm": 0.9268463422850302, "learning_rate": 8.25731671919634e-06, "loss": 0.1785, "step": 2195 }, { "epoch": 0.3, "grad_norm": 0.9568213186108524, "learning_rate": 8.255659614560241e-06, "loss": 0.1664, "step": 2196 }, { "epoch": 0.3, "grad_norm": 0.7855335029884315, "learning_rate": 8.25400188888552e-06, "loss": 0.1346, "step": 2197 }, { "epoch": 0.3, "grad_norm": 1.1003398841709715, "learning_rate": 8.252343542488397e-06, "loss": 0.2323, "step": 2198 }, { "epoch": 0.3, "grad_norm": 1.0404913236378173, "learning_rate": 8.250684575685214e-06, "loss": 0.1926, "step": 2199 }, { "epoch": 0.3, "grad_norm": 0.9276251519125138, "learning_rate": 8.249024988792433e-06, "loss": 0.2053, "step": 2200 }, { "epoch": 0.3, "grad_norm": 0.6964253064516434, "learning_rate": 8.247364782126625e-06, "loss": 0.1524, "step": 2201 }, { "epoch": 0.3, "grad_norm": 1.0201232542444862, "learning_rate": 8.24570395600449e-06, "loss": 0.2491, "step": 2202 }, { "epoch": 0.3, "grad_norm": 0.9647316368238521, "learning_rate": 8.244042510742838e-06, "loss": 0.1661, "step": 2203 }, { "epoch": 0.3, "grad_norm": 0.8401868570761994, "learning_rate": 8.242380446658602e-06, "loss": 0.1342, "step": 2204 }, { "epoch": 0.3, "grad_norm": 0.9377789220931526, "learning_rate": 8.240717764068835e-06, "loss": 0.231, "step": 2205 }, { "epoch": 0.3, "grad_norm": 1.1547510107759391, "learning_rate": 8.239054463290701e-06, "loss": 0.1934, "step": 2206 }, { "epoch": 0.3, "grad_norm": 1.148402310315943, "learning_rate": 8.237390544641485e-06, "loss": 0.2161, "step": 2207 }, { "epoch": 0.3, "grad_norm": 1.0783584387212832, "learning_rate": 8.235726008438594e-06, "loss": 0.1838, "step": 2208 }, { "epoch": 0.3, "grad_norm": 0.6728763474999111, "learning_rate": 8.234060854999546e-06, "loss": 0.1338, "step": 2209 }, { "epoch": 0.3, "grad_norm": 1.0256516301263843, "learning_rate": 8.232395084641982e-06, "loss": 0.1528, "step": 2210 }, { "epoch": 0.3, "grad_norm": 0.7140594613291507, "learning_rate": 8.230728697683658e-06, "loss": 0.1651, "step": 2211 }, { "epoch": 0.3, "grad_norm": 0.7403565266502637, "learning_rate": 8.229061694442451e-06, "loss": 0.1834, "step": 2212 }, { "epoch": 0.3, "grad_norm": 0.8781943557943832, "learning_rate": 8.227394075236347e-06, "loss": 0.1308, "step": 2213 }, { "epoch": 0.3, "grad_norm": 0.9632484691072696, "learning_rate": 8.22572584038346e-06, "loss": 0.1681, "step": 2214 }, { "epoch": 0.3, "grad_norm": 0.9865672605209224, "learning_rate": 8.224056990202016e-06, "loss": 0.1585, "step": 2215 }, { "epoch": 0.3, "grad_norm": 0.914609306796269, "learning_rate": 8.222387525010357e-06, "loss": 0.1526, "step": 2216 }, { "epoch": 0.3, "grad_norm": 0.9916696019494328, "learning_rate": 8.22071744512695e-06, "loss": 0.2009, "step": 2217 }, { "epoch": 0.3, "grad_norm": 1.1813677726654375, "learning_rate": 8.219046750870367e-06, "loss": 0.2165, "step": 2218 }, { "epoch": 0.3, "grad_norm": 1.1370880225356776, "learning_rate": 8.21737544255931e-06, "loss": 0.1836, "step": 2219 }, { "epoch": 0.3, "grad_norm": 1.54660953112403, "learning_rate": 8.215703520512588e-06, "loss": 0.3344, "step": 2220 }, { "epoch": 0.3, "grad_norm": 0.9144064997880809, "learning_rate": 8.214030985049131e-06, "loss": 0.1977, "step": 2221 }, { "epoch": 0.3, "grad_norm": 0.9517972335106583, "learning_rate": 8.21235783648799e-06, "loss": 0.1229, "step": 2222 }, { "epoch": 0.3, "grad_norm": 1.1743576627434067, "learning_rate": 8.210684075148325e-06, "loss": 0.2278, "step": 2223 }, { "epoch": 0.3, "grad_norm": 0.8607666934059566, "learning_rate": 8.20900970134942e-06, "loss": 0.1833, "step": 2224 }, { "epoch": 0.3, "grad_norm": 0.5880782276347337, "learning_rate": 8.207334715410671e-06, "loss": 0.1439, "step": 2225 }, { "epoch": 0.3, "grad_norm": 0.8751033401120727, "learning_rate": 8.205659117651594e-06, "loss": 0.1484, "step": 2226 }, { "epoch": 0.3, "grad_norm": 1.0477296152432438, "learning_rate": 8.203982908391821e-06, "loss": 0.2107, "step": 2227 }, { "epoch": 0.3, "grad_norm": 1.06918575689938, "learning_rate": 8.202306087951098e-06, "loss": 0.2067, "step": 2228 }, { "epoch": 0.3, "grad_norm": 1.1483728147898322, "learning_rate": 8.20062865664929e-06, "loss": 0.1955, "step": 2229 }, { "epoch": 0.3, "grad_norm": 0.9731235502300674, "learning_rate": 8.198950614806378e-06, "loss": 0.1819, "step": 2230 }, { "epoch": 0.3, "grad_norm": 0.766139183068051, "learning_rate": 8.197271962742463e-06, "loss": 0.1721, "step": 2231 }, { "epoch": 0.3, "grad_norm": 0.7798250414378838, "learning_rate": 8.195592700777755e-06, "loss": 0.1091, "step": 2232 }, { "epoch": 0.3, "grad_norm": 0.9589274117442927, "learning_rate": 8.193912829232585e-06, "loss": 0.1664, "step": 2233 }, { "epoch": 0.3, "grad_norm": 0.812140528118853, "learning_rate": 8.1922323484274e-06, "loss": 0.156, "step": 2234 }, { "epoch": 0.3, "grad_norm": 1.0299781729968773, "learning_rate": 8.190551258682761e-06, "loss": 0.2161, "step": 2235 }, { "epoch": 0.3, "grad_norm": 1.1274118719317008, "learning_rate": 8.188869560319353e-06, "loss": 0.1999, "step": 2236 }, { "epoch": 0.3, "grad_norm": 0.6314067135791346, "learning_rate": 8.187187253657965e-06, "loss": 0.1386, "step": 2237 }, { "epoch": 0.3, "grad_norm": 0.9813418178525695, "learning_rate": 8.18550433901951e-06, "loss": 0.2126, "step": 2238 }, { "epoch": 0.3, "grad_norm": 1.0693064080366987, "learning_rate": 8.183820816725015e-06, "loss": 0.2138, "step": 2239 }, { "epoch": 0.3, "grad_norm": 1.0156369026062158, "learning_rate": 8.182136687095625e-06, "loss": 0.2357, "step": 2240 }, { "epoch": 0.3, "grad_norm": 1.1322877290276059, "learning_rate": 8.180451950452595e-06, "loss": 0.2096, "step": 2241 }, { "epoch": 0.3, "grad_norm": 1.0856564508924773, "learning_rate": 8.178766607117303e-06, "loss": 0.1944, "step": 2242 }, { "epoch": 0.3, "grad_norm": 0.9933243616832047, "learning_rate": 8.177080657411238e-06, "loss": 0.1857, "step": 2243 }, { "epoch": 0.3, "grad_norm": 0.7110954796848553, "learning_rate": 8.175394101656007e-06, "loss": 0.1443, "step": 2244 }, { "epoch": 0.3, "grad_norm": 0.903329915722252, "learning_rate": 8.173706940173331e-06, "loss": 0.1521, "step": 2245 }, { "epoch": 0.3, "grad_norm": 1.2084116418671134, "learning_rate": 8.172019173285046e-06, "loss": 0.2563, "step": 2246 }, { "epoch": 0.3, "grad_norm": 1.124100392018233, "learning_rate": 8.170330801313106e-06, "loss": 0.1769, "step": 2247 }, { "epoch": 0.3, "grad_norm": 0.9997579735117957, "learning_rate": 8.16864182457958e-06, "loss": 0.2105, "step": 2248 }, { "epoch": 0.3, "grad_norm": 0.838953834420982, "learning_rate": 8.16695224340665e-06, "loss": 0.1167, "step": 2249 }, { "epoch": 0.3, "grad_norm": 0.8022931399414237, "learning_rate": 8.165262058116617e-06, "loss": 0.1419, "step": 2250 }, { "epoch": 0.3, "grad_norm": 0.9496443464727633, "learning_rate": 8.16357126903189e-06, "loss": 0.182, "step": 2251 }, { "epoch": 0.3, "grad_norm": 1.213728402372826, "learning_rate": 8.161879876475006e-06, "loss": 0.2355, "step": 2252 }, { "epoch": 0.3, "grad_norm": 0.9830721799428025, "learning_rate": 8.160187880768602e-06, "loss": 0.1819, "step": 2253 }, { "epoch": 0.3, "grad_norm": 1.1724173131103592, "learning_rate": 8.15849528223544e-06, "loss": 0.2182, "step": 2254 }, { "epoch": 0.3, "grad_norm": 0.9418261562593166, "learning_rate": 8.156802081198396e-06, "loss": 0.2115, "step": 2255 }, { "epoch": 0.3, "grad_norm": 1.057334616417378, "learning_rate": 8.155108277980456e-06, "loss": 0.1848, "step": 2256 }, { "epoch": 0.3, "grad_norm": 0.8250724999191061, "learning_rate": 8.153413872904728e-06, "loss": 0.1884, "step": 2257 }, { "epoch": 0.3, "grad_norm": 1.001520118704844, "learning_rate": 8.151718866294429e-06, "loss": 0.1916, "step": 2258 }, { "epoch": 0.3, "grad_norm": 1.0537827331386813, "learning_rate": 8.150023258472893e-06, "loss": 0.2099, "step": 2259 }, { "epoch": 0.3, "grad_norm": 1.0813018022532674, "learning_rate": 8.148327049763569e-06, "loss": 0.1562, "step": 2260 }, { "epoch": 0.3, "grad_norm": 0.8069208663461622, "learning_rate": 8.146630240490018e-06, "loss": 0.2044, "step": 2261 }, { "epoch": 0.31, "grad_norm": 0.9459465749749674, "learning_rate": 8.14493283097592e-06, "loss": 0.2164, "step": 2262 }, { "epoch": 0.31, "grad_norm": 0.84640600809592, "learning_rate": 8.143234821545063e-06, "loss": 0.1931, "step": 2263 }, { "epoch": 0.31, "grad_norm": 0.9519293490357946, "learning_rate": 8.14153621252136e-06, "loss": 0.227, "step": 2264 }, { "epoch": 0.31, "grad_norm": 1.0084275134722331, "learning_rate": 8.139837004228828e-06, "loss": 0.1993, "step": 2265 }, { "epoch": 0.31, "grad_norm": 1.1504973097506954, "learning_rate": 8.138137196991602e-06, "loss": 0.1814, "step": 2266 }, { "epoch": 0.31, "grad_norm": 1.1267452024544347, "learning_rate": 8.136436791133932e-06, "loss": 0.2006, "step": 2267 }, { "epoch": 0.31, "grad_norm": 0.8861560806565022, "learning_rate": 8.13473578698018e-06, "loss": 0.1969, "step": 2268 }, { "epoch": 0.31, "grad_norm": 1.0763100572931472, "learning_rate": 8.133034184854828e-06, "loss": 0.1674, "step": 2269 }, { "epoch": 0.31, "grad_norm": 0.9844768190056123, "learning_rate": 8.131331985082466e-06, "loss": 0.1822, "step": 2270 }, { "epoch": 0.31, "grad_norm": 1.235466299357091, "learning_rate": 8.129629187987799e-06, "loss": 0.2015, "step": 2271 }, { "epoch": 0.31, "grad_norm": 1.060476478597213, "learning_rate": 8.127925793895647e-06, "loss": 0.1992, "step": 2272 }, { "epoch": 0.31, "grad_norm": 1.0310623923537072, "learning_rate": 8.126221803130944e-06, "loss": 0.2116, "step": 2273 }, { "epoch": 0.31, "grad_norm": 1.1994031270474312, "learning_rate": 8.124517216018738e-06, "loss": 0.2211, "step": 2274 }, { "epoch": 0.31, "grad_norm": 0.8271171869109314, "learning_rate": 8.122812032884188e-06, "loss": 0.1795, "step": 2275 }, { "epoch": 0.31, "grad_norm": 1.0834028244877305, "learning_rate": 8.121106254052571e-06, "loss": 0.186, "step": 2276 }, { "epoch": 0.31, "grad_norm": 1.0151854003796195, "learning_rate": 8.119399879849277e-06, "loss": 0.1585, "step": 2277 }, { "epoch": 0.31, "grad_norm": 0.8269065084743129, "learning_rate": 8.117692910599805e-06, "loss": 0.1746, "step": 2278 }, { "epoch": 0.31, "grad_norm": 0.9140710048085571, "learning_rate": 8.115985346629774e-06, "loss": 0.1376, "step": 2279 }, { "epoch": 0.31, "grad_norm": 1.0229528488935287, "learning_rate": 8.114277188264909e-06, "loss": 0.1979, "step": 2280 }, { "epoch": 0.31, "grad_norm": 0.8182465394457685, "learning_rate": 8.112568435831057e-06, "loss": 0.1588, "step": 2281 }, { "epoch": 0.31, "grad_norm": 0.8713364449779797, "learning_rate": 8.11085908965417e-06, "loss": 0.1513, "step": 2282 }, { "epoch": 0.31, "grad_norm": 0.950597702958833, "learning_rate": 8.10914915006032e-06, "loss": 0.1809, "step": 2283 }, { "epoch": 0.31, "grad_norm": 0.9336158360753143, "learning_rate": 8.107438617375687e-06, "loss": 0.1326, "step": 2284 }, { "epoch": 0.31, "grad_norm": 1.0020485437097177, "learning_rate": 8.105727491926568e-06, "loss": 0.2028, "step": 2285 }, { "epoch": 0.31, "grad_norm": 0.9294228417072202, "learning_rate": 8.10401577403937e-06, "loss": 0.2101, "step": 2286 }, { "epoch": 0.31, "grad_norm": 1.0551059935350728, "learning_rate": 8.102303464040615e-06, "loss": 0.1721, "step": 2287 }, { "epoch": 0.31, "grad_norm": 0.8002456182535358, "learning_rate": 8.100590562256941e-06, "loss": 0.1718, "step": 2288 }, { "epoch": 0.31, "grad_norm": 0.685898954815334, "learning_rate": 8.09887706901509e-06, "loss": 0.1881, "step": 2289 }, { "epoch": 0.31, "grad_norm": 0.9223903361797338, "learning_rate": 8.097162984641927e-06, "loss": 0.1012, "step": 2290 }, { "epoch": 0.31, "grad_norm": 1.0657433509121568, "learning_rate": 8.095448309464417e-06, "loss": 0.2535, "step": 2291 }, { "epoch": 0.31, "grad_norm": 0.7552544638302718, "learning_rate": 8.093733043809656e-06, "loss": 0.1296, "step": 2292 }, { "epoch": 0.31, "grad_norm": 1.0508381071221844, "learning_rate": 8.092017188004837e-06, "loss": 0.2258, "step": 2293 }, { "epoch": 0.31, "grad_norm": 0.8986133331940236, "learning_rate": 8.090300742377269e-06, "loss": 0.1528, "step": 2294 }, { "epoch": 0.31, "grad_norm": 0.6762325784355255, "learning_rate": 8.088583707254379e-06, "loss": 0.1496, "step": 2295 }, { "epoch": 0.31, "grad_norm": 1.0918771696850438, "learning_rate": 8.086866082963702e-06, "loss": 0.1697, "step": 2296 }, { "epoch": 0.31, "grad_norm": 0.8786738060378387, "learning_rate": 8.085147869832884e-06, "loss": 0.1807, "step": 2297 }, { "epoch": 0.31, "grad_norm": 1.1191804226890123, "learning_rate": 8.083429068189688e-06, "loss": 0.2155, "step": 2298 }, { "epoch": 0.31, "grad_norm": 1.341506085910093, "learning_rate": 8.081709678361986e-06, "loss": 0.312, "step": 2299 }, { "epoch": 0.31, "grad_norm": 1.2956057374765844, "learning_rate": 8.079989700677762e-06, "loss": 0.2137, "step": 2300 }, { "epoch": 0.31, "grad_norm": 1.0679446230736978, "learning_rate": 8.078269135465114e-06, "loss": 0.1658, "step": 2301 }, { "epoch": 0.31, "grad_norm": 0.8978706476749345, "learning_rate": 8.076547983052252e-06, "loss": 0.2017, "step": 2302 }, { "epoch": 0.31, "grad_norm": 0.8762232580870323, "learning_rate": 8.074826243767497e-06, "loss": 0.1929, "step": 2303 }, { "epoch": 0.31, "grad_norm": 1.2023799051507404, "learning_rate": 8.07310391793928e-06, "loss": 0.2257, "step": 2304 }, { "epoch": 0.31, "grad_norm": 0.8930059827167441, "learning_rate": 8.07138100589615e-06, "loss": 0.1609, "step": 2305 }, { "epoch": 0.31, "grad_norm": 1.062902497664649, "learning_rate": 8.069657507966759e-06, "loss": 0.2483, "step": 2306 }, { "epoch": 0.31, "grad_norm": 1.205468803581842, "learning_rate": 8.067933424479882e-06, "loss": 0.2504, "step": 2307 }, { "epoch": 0.31, "grad_norm": 1.0787037869005491, "learning_rate": 8.066208755764391e-06, "loss": 0.2036, "step": 2308 }, { "epoch": 0.31, "grad_norm": 1.162873988601957, "learning_rate": 8.064483502149285e-06, "loss": 0.2216, "step": 2309 }, { "epoch": 0.31, "grad_norm": 1.1635037976133231, "learning_rate": 8.062757663963665e-06, "loss": 0.1649, "step": 2310 }, { "epoch": 0.31, "grad_norm": 1.035798078766979, "learning_rate": 8.061031241536745e-06, "loss": 0.2032, "step": 2311 }, { "epoch": 0.31, "grad_norm": 1.114509863269153, "learning_rate": 8.059304235197853e-06, "loss": 0.2279, "step": 2312 }, { "epoch": 0.31, "grad_norm": 1.0494255806148676, "learning_rate": 8.057576645276428e-06, "loss": 0.2005, "step": 2313 }, { "epoch": 0.31, "grad_norm": 0.9523983752832498, "learning_rate": 8.055848472102015e-06, "loss": 0.1826, "step": 2314 }, { "epoch": 0.31, "grad_norm": 1.0752978697612667, "learning_rate": 8.054119716004279e-06, "loss": 0.1732, "step": 2315 }, { "epoch": 0.31, "grad_norm": 0.7778857077184407, "learning_rate": 8.05239037731299e-06, "loss": 0.1735, "step": 2316 }, { "epoch": 0.31, "grad_norm": 0.706122550806684, "learning_rate": 8.050660456358031e-06, "loss": 0.1329, "step": 2317 }, { "epoch": 0.31, "grad_norm": 0.9050315004334504, "learning_rate": 8.048929953469394e-06, "loss": 0.2022, "step": 2318 }, { "epoch": 0.31, "grad_norm": 1.0546961326635733, "learning_rate": 8.047198868977187e-06, "loss": 0.1789, "step": 2319 }, { "epoch": 0.31, "grad_norm": 0.8795432928772389, "learning_rate": 8.045467203211623e-06, "loss": 0.1523, "step": 2320 }, { "epoch": 0.31, "grad_norm": 1.1423464595753097, "learning_rate": 8.04373495650303e-06, "loss": 0.2373, "step": 2321 }, { "epoch": 0.31, "grad_norm": 1.2220184216622463, "learning_rate": 8.042002129181844e-06, "loss": 0.2425, "step": 2322 }, { "epoch": 0.31, "grad_norm": 0.8469105471789272, "learning_rate": 8.040268721578619e-06, "loss": 0.1132, "step": 2323 }, { "epoch": 0.31, "grad_norm": 1.021835515055419, "learning_rate": 8.038534734024008e-06, "loss": 0.2258, "step": 2324 }, { "epoch": 0.31, "grad_norm": 0.9587618183238347, "learning_rate": 8.036800166848781e-06, "loss": 0.2198, "step": 2325 }, { "epoch": 0.31, "grad_norm": 1.075340271406541, "learning_rate": 8.03506502038382e-06, "loss": 0.2288, "step": 2326 }, { "epoch": 0.31, "grad_norm": 0.6019555696347267, "learning_rate": 8.033329294960116e-06, "loss": 0.1051, "step": 2327 }, { "epoch": 0.31, "grad_norm": 1.053513761483285, "learning_rate": 8.031592990908769e-06, "loss": 0.2033, "step": 2328 }, { "epoch": 0.31, "grad_norm": 0.9490452921000121, "learning_rate": 8.02985610856099e-06, "loss": 0.1382, "step": 2329 }, { "epoch": 0.31, "grad_norm": 1.094057100785234, "learning_rate": 8.028118648248102e-06, "loss": 0.2374, "step": 2330 }, { "epoch": 0.31, "grad_norm": 0.8460114292550422, "learning_rate": 8.026380610301537e-06, "loss": 0.1802, "step": 2331 }, { "epoch": 0.31, "grad_norm": 0.8565540692119484, "learning_rate": 8.024641995052837e-06, "loss": 0.1708, "step": 2332 }, { "epoch": 0.31, "grad_norm": 1.0548190483370201, "learning_rate": 8.022902802833655e-06, "loss": 0.2069, "step": 2333 }, { "epoch": 0.31, "grad_norm": 0.8647164956064042, "learning_rate": 8.02116303397575e-06, "loss": 0.1465, "step": 2334 }, { "epoch": 0.31, "grad_norm": 0.9850540199981374, "learning_rate": 8.019422688811e-06, "loss": 0.2614, "step": 2335 }, { "epoch": 0.31, "grad_norm": 1.3592602964900535, "learning_rate": 8.017681767671382e-06, "loss": 0.2582, "step": 2336 }, { "epoch": 0.32, "grad_norm": 1.1340579184908186, "learning_rate": 8.015940270888992e-06, "loss": 0.2205, "step": 2337 }, { "epoch": 0.32, "grad_norm": 1.076511436789397, "learning_rate": 8.01419819879603e-06, "loss": 0.1516, "step": 2338 }, { "epoch": 0.32, "grad_norm": 1.0307741744241292, "learning_rate": 8.01245555172481e-06, "loss": 0.2178, "step": 2339 }, { "epoch": 0.32, "grad_norm": 0.8162032751032494, "learning_rate": 8.010712330007752e-06, "loss": 0.1367, "step": 2340 }, { "epoch": 0.32, "grad_norm": 0.9088378723933039, "learning_rate": 8.008968533977386e-06, "loss": 0.1502, "step": 2341 }, { "epoch": 0.32, "grad_norm": 1.1280857538919171, "learning_rate": 8.007224163966353e-06, "loss": 0.2112, "step": 2342 }, { "epoch": 0.32, "grad_norm": 1.0862308970500194, "learning_rate": 8.005479220307405e-06, "loss": 0.2046, "step": 2343 }, { "epoch": 0.32, "grad_norm": 1.3863605349702721, "learning_rate": 8.003733703333402e-06, "loss": 0.2646, "step": 2344 }, { "epoch": 0.32, "grad_norm": 0.9706236513545669, "learning_rate": 8.00198761337731e-06, "loss": 0.2118, "step": 2345 }, { "epoch": 0.32, "grad_norm": 0.9871121520876748, "learning_rate": 8.000240950772208e-06, "loss": 0.1787, "step": 2346 }, { "epoch": 0.32, "grad_norm": 0.7819099274809233, "learning_rate": 7.998493715851283e-06, "loss": 0.1564, "step": 2347 }, { "epoch": 0.32, "grad_norm": 0.8724080813077161, "learning_rate": 7.996745908947834e-06, "loss": 0.1769, "step": 2348 }, { "epoch": 0.32, "grad_norm": 1.2031448214622709, "learning_rate": 7.994997530395266e-06, "loss": 0.2669, "step": 2349 }, { "epoch": 0.32, "grad_norm": 0.8780463009364584, "learning_rate": 7.993248580527093e-06, "loss": 0.2003, "step": 2350 }, { "epoch": 0.32, "grad_norm": 0.9711861978962493, "learning_rate": 7.991499059676937e-06, "loss": 0.1751, "step": 2351 }, { "epoch": 0.32, "grad_norm": 1.1273470995505042, "learning_rate": 7.989748968178536e-06, "loss": 0.2431, "step": 2352 }, { "epoch": 0.32, "grad_norm": 1.0611702101247467, "learning_rate": 7.987998306365725e-06, "loss": 0.2012, "step": 2353 }, { "epoch": 0.32, "grad_norm": 0.9225746742320041, "learning_rate": 7.986247074572457e-06, "loss": 0.2135, "step": 2354 }, { "epoch": 0.32, "grad_norm": 0.7666784183249867, "learning_rate": 7.984495273132795e-06, "loss": 0.1356, "step": 2355 }, { "epoch": 0.32, "grad_norm": 0.8857298692556056, "learning_rate": 7.982742902380901e-06, "loss": 0.1745, "step": 2356 }, { "epoch": 0.32, "grad_norm": 1.0413502694543366, "learning_rate": 7.980989962651054e-06, "loss": 0.1803, "step": 2357 }, { "epoch": 0.32, "grad_norm": 0.8361358731319376, "learning_rate": 7.979236454277637e-06, "loss": 0.137, "step": 2358 }, { "epoch": 0.32, "grad_norm": 0.9238188085228582, "learning_rate": 7.977482377595147e-06, "loss": 0.1766, "step": 2359 }, { "epoch": 0.32, "grad_norm": 0.9224114790572554, "learning_rate": 7.975727732938183e-06, "loss": 0.1404, "step": 2360 }, { "epoch": 0.32, "grad_norm": 0.9194154670896808, "learning_rate": 7.973972520641454e-06, "loss": 0.1854, "step": 2361 }, { "epoch": 0.32, "grad_norm": 0.6780653290262262, "learning_rate": 7.972216741039781e-06, "loss": 0.1223, "step": 2362 }, { "epoch": 0.32, "grad_norm": 0.8782508209568837, "learning_rate": 7.970460394468088e-06, "loss": 0.1621, "step": 2363 }, { "epoch": 0.32, "grad_norm": 0.9262643382055853, "learning_rate": 7.968703481261414e-06, "loss": 0.1461, "step": 2364 }, { "epoch": 0.32, "grad_norm": 0.730793152333362, "learning_rate": 7.966946001754899e-06, "loss": 0.1729, "step": 2365 }, { "epoch": 0.32, "grad_norm": 1.0646549153935234, "learning_rate": 7.965187956283791e-06, "loss": 0.2345, "step": 2366 }, { "epoch": 0.32, "grad_norm": 1.3440770745420596, "learning_rate": 7.963429345183455e-06, "loss": 0.2437, "step": 2367 }, { "epoch": 0.32, "grad_norm": 1.079135911373999, "learning_rate": 7.961670168789353e-06, "loss": 0.1833, "step": 2368 }, { "epoch": 0.32, "grad_norm": 0.8013734107679173, "learning_rate": 7.95991042743706e-06, "loss": 0.1649, "step": 2369 }, { "epoch": 0.32, "grad_norm": 1.1632545482994772, "learning_rate": 7.95815012146226e-06, "loss": 0.2167, "step": 2370 }, { "epoch": 0.32, "grad_norm": 1.1449168561195702, "learning_rate": 7.956389251200742e-06, "loss": 0.2141, "step": 2371 }, { "epoch": 0.32, "grad_norm": 1.0665605408437047, "learning_rate": 7.954627816988405e-06, "loss": 0.2012, "step": 2372 }, { "epoch": 0.32, "grad_norm": 1.0630924267351818, "learning_rate": 7.952865819161253e-06, "loss": 0.2267, "step": 2373 }, { "epoch": 0.32, "grad_norm": 1.364600007342432, "learning_rate": 7.951103258055398e-06, "loss": 0.2459, "step": 2374 }, { "epoch": 0.32, "grad_norm": 1.098092256682196, "learning_rate": 7.949340134007061e-06, "loss": 0.2045, "step": 2375 }, { "epoch": 0.32, "grad_norm": 0.8059605805311861, "learning_rate": 7.947576447352571e-06, "loss": 0.1748, "step": 2376 }, { "epoch": 0.32, "grad_norm": 0.8900642732706798, "learning_rate": 7.945812198428361e-06, "loss": 0.2014, "step": 2377 }, { "epoch": 0.32, "grad_norm": 0.9777438876192389, "learning_rate": 7.944047387570974e-06, "loss": 0.2243, "step": 2378 }, { "epoch": 0.32, "grad_norm": 0.8400652998211581, "learning_rate": 7.94228201511706e-06, "loss": 0.1356, "step": 2379 }, { "epoch": 0.32, "grad_norm": 1.0358595069992182, "learning_rate": 7.940516081403373e-06, "loss": 0.1934, "step": 2380 }, { "epoch": 0.32, "grad_norm": 1.1807714733736934, "learning_rate": 7.938749586766778e-06, "loss": 0.23, "step": 2381 }, { "epoch": 0.32, "grad_norm": 0.9246765439690922, "learning_rate": 7.936982531544248e-06, "loss": 0.2262, "step": 2382 }, { "epoch": 0.32, "grad_norm": 1.1946005861886884, "learning_rate": 7.935214916072856e-06, "loss": 0.2211, "step": 2383 }, { "epoch": 0.32, "grad_norm": 1.2115717190331012, "learning_rate": 7.93344674068979e-06, "loss": 0.2157, "step": 2384 }, { "epoch": 0.32, "grad_norm": 0.8679539857092076, "learning_rate": 7.93167800573234e-06, "loss": 0.1702, "step": 2385 }, { "epoch": 0.32, "grad_norm": 1.0388874595553887, "learning_rate": 7.929908711537902e-06, "loss": 0.2076, "step": 2386 }, { "epoch": 0.32, "grad_norm": 1.0734999866485089, "learning_rate": 7.928138858443984e-06, "loss": 0.195, "step": 2387 }, { "epoch": 0.32, "grad_norm": 1.0982874320957747, "learning_rate": 7.926368446788192e-06, "loss": 0.2459, "step": 2388 }, { "epoch": 0.32, "grad_norm": 0.857893707861552, "learning_rate": 7.92459747690825e-06, "loss": 0.1578, "step": 2389 }, { "epoch": 0.32, "grad_norm": 0.9246761479352688, "learning_rate": 7.92282594914198e-06, "loss": 0.17, "step": 2390 }, { "epoch": 0.32, "grad_norm": 0.876576562840626, "learning_rate": 7.921053863827311e-06, "loss": 0.2148, "step": 2391 }, { "epoch": 0.32, "grad_norm": 0.8330634864455151, "learning_rate": 7.919281221302279e-06, "loss": 0.1767, "step": 2392 }, { "epoch": 0.32, "grad_norm": 0.8507800714330225, "learning_rate": 7.917508021905033e-06, "loss": 0.1756, "step": 2393 }, { "epoch": 0.32, "grad_norm": 0.9635351455820275, "learning_rate": 7.915734265973818e-06, "loss": 0.2405, "step": 2394 }, { "epoch": 0.32, "grad_norm": 0.995774633850403, "learning_rate": 7.913959953846989e-06, "loss": 0.1982, "step": 2395 }, { "epoch": 0.32, "grad_norm": 1.322627333630704, "learning_rate": 7.912185085863009e-06, "loss": 0.2548, "step": 2396 }, { "epoch": 0.32, "grad_norm": 1.0782000776442393, "learning_rate": 7.910409662360448e-06, "loss": 0.1819, "step": 2397 }, { "epoch": 0.32, "grad_norm": 1.0835774968155412, "learning_rate": 7.908633683677976e-06, "loss": 0.2026, "step": 2398 }, { "epoch": 0.32, "grad_norm": 0.8758797004249126, "learning_rate": 7.906857150154377e-06, "loss": 0.1612, "step": 2399 }, { "epoch": 0.32, "grad_norm": 1.1798346223098863, "learning_rate": 7.905080062128532e-06, "loss": 0.2192, "step": 2400 }, { "epoch": 0.32, "grad_norm": 1.1152870256172602, "learning_rate": 7.903302419939436e-06, "loss": 0.242, "step": 2401 }, { "epoch": 0.32, "grad_norm": 1.0363314680886466, "learning_rate": 7.901524223926183e-06, "loss": 0.1709, "step": 2402 }, { "epoch": 0.32, "grad_norm": 0.8856845752633457, "learning_rate": 7.899745474427978e-06, "loss": 0.1769, "step": 2403 }, { "epoch": 0.32, "grad_norm": 0.8133988891410937, "learning_rate": 7.897966171784128e-06, "loss": 0.1444, "step": 2404 }, { "epoch": 0.32, "grad_norm": 1.007848332468057, "learning_rate": 7.896186316334047e-06, "loss": 0.1799, "step": 2405 }, { "epoch": 0.32, "grad_norm": 0.8777306739989317, "learning_rate": 7.894405908417256e-06, "loss": 0.1858, "step": 2406 }, { "epoch": 0.32, "grad_norm": 1.1212644400814125, "learning_rate": 7.892624948373375e-06, "loss": 0.1715, "step": 2407 }, { "epoch": 0.32, "grad_norm": 1.1128796950923365, "learning_rate": 7.890843436542138e-06, "loss": 0.2235, "step": 2408 }, { "epoch": 0.32, "grad_norm": 1.30961176536662, "learning_rate": 7.88906137326338e-06, "loss": 0.2296, "step": 2409 }, { "epoch": 0.32, "grad_norm": 1.041836199233003, "learning_rate": 7.88727875887704e-06, "loss": 0.1866, "step": 2410 }, { "epoch": 0.33, "grad_norm": 1.085873013193842, "learning_rate": 7.885495593723163e-06, "loss": 0.1861, "step": 2411 }, { "epoch": 0.33, "grad_norm": 1.2958323815465607, "learning_rate": 7.8837118781419e-06, "loss": 0.2487, "step": 2412 }, { "epoch": 0.33, "grad_norm": 1.0399667242019999, "learning_rate": 7.881927612473509e-06, "loss": 0.1882, "step": 2413 }, { "epoch": 0.33, "grad_norm": 0.7861500464382131, "learning_rate": 7.880142797058348e-06, "loss": 0.1713, "step": 2414 }, { "epoch": 0.33, "grad_norm": 0.9544938127528406, "learning_rate": 7.87835743223688e-06, "loss": 0.1975, "step": 2415 }, { "epoch": 0.33, "grad_norm": 0.770325580265094, "learning_rate": 7.876571518349682e-06, "loss": 0.143, "step": 2416 }, { "epoch": 0.33, "grad_norm": 0.9653385614901939, "learning_rate": 7.874785055737423e-06, "loss": 0.168, "step": 2417 }, { "epoch": 0.33, "grad_norm": 1.359363866876407, "learning_rate": 7.872998044740885e-06, "loss": 0.2481, "step": 2418 }, { "epoch": 0.33, "grad_norm": 0.7732545603607058, "learning_rate": 7.871210485700951e-06, "loss": 0.1155, "step": 2419 }, { "epoch": 0.33, "grad_norm": 0.9649554525538463, "learning_rate": 7.86942237895861e-06, "loss": 0.1834, "step": 2420 }, { "epoch": 0.33, "grad_norm": 0.8836140144086708, "learning_rate": 7.867633724854956e-06, "loss": 0.1338, "step": 2421 }, { "epoch": 0.33, "grad_norm": 0.9654234300512068, "learning_rate": 7.865844523731187e-06, "loss": 0.178, "step": 2422 }, { "epoch": 0.33, "grad_norm": 1.310502499659962, "learning_rate": 7.864054775928603e-06, "loss": 0.2716, "step": 2423 }, { "epoch": 0.33, "grad_norm": 0.6593168715655524, "learning_rate": 7.862264481788611e-06, "loss": 0.1438, "step": 2424 }, { "epoch": 0.33, "grad_norm": 1.164295588456566, "learning_rate": 7.860473641652724e-06, "loss": 0.2481, "step": 2425 }, { "epoch": 0.33, "grad_norm": 1.2452690442280814, "learning_rate": 7.85868225586255e-06, "loss": 0.2451, "step": 2426 }, { "epoch": 0.33, "grad_norm": 1.2856937980412066, "learning_rate": 7.856890324759817e-06, "loss": 0.2424, "step": 2427 }, { "epoch": 0.33, "grad_norm": 1.031847928911392, "learning_rate": 7.855097848686341e-06, "loss": 0.1917, "step": 2428 }, { "epoch": 0.33, "grad_norm": 1.1224779322492175, "learning_rate": 7.853304827984051e-06, "loss": 0.2252, "step": 2429 }, { "epoch": 0.33, "grad_norm": 0.9316150177559973, "learning_rate": 7.851511262994976e-06, "loss": 0.1541, "step": 2430 }, { "epoch": 0.33, "grad_norm": 0.8562839689554356, "learning_rate": 7.849717154061251e-06, "loss": 0.1818, "step": 2431 }, { "epoch": 0.33, "grad_norm": 0.9779225566524464, "learning_rate": 7.847922501525116e-06, "loss": 0.1886, "step": 2432 }, { "epoch": 0.33, "grad_norm": 1.1815189638810069, "learning_rate": 7.846127305728912e-06, "loss": 0.258, "step": 2433 }, { "epoch": 0.33, "grad_norm": 0.8194029884223379, "learning_rate": 7.844331567015083e-06, "loss": 0.1618, "step": 2434 }, { "epoch": 0.33, "grad_norm": 0.8702348737614874, "learning_rate": 7.842535285726179e-06, "loss": 0.1549, "step": 2435 }, { "epoch": 0.33, "grad_norm": 0.7491022052123658, "learning_rate": 7.840738462204853e-06, "loss": 0.1481, "step": 2436 }, { "epoch": 0.33, "grad_norm": 0.9417048621843412, "learning_rate": 7.83894109679386e-06, "loss": 0.2116, "step": 2437 }, { "epoch": 0.33, "grad_norm": 1.0390017376825342, "learning_rate": 7.837143189836061e-06, "loss": 0.2019, "step": 2438 }, { "epoch": 0.33, "grad_norm": 0.8601324744269842, "learning_rate": 7.835344741674415e-06, "loss": 0.1431, "step": 2439 }, { "epoch": 0.33, "grad_norm": 1.1263137880611092, "learning_rate": 7.833545752651993e-06, "loss": 0.2028, "step": 2440 }, { "epoch": 0.33, "grad_norm": 0.9948550994010357, "learning_rate": 7.83174622311196e-06, "loss": 0.1856, "step": 2441 }, { "epoch": 0.33, "grad_norm": 0.6580242504923062, "learning_rate": 7.829946153397592e-06, "loss": 0.131, "step": 2442 }, { "epoch": 0.33, "grad_norm": 1.180366312068499, "learning_rate": 7.82814554385226e-06, "loss": 0.1874, "step": 2443 }, { "epoch": 0.33, "grad_norm": 0.8961627144953566, "learning_rate": 7.826344394819443e-06, "loss": 0.162, "step": 2444 }, { "epoch": 0.33, "grad_norm": 1.072847049721341, "learning_rate": 7.824542706642724e-06, "loss": 0.2431, "step": 2445 }, { "epoch": 0.33, "grad_norm": 1.0991997437991172, "learning_rate": 7.822740479665786e-06, "loss": 0.1866, "step": 2446 }, { "epoch": 0.33, "grad_norm": 0.7879630480765036, "learning_rate": 7.820937714232418e-06, "loss": 0.1429, "step": 2447 }, { "epoch": 0.33, "grad_norm": 1.2823115464355592, "learning_rate": 7.819134410686505e-06, "loss": 0.2042, "step": 2448 }, { "epoch": 0.33, "grad_norm": 0.8585351912113849, "learning_rate": 7.817330569372043e-06, "loss": 0.1615, "step": 2449 }, { "epoch": 0.33, "grad_norm": 0.9708759188821241, "learning_rate": 7.815526190633124e-06, "loss": 0.2139, "step": 2450 }, { "epoch": 0.33, "grad_norm": 0.9014206615267103, "learning_rate": 7.813721274813946e-06, "loss": 0.2002, "step": 2451 }, { "epoch": 0.33, "grad_norm": 0.8781584006723137, "learning_rate": 7.81191582225881e-06, "loss": 0.2308, "step": 2452 }, { "epoch": 0.33, "grad_norm": 1.2886630193981286, "learning_rate": 7.810109833312116e-06, "loss": 0.2479, "step": 2453 }, { "epoch": 0.33, "grad_norm": 0.9824594627943716, "learning_rate": 7.808303308318371e-06, "loss": 0.2109, "step": 2454 }, { "epoch": 0.33, "grad_norm": 0.8041269171736151, "learning_rate": 7.806496247622177e-06, "loss": 0.1653, "step": 2455 }, { "epoch": 0.33, "grad_norm": 1.0361157348056695, "learning_rate": 7.804688651568247e-06, "loss": 0.2104, "step": 2456 }, { "epoch": 0.33, "grad_norm": 0.7954897603283365, "learning_rate": 7.80288052050139e-06, "loss": 0.1723, "step": 2457 }, { "epoch": 0.33, "grad_norm": 0.9374887656538512, "learning_rate": 7.80107185476652e-06, "loss": 0.1753, "step": 2458 }, { "epoch": 0.33, "grad_norm": 0.9988695309556025, "learning_rate": 7.799262654708653e-06, "loss": 0.2163, "step": 2459 }, { "epoch": 0.33, "grad_norm": 1.1854913497685624, "learning_rate": 7.797452920672902e-06, "loss": 0.1923, "step": 2460 }, { "epoch": 0.33, "grad_norm": 1.0419563787207728, "learning_rate": 7.79564265300449e-06, "loss": 0.1895, "step": 2461 }, { "epoch": 0.33, "grad_norm": 0.9349675776844178, "learning_rate": 7.793831852048734e-06, "loss": 0.2049, "step": 2462 }, { "epoch": 0.33, "grad_norm": 0.8004645074821134, "learning_rate": 7.79202051815106e-06, "loss": 0.1809, "step": 2463 }, { "epoch": 0.33, "grad_norm": 1.0267446778740834, "learning_rate": 7.790208651656987e-06, "loss": 0.2442, "step": 2464 }, { "epoch": 0.33, "grad_norm": 1.1536855438143556, "learning_rate": 7.788396252912145e-06, "loss": 0.1884, "step": 2465 }, { "epoch": 0.33, "grad_norm": 1.3533316708917116, "learning_rate": 7.78658332226226e-06, "loss": 0.2455, "step": 2466 }, { "epoch": 0.33, "grad_norm": 0.9499356237530023, "learning_rate": 7.784769860053159e-06, "loss": 0.2064, "step": 2467 }, { "epoch": 0.33, "grad_norm": 0.47378579721710057, "learning_rate": 7.782955866630774e-06, "loss": 0.1129, "step": 2468 }, { "epoch": 0.33, "grad_norm": 0.9835908834264799, "learning_rate": 7.781141342341133e-06, "loss": 0.1872, "step": 2469 }, { "epoch": 0.33, "grad_norm": 1.1579940062304819, "learning_rate": 7.779326287530373e-06, "loss": 0.2701, "step": 2470 }, { "epoch": 0.33, "grad_norm": 0.9682420808355241, "learning_rate": 7.777510702544724e-06, "loss": 0.1305, "step": 2471 }, { "epoch": 0.33, "grad_norm": 1.1596157892822583, "learning_rate": 7.775694587730523e-06, "loss": 0.2176, "step": 2472 }, { "epoch": 0.33, "grad_norm": 0.9137857395443876, "learning_rate": 7.773877943434205e-06, "loss": 0.1829, "step": 2473 }, { "epoch": 0.33, "grad_norm": 0.9883442688172587, "learning_rate": 7.772060770002308e-06, "loss": 0.1933, "step": 2474 }, { "epoch": 0.33, "grad_norm": 0.5980186989299213, "learning_rate": 7.770243067781469e-06, "loss": 0.1355, "step": 2475 }, { "epoch": 0.33, "grad_norm": 0.9600509703899313, "learning_rate": 7.768424837118424e-06, "loss": 0.2112, "step": 2476 }, { "epoch": 0.33, "grad_norm": 0.9939658712215663, "learning_rate": 7.766606078360017e-06, "loss": 0.2051, "step": 2477 }, { "epoch": 0.33, "grad_norm": 1.0415575628258684, "learning_rate": 7.764786791853186e-06, "loss": 0.1862, "step": 2478 }, { "epoch": 0.33, "grad_norm": 0.8762371063755043, "learning_rate": 7.762966977944973e-06, "loss": 0.1658, "step": 2479 }, { "epoch": 0.33, "grad_norm": 0.8200253622766234, "learning_rate": 7.761146636982519e-06, "loss": 0.1752, "step": 2480 }, { "epoch": 0.33, "grad_norm": 0.9059075406472024, "learning_rate": 7.759325769313067e-06, "loss": 0.1797, "step": 2481 }, { "epoch": 0.33, "grad_norm": 0.8094196182599945, "learning_rate": 7.757504375283957e-06, "loss": 0.1921, "step": 2482 }, { "epoch": 0.33, "grad_norm": 0.9849032873751608, "learning_rate": 7.755682455242633e-06, "loss": 0.2204, "step": 2483 }, { "epoch": 0.33, "grad_norm": 1.1001115290016754, "learning_rate": 7.75386000953664e-06, "loss": 0.2091, "step": 2484 }, { "epoch": 0.34, "grad_norm": 1.205398953900868, "learning_rate": 7.752037038513622e-06, "loss": 0.2292, "step": 2485 }, { "epoch": 0.34, "grad_norm": 0.8101228537554984, "learning_rate": 7.75021354252132e-06, "loss": 0.1969, "step": 2486 }, { "epoch": 0.34, "grad_norm": 0.8939558279673628, "learning_rate": 7.748389521907577e-06, "loss": 0.1607, "step": 2487 }, { "epoch": 0.34, "grad_norm": 0.9562953946297904, "learning_rate": 7.746564977020344e-06, "loss": 0.2412, "step": 2488 }, { "epoch": 0.34, "grad_norm": 1.238919997073506, "learning_rate": 7.744739908207656e-06, "loss": 0.2395, "step": 2489 }, { "epoch": 0.34, "grad_norm": 1.1140703436645192, "learning_rate": 7.742914315817664e-06, "loss": 0.185, "step": 2490 }, { "epoch": 0.34, "grad_norm": 0.8930378043819944, "learning_rate": 7.741088200198607e-06, "loss": 0.1884, "step": 2491 }, { "epoch": 0.34, "grad_norm": 1.0059960383454092, "learning_rate": 7.739261561698832e-06, "loss": 0.1869, "step": 2492 }, { "epoch": 0.34, "grad_norm": 0.9719317329139144, "learning_rate": 7.73743440066678e-06, "loss": 0.222, "step": 2493 }, { "epoch": 0.34, "grad_norm": 0.9562090029074628, "learning_rate": 7.735606717450997e-06, "loss": 0.1777, "step": 2494 }, { "epoch": 0.34, "grad_norm": 1.2855231228690032, "learning_rate": 7.73377851240012e-06, "loss": 0.2286, "step": 2495 }, { "epoch": 0.34, "grad_norm": 0.8089886642491668, "learning_rate": 7.7319497858629e-06, "loss": 0.1379, "step": 2496 }, { "epoch": 0.34, "grad_norm": 0.8640550098811857, "learning_rate": 7.73012053818817e-06, "loss": 0.173, "step": 2497 }, { "epoch": 0.34, "grad_norm": 0.975382125194293, "learning_rate": 7.728290769724876e-06, "loss": 0.1813, "step": 2498 }, { "epoch": 0.34, "grad_norm": 1.073200586785624, "learning_rate": 7.726460480822056e-06, "loss": 0.2006, "step": 2499 }, { "epoch": 0.34, "grad_norm": 0.999287784824526, "learning_rate": 7.724629671828854e-06, "loss": 0.1787, "step": 2500 }, { "epoch": 0.34, "grad_norm": 0.9606467072950612, "learning_rate": 7.722798343094505e-06, "loss": 0.1795, "step": 2501 }, { "epoch": 0.34, "grad_norm": 1.1447331574703805, "learning_rate": 7.720966494968346e-06, "loss": 0.2321, "step": 2502 }, { "epoch": 0.34, "grad_norm": 1.0243077474869624, "learning_rate": 7.719134127799816e-06, "loss": 0.2035, "step": 2503 }, { "epoch": 0.34, "grad_norm": 1.111762978453211, "learning_rate": 7.717301241938454e-06, "loss": 0.2222, "step": 2504 }, { "epoch": 0.34, "grad_norm": 0.7328318129312789, "learning_rate": 7.71546783773389e-06, "loss": 0.1522, "step": 2505 }, { "epoch": 0.34, "grad_norm": 0.7357700770475664, "learning_rate": 7.713633915535861e-06, "loss": 0.1701, "step": 2506 }, { "epoch": 0.34, "grad_norm": 1.6482438673463664, "learning_rate": 7.7117994756942e-06, "loss": 0.1475, "step": 2507 }, { "epoch": 0.34, "grad_norm": 0.7675869469893206, "learning_rate": 7.709964518558838e-06, "loss": 0.1589, "step": 2508 }, { "epoch": 0.34, "grad_norm": 0.9246215895707957, "learning_rate": 7.708129044479803e-06, "loss": 0.2047, "step": 2509 }, { "epoch": 0.34, "grad_norm": 1.04740206502982, "learning_rate": 7.706293053807228e-06, "loss": 0.1984, "step": 2510 }, { "epoch": 0.34, "grad_norm": 1.0223840910741568, "learning_rate": 7.704456546891337e-06, "loss": 0.2114, "step": 2511 }, { "epoch": 0.34, "grad_norm": 1.1702729816678852, "learning_rate": 7.702619524082458e-06, "loss": 0.2127, "step": 2512 }, { "epoch": 0.34, "grad_norm": 1.2106842197786745, "learning_rate": 7.700781985731013e-06, "loss": 0.1856, "step": 2513 }, { "epoch": 0.34, "grad_norm": 1.05742884850698, "learning_rate": 7.698943932187529e-06, "loss": 0.1629, "step": 2514 }, { "epoch": 0.34, "grad_norm": 0.9543189824954512, "learning_rate": 7.69710536380262e-06, "loss": 0.1658, "step": 2515 }, { "epoch": 0.34, "grad_norm": 0.931006677028311, "learning_rate": 7.69526628092701e-06, "loss": 0.1789, "step": 2516 }, { "epoch": 0.34, "grad_norm": 0.9665479026613295, "learning_rate": 7.693426683911516e-06, "loss": 0.1331, "step": 2517 }, { "epoch": 0.34, "grad_norm": 1.209468372287929, "learning_rate": 7.691586573107052e-06, "loss": 0.2349, "step": 2518 }, { "epoch": 0.34, "grad_norm": 1.0077037192925118, "learning_rate": 7.68974594886463e-06, "loss": 0.2031, "step": 2519 }, { "epoch": 0.34, "grad_norm": 0.8565203666738854, "learning_rate": 7.687904811535364e-06, "loss": 0.1976, "step": 2520 }, { "epoch": 0.34, "grad_norm": 1.100673202083589, "learning_rate": 7.686063161470463e-06, "loss": 0.2424, "step": 2521 }, { "epoch": 0.34, "grad_norm": 0.9843797711327092, "learning_rate": 7.684220999021234e-06, "loss": 0.1626, "step": 2522 }, { "epoch": 0.34, "grad_norm": 1.0694359829884565, "learning_rate": 7.682378324539079e-06, "loss": 0.2256, "step": 2523 }, { "epoch": 0.34, "grad_norm": 0.7594254125221145, "learning_rate": 7.680535138375503e-06, "loss": 0.1359, "step": 2524 }, { "epoch": 0.34, "grad_norm": 0.9914422327557196, "learning_rate": 7.678691440882104e-06, "loss": 0.1795, "step": 2525 }, { "epoch": 0.34, "grad_norm": 0.9188916301211716, "learning_rate": 7.676847232410582e-06, "loss": 0.2012, "step": 2526 }, { "epoch": 0.34, "grad_norm": 1.0503455759255365, "learning_rate": 7.675002513312727e-06, "loss": 0.1516, "step": 2527 }, { "epoch": 0.34, "grad_norm": 1.0184026136579512, "learning_rate": 7.673157283940438e-06, "loss": 0.1897, "step": 2528 }, { "epoch": 0.34, "grad_norm": 1.240114721121259, "learning_rate": 7.671311544645703e-06, "loss": 0.2263, "step": 2529 }, { "epoch": 0.34, "grad_norm": 1.0715436494192168, "learning_rate": 7.669465295780606e-06, "loss": 0.2006, "step": 2530 }, { "epoch": 0.34, "grad_norm": 1.277136500757425, "learning_rate": 7.667618537697332e-06, "loss": 0.265, "step": 2531 }, { "epoch": 0.34, "grad_norm": 0.9510023216211945, "learning_rate": 7.665771270748164e-06, "loss": 0.1278, "step": 2532 }, { "epoch": 0.34, "grad_norm": 0.6893838613359251, "learning_rate": 7.66392349528548e-06, "loss": 0.1868, "step": 2533 }, { "epoch": 0.34, "grad_norm": 1.1528883679483053, "learning_rate": 7.662075211661754e-06, "loss": 0.2378, "step": 2534 }, { "epoch": 0.34, "grad_norm": 0.9263070456631116, "learning_rate": 7.66022642022956e-06, "loss": 0.183, "step": 2535 }, { "epoch": 0.34, "grad_norm": 0.9432598708294494, "learning_rate": 7.658377121341564e-06, "loss": 0.1496, "step": 2536 }, { "epoch": 0.34, "grad_norm": 1.07269210671933, "learning_rate": 7.656527315350537e-06, "loss": 0.211, "step": 2537 }, { "epoch": 0.34, "grad_norm": 0.9576703157438666, "learning_rate": 7.65467700260934e-06, "loss": 0.1944, "step": 2538 }, { "epoch": 0.34, "grad_norm": 0.8313235216984847, "learning_rate": 7.65282618347093e-06, "loss": 0.1741, "step": 2539 }, { "epoch": 0.34, "grad_norm": 0.6107254389504282, "learning_rate": 7.650974858288364e-06, "loss": 0.0949, "step": 2540 }, { "epoch": 0.34, "grad_norm": 0.876466824418133, "learning_rate": 7.649123027414798e-06, "loss": 0.1882, "step": 2541 }, { "epoch": 0.34, "grad_norm": 1.331145621185843, "learning_rate": 7.647270691203476e-06, "loss": 0.2622, "step": 2542 }, { "epoch": 0.34, "grad_norm": 0.8132225013980352, "learning_rate": 7.645417850007745e-06, "loss": 0.1595, "step": 2543 }, { "epoch": 0.34, "grad_norm": 1.194206073998705, "learning_rate": 7.643564504181048e-06, "loss": 0.2148, "step": 2544 }, { "epoch": 0.34, "grad_norm": 1.0384158068720195, "learning_rate": 7.641710654076923e-06, "loss": 0.1786, "step": 2545 }, { "epoch": 0.34, "grad_norm": 1.0498450185285226, "learning_rate": 7.639856300049003e-06, "loss": 0.1814, "step": 2546 }, { "epoch": 0.34, "grad_norm": 0.9108888163836882, "learning_rate": 7.638001442451019e-06, "loss": 0.1616, "step": 2547 }, { "epoch": 0.34, "grad_norm": 0.876002484252067, "learning_rate": 7.636146081636797e-06, "loss": 0.1753, "step": 2548 }, { "epoch": 0.34, "grad_norm": 0.9401535284846605, "learning_rate": 7.634290217960258e-06, "loss": 0.1383, "step": 2549 }, { "epoch": 0.34, "grad_norm": 0.9151891918955153, "learning_rate": 7.632433851775422e-06, "loss": 0.2064, "step": 2550 }, { "epoch": 0.34, "grad_norm": 0.8338386369618562, "learning_rate": 7.630576983436404e-06, "loss": 0.1826, "step": 2551 }, { "epoch": 0.34, "grad_norm": 0.8272508605159983, "learning_rate": 7.628719613297414e-06, "loss": 0.1644, "step": 2552 }, { "epoch": 0.34, "grad_norm": 0.7178547010180232, "learning_rate": 7.626861741712755e-06, "loss": 0.1957, "step": 2553 }, { "epoch": 0.34, "grad_norm": 1.0128268462691266, "learning_rate": 7.625003369036832e-06, "loss": 0.1944, "step": 2554 }, { "epoch": 0.34, "grad_norm": 1.1889103136996255, "learning_rate": 7.623144495624136e-06, "loss": 0.2347, "step": 2555 }, { "epoch": 0.34, "grad_norm": 0.8464214105221063, "learning_rate": 7.621285121829265e-06, "loss": 0.1684, "step": 2556 }, { "epoch": 0.34, "grad_norm": 1.0335943302274293, "learning_rate": 7.6194252480069045e-06, "loss": 0.184, "step": 2557 }, { "epoch": 0.34, "grad_norm": 0.98082314055913, "learning_rate": 7.617564874511838e-06, "loss": 0.1962, "step": 2558 }, { "epoch": 0.35, "grad_norm": 1.0580456606074862, "learning_rate": 7.615704001698945e-06, "loss": 0.2451, "step": 2559 }, { "epoch": 0.35, "grad_norm": 1.1633424605025564, "learning_rate": 7.613842629923198e-06, "loss": 0.2612, "step": 2560 }, { "epoch": 0.35, "grad_norm": 1.0140774725516295, "learning_rate": 7.611980759539668e-06, "loss": 0.1979, "step": 2561 }, { "epoch": 0.35, "grad_norm": 1.0550838689990871, "learning_rate": 7.610118390903516e-06, "loss": 0.1756, "step": 2562 }, { "epoch": 0.35, "grad_norm": 0.7981994426478345, "learning_rate": 7.608255524370003e-06, "loss": 0.1762, "step": 2563 }, { "epoch": 0.35, "grad_norm": 0.9620810987387195, "learning_rate": 7.606392160294482e-06, "loss": 0.1868, "step": 2564 }, { "epoch": 0.35, "grad_norm": 0.7018145732163338, "learning_rate": 7.604528299032404e-06, "loss": 0.1336, "step": 2565 }, { "epoch": 0.35, "grad_norm": 0.8391379780983679, "learning_rate": 7.602663940939311e-06, "loss": 0.182, "step": 2566 }, { "epoch": 0.35, "grad_norm": 0.868863975097558, "learning_rate": 7.6007990863708425e-06, "loss": 0.1623, "step": 2567 }, { "epoch": 0.35, "grad_norm": 0.9993743731898556, "learning_rate": 7.598933735682734e-06, "loss": 0.1897, "step": 2568 }, { "epoch": 0.35, "grad_norm": 0.9438514593779093, "learning_rate": 7.59706788923081e-06, "loss": 0.1729, "step": 2569 }, { "epoch": 0.35, "grad_norm": 0.8399660724030051, "learning_rate": 7.595201547370994e-06, "loss": 0.1697, "step": 2570 }, { "epoch": 0.35, "grad_norm": 1.047928184905739, "learning_rate": 7.593334710459303e-06, "loss": 0.2097, "step": 2571 }, { "epoch": 0.35, "grad_norm": 0.9105123551228184, "learning_rate": 7.591467378851849e-06, "loss": 0.1166, "step": 2572 }, { "epoch": 0.35, "grad_norm": 1.115158614275299, "learning_rate": 7.589599552904838e-06, "loss": 0.1892, "step": 2573 }, { "epoch": 0.35, "grad_norm": 1.0174802669537286, "learning_rate": 7.5877312329745716e-06, "loss": 0.2179, "step": 2574 }, { "epoch": 0.35, "grad_norm": 0.5441411414479186, "learning_rate": 7.5858624194174405e-06, "loss": 0.1544, "step": 2575 }, { "epoch": 0.35, "grad_norm": 1.180834028711788, "learning_rate": 7.583993112589936e-06, "loss": 0.254, "step": 2576 }, { "epoch": 0.35, "grad_norm": 0.9386216527378254, "learning_rate": 7.58212331284864e-06, "loss": 0.1998, "step": 2577 }, { "epoch": 0.35, "grad_norm": 0.8712095677125203, "learning_rate": 7.58025302055023e-06, "loss": 0.1754, "step": 2578 }, { "epoch": 0.35, "grad_norm": 1.0638647082924035, "learning_rate": 7.578382236051475e-06, "loss": 0.2037, "step": 2579 }, { "epoch": 0.35, "grad_norm": 1.1141024608596102, "learning_rate": 7.57651095970924e-06, "loss": 0.193, "step": 2580 }, { "epoch": 0.35, "grad_norm": 1.0274854679701937, "learning_rate": 7.574639191880484e-06, "loss": 0.1559, "step": 2581 }, { "epoch": 0.35, "grad_norm": 0.9601825319530732, "learning_rate": 7.57276693292226e-06, "loss": 0.1929, "step": 2582 }, { "epoch": 0.35, "grad_norm": 0.9691596642201638, "learning_rate": 7.570894183191712e-06, "loss": 0.1762, "step": 2583 }, { "epoch": 0.35, "grad_norm": 1.178211482974723, "learning_rate": 7.5690209430460796e-06, "loss": 0.2046, "step": 2584 }, { "epoch": 0.35, "grad_norm": 0.9295834643493733, "learning_rate": 7.567147212842696e-06, "loss": 0.2085, "step": 2585 }, { "epoch": 0.35, "grad_norm": 1.1300474099619744, "learning_rate": 7.565272992938988e-06, "loss": 0.1832, "step": 2586 }, { "epoch": 0.35, "grad_norm": 1.1065597797776203, "learning_rate": 7.563398283692476e-06, "loss": 0.2165, "step": 2587 }, { "epoch": 0.35, "grad_norm": 1.0194839714007786, "learning_rate": 7.561523085460772e-06, "loss": 0.2294, "step": 2588 }, { "epoch": 0.35, "grad_norm": 1.4386574438949562, "learning_rate": 7.559647398601583e-06, "loss": 0.2714, "step": 2589 }, { "epoch": 0.35, "grad_norm": 0.9339560502056736, "learning_rate": 7.557771223472708e-06, "loss": 0.1884, "step": 2590 }, { "epoch": 0.35, "grad_norm": 0.9678356966229497, "learning_rate": 7.555894560432042e-06, "loss": 0.2285, "step": 2591 }, { "epoch": 0.35, "grad_norm": 0.9143553629843998, "learning_rate": 7.554017409837568e-06, "loss": 0.1906, "step": 2592 }, { "epoch": 0.35, "grad_norm": 0.9094212983032927, "learning_rate": 7.5521397720473686e-06, "loss": 0.1748, "step": 2593 }, { "epoch": 0.35, "grad_norm": 1.0366436712368505, "learning_rate": 7.55026164741961e-06, "loss": 0.194, "step": 2594 }, { "epoch": 0.35, "grad_norm": 1.040735690519662, "learning_rate": 7.548383036312562e-06, "loss": 0.1928, "step": 2595 }, { "epoch": 0.35, "grad_norm": 0.8730764345811022, "learning_rate": 7.546503939084579e-06, "loss": 0.1573, "step": 2596 }, { "epoch": 0.35, "grad_norm": 0.9159069061819339, "learning_rate": 7.544624356094114e-06, "loss": 0.1638, "step": 2597 }, { "epoch": 0.35, "grad_norm": 0.9789703359615398, "learning_rate": 7.542744287699709e-06, "loss": 0.203, "step": 2598 }, { "epoch": 0.35, "grad_norm": 0.9084077706314987, "learning_rate": 7.5408637342599985e-06, "loss": 0.1921, "step": 2599 }, { "epoch": 0.35, "grad_norm": 1.1678234074291303, "learning_rate": 7.538982696133711e-06, "loss": 0.2358, "step": 2600 }, { "epoch": 0.35, "grad_norm": 0.8439686827284687, "learning_rate": 7.537101173679666e-06, "loss": 0.1361, "step": 2601 }, { "epoch": 0.35, "grad_norm": 0.892220325827293, "learning_rate": 7.535219167256777e-06, "loss": 0.1863, "step": 2602 }, { "epoch": 0.35, "grad_norm": 1.085641616118169, "learning_rate": 7.533336677224052e-06, "loss": 0.2005, "step": 2603 }, { "epoch": 0.35, "grad_norm": 0.7900143796401147, "learning_rate": 7.5314537039405845e-06, "loss": 0.156, "step": 2604 }, { "epoch": 0.35, "grad_norm": 0.8510175523316104, "learning_rate": 7.5295702477655645e-06, "loss": 0.1729, "step": 2605 }, { "epoch": 0.35, "grad_norm": 1.0385889819201126, "learning_rate": 7.527686309058275e-06, "loss": 0.2239, "step": 2606 }, { "epoch": 0.35, "grad_norm": 0.9185186059073092, "learning_rate": 7.525801888178092e-06, "loss": 0.1697, "step": 2607 }, { "epoch": 0.35, "grad_norm": 1.0684996563701876, "learning_rate": 7.523916985484476e-06, "loss": 0.2369, "step": 2608 }, { "epoch": 0.35, "grad_norm": 1.0601243042219135, "learning_rate": 7.522031601336989e-06, "loss": 0.1807, "step": 2609 }, { "epoch": 0.35, "grad_norm": 0.7682957881539859, "learning_rate": 7.520145736095279e-06, "loss": 0.1309, "step": 2610 }, { "epoch": 0.35, "grad_norm": 1.1431996573673888, "learning_rate": 7.518259390119086e-06, "loss": 0.1952, "step": 2611 }, { "epoch": 0.35, "grad_norm": 1.1547805498478023, "learning_rate": 7.516372563768245e-06, "loss": 0.2543, "step": 2612 }, { "epoch": 0.35, "grad_norm": 0.9550897761116337, "learning_rate": 7.5144852574026795e-06, "loss": 0.1711, "step": 2613 }, { "epoch": 0.35, "grad_norm": 1.1845490838761463, "learning_rate": 7.512597471382407e-06, "loss": 0.2194, "step": 2614 }, { "epoch": 0.35, "grad_norm": 1.2203786834387687, "learning_rate": 7.5107092060675344e-06, "loss": 0.2548, "step": 2615 }, { "epoch": 0.35, "grad_norm": 0.8838482905758169, "learning_rate": 7.5088204618182605e-06, "loss": 0.179, "step": 2616 }, { "epoch": 0.35, "grad_norm": 1.1673088095845232, "learning_rate": 7.506931238994874e-06, "loss": 0.2095, "step": 2617 }, { "epoch": 0.35, "grad_norm": 0.8705596296202388, "learning_rate": 7.50504153795776e-06, "loss": 0.1546, "step": 2618 }, { "epoch": 0.35, "grad_norm": 1.1176213864981543, "learning_rate": 7.503151359067391e-06, "loss": 0.2487, "step": 2619 }, { "epoch": 0.35, "grad_norm": 0.8976275374931851, "learning_rate": 7.5012607026843285e-06, "loss": 0.1284, "step": 2620 }, { "epoch": 0.35, "grad_norm": 1.1229061196841248, "learning_rate": 7.49936956916923e-06, "loss": 0.2343, "step": 2621 }, { "epoch": 0.35, "grad_norm": 1.0080071042698007, "learning_rate": 7.497477958882842e-06, "loss": 0.1977, "step": 2622 }, { "epoch": 0.35, "grad_norm": 1.0956943288520335, "learning_rate": 7.495585872185999e-06, "loss": 0.1962, "step": 2623 }, { "epoch": 0.35, "grad_norm": 0.5996312025599517, "learning_rate": 7.493693309439632e-06, "loss": 0.1823, "step": 2624 }, { "epoch": 0.35, "grad_norm": 0.8617816885565124, "learning_rate": 7.49180027100476e-06, "loss": 0.1205, "step": 2625 }, { "epoch": 0.35, "grad_norm": 1.072888112315538, "learning_rate": 7.48990675724249e-06, "loss": 0.2404, "step": 2626 }, { "epoch": 0.35, "grad_norm": 1.1813452293401616, "learning_rate": 7.4880127685140235e-06, "loss": 0.2427, "step": 2627 }, { "epoch": 0.35, "grad_norm": 0.9918270552104747, "learning_rate": 7.486118305180651e-06, "loss": 0.1963, "step": 2628 }, { "epoch": 0.35, "grad_norm": 1.0121884147973859, "learning_rate": 7.484223367603756e-06, "loss": 0.18, "step": 2629 }, { "epoch": 0.35, "grad_norm": 0.940394237770484, "learning_rate": 7.4823279561448084e-06, "loss": 0.1772, "step": 2630 }, { "epoch": 0.35, "grad_norm": 1.1976762776029029, "learning_rate": 7.480432071165371e-06, "loss": 0.2134, "step": 2631 }, { "epoch": 0.35, "grad_norm": 0.8629697001646478, "learning_rate": 7.478535713027096e-06, "loss": 0.1702, "step": 2632 }, { "epoch": 0.36, "grad_norm": 0.937806260569262, "learning_rate": 7.476638882091726e-06, "loss": 0.2156, "step": 2633 }, { "epoch": 0.36, "grad_norm": 1.0177175766746294, "learning_rate": 7.474741578721096e-06, "loss": 0.2663, "step": 2634 }, { "epoch": 0.36, "grad_norm": 0.9719514674203783, "learning_rate": 7.472843803277127e-06, "loss": 0.1967, "step": 2635 }, { "epoch": 0.36, "grad_norm": 0.988372687042928, "learning_rate": 7.4709455561218326e-06, "loss": 0.216, "step": 2636 }, { "epoch": 0.36, "grad_norm": 1.0427128649289727, "learning_rate": 7.469046837617317e-06, "loss": 0.1827, "step": 2637 }, { "epoch": 0.36, "grad_norm": 1.0299670364643276, "learning_rate": 7.4671476481257735e-06, "loss": 0.1781, "step": 2638 }, { "epoch": 0.36, "grad_norm": 0.9359275926224101, "learning_rate": 7.465247988009483e-06, "loss": 0.1782, "step": 2639 }, { "epoch": 0.36, "grad_norm": 0.8590425076333492, "learning_rate": 7.4633478576308215e-06, "loss": 0.1356, "step": 2640 }, { "epoch": 0.36, "grad_norm": 1.0392574998344555, "learning_rate": 7.461447257352248e-06, "loss": 0.2054, "step": 2641 }, { "epoch": 0.36, "grad_norm": 0.6095565101615245, "learning_rate": 7.459546187536317e-06, "loss": 0.1642, "step": 2642 }, { "epoch": 0.36, "grad_norm": 0.8665951128038286, "learning_rate": 7.4576446485456685e-06, "loss": 0.1842, "step": 2643 }, { "epoch": 0.36, "grad_norm": 0.9310647190406239, "learning_rate": 7.455742640743035e-06, "loss": 0.2072, "step": 2644 }, { "epoch": 0.36, "grad_norm": 0.7071920475707153, "learning_rate": 7.453840164491237e-06, "loss": 0.1623, "step": 2645 }, { "epoch": 0.36, "grad_norm": 0.7085179005563707, "learning_rate": 7.451937220153184e-06, "loss": 0.1002, "step": 2646 }, { "epoch": 0.36, "grad_norm": 0.9480402329426247, "learning_rate": 7.450033808091875e-06, "loss": 0.2103, "step": 2647 }, { "epoch": 0.36, "grad_norm": 1.189882415968913, "learning_rate": 7.448129928670398e-06, "loss": 0.2023, "step": 2648 }, { "epoch": 0.36, "grad_norm": 1.1590569803886348, "learning_rate": 7.446225582251931e-06, "loss": 0.2547, "step": 2649 }, { "epoch": 0.36, "grad_norm": 0.9269129268625055, "learning_rate": 7.444320769199741e-06, "loss": 0.2276, "step": 2650 }, { "epoch": 0.36, "grad_norm": 0.9200451653064249, "learning_rate": 7.442415489877185e-06, "loss": 0.1465, "step": 2651 }, { "epoch": 0.36, "grad_norm": 0.9142825964743402, "learning_rate": 7.440509744647705e-06, "loss": 0.1617, "step": 2652 }, { "epoch": 0.36, "grad_norm": 1.1524469593005653, "learning_rate": 7.438603533874838e-06, "loss": 0.2208, "step": 2653 }, { "epoch": 0.36, "grad_norm": 0.7951901338314104, "learning_rate": 7.436696857922202e-06, "loss": 0.1382, "step": 2654 }, { "epoch": 0.36, "grad_norm": 0.8366729372350902, "learning_rate": 7.434789717153511e-06, "loss": 0.1449, "step": 2655 }, { "epoch": 0.36, "grad_norm": 0.8451299783316879, "learning_rate": 7.4328821119325626e-06, "loss": 0.1756, "step": 2656 }, { "epoch": 0.36, "grad_norm": 1.215529346473779, "learning_rate": 7.430974042623247e-06, "loss": 0.2188, "step": 2657 }, { "epoch": 0.36, "grad_norm": 0.8145132734898775, "learning_rate": 7.429065509589542e-06, "loss": 0.1312, "step": 2658 }, { "epoch": 0.36, "grad_norm": 1.0450251483880602, "learning_rate": 7.4271565131955095e-06, "loss": 0.2146, "step": 2659 }, { "epoch": 0.36, "grad_norm": 1.1057627756843322, "learning_rate": 7.4252470538053055e-06, "loss": 0.2056, "step": 2660 }, { "epoch": 0.36, "grad_norm": 0.9327537691834462, "learning_rate": 7.4233371317831724e-06, "loss": 0.1889, "step": 2661 }, { "epoch": 0.36, "grad_norm": 1.0580558438645953, "learning_rate": 7.42142674749344e-06, "loss": 0.1799, "step": 2662 }, { "epoch": 0.36, "grad_norm": 0.9108535474492828, "learning_rate": 7.419515901300525e-06, "loss": 0.1745, "step": 2663 }, { "epoch": 0.36, "grad_norm": 1.011387211307849, "learning_rate": 7.417604593568937e-06, "loss": 0.1925, "step": 2664 }, { "epoch": 0.36, "grad_norm": 0.9297701707330855, "learning_rate": 7.415692824663268e-06, "loss": 0.1782, "step": 2665 }, { "epoch": 0.36, "grad_norm": 0.8586044735486273, "learning_rate": 7.4137805949482015e-06, "loss": 0.1751, "step": 2666 }, { "epoch": 0.36, "grad_norm": 0.7387672942194006, "learning_rate": 7.411867904788508e-06, "loss": 0.1029, "step": 2667 }, { "epoch": 0.36, "grad_norm": 0.5927426200859575, "learning_rate": 7.409954754549047e-06, "loss": 0.1503, "step": 2668 }, { "epoch": 0.36, "grad_norm": 1.2459352474757537, "learning_rate": 7.408041144594762e-06, "loss": 0.2391, "step": 2669 }, { "epoch": 0.36, "grad_norm": 1.003457646383541, "learning_rate": 7.406127075290689e-06, "loss": 0.1548, "step": 2670 }, { "epoch": 0.36, "grad_norm": 0.9950229489911964, "learning_rate": 7.404212547001947e-06, "loss": 0.1806, "step": 2671 }, { "epoch": 0.36, "grad_norm": 1.1000395128001494, "learning_rate": 7.4022975600937495e-06, "loss": 0.2231, "step": 2672 }, { "epoch": 0.36, "grad_norm": 0.9348175619357479, "learning_rate": 7.400382114931387e-06, "loss": 0.1921, "step": 2673 }, { "epoch": 0.36, "grad_norm": 0.9483779726318815, "learning_rate": 7.3984662118802476e-06, "loss": 0.1815, "step": 2674 }, { "epoch": 0.36, "grad_norm": 0.8079023278176141, "learning_rate": 7.396549851305801e-06, "loss": 0.1832, "step": 2675 }, { "epoch": 0.36, "grad_norm": 0.829226325825035, "learning_rate": 7.394633033573606e-06, "loss": 0.0962, "step": 2676 }, { "epoch": 0.36, "grad_norm": 0.8811599127993811, "learning_rate": 7.39271575904931e-06, "loss": 0.1639, "step": 2677 }, { "epoch": 0.36, "grad_norm": 1.0218311878295687, "learning_rate": 7.390798028098642e-06, "loss": 0.1972, "step": 2678 }, { "epoch": 0.36, "grad_norm": 0.9791714054033586, "learning_rate": 7.388879841087423e-06, "loss": 0.2034, "step": 2679 }, { "epoch": 0.36, "grad_norm": 1.052687023791769, "learning_rate": 7.386961198381561e-06, "loss": 0.1702, "step": 2680 }, { "epoch": 0.36, "grad_norm": 1.2486607113159174, "learning_rate": 7.385042100347051e-06, "loss": 0.2177, "step": 2681 }, { "epoch": 0.36, "grad_norm": 0.8972531995035304, "learning_rate": 7.383122547349971e-06, "loss": 0.213, "step": 2682 }, { "epoch": 0.36, "grad_norm": 1.0894071882315572, "learning_rate": 7.38120253975649e-06, "loss": 0.238, "step": 2683 }, { "epoch": 0.36, "grad_norm": 1.0654505217830808, "learning_rate": 7.379282077932861e-06, "loss": 0.209, "step": 2684 }, { "epoch": 0.36, "grad_norm": 1.2011635132466219, "learning_rate": 7.377361162245426e-06, "loss": 0.1969, "step": 2685 }, { "epoch": 0.36, "grad_norm": 0.8927070653561099, "learning_rate": 7.375439793060612e-06, "loss": 0.2034, "step": 2686 }, { "epoch": 0.36, "grad_norm": 1.2019835001932984, "learning_rate": 7.3735179707449345e-06, "loss": 0.2547, "step": 2687 }, { "epoch": 0.36, "grad_norm": 1.0678120136642224, "learning_rate": 7.37159569566499e-06, "loss": 0.165, "step": 2688 }, { "epoch": 0.36, "grad_norm": 1.2629334168732944, "learning_rate": 7.369672968187467e-06, "loss": 0.2776, "step": 2689 }, { "epoch": 0.36, "grad_norm": 1.0780240414125095, "learning_rate": 7.3677497886791395e-06, "loss": 0.1925, "step": 2690 }, { "epoch": 0.36, "grad_norm": 0.9636990177683012, "learning_rate": 7.365826157506867e-06, "loss": 0.1784, "step": 2691 }, { "epoch": 0.36, "grad_norm": 0.9938288531534171, "learning_rate": 7.363902075037593e-06, "loss": 0.193, "step": 2692 }, { "epoch": 0.36, "grad_norm": 0.6085504516766802, "learning_rate": 7.361977541638351e-06, "loss": 0.1737, "step": 2693 }, { "epoch": 0.36, "grad_norm": 1.0031558787978274, "learning_rate": 7.360052557676256e-06, "loss": 0.1642, "step": 2694 }, { "epoch": 0.36, "grad_norm": 1.3160336370116197, "learning_rate": 7.358127123518512e-06, "loss": 0.2513, "step": 2695 }, { "epoch": 0.36, "grad_norm": 0.8763638163722971, "learning_rate": 7.356201239532411e-06, "loss": 0.1858, "step": 2696 }, { "epoch": 0.36, "grad_norm": 0.8460616937347156, "learning_rate": 7.354274906085326e-06, "loss": 0.1826, "step": 2697 }, { "epoch": 0.36, "grad_norm": 1.2492336016752206, "learning_rate": 7.352348123544718e-06, "loss": 0.2221, "step": 2698 }, { "epoch": 0.36, "grad_norm": 0.9340956185363404, "learning_rate": 7.350420892278134e-06, "loss": 0.1933, "step": 2699 }, { "epoch": 0.36, "grad_norm": 0.8012318036069901, "learning_rate": 7.348493212653205e-06, "loss": 0.1762, "step": 2700 }, { "epoch": 0.36, "grad_norm": 1.1329743225676785, "learning_rate": 7.346565085037649e-06, "loss": 0.2316, "step": 2701 }, { "epoch": 0.36, "grad_norm": 0.8713737314225705, "learning_rate": 7.34463650979927e-06, "loss": 0.195, "step": 2702 }, { "epoch": 0.36, "grad_norm": 1.215901840007839, "learning_rate": 7.3427074873059565e-06, "loss": 0.1884, "step": 2703 }, { "epoch": 0.36, "grad_norm": 1.043664831776346, "learning_rate": 7.340778017925682e-06, "loss": 0.2306, "step": 2704 }, { "epoch": 0.36, "grad_norm": 1.0659591615573485, "learning_rate": 7.338848102026505e-06, "loss": 0.2442, "step": 2705 }, { "epoch": 0.36, "grad_norm": 0.8956181104476681, "learning_rate": 7.336917739976569e-06, "loss": 0.1579, "step": 2706 }, { "epoch": 0.37, "grad_norm": 1.1084214566570678, "learning_rate": 7.334986932144106e-06, "loss": 0.2179, "step": 2707 }, { "epoch": 0.37, "grad_norm": 0.9027785613774786, "learning_rate": 7.333055678897427e-06, "loss": 0.161, "step": 2708 }, { "epoch": 0.37, "grad_norm": 0.9563652116865602, "learning_rate": 7.331123980604935e-06, "loss": 0.1697, "step": 2709 }, { "epoch": 0.37, "grad_norm": 0.9383880512529885, "learning_rate": 7.329191837635112e-06, "loss": 0.2144, "step": 2710 }, { "epoch": 0.37, "grad_norm": 0.9010243625838051, "learning_rate": 7.327259250356527e-06, "loss": 0.192, "step": 2711 }, { "epoch": 0.37, "grad_norm": 1.173620739140971, "learning_rate": 7.325326219137834e-06, "loss": 0.2282, "step": 2712 }, { "epoch": 0.37, "grad_norm": 1.0881105950119698, "learning_rate": 7.323392744347772e-06, "loss": 0.2031, "step": 2713 }, { "epoch": 0.37, "grad_norm": 0.5319768162693915, "learning_rate": 7.321458826355165e-06, "loss": 0.1093, "step": 2714 }, { "epoch": 0.37, "grad_norm": 1.0326215125686249, "learning_rate": 7.319524465528919e-06, "loss": 0.2107, "step": 2715 }, { "epoch": 0.37, "grad_norm": 0.8686601073940099, "learning_rate": 7.3175896622380264e-06, "loss": 0.2022, "step": 2716 }, { "epoch": 0.37, "grad_norm": 1.0046749804651958, "learning_rate": 7.315654416851563e-06, "loss": 0.1689, "step": 2717 }, { "epoch": 0.37, "grad_norm": 1.1597134738240868, "learning_rate": 7.313718729738691e-06, "loss": 0.2287, "step": 2718 }, { "epoch": 0.37, "grad_norm": 1.0039250971401603, "learning_rate": 7.311782601268657e-06, "loss": 0.1987, "step": 2719 }, { "epoch": 0.37, "grad_norm": 0.8185078175876662, "learning_rate": 7.309846031810786e-06, "loss": 0.1778, "step": 2720 }, { "epoch": 0.37, "grad_norm": 1.1540555545885667, "learning_rate": 7.3079090217344965e-06, "loss": 0.1929, "step": 2721 }, { "epoch": 0.37, "grad_norm": 0.7936481355298155, "learning_rate": 7.305971571409282e-06, "loss": 0.1548, "step": 2722 }, { "epoch": 0.37, "grad_norm": 1.0821870267562106, "learning_rate": 7.3040336812047266e-06, "loss": 0.1578, "step": 2723 }, { "epoch": 0.37, "grad_norm": 0.6118649227202061, "learning_rate": 7.302095351490494e-06, "loss": 0.1376, "step": 2724 }, { "epoch": 0.37, "grad_norm": 1.0865644822533227, "learning_rate": 7.300156582636333e-06, "loss": 0.1773, "step": 2725 }, { "epoch": 0.37, "grad_norm": 0.8664032367578841, "learning_rate": 7.298217375012078e-06, "loss": 0.196, "step": 2726 }, { "epoch": 0.37, "grad_norm": 1.0083995878762835, "learning_rate": 7.296277728987646e-06, "loss": 0.1936, "step": 2727 }, { "epoch": 0.37, "grad_norm": 0.8180917164235116, "learning_rate": 7.294337644933036e-06, "loss": 0.1532, "step": 2728 }, { "epoch": 0.37, "grad_norm": 0.7553023933183168, "learning_rate": 7.292397123218331e-06, "loss": 0.1336, "step": 2729 }, { "epoch": 0.37, "grad_norm": 0.8630473947228512, "learning_rate": 7.290456164213703e-06, "loss": 0.1402, "step": 2730 }, { "epoch": 0.37, "grad_norm": 1.0236945597128244, "learning_rate": 7.288514768289399e-06, "loss": 0.2022, "step": 2731 }, { "epoch": 0.37, "grad_norm": 0.9376513467265303, "learning_rate": 7.2865729358157525e-06, "loss": 0.1398, "step": 2732 }, { "epoch": 0.37, "grad_norm": 0.8698204350713303, "learning_rate": 7.284630667163182e-06, "loss": 0.1755, "step": 2733 }, { "epoch": 0.37, "grad_norm": 1.2695084710492324, "learning_rate": 7.282687962702186e-06, "loss": 0.2712, "step": 2734 }, { "epoch": 0.37, "grad_norm": 0.9791852103069936, "learning_rate": 7.280744822803354e-06, "loss": 0.1649, "step": 2735 }, { "epoch": 0.37, "grad_norm": 1.0470760898277545, "learning_rate": 7.2788012478373485e-06, "loss": 0.1629, "step": 2736 }, { "epoch": 0.37, "grad_norm": 0.9877600544879925, "learning_rate": 7.276857238174919e-06, "loss": 0.2083, "step": 2737 }, { "epoch": 0.37, "grad_norm": 1.117237266092997, "learning_rate": 7.274912794186901e-06, "loss": 0.231, "step": 2738 }, { "epoch": 0.37, "grad_norm": 1.0488783028268864, "learning_rate": 7.272967916244207e-06, "loss": 0.2339, "step": 2739 }, { "epoch": 0.37, "grad_norm": 0.9629223250136234, "learning_rate": 7.2710226047178365e-06, "loss": 0.1412, "step": 2740 }, { "epoch": 0.37, "grad_norm": 1.138001985788664, "learning_rate": 7.269076859978871e-06, "loss": 0.1741, "step": 2741 }, { "epoch": 0.37, "grad_norm": 1.064022334743869, "learning_rate": 7.2671306823984735e-06, "loss": 0.2137, "step": 2742 }, { "epoch": 0.37, "grad_norm": 1.1860953225508009, "learning_rate": 7.265184072347892e-06, "loss": 0.2244, "step": 2743 }, { "epoch": 0.37, "grad_norm": 0.922972681029589, "learning_rate": 7.263237030198452e-06, "loss": 0.1954, "step": 2744 }, { "epoch": 0.37, "grad_norm": 0.853011478695407, "learning_rate": 7.261289556321568e-06, "loss": 0.1733, "step": 2745 }, { "epoch": 0.37, "grad_norm": 0.5584877527973753, "learning_rate": 7.2593416510887315e-06, "loss": 0.061, "step": 2746 }, { "epoch": 0.37, "grad_norm": 1.1782317943747938, "learning_rate": 7.257393314871519e-06, "loss": 0.2158, "step": 2747 }, { "epoch": 0.37, "grad_norm": 1.0486316731373484, "learning_rate": 7.255444548041589e-06, "loss": 0.2071, "step": 2748 }, { "epoch": 0.37, "grad_norm": 1.0880253570475447, "learning_rate": 7.253495350970681e-06, "loss": 0.2317, "step": 2749 }, { "epoch": 0.37, "grad_norm": 0.8652014969424182, "learning_rate": 7.251545724030616e-06, "loss": 0.1329, "step": 2750 }, { "epoch": 0.37, "grad_norm": 1.0012366374789825, "learning_rate": 7.249595667593301e-06, "loss": 0.1917, "step": 2751 }, { "epoch": 0.37, "grad_norm": 1.0347779136617856, "learning_rate": 7.247645182030721e-06, "loss": 0.1979, "step": 2752 }, { "epoch": 0.37, "grad_norm": 0.8519484463376209, "learning_rate": 7.245694267714944e-06, "loss": 0.1688, "step": 2753 }, { "epoch": 0.37, "grad_norm": 0.888454717560395, "learning_rate": 7.2437429250181176e-06, "loss": 0.2073, "step": 2754 }, { "epoch": 0.37, "grad_norm": 0.919418845263843, "learning_rate": 7.241791154312478e-06, "loss": 0.2175, "step": 2755 }, { "epoch": 0.37, "grad_norm": 1.2105743836358647, "learning_rate": 7.239838955970334e-06, "loss": 0.2471, "step": 2756 }, { "epoch": 0.37, "grad_norm": 0.9854519528984398, "learning_rate": 7.237886330364081e-06, "loss": 0.163, "step": 2757 }, { "epoch": 0.37, "grad_norm": 0.8434060377395088, "learning_rate": 7.235933277866199e-06, "loss": 0.2062, "step": 2758 }, { "epoch": 0.37, "grad_norm": 0.801148253053177, "learning_rate": 7.233979798849242e-06, "loss": 0.2152, "step": 2759 }, { "epoch": 0.37, "grad_norm": 1.2377591861193946, "learning_rate": 7.23202589368585e-06, "loss": 0.2283, "step": 2760 }, { "epoch": 0.37, "grad_norm": 1.0436460856549974, "learning_rate": 7.230071562748744e-06, "loss": 0.2089, "step": 2761 }, { "epoch": 0.37, "grad_norm": 0.9751278395948033, "learning_rate": 7.228116806410724e-06, "loss": 0.1838, "step": 2762 }, { "epoch": 0.37, "grad_norm": 0.8676482934145258, "learning_rate": 7.2261616250446755e-06, "loss": 0.1723, "step": 2763 }, { "epoch": 0.37, "grad_norm": 1.1258572357180048, "learning_rate": 7.2242060190235595e-06, "loss": 0.2306, "step": 2764 }, { "epoch": 0.37, "grad_norm": 0.8403180800865869, "learning_rate": 7.222249988720422e-06, "loss": 0.149, "step": 2765 }, { "epoch": 0.37, "grad_norm": 0.798826997520314, "learning_rate": 7.220293534508388e-06, "loss": 0.1623, "step": 2766 }, { "epoch": 0.37, "grad_norm": 0.7741823776863653, "learning_rate": 7.218336656760665e-06, "loss": 0.1718, "step": 2767 }, { "epoch": 0.37, "grad_norm": 1.134801839652925, "learning_rate": 7.21637935585054e-06, "loss": 0.2516, "step": 2768 }, { "epoch": 0.37, "grad_norm": 0.9307998581259873, "learning_rate": 7.214421632151381e-06, "loss": 0.182, "step": 2769 }, { "epoch": 0.37, "grad_norm": 0.9005250334228009, "learning_rate": 7.212463486036638e-06, "loss": 0.189, "step": 2770 }, { "epoch": 0.37, "grad_norm": 0.7913732667796961, "learning_rate": 7.210504917879838e-06, "loss": 0.1202, "step": 2771 }, { "epoch": 0.37, "grad_norm": 1.0188081260976039, "learning_rate": 7.208545928054593e-06, "loss": 0.2049, "step": 2772 }, { "epoch": 0.37, "grad_norm": 0.856023487802795, "learning_rate": 7.206586516934592e-06, "loss": 0.1718, "step": 2773 }, { "epoch": 0.37, "grad_norm": 0.7758001558136339, "learning_rate": 7.204626684893604e-06, "loss": 0.1609, "step": 2774 }, { "epoch": 0.37, "grad_norm": 0.8878690489166294, "learning_rate": 7.202666432305484e-06, "loss": 0.1956, "step": 2775 }, { "epoch": 0.37, "grad_norm": 1.1032415434084213, "learning_rate": 7.200705759544161e-06, "loss": 0.1775, "step": 2776 }, { "epoch": 0.37, "grad_norm": 1.074081021756156, "learning_rate": 7.198744666983647e-06, "loss": 0.2409, "step": 2777 }, { "epoch": 0.37, "grad_norm": 0.7767089501299029, "learning_rate": 7.196783154998033e-06, "loss": 0.1204, "step": 2778 }, { "epoch": 0.37, "grad_norm": 0.9086291781981162, "learning_rate": 7.194821223961488e-06, "loss": 0.1961, "step": 2779 }, { "epoch": 0.37, "grad_norm": 1.0298667781206245, "learning_rate": 7.192858874248266e-06, "loss": 0.1673, "step": 2780 }, { "epoch": 0.38, "grad_norm": 0.9351582019539361, "learning_rate": 7.190896106232699e-06, "loss": 0.1979, "step": 2781 }, { "epoch": 0.38, "grad_norm": 1.0890907562292997, "learning_rate": 7.1889329202891956e-06, "loss": 0.2229, "step": 2782 }, { "epoch": 0.38, "grad_norm": 0.7587788165515045, "learning_rate": 7.186969316792249e-06, "loss": 0.1987, "step": 2783 }, { "epoch": 0.38, "grad_norm": 1.0255569032810912, "learning_rate": 7.185005296116426e-06, "loss": 0.2279, "step": 2784 }, { "epoch": 0.38, "grad_norm": 0.9858384904547729, "learning_rate": 7.183040858636381e-06, "loss": 0.2104, "step": 2785 }, { "epoch": 0.38, "grad_norm": 0.8846652280864897, "learning_rate": 7.181076004726839e-06, "loss": 0.1741, "step": 2786 }, { "epoch": 0.38, "grad_norm": 0.556424178365562, "learning_rate": 7.179110734762613e-06, "loss": 0.1478, "step": 2787 }, { "epoch": 0.38, "grad_norm": 1.0168726503288958, "learning_rate": 7.177145049118588e-06, "loss": 0.1704, "step": 2788 }, { "epoch": 0.38, "grad_norm": 0.825857242061566, "learning_rate": 7.175178948169734e-06, "loss": 0.1472, "step": 2789 }, { "epoch": 0.38, "grad_norm": 1.0014680941382086, "learning_rate": 7.173212432291094e-06, "loss": 0.1691, "step": 2790 }, { "epoch": 0.38, "grad_norm": 0.7998621605864867, "learning_rate": 7.171245501857799e-06, "loss": 0.1672, "step": 2791 }, { "epoch": 0.38, "grad_norm": 0.9210752601488422, "learning_rate": 7.169278157245052e-06, "loss": 0.1598, "step": 2792 }, { "epoch": 0.38, "grad_norm": 0.984752022541704, "learning_rate": 7.167310398828137e-06, "loss": 0.1862, "step": 2793 }, { "epoch": 0.38, "grad_norm": 1.086644149445141, "learning_rate": 7.1653422269824145e-06, "loss": 0.1649, "step": 2794 }, { "epoch": 0.38, "grad_norm": 1.1799648414837443, "learning_rate": 7.163373642083329e-06, "loss": 0.2698, "step": 2795 }, { "epoch": 0.38, "grad_norm": 0.7729011761438327, "learning_rate": 7.1614046445064e-06, "loss": 0.1326, "step": 2796 }, { "epoch": 0.38, "grad_norm": 1.0821751389848597, "learning_rate": 7.159435234627228e-06, "loss": 0.1795, "step": 2797 }, { "epoch": 0.38, "grad_norm": 0.8317269274413615, "learning_rate": 7.157465412821489e-06, "loss": 0.1404, "step": 2798 }, { "epoch": 0.38, "grad_norm": 0.6391754175153338, "learning_rate": 7.155495179464942e-06, "loss": 0.1097, "step": 2799 }, { "epoch": 0.38, "grad_norm": 1.1180526082457025, "learning_rate": 7.153524534933422e-06, "loss": 0.1954, "step": 2800 }, { "epoch": 0.38, "grad_norm": 1.0744716088156947, "learning_rate": 7.15155347960284e-06, "loss": 0.1868, "step": 2801 }, { "epoch": 0.38, "grad_norm": 0.9555889436631148, "learning_rate": 7.149582013849188e-06, "loss": 0.1825, "step": 2802 }, { "epoch": 0.38, "grad_norm": 1.12863015007667, "learning_rate": 7.1476101380485385e-06, "loss": 0.2075, "step": 2803 }, { "epoch": 0.38, "grad_norm": 0.9877433234813672, "learning_rate": 7.145637852577039e-06, "loss": 0.2038, "step": 2804 }, { "epoch": 0.38, "grad_norm": 0.778052805408713, "learning_rate": 7.143665157810915e-06, "loss": 0.1217, "step": 2805 }, { "epoch": 0.38, "grad_norm": 1.0982075865548544, "learning_rate": 7.141692054126471e-06, "loss": 0.1939, "step": 2806 }, { "epoch": 0.38, "grad_norm": 0.9504200155091456, "learning_rate": 7.139718541900092e-06, "loss": 0.1934, "step": 2807 }, { "epoch": 0.38, "grad_norm": 0.7898779068252555, "learning_rate": 7.137744621508236e-06, "loss": 0.1518, "step": 2808 }, { "epoch": 0.38, "grad_norm": 1.0224221739190253, "learning_rate": 7.135770293327442e-06, "loss": 0.211, "step": 2809 }, { "epoch": 0.38, "grad_norm": 0.8099584618949003, "learning_rate": 7.133795557734328e-06, "loss": 0.1414, "step": 2810 }, { "epoch": 0.38, "grad_norm": 0.7658183556844463, "learning_rate": 7.1318204151055845e-06, "loss": 0.1249, "step": 2811 }, { "epoch": 0.38, "grad_norm": 1.027533614789803, "learning_rate": 7.129844865817985e-06, "loss": 0.1396, "step": 2812 }, { "epoch": 0.38, "grad_norm": 0.9030160185312495, "learning_rate": 7.127868910248379e-06, "loss": 0.176, "step": 2813 }, { "epoch": 0.38, "grad_norm": 1.051055711081802, "learning_rate": 7.125892548773692e-06, "loss": 0.1961, "step": 2814 }, { "epoch": 0.38, "grad_norm": 0.6462817239216077, "learning_rate": 7.1239157817709305e-06, "loss": 0.1369, "step": 2815 }, { "epoch": 0.38, "grad_norm": 0.9505198612567022, "learning_rate": 7.121938609617172e-06, "loss": 0.1583, "step": 2816 }, { "epoch": 0.38, "grad_norm": 0.6790097296434044, "learning_rate": 7.119961032689578e-06, "loss": 0.1118, "step": 2817 }, { "epoch": 0.38, "grad_norm": 1.0541508734680638, "learning_rate": 7.117983051365383e-06, "loss": 0.2218, "step": 2818 }, { "epoch": 0.38, "grad_norm": 1.1762786719431721, "learning_rate": 7.116004666021901e-06, "loss": 0.216, "step": 2819 }, { "epoch": 0.38, "grad_norm": 0.989249192189872, "learning_rate": 7.114025877036521e-06, "loss": 0.1701, "step": 2820 }, { "epoch": 0.38, "grad_norm": 0.9328224224485978, "learning_rate": 7.112046684786711e-06, "loss": 0.1803, "step": 2821 }, { "epoch": 0.38, "grad_norm": 0.993176370718209, "learning_rate": 7.110067089650014e-06, "loss": 0.2287, "step": 2822 }, { "epoch": 0.38, "grad_norm": 1.0185224442678311, "learning_rate": 7.108087092004055e-06, "loss": 0.2133, "step": 2823 }, { "epoch": 0.38, "grad_norm": 1.1139947017788174, "learning_rate": 7.106106692226524e-06, "loss": 0.2159, "step": 2824 }, { "epoch": 0.38, "grad_norm": 0.9044509267413214, "learning_rate": 7.104125890695202e-06, "loss": 0.1839, "step": 2825 }, { "epoch": 0.38, "grad_norm": 1.1066428048181574, "learning_rate": 7.102144687787938e-06, "loss": 0.2008, "step": 2826 }, { "epoch": 0.38, "grad_norm": 1.1504339430860684, "learning_rate": 7.1001630838826586e-06, "loss": 0.1682, "step": 2827 }, { "epoch": 0.38, "grad_norm": 0.9700492029440559, "learning_rate": 7.098181079357369e-06, "loss": 0.1854, "step": 2828 }, { "epoch": 0.38, "grad_norm": 0.8170891740808521, "learning_rate": 7.096198674590149e-06, "loss": 0.1964, "step": 2829 }, { "epoch": 0.38, "grad_norm": 0.84965904751481, "learning_rate": 7.094215869959156e-06, "loss": 0.1815, "step": 2830 }, { "epoch": 0.38, "grad_norm": 0.9371978547907245, "learning_rate": 7.092232665842622e-06, "loss": 0.1775, "step": 2831 }, { "epoch": 0.38, "grad_norm": 1.1499387756174637, "learning_rate": 7.090249062618857e-06, "loss": 0.2169, "step": 2832 }, { "epoch": 0.38, "grad_norm": 0.8132025131645627, "learning_rate": 7.088265060666248e-06, "loss": 0.1625, "step": 2833 }, { "epoch": 0.38, "grad_norm": 1.1101108388000513, "learning_rate": 7.086280660363254e-06, "loss": 0.2594, "step": 2834 }, { "epoch": 0.38, "grad_norm": 0.970081632859375, "learning_rate": 7.084295862088411e-06, "loss": 0.1942, "step": 2835 }, { "epoch": 0.38, "grad_norm": 0.813404403069584, "learning_rate": 7.082310666220338e-06, "loss": 0.1365, "step": 2836 }, { "epoch": 0.38, "grad_norm": 1.129870665620912, "learning_rate": 7.08032507313772e-06, "loss": 0.2402, "step": 2837 }, { "epoch": 0.38, "grad_norm": 0.8400915460274699, "learning_rate": 7.078339083219326e-06, "loss": 0.1758, "step": 2838 }, { "epoch": 0.38, "grad_norm": 0.741939149347158, "learning_rate": 7.076352696843991e-06, "loss": 0.1583, "step": 2839 }, { "epoch": 0.38, "grad_norm": 0.8369529016931115, "learning_rate": 7.074365914390635e-06, "loss": 0.1461, "step": 2840 }, { "epoch": 0.38, "grad_norm": 1.337776723759854, "learning_rate": 7.072378736238248e-06, "loss": 0.2083, "step": 2841 }, { "epoch": 0.38, "grad_norm": 1.1259350838398972, "learning_rate": 7.070391162765901e-06, "loss": 0.1974, "step": 2842 }, { "epoch": 0.38, "grad_norm": 0.9584881293509669, "learning_rate": 7.068403194352732e-06, "loss": 0.2256, "step": 2843 }, { "epoch": 0.38, "grad_norm": 0.9477145862029993, "learning_rate": 7.066414831377964e-06, "loss": 0.1702, "step": 2844 }, { "epoch": 0.38, "grad_norm": 0.889255040360095, "learning_rate": 7.064426074220886e-06, "loss": 0.1417, "step": 2845 }, { "epoch": 0.38, "grad_norm": 0.9420488382907629, "learning_rate": 7.062436923260869e-06, "loss": 0.2064, "step": 2846 }, { "epoch": 0.38, "grad_norm": 0.8989272085046384, "learning_rate": 7.060447378877355e-06, "loss": 0.1583, "step": 2847 }, { "epoch": 0.38, "grad_norm": 1.1716611281239868, "learning_rate": 7.058457441449866e-06, "loss": 0.2516, "step": 2848 }, { "epoch": 0.38, "grad_norm": 0.9675166465901386, "learning_rate": 7.056467111357993e-06, "loss": 0.1709, "step": 2849 }, { "epoch": 0.38, "grad_norm": 1.0346359160140688, "learning_rate": 7.054476388981405e-06, "loss": 0.238, "step": 2850 }, { "epoch": 0.38, "grad_norm": 1.4584853158872184, "learning_rate": 7.052485274699846e-06, "loss": 0.3076, "step": 2851 }, { "epoch": 0.38, "grad_norm": 1.1393295004309818, "learning_rate": 7.050493768893134e-06, "loss": 0.1631, "step": 2852 }, { "epoch": 0.38, "grad_norm": 0.7674628908317231, "learning_rate": 7.048501871941162e-06, "loss": 0.1744, "step": 2853 }, { "epoch": 0.38, "grad_norm": 0.880584332822167, "learning_rate": 7.046509584223898e-06, "loss": 0.2052, "step": 2854 }, { "epoch": 0.38, "grad_norm": 0.8070513095180055, "learning_rate": 7.044516906121383e-06, "loss": 0.1731, "step": 2855 }, { "epoch": 0.39, "grad_norm": 0.6885669016341891, "learning_rate": 7.042523838013735e-06, "loss": 0.108, "step": 2856 }, { "epoch": 0.39, "grad_norm": 0.9217754862347629, "learning_rate": 7.040530380281143e-06, "loss": 0.1671, "step": 2857 }, { "epoch": 0.39, "grad_norm": 0.8636524456549796, "learning_rate": 7.038536533303872e-06, "loss": 0.1315, "step": 2858 }, { "epoch": 0.39, "grad_norm": 1.0658283510525652, "learning_rate": 7.036542297462265e-06, "loss": 0.2262, "step": 2859 }, { "epoch": 0.39, "grad_norm": 0.7175160708565577, "learning_rate": 7.034547673136732e-06, "loss": 0.1226, "step": 2860 }, { "epoch": 0.39, "grad_norm": 0.9805406438942226, "learning_rate": 7.0325526607077655e-06, "loss": 0.2068, "step": 2861 }, { "epoch": 0.39, "grad_norm": 1.1180522990040893, "learning_rate": 7.030557260555922e-06, "loss": 0.1739, "step": 2862 }, { "epoch": 0.39, "grad_norm": 1.118433914176929, "learning_rate": 7.028561473061837e-06, "loss": 0.2718, "step": 2863 }, { "epoch": 0.39, "grad_norm": 0.6396540916319086, "learning_rate": 7.026565298606225e-06, "loss": 0.1642, "step": 2864 }, { "epoch": 0.39, "grad_norm": 0.7081923472164935, "learning_rate": 7.024568737569867e-06, "loss": 0.1417, "step": 2865 }, { "epoch": 0.39, "grad_norm": 0.93850699025464, "learning_rate": 7.02257179033362e-06, "loss": 0.1701, "step": 2866 }, { "epoch": 0.39, "grad_norm": 0.9833184400010689, "learning_rate": 7.020574457278415e-06, "loss": 0.1927, "step": 2867 }, { "epoch": 0.39, "grad_norm": 0.877230376376586, "learning_rate": 7.018576738785257e-06, "loss": 0.1635, "step": 2868 }, { "epoch": 0.39, "grad_norm": 0.9393283849737951, "learning_rate": 7.016578635235224e-06, "loss": 0.16, "step": 2869 }, { "epoch": 0.39, "grad_norm": 0.9939834480529591, "learning_rate": 7.014580147009468e-06, "loss": 0.1711, "step": 2870 }, { "epoch": 0.39, "grad_norm": 0.8760258837471128, "learning_rate": 7.0125812744892115e-06, "loss": 0.1686, "step": 2871 }, { "epoch": 0.39, "grad_norm": 1.0448838889030816, "learning_rate": 7.010582018055755e-06, "loss": 0.18, "step": 2872 }, { "epoch": 0.39, "grad_norm": 0.919708924601207, "learning_rate": 7.0085823780904686e-06, "loss": 0.1706, "step": 2873 }, { "epoch": 0.39, "grad_norm": 0.8481001388344491, "learning_rate": 7.006582354974798e-06, "loss": 0.1347, "step": 2874 }, { "epoch": 0.39, "grad_norm": 0.9539333328141497, "learning_rate": 7.004581949090259e-06, "loss": 0.1829, "step": 2875 }, { "epoch": 0.39, "grad_norm": 0.8341259559261798, "learning_rate": 7.0025811608184455e-06, "loss": 0.1644, "step": 2876 }, { "epoch": 0.39, "grad_norm": 0.7910408779009669, "learning_rate": 7.000579990541018e-06, "loss": 0.1325, "step": 2877 }, { "epoch": 0.39, "grad_norm": 1.1647263590579806, "learning_rate": 6.998578438639714e-06, "loss": 0.2263, "step": 2878 }, { "epoch": 0.39, "grad_norm": 0.9871788933486256, "learning_rate": 6.996576505496343e-06, "loss": 0.1895, "step": 2879 }, { "epoch": 0.39, "grad_norm": 0.8692491822281139, "learning_rate": 6.994574191492787e-06, "loss": 0.1562, "step": 2880 }, { "epoch": 0.39, "grad_norm": 1.3034315834724852, "learning_rate": 6.9925714970110004e-06, "loss": 0.2972, "step": 2881 }, { "epoch": 0.39, "grad_norm": 0.9329556778081658, "learning_rate": 6.9905684224330105e-06, "loss": 0.1345, "step": 2882 }, { "epoch": 0.39, "grad_norm": 0.9438017013342324, "learning_rate": 6.988564968140917e-06, "loss": 0.2564, "step": 2883 }, { "epoch": 0.39, "grad_norm": 0.8979809361139897, "learning_rate": 6.9865611345168925e-06, "loss": 0.1554, "step": 2884 }, { "epoch": 0.39, "grad_norm": 1.1246530243337567, "learning_rate": 6.984556921943182e-06, "loss": 0.241, "step": 2885 }, { "epoch": 0.39, "grad_norm": 1.0119557251660558, "learning_rate": 6.982552330802099e-06, "loss": 0.1954, "step": 2886 }, { "epoch": 0.39, "grad_norm": 0.6141870874103676, "learning_rate": 6.980547361476038e-06, "loss": 0.1195, "step": 2887 }, { "epoch": 0.39, "grad_norm": 0.8194115048596414, "learning_rate": 6.978542014347455e-06, "loss": 0.1648, "step": 2888 }, { "epoch": 0.39, "grad_norm": 0.8412103290858174, "learning_rate": 6.976536289798887e-06, "loss": 0.1687, "step": 2889 }, { "epoch": 0.39, "grad_norm": 1.1062817569658148, "learning_rate": 6.974530188212935e-06, "loss": 0.1737, "step": 2890 }, { "epoch": 0.39, "grad_norm": 0.7864506657385418, "learning_rate": 6.972523709972282e-06, "loss": 0.1954, "step": 2891 }, { "epoch": 0.39, "grad_norm": 1.027787022420036, "learning_rate": 6.970516855459672e-06, "loss": 0.2233, "step": 2892 }, { "epoch": 0.39, "grad_norm": 0.7401855640069765, "learning_rate": 6.9685096250579285e-06, "loss": 0.0964, "step": 2893 }, { "epoch": 0.39, "grad_norm": 0.8324107826310021, "learning_rate": 6.966502019149943e-06, "loss": 0.1566, "step": 2894 }, { "epoch": 0.39, "grad_norm": 0.8452283238561933, "learning_rate": 6.96449403811868e-06, "loss": 0.1243, "step": 2895 }, { "epoch": 0.39, "grad_norm": 0.9165541232586821, "learning_rate": 6.9624856823471744e-06, "loss": 0.1541, "step": 2896 }, { "epoch": 0.39, "grad_norm": 1.0645909805726692, "learning_rate": 6.960476952218534e-06, "loss": 0.2146, "step": 2897 }, { "epoch": 0.39, "grad_norm": 0.9414614311718337, "learning_rate": 6.958467848115937e-06, "loss": 0.1456, "step": 2898 }, { "epoch": 0.39, "grad_norm": 0.926994627224411, "learning_rate": 6.956458370422633e-06, "loss": 0.2041, "step": 2899 }, { "epoch": 0.39, "grad_norm": 1.055531593361823, "learning_rate": 6.954448519521946e-06, "loss": 0.1935, "step": 2900 }, { "epoch": 0.39, "grad_norm": 0.7523909029697885, "learning_rate": 6.952438295797263e-06, "loss": 0.1699, "step": 2901 }, { "epoch": 0.39, "grad_norm": 1.0094516059182563, "learning_rate": 6.950427699632053e-06, "loss": 0.1743, "step": 2902 }, { "epoch": 0.39, "grad_norm": 0.7877864259291313, "learning_rate": 6.948416731409846e-06, "loss": 0.1689, "step": 2903 }, { "epoch": 0.39, "grad_norm": 0.8057412781629671, "learning_rate": 6.9464053915142505e-06, "loss": 0.2074, "step": 2904 }, { "epoch": 0.39, "grad_norm": 1.0179582644553282, "learning_rate": 6.944393680328943e-06, "loss": 0.157, "step": 2905 }, { "epoch": 0.39, "grad_norm": 0.8376374286140917, "learning_rate": 6.942381598237668e-06, "loss": 0.1687, "step": 2906 }, { "epoch": 0.39, "grad_norm": 1.2517122417528066, "learning_rate": 6.940369145624246e-06, "loss": 0.2199, "step": 2907 }, { "epoch": 0.39, "grad_norm": 1.1882674718139281, "learning_rate": 6.938356322872565e-06, "loss": 0.2508, "step": 2908 }, { "epoch": 0.39, "grad_norm": 1.2064464014460519, "learning_rate": 6.936343130366584e-06, "loss": 0.1984, "step": 2909 }, { "epoch": 0.39, "grad_norm": 1.1546286514481559, "learning_rate": 6.934329568490333e-06, "loss": 0.1998, "step": 2910 }, { "epoch": 0.39, "grad_norm": 1.1324126170230275, "learning_rate": 6.932315637627912e-06, "loss": 0.2611, "step": 2911 }, { "epoch": 0.39, "grad_norm": 1.1401017054023181, "learning_rate": 6.930301338163492e-06, "loss": 0.2523, "step": 2912 }, { "epoch": 0.39, "grad_norm": 0.9933525314945522, "learning_rate": 6.928286670481313e-06, "loss": 0.1907, "step": 2913 }, { "epoch": 0.39, "grad_norm": 1.1065107877657505, "learning_rate": 6.926271634965689e-06, "loss": 0.2612, "step": 2914 }, { "epoch": 0.39, "grad_norm": 0.929530038909208, "learning_rate": 6.924256232000997e-06, "loss": 0.1703, "step": 2915 }, { "epoch": 0.39, "grad_norm": 0.9653478057768324, "learning_rate": 6.922240461971692e-06, "loss": 0.1605, "step": 2916 }, { "epoch": 0.39, "grad_norm": 0.731206766045542, "learning_rate": 6.920224325262295e-06, "loss": 0.1276, "step": 2917 }, { "epoch": 0.39, "grad_norm": 1.023316894150658, "learning_rate": 6.9182078222573946e-06, "loss": 0.1878, "step": 2918 }, { "epoch": 0.39, "grad_norm": 1.0568490380109097, "learning_rate": 6.916190953341655e-06, "loss": 0.1678, "step": 2919 }, { "epoch": 0.39, "grad_norm": 0.899038714875204, "learning_rate": 6.914173718899806e-06, "loss": 0.1736, "step": 2920 }, { "epoch": 0.39, "grad_norm": 1.1317873222020984, "learning_rate": 6.91215611931665e-06, "loss": 0.1663, "step": 2921 }, { "epoch": 0.39, "grad_norm": 0.7414298663674406, "learning_rate": 6.910138154977055e-06, "loss": 0.1423, "step": 2922 }, { "epoch": 0.39, "grad_norm": 1.1464918269092335, "learning_rate": 6.908119826265965e-06, "loss": 0.2, "step": 2923 }, { "epoch": 0.39, "grad_norm": 0.9123432236325203, "learning_rate": 6.906101133568386e-06, "loss": 0.1601, "step": 2924 }, { "epoch": 0.39, "grad_norm": 0.9179272703222926, "learning_rate": 6.904082077269396e-06, "loss": 0.1869, "step": 2925 }, { "epoch": 0.39, "grad_norm": 1.0750666378523566, "learning_rate": 6.902062657754146e-06, "loss": 0.2021, "step": 2926 }, { "epoch": 0.39, "grad_norm": 1.0583145954619708, "learning_rate": 6.900042875407854e-06, "loss": 0.191, "step": 2927 }, { "epoch": 0.39, "grad_norm": 1.042590289503633, "learning_rate": 6.898022730615806e-06, "loss": 0.2275, "step": 2928 }, { "epoch": 0.39, "grad_norm": 1.0953014149067084, "learning_rate": 6.896002223763357e-06, "loss": 0.1724, "step": 2929 }, { "epoch": 0.4, "grad_norm": 0.8321575693539154, "learning_rate": 6.893981355235933e-06, "loss": 0.1473, "step": 2930 }, { "epoch": 0.4, "grad_norm": 0.9845530108149722, "learning_rate": 6.891960125419028e-06, "loss": 0.205, "step": 2931 }, { "epoch": 0.4, "grad_norm": 0.9733599977196662, "learning_rate": 6.889938534698205e-06, "loss": 0.154, "step": 2932 }, { "epoch": 0.4, "grad_norm": 0.8920965491359834, "learning_rate": 6.8879165834590955e-06, "loss": 0.1787, "step": 2933 }, { "epoch": 0.4, "grad_norm": 0.9212048032528944, "learning_rate": 6.885894272087401e-06, "loss": 0.207, "step": 2934 }, { "epoch": 0.4, "grad_norm": 0.7664378406562191, "learning_rate": 6.88387160096889e-06, "loss": 0.1404, "step": 2935 }, { "epoch": 0.4, "grad_norm": 0.9394642699440696, "learning_rate": 6.881848570489399e-06, "loss": 0.1808, "step": 2936 }, { "epoch": 0.4, "grad_norm": 1.117116742614876, "learning_rate": 6.879825181034837e-06, "loss": 0.1786, "step": 2937 }, { "epoch": 0.4, "grad_norm": 0.8278171551980723, "learning_rate": 6.877801432991179e-06, "loss": 0.162, "step": 2938 }, { "epoch": 0.4, "grad_norm": 0.8707528097508889, "learning_rate": 6.875777326744466e-06, "loss": 0.1595, "step": 2939 }, { "epoch": 0.4, "grad_norm": 1.1267357970394287, "learning_rate": 6.873752862680812e-06, "loss": 0.25, "step": 2940 }, { "epoch": 0.4, "grad_norm": 0.7841947268558386, "learning_rate": 6.871728041186394e-06, "loss": 0.1904, "step": 2941 }, { "epoch": 0.4, "grad_norm": 0.8066871071538689, "learning_rate": 6.869702862647462e-06, "loss": 0.2043, "step": 2942 }, { "epoch": 0.4, "grad_norm": 1.2029568169129454, "learning_rate": 6.867677327450333e-06, "loss": 0.2773, "step": 2943 }, { "epoch": 0.4, "grad_norm": 1.2089548359321944, "learning_rate": 6.86565143598139e-06, "loss": 0.2066, "step": 2944 }, { "epoch": 0.4, "grad_norm": 1.0737955625112643, "learning_rate": 6.863625188627086e-06, "loss": 0.252, "step": 2945 }, { "epoch": 0.4, "grad_norm": 1.277628799328283, "learning_rate": 6.861598585773941e-06, "loss": 0.2596, "step": 2946 }, { "epoch": 0.4, "grad_norm": 1.247528570719736, "learning_rate": 6.859571627808542e-06, "loss": 0.2395, "step": 2947 }, { "epoch": 0.4, "grad_norm": 1.1984305463614513, "learning_rate": 6.8575443151175435e-06, "loss": 0.2499, "step": 2948 }, { "epoch": 0.4, "grad_norm": 1.0630063599987645, "learning_rate": 6.855516648087672e-06, "loss": 0.2051, "step": 2949 }, { "epoch": 0.4, "grad_norm": 1.0785230138100959, "learning_rate": 6.853488627105717e-06, "loss": 0.1789, "step": 2950 }, { "epoch": 0.4, "grad_norm": 0.9544349496276712, "learning_rate": 6.851460252558538e-06, "loss": 0.2119, "step": 2951 }, { "epoch": 0.4, "grad_norm": 0.8823248608840746, "learning_rate": 6.849431524833059e-06, "loss": 0.1947, "step": 2952 }, { "epoch": 0.4, "grad_norm": 1.1070445494670988, "learning_rate": 6.8474024443162735e-06, "loss": 0.2168, "step": 2953 }, { "epoch": 0.4, "grad_norm": 0.8212053230484287, "learning_rate": 6.845373011395242e-06, "loss": 0.1915, "step": 2954 }, { "epoch": 0.4, "grad_norm": 1.2452558469833603, "learning_rate": 6.843343226457095e-06, "loss": 0.2495, "step": 2955 }, { "epoch": 0.4, "grad_norm": 0.8350448156365683, "learning_rate": 6.841313089889023e-06, "loss": 0.1854, "step": 2956 }, { "epoch": 0.4, "grad_norm": 0.8095924917861316, "learning_rate": 6.839282602078291e-06, "loss": 0.1607, "step": 2957 }, { "epoch": 0.4, "grad_norm": 0.7413163142299141, "learning_rate": 6.837251763412227e-06, "loss": 0.1321, "step": 2958 }, { "epoch": 0.4, "grad_norm": 0.9643295845123688, "learning_rate": 6.835220574278226e-06, "loss": 0.2052, "step": 2959 }, { "epoch": 0.4, "grad_norm": 0.8131501535441809, "learning_rate": 6.833189035063754e-06, "loss": 0.1241, "step": 2960 }, { "epoch": 0.4, "grad_norm": 0.8746853003732196, "learning_rate": 6.831157146156339e-06, "loss": 0.1548, "step": 2961 }, { "epoch": 0.4, "grad_norm": 0.8375224400253533, "learning_rate": 6.829124907943574e-06, "loss": 0.2028, "step": 2962 }, { "epoch": 0.4, "grad_norm": 1.0405662663422648, "learning_rate": 6.8270923208131266e-06, "loss": 0.1785, "step": 2963 }, { "epoch": 0.4, "grad_norm": 1.0703558494197594, "learning_rate": 6.825059385152722e-06, "loss": 0.2144, "step": 2964 }, { "epoch": 0.4, "grad_norm": 0.9421078255885694, "learning_rate": 6.8230261013501595e-06, "loss": 0.1492, "step": 2965 }, { "epoch": 0.4, "grad_norm": 0.6692618870621301, "learning_rate": 6.8209924697933015e-06, "loss": 0.1574, "step": 2966 }, { "epoch": 0.4, "grad_norm": 0.9487285719444921, "learning_rate": 6.818958490870074e-06, "loss": 0.1874, "step": 2967 }, { "epoch": 0.4, "grad_norm": 0.7104009706208959, "learning_rate": 6.8169241649684745e-06, "loss": 0.1442, "step": 2968 }, { "epoch": 0.4, "grad_norm": 0.8181998588986615, "learning_rate": 6.814889492476563e-06, "loss": 0.1369, "step": 2969 }, { "epoch": 0.4, "grad_norm": 0.9236266648588737, "learning_rate": 6.812854473782466e-06, "loss": 0.2001, "step": 2970 }, { "epoch": 0.4, "grad_norm": 0.85946903493574, "learning_rate": 6.810819109274378e-06, "loss": 0.1465, "step": 2971 }, { "epoch": 0.4, "grad_norm": 0.9158532040761805, "learning_rate": 6.8087833993405584e-06, "loss": 0.1724, "step": 2972 }, { "epoch": 0.4, "grad_norm": 0.766436283723811, "learning_rate": 6.806747344369331e-06, "loss": 0.1821, "step": 2973 }, { "epoch": 0.4, "grad_norm": 0.761649141837633, "learning_rate": 6.804710944749088e-06, "loss": 0.1664, "step": 2974 }, { "epoch": 0.4, "grad_norm": 0.9615800262667212, "learning_rate": 6.8026742008682855e-06, "loss": 0.1774, "step": 2975 }, { "epoch": 0.4, "grad_norm": 1.174871544361149, "learning_rate": 6.800637113115447e-06, "loss": 0.2022, "step": 2976 }, { "epoch": 0.4, "grad_norm": 0.9850914930807271, "learning_rate": 6.798599681879158e-06, "loss": 0.2016, "step": 2977 }, { "epoch": 0.4, "grad_norm": 1.0458121334795112, "learning_rate": 6.796561907548074e-06, "loss": 0.1926, "step": 2978 }, { "epoch": 0.4, "grad_norm": 1.285775352787307, "learning_rate": 6.794523790510914e-06, "loss": 0.219, "step": 2979 }, { "epoch": 0.4, "grad_norm": 0.5841518793607494, "learning_rate": 6.7924853311564606e-06, "loss": 0.0992, "step": 2980 }, { "epoch": 0.4, "grad_norm": 0.6383140835166666, "learning_rate": 6.7904465298735645e-06, "loss": 0.1364, "step": 2981 }, { "epoch": 0.4, "grad_norm": 1.166691541845335, "learning_rate": 6.788407387051142e-06, "loss": 0.237, "step": 2982 }, { "epoch": 0.4, "grad_norm": 0.6802958828412328, "learning_rate": 6.786367903078172e-06, "loss": 0.1625, "step": 2983 }, { "epoch": 0.4, "grad_norm": 1.2097546111382464, "learning_rate": 6.784328078343698e-06, "loss": 0.2436, "step": 2984 }, { "epoch": 0.4, "grad_norm": 1.0694658114999311, "learning_rate": 6.782287913236834e-06, "loss": 0.1702, "step": 2985 }, { "epoch": 0.4, "grad_norm": 0.9117792311727552, "learning_rate": 6.780247408146749e-06, "loss": 0.1752, "step": 2986 }, { "epoch": 0.4, "grad_norm": 0.9305753342230348, "learning_rate": 6.778206563462687e-06, "loss": 0.1243, "step": 2987 }, { "epoch": 0.4, "grad_norm": 0.9986068371416488, "learning_rate": 6.776165379573952e-06, "loss": 0.1658, "step": 2988 }, { "epoch": 0.4, "grad_norm": 0.8556606970389373, "learning_rate": 6.774123856869914e-06, "loss": 0.1514, "step": 2989 }, { "epoch": 0.4, "grad_norm": 0.9970678330310709, "learning_rate": 6.772081995740005e-06, "loss": 0.186, "step": 2990 }, { "epoch": 0.4, "grad_norm": 1.119860311378756, "learning_rate": 6.770039796573726e-06, "loss": 0.1942, "step": 2991 }, { "epoch": 0.4, "grad_norm": 1.1243302541543778, "learning_rate": 6.767997259760638e-06, "loss": 0.1801, "step": 2992 }, { "epoch": 0.4, "grad_norm": 0.7905715954099999, "learning_rate": 6.76595438569037e-06, "loss": 0.1386, "step": 2993 }, { "epoch": 0.4, "grad_norm": 0.9734907664699802, "learning_rate": 6.763911174752611e-06, "loss": 0.1523, "step": 2994 }, { "epoch": 0.4, "grad_norm": 0.6362326796257797, "learning_rate": 6.761867627337121e-06, "loss": 0.1568, "step": 2995 }, { "epoch": 0.4, "grad_norm": 1.1337440944033976, "learning_rate": 6.759823743833717e-06, "loss": 0.2207, "step": 2996 }, { "epoch": 0.4, "grad_norm": 1.114691917700905, "learning_rate": 6.7577795246322844e-06, "loss": 0.2137, "step": 2997 }, { "epoch": 0.4, "grad_norm": 0.9947965469478168, "learning_rate": 6.755734970122771e-06, "loss": 0.2162, "step": 2998 }, { "epoch": 0.4, "grad_norm": 0.8846923540542035, "learning_rate": 6.753690080695191e-06, "loss": 0.1778, "step": 2999 }, { "epoch": 0.4, "grad_norm": 0.9392159712679259, "learning_rate": 6.751644856739619e-06, "loss": 0.191, "step": 3000 }, { "epoch": 0.4, "grad_norm": 0.8113072543948711, "learning_rate": 6.749599298646194e-06, "loss": 0.1708, "step": 3001 }, { "epoch": 0.4, "grad_norm": 0.865778935668425, "learning_rate": 6.747553406805122e-06, "loss": 0.2109, "step": 3002 }, { "epoch": 0.4, "grad_norm": 1.0526157083314227, "learning_rate": 6.7455071816066684e-06, "loss": 0.2082, "step": 3003 }, { "epoch": 0.41, "grad_norm": 0.9210919764190071, "learning_rate": 6.743460623441165e-06, "loss": 0.162, "step": 3004 }, { "epoch": 0.41, "grad_norm": 1.1733098495534304, "learning_rate": 6.741413732699008e-06, "loss": 0.219, "step": 3005 }, { "epoch": 0.41, "grad_norm": 0.9209955209949069, "learning_rate": 6.739366509770653e-06, "loss": 0.1732, "step": 3006 }, { "epoch": 0.41, "grad_norm": 0.7518408105939033, "learning_rate": 6.7373189550466225e-06, "loss": 0.119, "step": 3007 }, { "epoch": 0.41, "grad_norm": 0.9877969225774159, "learning_rate": 6.735271068917501e-06, "loss": 0.1598, "step": 3008 }, { "epoch": 0.41, "grad_norm": 1.0054963748448682, "learning_rate": 6.733222851773936e-06, "loss": 0.1793, "step": 3009 }, { "epoch": 0.41, "grad_norm": 0.9789385138449563, "learning_rate": 6.731174304006639e-06, "loss": 0.1911, "step": 3010 }, { "epoch": 0.41, "grad_norm": 0.9767866526534039, "learning_rate": 6.7291254260063834e-06, "loss": 0.2146, "step": 3011 }, { "epoch": 0.41, "grad_norm": 1.0601824227108267, "learning_rate": 6.727076218164006e-06, "loss": 0.1893, "step": 3012 }, { "epoch": 0.41, "grad_norm": 0.8476407948737271, "learning_rate": 6.725026680870408e-06, "loss": 0.1663, "step": 3013 }, { "epoch": 0.41, "grad_norm": 0.872910910536, "learning_rate": 6.722976814516551e-06, "loss": 0.1414, "step": 3014 }, { "epoch": 0.41, "grad_norm": 1.0320594542964807, "learning_rate": 6.720926619493462e-06, "loss": 0.1772, "step": 3015 }, { "epoch": 0.41, "grad_norm": 0.8575952230239846, "learning_rate": 6.718876096192227e-06, "loss": 0.2103, "step": 3016 }, { "epoch": 0.41, "grad_norm": 1.026575439565605, "learning_rate": 6.7168252450039995e-06, "loss": 0.2229, "step": 3017 }, { "epoch": 0.41, "grad_norm": 0.852995007619263, "learning_rate": 6.7147740663199915e-06, "loss": 0.1554, "step": 3018 }, { "epoch": 0.41, "grad_norm": 1.0170994145803802, "learning_rate": 6.712722560531477e-06, "loss": 0.1728, "step": 3019 }, { "epoch": 0.41, "grad_norm": 0.9133571944333275, "learning_rate": 6.7106707280297974e-06, "loss": 0.157, "step": 3020 }, { "epoch": 0.41, "grad_norm": 0.8279952386447424, "learning_rate": 6.708618569206353e-06, "loss": 0.1983, "step": 3021 }, { "epoch": 0.41, "grad_norm": 0.7067678449263621, "learning_rate": 6.706566084452606e-06, "loss": 0.1686, "step": 3022 }, { "epoch": 0.41, "grad_norm": 0.8687815703140261, "learning_rate": 6.704513274160083e-06, "loss": 0.0997, "step": 3023 }, { "epoch": 0.41, "grad_norm": 0.8409064513204234, "learning_rate": 6.702460138720368e-06, "loss": 0.1668, "step": 3024 }, { "epoch": 0.41, "grad_norm": 1.0277330861220255, "learning_rate": 6.700406678525112e-06, "loss": 0.215, "step": 3025 }, { "epoch": 0.41, "grad_norm": 0.815628860162803, "learning_rate": 6.698352893966024e-06, "loss": 0.1507, "step": 3026 }, { "epoch": 0.41, "grad_norm": 0.6447312410092424, "learning_rate": 6.696298785434882e-06, "loss": 0.0989, "step": 3027 }, { "epoch": 0.41, "grad_norm": 0.7591551999948479, "learning_rate": 6.694244353323515e-06, "loss": 0.1612, "step": 3028 }, { "epoch": 0.41, "grad_norm": 0.8322987307531645, "learning_rate": 6.6921895980238235e-06, "loss": 0.151, "step": 3029 }, { "epoch": 0.41, "grad_norm": 0.8665384232727259, "learning_rate": 6.6901345199277654e-06, "loss": 0.154, "step": 3030 }, { "epoch": 0.41, "grad_norm": 0.9829853119133568, "learning_rate": 6.68807911942736e-06, "loss": 0.1735, "step": 3031 }, { "epoch": 0.41, "grad_norm": 1.052781063938126, "learning_rate": 6.686023396914685e-06, "loss": 0.2465, "step": 3032 }, { "epoch": 0.41, "grad_norm": 0.823553537011791, "learning_rate": 6.6839673527818885e-06, "loss": 0.1271, "step": 3033 }, { "epoch": 0.41, "grad_norm": 1.183682889146376, "learning_rate": 6.681910987421171e-06, "loss": 0.2422, "step": 3034 }, { "epoch": 0.41, "grad_norm": 1.1038292405504577, "learning_rate": 6.6798543012247995e-06, "loss": 0.2116, "step": 3035 }, { "epoch": 0.41, "grad_norm": 0.7603105969540861, "learning_rate": 6.6777972945851e-06, "loss": 0.1274, "step": 3036 }, { "epoch": 0.41, "grad_norm": 0.9096959970608113, "learning_rate": 6.675739967894459e-06, "loss": 0.1939, "step": 3037 }, { "epoch": 0.41, "grad_norm": 1.0984147219450568, "learning_rate": 6.673682321545327e-06, "loss": 0.2195, "step": 3038 }, { "epoch": 0.41, "grad_norm": 1.1918852920321084, "learning_rate": 6.671624355930213e-06, "loss": 0.2065, "step": 3039 }, { "epoch": 0.41, "grad_norm": 0.8923877238172716, "learning_rate": 6.669566071441689e-06, "loss": 0.1601, "step": 3040 }, { "epoch": 0.41, "grad_norm": 0.8093376093984918, "learning_rate": 6.667507468472383e-06, "loss": 0.1589, "step": 3041 }, { "epoch": 0.41, "grad_norm": 0.9620745317995796, "learning_rate": 6.665448547414989e-06, "loss": 0.1565, "step": 3042 }, { "epoch": 0.41, "grad_norm": 0.8302782451028086, "learning_rate": 6.663389308662259e-06, "loss": 0.121, "step": 3043 }, { "epoch": 0.41, "grad_norm": 1.0164228874551233, "learning_rate": 6.661329752607009e-06, "loss": 0.2625, "step": 3044 }, { "epoch": 0.41, "grad_norm": 1.0075950466742682, "learning_rate": 6.65926987964211e-06, "loss": 0.19, "step": 3045 }, { "epoch": 0.41, "grad_norm": 1.1246571145313804, "learning_rate": 6.6572096901605e-06, "loss": 0.1953, "step": 3046 }, { "epoch": 0.41, "grad_norm": 0.8080117946579581, "learning_rate": 6.65514918455517e-06, "loss": 0.1394, "step": 3047 }, { "epoch": 0.41, "grad_norm": 0.9763179521569438, "learning_rate": 6.653088363219176e-06, "loss": 0.1276, "step": 3048 }, { "epoch": 0.41, "grad_norm": 1.0841816232013195, "learning_rate": 6.651027226545632e-06, "loss": 0.2294, "step": 3049 }, { "epoch": 0.41, "grad_norm": 0.9941460346052649, "learning_rate": 6.648965774927718e-06, "loss": 0.1795, "step": 3050 }, { "epoch": 0.41, "grad_norm": 1.0205581860352413, "learning_rate": 6.646904008758665e-06, "loss": 0.16, "step": 3051 }, { "epoch": 0.41, "grad_norm": 0.999116724118137, "learning_rate": 6.6448419284317724e-06, "loss": 0.1655, "step": 3052 }, { "epoch": 0.41, "grad_norm": 0.7336061821796505, "learning_rate": 6.642779534340392e-06, "loss": 0.1794, "step": 3053 }, { "epoch": 0.41, "grad_norm": 0.7483707492800282, "learning_rate": 6.640716826877942e-06, "loss": 0.1264, "step": 3054 }, { "epoch": 0.41, "grad_norm": 0.8022215437295233, "learning_rate": 6.638653806437896e-06, "loss": 0.1562, "step": 3055 }, { "epoch": 0.41, "grad_norm": 0.8070237671602603, "learning_rate": 6.636590473413787e-06, "loss": 0.1389, "step": 3056 }, { "epoch": 0.41, "grad_norm": 0.9454622740243044, "learning_rate": 6.634526828199215e-06, "loss": 0.1232, "step": 3057 }, { "epoch": 0.41, "grad_norm": 0.6794980668659582, "learning_rate": 6.632462871187828e-06, "loss": 0.1051, "step": 3058 }, { "epoch": 0.41, "grad_norm": 1.3513143506105916, "learning_rate": 6.630398602773343e-06, "loss": 0.2614, "step": 3059 }, { "epoch": 0.41, "grad_norm": 0.9299288828350463, "learning_rate": 6.62833402334953e-06, "loss": 0.1616, "step": 3060 }, { "epoch": 0.41, "grad_norm": 0.8487399598034975, "learning_rate": 6.626269133310225e-06, "loss": 0.1431, "step": 3061 }, { "epoch": 0.41, "grad_norm": 0.9327027928806635, "learning_rate": 6.624203933049317e-06, "loss": 0.1914, "step": 3062 }, { "epoch": 0.41, "grad_norm": 1.0267771777727248, "learning_rate": 6.622138422960757e-06, "loss": 0.2149, "step": 3063 }, { "epoch": 0.41, "grad_norm": 0.8495181354812115, "learning_rate": 6.620072603438554e-06, "loss": 0.1597, "step": 3064 }, { "epoch": 0.41, "grad_norm": 1.0092282568647621, "learning_rate": 6.618006474876777e-06, "loss": 0.2309, "step": 3065 }, { "epoch": 0.41, "grad_norm": 1.2464759878972203, "learning_rate": 6.615940037669556e-06, "loss": 0.2731, "step": 3066 }, { "epoch": 0.41, "grad_norm": 0.7394008210723984, "learning_rate": 6.613873292211075e-06, "loss": 0.1687, "step": 3067 }, { "epoch": 0.41, "grad_norm": 1.0411981196525806, "learning_rate": 6.61180623889558e-06, "loss": 0.2074, "step": 3068 }, { "epoch": 0.41, "grad_norm": 0.6723385587942562, "learning_rate": 6.609738878117375e-06, "loss": 0.1012, "step": 3069 }, { "epoch": 0.41, "grad_norm": 0.982299292429497, "learning_rate": 6.607671210270825e-06, "loss": 0.1167, "step": 3070 }, { "epoch": 0.41, "grad_norm": 1.077047381238657, "learning_rate": 6.605603235750347e-06, "loss": 0.2173, "step": 3071 }, { "epoch": 0.41, "grad_norm": 0.7403292922914633, "learning_rate": 6.603534954950424e-06, "loss": 0.1457, "step": 3072 }, { "epoch": 0.41, "grad_norm": 0.9024872702704261, "learning_rate": 6.601466368265594e-06, "loss": 0.146, "step": 3073 }, { "epoch": 0.41, "grad_norm": 1.1887648840455118, "learning_rate": 6.599397476090453e-06, "loss": 0.2579, "step": 3074 }, { "epoch": 0.41, "grad_norm": 0.9022487276526236, "learning_rate": 6.597328278819656e-06, "loss": 0.1816, "step": 3075 }, { "epoch": 0.41, "grad_norm": 1.2543591397034792, "learning_rate": 6.5952587768479144e-06, "loss": 0.2208, "step": 3076 }, { "epoch": 0.41, "grad_norm": 0.7368583334943896, "learning_rate": 6.5931889705700035e-06, "loss": 0.1164, "step": 3077 }, { "epoch": 0.42, "grad_norm": 0.7210733045168832, "learning_rate": 6.591118860380749e-06, "loss": 0.1097, "step": 3078 }, { "epoch": 0.42, "grad_norm": 0.8690324109290877, "learning_rate": 6.589048446675039e-06, "loss": 0.1732, "step": 3079 }, { "epoch": 0.42, "grad_norm": 0.9207789749019689, "learning_rate": 6.586977729847819e-06, "loss": 0.1352, "step": 3080 }, { "epoch": 0.42, "grad_norm": 1.11996486674473, "learning_rate": 6.584906710294091e-06, "loss": 0.1892, "step": 3081 }, { "epoch": 0.42, "grad_norm": 0.7029128308466434, "learning_rate": 6.582835388408916e-06, "loss": 0.1797, "step": 3082 }, { "epoch": 0.42, "grad_norm": 1.2667043908027833, "learning_rate": 6.580763764587413e-06, "loss": 0.2033, "step": 3083 }, { "epoch": 0.42, "grad_norm": 1.0988051521094353, "learning_rate": 6.57869183922476e-06, "loss": 0.2053, "step": 3084 }, { "epoch": 0.42, "grad_norm": 0.9595496511353603, "learning_rate": 6.576619612716188e-06, "loss": 0.2144, "step": 3085 }, { "epoch": 0.42, "grad_norm": 1.122062178089135, "learning_rate": 6.574547085456986e-06, "loss": 0.1984, "step": 3086 }, { "epoch": 0.42, "grad_norm": 1.0903514947869972, "learning_rate": 6.572474257842505e-06, "loss": 0.216, "step": 3087 }, { "epoch": 0.42, "grad_norm": 0.7404673971277069, "learning_rate": 6.570401130268149e-06, "loss": 0.1199, "step": 3088 }, { "epoch": 0.42, "grad_norm": 1.1490859753640137, "learning_rate": 6.568327703129383e-06, "loss": 0.2018, "step": 3089 }, { "epoch": 0.42, "grad_norm": 1.0664369992867675, "learning_rate": 6.566253976821727e-06, "loss": 0.1567, "step": 3090 }, { "epoch": 0.42, "grad_norm": 1.2102328779050444, "learning_rate": 6.564179951740755e-06, "loss": 0.2677, "step": 3091 }, { "epoch": 0.42, "grad_norm": 1.1449624343249254, "learning_rate": 6.562105628282105e-06, "loss": 0.2047, "step": 3092 }, { "epoch": 0.42, "grad_norm": 1.034287200447329, "learning_rate": 6.5600310068414645e-06, "loss": 0.1834, "step": 3093 }, { "epoch": 0.42, "grad_norm": 0.9128639337966944, "learning_rate": 6.557956087814582e-06, "loss": 0.173, "step": 3094 }, { "epoch": 0.42, "grad_norm": 1.1981185354578547, "learning_rate": 6.555880871597262e-06, "loss": 0.2893, "step": 3095 }, { "epoch": 0.42, "grad_norm": 1.0378468531253984, "learning_rate": 6.5538053585853675e-06, "loss": 0.1732, "step": 3096 }, { "epoch": 0.42, "grad_norm": 0.9059727783602584, "learning_rate": 6.5517295491748156e-06, "loss": 0.1393, "step": 3097 }, { "epoch": 0.42, "grad_norm": 0.8135352457100078, "learning_rate": 6.54965344376158e-06, "loss": 0.1286, "step": 3098 }, { "epoch": 0.42, "grad_norm": 0.6981272573325141, "learning_rate": 6.547577042741691e-06, "loss": 0.1231, "step": 3099 }, { "epoch": 0.42, "grad_norm": 0.9277897284640803, "learning_rate": 6.545500346511237e-06, "loss": 0.19, "step": 3100 }, { "epoch": 0.42, "grad_norm": 0.7219501191161909, "learning_rate": 6.54342335546636e-06, "loss": 0.1165, "step": 3101 }, { "epoch": 0.42, "grad_norm": 0.91798715992262, "learning_rate": 6.541346070003264e-06, "loss": 0.1691, "step": 3102 }, { "epoch": 0.42, "grad_norm": 1.1345008245160655, "learning_rate": 6.5392684905182e-06, "loss": 0.2647, "step": 3103 }, { "epoch": 0.42, "grad_norm": 1.0150583204392196, "learning_rate": 6.537190617407481e-06, "loss": 0.2069, "step": 3104 }, { "epoch": 0.42, "grad_norm": 0.8799646467853974, "learning_rate": 6.535112451067477e-06, "loss": 0.1421, "step": 3105 }, { "epoch": 0.42, "grad_norm": 0.9975182551088886, "learning_rate": 6.533033991894611e-06, "loss": 0.2068, "step": 3106 }, { "epoch": 0.42, "grad_norm": 0.8601102252767973, "learning_rate": 6.530955240285363e-06, "loss": 0.1209, "step": 3107 }, { "epoch": 0.42, "grad_norm": 0.9486104553120993, "learning_rate": 6.528876196636269e-06, "loss": 0.1452, "step": 3108 }, { "epoch": 0.42, "grad_norm": 0.669236983589855, "learning_rate": 6.526796861343919e-06, "loss": 0.1083, "step": 3109 }, { "epoch": 0.42, "grad_norm": 0.843903289842928, "learning_rate": 6.52471723480496e-06, "loss": 0.1853, "step": 3110 }, { "epoch": 0.42, "grad_norm": 1.1105002875056555, "learning_rate": 6.522637317416096e-06, "loss": 0.1788, "step": 3111 }, { "epoch": 0.42, "grad_norm": 1.1154644017555144, "learning_rate": 6.520557109574086e-06, "loss": 0.1731, "step": 3112 }, { "epoch": 0.42, "grad_norm": 0.9149793705074292, "learning_rate": 6.5184766116757395e-06, "loss": 0.1828, "step": 3113 }, { "epoch": 0.42, "grad_norm": 1.315416256418724, "learning_rate": 6.516395824117929e-06, "loss": 0.2449, "step": 3114 }, { "epoch": 0.42, "grad_norm": 0.5897856719683376, "learning_rate": 6.514314747297577e-06, "loss": 0.1892, "step": 3115 }, { "epoch": 0.42, "grad_norm": 0.7363202344202495, "learning_rate": 6.512233381611661e-06, "loss": 0.1445, "step": 3116 }, { "epoch": 0.42, "grad_norm": 0.9619264098219557, "learning_rate": 6.510151727457218e-06, "loss": 0.1822, "step": 3117 }, { "epoch": 0.42, "grad_norm": 0.7629273649985496, "learning_rate": 6.508069785231337e-06, "loss": 0.1351, "step": 3118 }, { "epoch": 0.42, "grad_norm": 1.1457145939117543, "learning_rate": 6.50598755533116e-06, "loss": 0.2144, "step": 3119 }, { "epoch": 0.42, "grad_norm": 0.9702039425430392, "learning_rate": 6.5039050381538885e-06, "loss": 0.2159, "step": 3120 }, { "epoch": 0.42, "grad_norm": 0.974481151521882, "learning_rate": 6.501822234096775e-06, "loss": 0.1904, "step": 3121 }, { "epoch": 0.42, "grad_norm": 1.1051738228320551, "learning_rate": 6.499739143557129e-06, "loss": 0.1661, "step": 3122 }, { "epoch": 0.42, "grad_norm": 0.8783267186236835, "learning_rate": 6.497655766932315e-06, "loss": 0.15, "step": 3123 }, { "epoch": 0.42, "grad_norm": 0.8328408529756012, "learning_rate": 6.495572104619748e-06, "loss": 0.178, "step": 3124 }, { "epoch": 0.42, "grad_norm": 1.0262172016194633, "learning_rate": 6.493488157016902e-06, "loss": 0.2029, "step": 3125 }, { "epoch": 0.42, "grad_norm": 0.9145841654124343, "learning_rate": 6.491403924521304e-06, "loss": 0.1664, "step": 3126 }, { "epoch": 0.42, "grad_norm": 1.0540380610790152, "learning_rate": 6.489319407530534e-06, "loss": 0.2608, "step": 3127 }, { "epoch": 0.42, "grad_norm": 0.8157890300327244, "learning_rate": 6.487234606442229e-06, "loss": 0.1712, "step": 3128 }, { "epoch": 0.42, "grad_norm": 1.1537489368689404, "learning_rate": 6.4851495216540775e-06, "loss": 0.2353, "step": 3129 }, { "epoch": 0.42, "grad_norm": 1.043304312279388, "learning_rate": 6.483064153563823e-06, "loss": 0.2176, "step": 3130 }, { "epoch": 0.42, "grad_norm": 0.9719012995334656, "learning_rate": 6.480978502569265e-06, "loss": 0.2101, "step": 3131 }, { "epoch": 0.42, "grad_norm": 0.9941662181695049, "learning_rate": 6.4788925690682535e-06, "loss": 0.1958, "step": 3132 }, { "epoch": 0.42, "grad_norm": 1.0184819384139774, "learning_rate": 6.476806353458693e-06, "loss": 0.1644, "step": 3133 }, { "epoch": 0.42, "grad_norm": 0.933720378297325, "learning_rate": 6.4747198561385475e-06, "loss": 0.1644, "step": 3134 }, { "epoch": 0.42, "grad_norm": 1.0198794401712061, "learning_rate": 6.472633077505825e-06, "loss": 0.2173, "step": 3135 }, { "epoch": 0.42, "grad_norm": 0.7720681560124554, "learning_rate": 6.470546017958595e-06, "loss": 0.1483, "step": 3136 }, { "epoch": 0.42, "grad_norm": 0.9234719440508313, "learning_rate": 6.468458677894978e-06, "loss": 0.2168, "step": 3137 }, { "epoch": 0.42, "grad_norm": 0.8731125730561147, "learning_rate": 6.4663710577131475e-06, "loss": 0.1496, "step": 3138 }, { "epoch": 0.42, "grad_norm": 0.7133018834072532, "learning_rate": 6.464283157811332e-06, "loss": 0.192, "step": 3139 }, { "epoch": 0.42, "grad_norm": 0.9478875144991992, "learning_rate": 6.4621949785878095e-06, "loss": 0.1897, "step": 3140 }, { "epoch": 0.42, "grad_norm": 0.9234501224793876, "learning_rate": 6.460106520440916e-06, "loss": 0.1793, "step": 3141 }, { "epoch": 0.42, "grad_norm": 1.275388513478614, "learning_rate": 6.458017783769039e-06, "loss": 0.2261, "step": 3142 }, { "epoch": 0.42, "grad_norm": 0.9351882337062494, "learning_rate": 6.455928768970616e-06, "loss": 0.128, "step": 3143 }, { "epoch": 0.42, "grad_norm": 0.6758761449362894, "learning_rate": 6.453839476444144e-06, "loss": 0.1116, "step": 3144 }, { "epoch": 0.42, "grad_norm": 0.652408591624659, "learning_rate": 6.451749906588167e-06, "loss": 0.0957, "step": 3145 }, { "epoch": 0.42, "grad_norm": 0.9902393711918263, "learning_rate": 6.449660059801287e-06, "loss": 0.1756, "step": 3146 }, { "epoch": 0.42, "grad_norm": 1.2262582621791105, "learning_rate": 6.447569936482153e-06, "loss": 0.2533, "step": 3147 }, { "epoch": 0.42, "grad_norm": 0.6369617198018447, "learning_rate": 6.445479537029471e-06, "loss": 0.1269, "step": 3148 }, { "epoch": 0.42, "grad_norm": 0.8571201294891237, "learning_rate": 6.443388861841999e-06, "loss": 0.214, "step": 3149 }, { "epoch": 0.42, "grad_norm": 0.9400961394428352, "learning_rate": 6.441297911318547e-06, "loss": 0.1712, "step": 3150 }, { "epoch": 0.42, "grad_norm": 1.2155052068391083, "learning_rate": 6.439206685857977e-06, "loss": 0.2392, "step": 3151 }, { "epoch": 0.43, "grad_norm": 0.9397064237215956, "learning_rate": 6.437115185859205e-06, "loss": 0.1667, "step": 3152 }, { "epoch": 0.43, "grad_norm": 0.7441595639003105, "learning_rate": 6.435023411721198e-06, "loss": 0.1432, "step": 3153 }, { "epoch": 0.43, "grad_norm": 0.9702298470033125, "learning_rate": 6.432931363842977e-06, "loss": 0.142, "step": 3154 }, { "epoch": 0.43, "grad_norm": 1.0389588059233288, "learning_rate": 6.430839042623611e-06, "loss": 0.1845, "step": 3155 }, { "epoch": 0.43, "grad_norm": 0.9474660732826402, "learning_rate": 6.428746448462227e-06, "loss": 0.2112, "step": 3156 }, { "epoch": 0.43, "grad_norm": 0.8404674681924468, "learning_rate": 6.426653581757999e-06, "loss": 0.1571, "step": 3157 }, { "epoch": 0.43, "grad_norm": 1.0974076042202654, "learning_rate": 6.424560442910157e-06, "loss": 0.2028, "step": 3158 }, { "epoch": 0.43, "grad_norm": 0.6893662914329438, "learning_rate": 6.42246703231798e-06, "loss": 0.1211, "step": 3159 }, { "epoch": 0.43, "grad_norm": 1.0103800331200004, "learning_rate": 6.420373350380801e-06, "loss": 0.2142, "step": 3160 }, { "epoch": 0.43, "grad_norm": 0.8669178276982972, "learning_rate": 6.418279397498003e-06, "loss": 0.1621, "step": 3161 }, { "epoch": 0.43, "grad_norm": 1.149950752686713, "learning_rate": 6.416185174069021e-06, "loss": 0.2446, "step": 3162 }, { "epoch": 0.43, "grad_norm": 0.8330580007977934, "learning_rate": 6.414090680493343e-06, "loss": 0.1887, "step": 3163 }, { "epoch": 0.43, "grad_norm": 1.1142009063432725, "learning_rate": 6.411995917170506e-06, "loss": 0.1946, "step": 3164 }, { "epoch": 0.43, "grad_norm": 0.8317125772656634, "learning_rate": 6.4099008845001e-06, "loss": 0.178, "step": 3165 }, { "epoch": 0.43, "grad_norm": 0.8496933442779565, "learning_rate": 6.407805582881767e-06, "loss": 0.123, "step": 3166 }, { "epoch": 0.43, "grad_norm": 0.9015630409193259, "learning_rate": 6.405710012715201e-06, "loss": 0.2069, "step": 3167 }, { "epoch": 0.43, "grad_norm": 0.7810509124176188, "learning_rate": 6.403614174400146e-06, "loss": 0.1474, "step": 3168 }, { "epoch": 0.43, "grad_norm": 1.005090876531031, "learning_rate": 6.401518068336395e-06, "loss": 0.1809, "step": 3169 }, { "epoch": 0.43, "grad_norm": 0.9439538727412466, "learning_rate": 6.399421694923794e-06, "loss": 0.1459, "step": 3170 }, { "epoch": 0.43, "grad_norm": 1.096665581492917, "learning_rate": 6.397325054562242e-06, "loss": 0.2199, "step": 3171 }, { "epoch": 0.43, "grad_norm": 0.7173492886723163, "learning_rate": 6.395228147651684e-06, "loss": 0.1691, "step": 3172 }, { "epoch": 0.43, "grad_norm": 0.8992081054397256, "learning_rate": 6.393130974592122e-06, "loss": 0.1695, "step": 3173 }, { "epoch": 0.43, "grad_norm": 0.879774215171381, "learning_rate": 6.391033535783605e-06, "loss": 0.1332, "step": 3174 }, { "epoch": 0.43, "grad_norm": 0.7958426263139131, "learning_rate": 6.388935831626233e-06, "loss": 0.1531, "step": 3175 }, { "epoch": 0.43, "grad_norm": 1.0395378287743053, "learning_rate": 6.386837862520157e-06, "loss": 0.1972, "step": 3176 }, { "epoch": 0.43, "grad_norm": 0.8495339211390418, "learning_rate": 6.384739628865577e-06, "loss": 0.1799, "step": 3177 }, { "epoch": 0.43, "grad_norm": 1.1973147417487078, "learning_rate": 6.382641131062746e-06, "loss": 0.22, "step": 3178 }, { "epoch": 0.43, "grad_norm": 0.7974096219881709, "learning_rate": 6.380542369511969e-06, "loss": 0.1547, "step": 3179 }, { "epoch": 0.43, "grad_norm": 0.9149559970354038, "learning_rate": 6.378443344613594e-06, "loss": 0.166, "step": 3180 }, { "epoch": 0.43, "grad_norm": 0.7729138486873252, "learning_rate": 6.376344056768027e-06, "loss": 0.133, "step": 3181 }, { "epoch": 0.43, "grad_norm": 0.9320895492331024, "learning_rate": 6.374244506375721e-06, "loss": 0.2099, "step": 3182 }, { "epoch": 0.43, "grad_norm": 0.9757005024531279, "learning_rate": 6.372144693837176e-06, "loss": 0.1584, "step": 3183 }, { "epoch": 0.43, "grad_norm": 0.7500559195082059, "learning_rate": 6.37004461955295e-06, "loss": 0.1658, "step": 3184 }, { "epoch": 0.43, "grad_norm": 0.946418606680315, "learning_rate": 6.367944283923643e-06, "loss": 0.1924, "step": 3185 }, { "epoch": 0.43, "grad_norm": 0.9955776210512474, "learning_rate": 6.3658436873499075e-06, "loss": 0.1468, "step": 3186 }, { "epoch": 0.43, "grad_norm": 0.8866849383918626, "learning_rate": 6.363742830232448e-06, "loss": 0.1612, "step": 3187 }, { "epoch": 0.43, "grad_norm": 1.041940689948413, "learning_rate": 6.361641712972015e-06, "loss": 0.2201, "step": 3188 }, { "epoch": 0.43, "grad_norm": 1.0182517128253659, "learning_rate": 6.3595403359694116e-06, "loss": 0.2153, "step": 3189 }, { "epoch": 0.43, "grad_norm": 0.48577499589136536, "learning_rate": 6.357438699625491e-06, "loss": 0.0922, "step": 3190 }, { "epoch": 0.43, "grad_norm": 1.0380890331912611, "learning_rate": 6.355336804341152e-06, "loss": 0.1965, "step": 3191 }, { "epoch": 0.43, "grad_norm": 1.098037431184755, "learning_rate": 6.3532346505173446e-06, "loss": 0.1988, "step": 3192 }, { "epoch": 0.43, "grad_norm": 0.9535285909672399, "learning_rate": 6.351132238555072e-06, "loss": 0.2235, "step": 3193 }, { "epoch": 0.43, "grad_norm": 0.9586077648381052, "learning_rate": 6.349029568855378e-06, "loss": 0.1942, "step": 3194 }, { "epoch": 0.43, "grad_norm": 0.8785998059537523, "learning_rate": 6.346926641819365e-06, "loss": 0.1759, "step": 3195 }, { "epoch": 0.43, "grad_norm": 0.9040083521858789, "learning_rate": 6.3448234578481795e-06, "loss": 0.2132, "step": 3196 }, { "epoch": 0.43, "grad_norm": 0.8513994277572755, "learning_rate": 6.342720017343016e-06, "loss": 0.1086, "step": 3197 }, { "epoch": 0.43, "grad_norm": 1.0127605506266084, "learning_rate": 6.340616320705122e-06, "loss": 0.1928, "step": 3198 }, { "epoch": 0.43, "grad_norm": 0.8748286185770402, "learning_rate": 6.33851236833579e-06, "loss": 0.167, "step": 3199 }, { "epoch": 0.43, "grad_norm": 0.8442037215042029, "learning_rate": 6.336408160636362e-06, "loss": 0.1632, "step": 3200 }, { "epoch": 0.43, "grad_norm": 0.7571573309013994, "learning_rate": 6.334303698008232e-06, "loss": 0.147, "step": 3201 }, { "epoch": 0.43, "grad_norm": 0.6103247896306828, "learning_rate": 6.332198980852839e-06, "loss": 0.1227, "step": 3202 }, { "epoch": 0.43, "grad_norm": 1.0538364909114044, "learning_rate": 6.33009400957167e-06, "loss": 0.2016, "step": 3203 }, { "epoch": 0.43, "grad_norm": 1.1418110440038336, "learning_rate": 6.3279887845662654e-06, "loss": 0.2359, "step": 3204 }, { "epoch": 0.43, "grad_norm": 0.8874391328612645, "learning_rate": 6.3258833062382095e-06, "loss": 0.1564, "step": 3205 }, { "epoch": 0.43, "grad_norm": 0.7384562764113297, "learning_rate": 6.323777574989134e-06, "loss": 0.1646, "step": 3206 }, { "epoch": 0.43, "grad_norm": 0.7860922331959276, "learning_rate": 6.321671591220726e-06, "loss": 0.1584, "step": 3207 }, { "epoch": 0.43, "grad_norm": 0.9882339050904325, "learning_rate": 6.319565355334711e-06, "loss": 0.2031, "step": 3208 }, { "epoch": 0.43, "grad_norm": 0.9070373092173576, "learning_rate": 6.31745886773287e-06, "loss": 0.1636, "step": 3209 }, { "epoch": 0.43, "grad_norm": 1.1089091586300088, "learning_rate": 6.315352128817028e-06, "loss": 0.2171, "step": 3210 }, { "epoch": 0.43, "grad_norm": 1.358575334740753, "learning_rate": 6.31324513898906e-06, "loss": 0.2446, "step": 3211 }, { "epoch": 0.43, "grad_norm": 1.0614493587244402, "learning_rate": 6.311137898650889e-06, "loss": 0.2196, "step": 3212 }, { "epoch": 0.43, "grad_norm": 0.8344592747267526, "learning_rate": 6.309030408204485e-06, "loss": 0.1681, "step": 3213 }, { "epoch": 0.43, "grad_norm": 0.6853671376791425, "learning_rate": 6.306922668051866e-06, "loss": 0.0898, "step": 3214 }, { "epoch": 0.43, "grad_norm": 0.8140198350958752, "learning_rate": 6.304814678595095e-06, "loss": 0.1649, "step": 3215 }, { "epoch": 0.43, "grad_norm": 0.8154762882992758, "learning_rate": 6.3027064402362895e-06, "loss": 0.1224, "step": 3216 }, { "epoch": 0.43, "grad_norm": 1.1082399035884154, "learning_rate": 6.300597953377603e-06, "loss": 0.2424, "step": 3217 }, { "epoch": 0.43, "grad_norm": 0.698776638810563, "learning_rate": 6.2984892184212485e-06, "loss": 0.1321, "step": 3218 }, { "epoch": 0.43, "grad_norm": 1.0330229169657499, "learning_rate": 6.296380235769481e-06, "loss": 0.1802, "step": 3219 }, { "epoch": 0.43, "grad_norm": 0.7941812242908346, "learning_rate": 6.2942710058246024e-06, "loss": 0.1556, "step": 3220 }, { "epoch": 0.43, "grad_norm": 0.8781860050548086, "learning_rate": 6.29216152898896e-06, "loss": 0.1762, "step": 3221 }, { "epoch": 0.43, "grad_norm": 1.1152958759066267, "learning_rate": 6.290051805664954e-06, "loss": 0.2214, "step": 3222 }, { "epoch": 0.43, "grad_norm": 1.0505611252046057, "learning_rate": 6.287941836255026e-06, "loss": 0.1928, "step": 3223 }, { "epoch": 0.43, "grad_norm": 1.022246526100619, "learning_rate": 6.285831621161667e-06, "loss": 0.1984, "step": 3224 }, { "epoch": 0.43, "grad_norm": 1.0928716636630766, "learning_rate": 6.283721160787413e-06, "loss": 0.2173, "step": 3225 }, { "epoch": 0.44, "grad_norm": 1.1801422693931947, "learning_rate": 6.281610455534851e-06, "loss": 0.2435, "step": 3226 }, { "epoch": 0.44, "grad_norm": 0.8526243218292492, "learning_rate": 6.27949950580661e-06, "loss": 0.1333, "step": 3227 }, { "epoch": 0.44, "grad_norm": 0.8762899224509515, "learning_rate": 6.277388312005368e-06, "loss": 0.1614, "step": 3228 }, { "epoch": 0.44, "grad_norm": 0.8136621310251485, "learning_rate": 6.2752768745338515e-06, "loss": 0.1584, "step": 3229 }, { "epoch": 0.44, "grad_norm": 1.4661037769864387, "learning_rate": 6.2731651937948275e-06, "loss": 0.2837, "step": 3230 }, { "epoch": 0.44, "grad_norm": 0.8384090450569323, "learning_rate": 6.271053270191116e-06, "loss": 0.1535, "step": 3231 }, { "epoch": 0.44, "grad_norm": 1.1515773360088255, "learning_rate": 6.26894110412558e-06, "loss": 0.1912, "step": 3232 }, { "epoch": 0.44, "grad_norm": 0.9748707898642016, "learning_rate": 6.266828696001127e-06, "loss": 0.1657, "step": 3233 }, { "epoch": 0.44, "grad_norm": 0.969840420886193, "learning_rate": 6.264716046220713e-06, "loss": 0.19, "step": 3234 }, { "epoch": 0.44, "grad_norm": 0.9146987653775734, "learning_rate": 6.262603155187343e-06, "loss": 0.1914, "step": 3235 }, { "epoch": 0.44, "grad_norm": 0.755970438236634, "learning_rate": 6.260490023304062e-06, "loss": 0.1444, "step": 3236 }, { "epoch": 0.44, "grad_norm": 1.0530455404770793, "learning_rate": 6.258376650973967e-06, "loss": 0.1953, "step": 3237 }, { "epoch": 0.44, "grad_norm": 1.0468988556836076, "learning_rate": 6.2562630386001945e-06, "loss": 0.1759, "step": 3238 }, { "epoch": 0.44, "grad_norm": 1.0048260346700242, "learning_rate": 6.25414918658593e-06, "loss": 0.2248, "step": 3239 }, { "epoch": 0.44, "grad_norm": 0.7784600736379296, "learning_rate": 6.252035095334408e-06, "loss": 0.1842, "step": 3240 }, { "epoch": 0.44, "grad_norm": 1.117263520778145, "learning_rate": 6.249920765248902e-06, "loss": 0.2218, "step": 3241 }, { "epoch": 0.44, "grad_norm": 0.8141482493264715, "learning_rate": 6.2478061967327374e-06, "loss": 0.1396, "step": 3242 }, { "epoch": 0.44, "grad_norm": 1.0544008417555442, "learning_rate": 6.245691390189281e-06, "loss": 0.1967, "step": 3243 }, { "epoch": 0.44, "grad_norm": 1.036777434246522, "learning_rate": 6.243576346021944e-06, "loss": 0.1957, "step": 3244 }, { "epoch": 0.44, "grad_norm": 0.907210428274371, "learning_rate": 6.241461064634188e-06, "loss": 0.2092, "step": 3245 }, { "epoch": 0.44, "grad_norm": 0.8202021985326615, "learning_rate": 6.239345546429516e-06, "loss": 0.2088, "step": 3246 }, { "epoch": 0.44, "grad_norm": 0.9421030026234818, "learning_rate": 6.2372297918114766e-06, "loss": 0.1584, "step": 3247 }, { "epoch": 0.44, "grad_norm": 1.2771513306557223, "learning_rate": 6.235113801183666e-06, "loss": 0.2175, "step": 3248 }, { "epoch": 0.44, "grad_norm": 0.9777460722568628, "learning_rate": 6.23299757494972e-06, "loss": 0.2155, "step": 3249 }, { "epoch": 0.44, "grad_norm": 0.9418388407293709, "learning_rate": 6.230881113513324e-06, "loss": 0.1428, "step": 3250 }, { "epoch": 0.44, "grad_norm": 0.9366562075291531, "learning_rate": 6.228764417278207e-06, "loss": 0.1853, "step": 3251 }, { "epoch": 0.44, "grad_norm": 0.9238510611002327, "learning_rate": 6.226647486648145e-06, "loss": 0.1851, "step": 3252 }, { "epoch": 0.44, "grad_norm": 1.1358496677076082, "learning_rate": 6.224530322026953e-06, "loss": 0.2264, "step": 3253 }, { "epoch": 0.44, "grad_norm": 0.8445795314999224, "learning_rate": 6.222412923818499e-06, "loss": 0.1509, "step": 3254 }, { "epoch": 0.44, "grad_norm": 1.0928240628292558, "learning_rate": 6.220295292426688e-06, "loss": 0.2233, "step": 3255 }, { "epoch": 0.44, "grad_norm": 0.807558862476007, "learning_rate": 6.218177428255468e-06, "loss": 0.1593, "step": 3256 }, { "epoch": 0.44, "grad_norm": 1.091893390667862, "learning_rate": 6.2160593317088425e-06, "loss": 0.2033, "step": 3257 }, { "epoch": 0.44, "grad_norm": 1.0335697964133073, "learning_rate": 6.213941003190849e-06, "loss": 0.1872, "step": 3258 }, { "epoch": 0.44, "grad_norm": 0.8752295923552332, "learning_rate": 6.211822443105574e-06, "loss": 0.2291, "step": 3259 }, { "epoch": 0.44, "grad_norm": 1.0028598999200582, "learning_rate": 6.2097036518571455e-06, "loss": 0.1928, "step": 3260 }, { "epoch": 0.44, "grad_norm": 0.8902364307556637, "learning_rate": 6.207584629849736e-06, "loss": 0.1318, "step": 3261 }, { "epoch": 0.44, "grad_norm": 0.676730178379467, "learning_rate": 6.205465377487568e-06, "loss": 0.1325, "step": 3262 }, { "epoch": 0.44, "grad_norm": 1.0604459455284045, "learning_rate": 6.203345895174896e-06, "loss": 0.226, "step": 3263 }, { "epoch": 0.44, "grad_norm": 0.9938779881987492, "learning_rate": 6.20122618331603e-06, "loss": 0.1804, "step": 3264 }, { "epoch": 0.44, "grad_norm": 0.811637721543829, "learning_rate": 6.199106242315318e-06, "loss": 0.1362, "step": 3265 }, { "epoch": 0.44, "grad_norm": 1.2252292115625805, "learning_rate": 6.1969860725771534e-06, "loss": 0.2746, "step": 3266 }, { "epoch": 0.44, "grad_norm": 1.1973550313027639, "learning_rate": 6.19486567450597e-06, "loss": 0.2543, "step": 3267 }, { "epoch": 0.44, "grad_norm": 1.1427337608490478, "learning_rate": 6.19274504850625e-06, "loss": 0.1829, "step": 3268 }, { "epoch": 0.44, "grad_norm": 0.6934810617580925, "learning_rate": 6.1906241949825175e-06, "loss": 0.1694, "step": 3269 }, { "epoch": 0.44, "grad_norm": 0.8417920479019366, "learning_rate": 6.1885031143393385e-06, "loss": 0.1898, "step": 3270 }, { "epoch": 0.44, "grad_norm": 0.907939599129123, "learning_rate": 6.186381806981322e-06, "loss": 0.1712, "step": 3271 }, { "epoch": 0.44, "grad_norm": 0.9938534226632566, "learning_rate": 6.184260273313122e-06, "loss": 0.1889, "step": 3272 }, { "epoch": 0.44, "grad_norm": 0.8656495916331595, "learning_rate": 6.182138513739435e-06, "loss": 0.1392, "step": 3273 }, { "epoch": 0.44, "grad_norm": 0.7593072252560845, "learning_rate": 6.180016528665001e-06, "loss": 0.1464, "step": 3274 }, { "epoch": 0.44, "grad_norm": 0.8451109131116119, "learning_rate": 6.177894318494605e-06, "loss": 0.165, "step": 3275 }, { "epoch": 0.44, "grad_norm": 1.0367230918314014, "learning_rate": 6.175771883633068e-06, "loss": 0.1483, "step": 3276 }, { "epoch": 0.44, "grad_norm": 0.9459202562486363, "learning_rate": 6.173649224485263e-06, "loss": 0.1947, "step": 3277 }, { "epoch": 0.44, "grad_norm": 1.156306073093026, "learning_rate": 6.1715263414561e-06, "loss": 0.1971, "step": 3278 }, { "epoch": 0.44, "grad_norm": 0.978807194383666, "learning_rate": 6.16940323495053e-06, "loss": 0.1689, "step": 3279 }, { "epoch": 0.44, "grad_norm": 0.9377976077008201, "learning_rate": 6.167279905373552e-06, "loss": 0.1619, "step": 3280 }, { "epoch": 0.44, "grad_norm": 1.2181670783949765, "learning_rate": 6.165156353130205e-06, "loss": 0.229, "step": 3281 }, { "epoch": 0.44, "grad_norm": 0.959410654098788, "learning_rate": 6.1630325786255714e-06, "loss": 0.2091, "step": 3282 }, { "epoch": 0.44, "grad_norm": 1.0828707901862433, "learning_rate": 6.160908582264774e-06, "loss": 0.1983, "step": 3283 }, { "epoch": 0.44, "grad_norm": 0.7544465965181328, "learning_rate": 6.158784364452981e-06, "loss": 0.1259, "step": 3284 }, { "epoch": 0.44, "grad_norm": 0.8585498771228145, "learning_rate": 6.156659925595398e-06, "loss": 0.1669, "step": 3285 }, { "epoch": 0.44, "grad_norm": 0.8356258326594794, "learning_rate": 6.154535266097279e-06, "loss": 0.1623, "step": 3286 }, { "epoch": 0.44, "grad_norm": 0.9345842485267477, "learning_rate": 6.152410386363914e-06, "loss": 0.1983, "step": 3287 }, { "epoch": 0.44, "grad_norm": 0.8491441595775413, "learning_rate": 6.1502852868006405e-06, "loss": 0.1462, "step": 3288 }, { "epoch": 0.44, "grad_norm": 1.0405429724258248, "learning_rate": 6.148159967812832e-06, "loss": 0.19, "step": 3289 }, { "epoch": 0.44, "grad_norm": 0.9914221982149378, "learning_rate": 6.146034429805909e-06, "loss": 0.1713, "step": 3290 }, { "epoch": 0.44, "grad_norm": 0.8849530232950932, "learning_rate": 6.143908673185334e-06, "loss": 0.1881, "step": 3291 }, { "epoch": 0.44, "grad_norm": 0.9651160561799463, "learning_rate": 6.141782698356608e-06, "loss": 0.1984, "step": 3292 }, { "epoch": 0.44, "grad_norm": 0.8428327490060369, "learning_rate": 6.139656505725274e-06, "loss": 0.2204, "step": 3293 }, { "epoch": 0.44, "grad_norm": 1.0634207486619869, "learning_rate": 6.1375300956969155e-06, "loss": 0.2225, "step": 3294 }, { "epoch": 0.44, "grad_norm": 0.947504243051404, "learning_rate": 6.135403468677161e-06, "loss": 0.2107, "step": 3295 }, { "epoch": 0.44, "grad_norm": 1.0071402801850007, "learning_rate": 6.133276625071681e-06, "loss": 0.1999, "step": 3296 }, { "epoch": 0.44, "grad_norm": 0.8808533446606809, "learning_rate": 6.131149565286181e-06, "loss": 0.1452, "step": 3297 }, { "epoch": 0.44, "grad_norm": 1.0262015458721239, "learning_rate": 6.129022289726414e-06, "loss": 0.1841, "step": 3298 }, { "epoch": 0.44, "grad_norm": 1.1915867719121067, "learning_rate": 6.126894798798171e-06, "loss": 0.2212, "step": 3299 }, { "epoch": 0.44, "grad_norm": 1.1755675540861534, "learning_rate": 6.1247670929072865e-06, "loss": 0.2615, "step": 3300 }, { "epoch": 0.45, "grad_norm": 0.6129509821274504, "learning_rate": 6.122639172459632e-06, "loss": 0.0969, "step": 3301 }, { "epoch": 0.45, "grad_norm": 0.7186439506384219, "learning_rate": 6.120511037861125e-06, "loss": 0.155, "step": 3302 }, { "epoch": 0.45, "grad_norm": 0.9208245301955248, "learning_rate": 6.11838268951772e-06, "loss": 0.1577, "step": 3303 }, { "epoch": 0.45, "grad_norm": 1.0075952199636242, "learning_rate": 6.116254127835414e-06, "loss": 0.2378, "step": 3304 }, { "epoch": 0.45, "grad_norm": 0.9416757596762488, "learning_rate": 6.114125353220243e-06, "loss": 0.1997, "step": 3305 }, { "epoch": 0.45, "grad_norm": 1.095988362022771, "learning_rate": 6.111996366078287e-06, "loss": 0.2312, "step": 3306 }, { "epoch": 0.45, "grad_norm": 1.1062634398152513, "learning_rate": 6.109867166815662e-06, "loss": 0.2267, "step": 3307 }, { "epoch": 0.45, "grad_norm": 0.7353189762814795, "learning_rate": 6.107737755838528e-06, "loss": 0.1695, "step": 3308 }, { "epoch": 0.45, "grad_norm": 0.6736289133911205, "learning_rate": 6.105608133553085e-06, "loss": 0.0942, "step": 3309 }, { "epoch": 0.45, "grad_norm": 1.0988892944173225, "learning_rate": 6.103478300365571e-06, "loss": 0.1762, "step": 3310 }, { "epoch": 0.45, "grad_norm": 0.9982331097239802, "learning_rate": 6.1013482566822665e-06, "loss": 0.1868, "step": 3311 }, { "epoch": 0.45, "grad_norm": 1.1033585598295905, "learning_rate": 6.0992180029094905e-06, "loss": 0.1837, "step": 3312 }, { "epoch": 0.45, "grad_norm": 0.9230524013037473, "learning_rate": 6.097087539453606e-06, "loss": 0.191, "step": 3313 }, { "epoch": 0.45, "grad_norm": 0.8006442531279105, "learning_rate": 6.094956866721009e-06, "loss": 0.1616, "step": 3314 }, { "epoch": 0.45, "grad_norm": 1.0082843954955896, "learning_rate": 6.0928259851181425e-06, "loss": 0.1721, "step": 3315 }, { "epoch": 0.45, "grad_norm": 0.8367880088812952, "learning_rate": 6.090694895051487e-06, "loss": 0.1624, "step": 3316 }, { "epoch": 0.45, "grad_norm": 0.9517207767367818, "learning_rate": 6.088563596927558e-06, "loss": 0.1464, "step": 3317 }, { "epoch": 0.45, "grad_norm": 1.0533126685617096, "learning_rate": 6.086432091152916e-06, "loss": 0.2215, "step": 3318 }, { "epoch": 0.45, "grad_norm": 0.9148148021045074, "learning_rate": 6.084300378134162e-06, "loss": 0.1832, "step": 3319 }, { "epoch": 0.45, "grad_norm": 0.890135743934907, "learning_rate": 6.082168458277934e-06, "loss": 0.1397, "step": 3320 }, { "epoch": 0.45, "grad_norm": 0.7563020776969095, "learning_rate": 6.080036331990908e-06, "loss": 0.1379, "step": 3321 }, { "epoch": 0.45, "grad_norm": 1.209909100923266, "learning_rate": 6.077903999679803e-06, "loss": 0.2266, "step": 3322 }, { "epoch": 0.45, "grad_norm": 0.8347384910449455, "learning_rate": 6.075771461751373e-06, "loss": 0.1092, "step": 3323 }, { "epoch": 0.45, "grad_norm": 0.8906773496251132, "learning_rate": 6.0736387186124165e-06, "loss": 0.2051, "step": 3324 }, { "epoch": 0.45, "grad_norm": 0.9031451536572365, "learning_rate": 6.071505770669767e-06, "loss": 0.168, "step": 3325 }, { "epoch": 0.45, "grad_norm": 1.1253120663352307, "learning_rate": 6.0693726183303e-06, "loss": 0.1972, "step": 3326 }, { "epoch": 0.45, "grad_norm": 0.9662984308244261, "learning_rate": 6.067239262000925e-06, "loss": 0.1667, "step": 3327 }, { "epoch": 0.45, "grad_norm": 1.0568590880684343, "learning_rate": 6.065105702088598e-06, "loss": 0.2093, "step": 3328 }, { "epoch": 0.45, "grad_norm": 0.4768588235973381, "learning_rate": 6.062971939000306e-06, "loss": 0.1162, "step": 3329 }, { "epoch": 0.45, "grad_norm": 0.7271376836980898, "learning_rate": 6.060837973143081e-06, "loss": 0.163, "step": 3330 }, { "epoch": 0.45, "grad_norm": 0.9003031905984398, "learning_rate": 6.058703804923991e-06, "loss": 0.1516, "step": 3331 }, { "epoch": 0.45, "grad_norm": 1.2390316680863758, "learning_rate": 6.056569434750141e-06, "loss": 0.2279, "step": 3332 }, { "epoch": 0.45, "grad_norm": 0.987951646498775, "learning_rate": 6.054434863028678e-06, "loss": 0.1788, "step": 3333 }, { "epoch": 0.45, "grad_norm": 0.9749979595469361, "learning_rate": 6.052300090166783e-06, "loss": 0.2129, "step": 3334 }, { "epoch": 0.45, "grad_norm": 1.2458130090999744, "learning_rate": 6.0501651165716805e-06, "loss": 0.2349, "step": 3335 }, { "epoch": 0.45, "grad_norm": 0.8377324767223108, "learning_rate": 6.0480299426506305e-06, "loss": 0.1617, "step": 3336 }, { "epoch": 0.45, "grad_norm": 1.0796883814722067, "learning_rate": 6.045894568810931e-06, "loss": 0.1575, "step": 3337 }, { "epoch": 0.45, "grad_norm": 0.7923449145015939, "learning_rate": 6.0437589954599195e-06, "loss": 0.1777, "step": 3338 }, { "epoch": 0.45, "grad_norm": 0.790525949052376, "learning_rate": 6.041623223004971e-06, "loss": 0.1509, "step": 3339 }, { "epoch": 0.45, "grad_norm": 0.5623605670827303, "learning_rate": 6.039487251853497e-06, "loss": 0.137, "step": 3340 }, { "epoch": 0.45, "grad_norm": 0.8631358474132035, "learning_rate": 6.037351082412947e-06, "loss": 0.1575, "step": 3341 }, { "epoch": 0.45, "grad_norm": 0.8369383934935082, "learning_rate": 6.035214715090813e-06, "loss": 0.1711, "step": 3342 }, { "epoch": 0.45, "grad_norm": 1.0527460644566973, "learning_rate": 6.033078150294619e-06, "loss": 0.193, "step": 3343 }, { "epoch": 0.45, "grad_norm": 1.1259633895198504, "learning_rate": 6.030941388431929e-06, "loss": 0.2406, "step": 3344 }, { "epoch": 0.45, "grad_norm": 0.9119085219525361, "learning_rate": 6.028804429910344e-06, "loss": 0.1881, "step": 3345 }, { "epoch": 0.45, "grad_norm": 1.106247758885928, "learning_rate": 6.026667275137506e-06, "loss": 0.2116, "step": 3346 }, { "epoch": 0.45, "grad_norm": 0.6796696194191811, "learning_rate": 6.024529924521086e-06, "loss": 0.1591, "step": 3347 }, { "epoch": 0.45, "grad_norm": 0.8716255751392579, "learning_rate": 6.0223923784688025e-06, "loss": 0.1544, "step": 3348 }, { "epoch": 0.45, "grad_norm": 0.7966828345076515, "learning_rate": 6.020254637388404e-06, "loss": 0.1606, "step": 3349 }, { "epoch": 0.45, "grad_norm": 1.0461448368220103, "learning_rate": 6.018116701687681e-06, "loss": 0.211, "step": 3350 }, { "epoch": 0.45, "grad_norm": 0.9087212703392943, "learning_rate": 6.015978571774457e-06, "loss": 0.1925, "step": 3351 }, { "epoch": 0.45, "grad_norm": 1.2270801509199503, "learning_rate": 6.013840248056593e-06, "loss": 0.2107, "step": 3352 }, { "epoch": 0.45, "grad_norm": 1.1386582345480818, "learning_rate": 6.011701730941993e-06, "loss": 0.1966, "step": 3353 }, { "epoch": 0.45, "grad_norm": 1.1724820084542635, "learning_rate": 6.00956302083859e-06, "loss": 0.2075, "step": 3354 }, { "epoch": 0.45, "grad_norm": 0.6908129495302547, "learning_rate": 6.007424118154358e-06, "loss": 0.1551, "step": 3355 }, { "epoch": 0.45, "grad_norm": 1.2055353979840329, "learning_rate": 6.0052850232973055e-06, "loss": 0.2548, "step": 3356 }, { "epoch": 0.45, "grad_norm": 0.8446655394623233, "learning_rate": 6.00314573667548e-06, "loss": 0.1494, "step": 3357 }, { "epoch": 0.45, "grad_norm": 0.6865029532985428, "learning_rate": 6.001006258696965e-06, "loss": 0.1518, "step": 3358 }, { "epoch": 0.45, "grad_norm": 0.9859646115012686, "learning_rate": 5.99886658976988e-06, "loss": 0.1715, "step": 3359 }, { "epoch": 0.45, "grad_norm": 0.6313181510783413, "learning_rate": 5.996726730302382e-06, "loss": 0.1222, "step": 3360 }, { "epoch": 0.45, "grad_norm": 0.7224993636454411, "learning_rate": 5.994586680702662e-06, "loss": 0.1057, "step": 3361 }, { "epoch": 0.45, "grad_norm": 0.8797065089029054, "learning_rate": 5.992446441378947e-06, "loss": 0.2099, "step": 3362 }, { "epoch": 0.45, "grad_norm": 0.8856091589844136, "learning_rate": 5.990306012739507e-06, "loss": 0.1794, "step": 3363 }, { "epoch": 0.45, "grad_norm": 1.112422567603893, "learning_rate": 5.988165395192635e-06, "loss": 0.1954, "step": 3364 }, { "epoch": 0.45, "grad_norm": 0.975403435551824, "learning_rate": 5.9860245891466766e-06, "loss": 0.1831, "step": 3365 }, { "epoch": 0.45, "grad_norm": 1.25480607765012, "learning_rate": 5.983883595009999e-06, "loss": 0.2417, "step": 3366 }, { "epoch": 0.45, "grad_norm": 1.07932013571442, "learning_rate": 5.981742413191012e-06, "loss": 0.2012, "step": 3367 }, { "epoch": 0.45, "grad_norm": 1.172080856597703, "learning_rate": 5.97960104409816e-06, "loss": 0.2295, "step": 3368 }, { "epoch": 0.45, "grad_norm": 1.0427404658680373, "learning_rate": 5.977459488139928e-06, "loss": 0.1703, "step": 3369 }, { "epoch": 0.45, "grad_norm": 1.005622558011316, "learning_rate": 5.975317745724824e-06, "loss": 0.1878, "step": 3370 }, { "epoch": 0.45, "grad_norm": 0.826504269738225, "learning_rate": 5.973175817261406e-06, "loss": 0.1785, "step": 3371 }, { "epoch": 0.45, "grad_norm": 1.0656157342927082, "learning_rate": 5.971033703158259e-06, "loss": 0.1942, "step": 3372 }, { "epoch": 0.45, "grad_norm": 0.8582720261194278, "learning_rate": 5.968891403824005e-06, "loss": 0.1619, "step": 3373 }, { "epoch": 0.45, "grad_norm": 0.8584579104281521, "learning_rate": 5.9667489196673e-06, "loss": 0.1846, "step": 3374 }, { "epoch": 0.46, "grad_norm": 0.8812579972450608, "learning_rate": 5.964606251096839e-06, "loss": 0.1837, "step": 3375 }, { "epoch": 0.46, "grad_norm": 0.46964319241777563, "learning_rate": 5.962463398521352e-06, "loss": 0.1181, "step": 3376 }, { "epoch": 0.46, "grad_norm": 1.0995901025326582, "learning_rate": 5.960320362349602e-06, "loss": 0.2252, "step": 3377 }, { "epoch": 0.46, "grad_norm": 0.9039656002653291, "learning_rate": 5.958177142990383e-06, "loss": 0.1678, "step": 3378 }, { "epoch": 0.46, "grad_norm": 1.078142143884205, "learning_rate": 5.956033740852533e-06, "loss": 0.2231, "step": 3379 }, { "epoch": 0.46, "grad_norm": 1.0115649333866812, "learning_rate": 5.953890156344917e-06, "loss": 0.2078, "step": 3380 }, { "epoch": 0.46, "grad_norm": 0.7571236754799984, "learning_rate": 5.951746389876439e-06, "loss": 0.1199, "step": 3381 }, { "epoch": 0.46, "grad_norm": 1.0831773621972127, "learning_rate": 5.949602441856037e-06, "loss": 0.1853, "step": 3382 }, { "epoch": 0.46, "grad_norm": 1.077967017736181, "learning_rate": 5.947458312692685e-06, "loss": 0.2256, "step": 3383 }, { "epoch": 0.46, "grad_norm": 0.7838964518273219, "learning_rate": 5.945314002795386e-06, "loss": 0.1402, "step": 3384 }, { "epoch": 0.46, "grad_norm": 1.0168246789638522, "learning_rate": 5.9431695125731834e-06, "loss": 0.2297, "step": 3385 }, { "epoch": 0.46, "grad_norm": 1.1366750209201903, "learning_rate": 5.941024842435154e-06, "loss": 0.204, "step": 3386 }, { "epoch": 0.46, "grad_norm": 0.905751476764097, "learning_rate": 5.9388799927904065e-06, "loss": 0.1456, "step": 3387 }, { "epoch": 0.46, "grad_norm": 0.8254184780875954, "learning_rate": 5.936734964048084e-06, "loss": 0.1579, "step": 3388 }, { "epoch": 0.46, "grad_norm": 1.232852402382801, "learning_rate": 5.934589756617367e-06, "loss": 0.248, "step": 3389 }, { "epoch": 0.46, "grad_norm": 0.861346015141579, "learning_rate": 5.932444370907468e-06, "loss": 0.1925, "step": 3390 }, { "epoch": 0.46, "grad_norm": 0.7959080958853717, "learning_rate": 5.93029880732763e-06, "loss": 0.1387, "step": 3391 }, { "epoch": 0.46, "grad_norm": 0.974713205882901, "learning_rate": 5.92815306628714e-06, "loss": 0.1759, "step": 3392 }, { "epoch": 0.46, "grad_norm": 0.7665304076323795, "learning_rate": 5.926007148195306e-06, "loss": 0.1498, "step": 3393 }, { "epoch": 0.46, "grad_norm": 1.0439176547709133, "learning_rate": 5.923861053461479e-06, "loss": 0.1856, "step": 3394 }, { "epoch": 0.46, "grad_norm": 0.9464594381826548, "learning_rate": 5.921714782495042e-06, "loss": 0.1662, "step": 3395 }, { "epoch": 0.46, "grad_norm": 1.1606229869197155, "learning_rate": 5.919568335705406e-06, "loss": 0.234, "step": 3396 }, { "epoch": 0.46, "grad_norm": 0.8714370032392162, "learning_rate": 5.917421713502024e-06, "loss": 0.1501, "step": 3397 }, { "epoch": 0.46, "grad_norm": 1.0085117231184364, "learning_rate": 5.915274916294378e-06, "loss": 0.1489, "step": 3398 }, { "epoch": 0.46, "grad_norm": 1.0277831506818609, "learning_rate": 5.913127944491982e-06, "loss": 0.2049, "step": 3399 }, { "epoch": 0.46, "grad_norm": 1.1072906931107513, "learning_rate": 5.910980798504387e-06, "loss": 0.1971, "step": 3400 }, { "epoch": 0.46, "grad_norm": 1.174948373592888, "learning_rate": 5.908833478741174e-06, "loss": 0.1654, "step": 3401 }, { "epoch": 0.46, "grad_norm": 0.8569093777644314, "learning_rate": 5.906685985611956e-06, "loss": 0.1155, "step": 3402 }, { "epoch": 0.46, "grad_norm": 0.896487264301202, "learning_rate": 5.9045383195263875e-06, "loss": 0.1578, "step": 3403 }, { "epoch": 0.46, "grad_norm": 0.9111904935233098, "learning_rate": 5.9023904808941456e-06, "loss": 0.1792, "step": 3404 }, { "epoch": 0.46, "grad_norm": 1.2068602476397683, "learning_rate": 5.900242470124946e-06, "loss": 0.2156, "step": 3405 }, { "epoch": 0.46, "grad_norm": 1.0187064813004947, "learning_rate": 5.898094287628536e-06, "loss": 0.1733, "step": 3406 }, { "epoch": 0.46, "grad_norm": 1.206483445024679, "learning_rate": 5.895945933814695e-06, "loss": 0.2133, "step": 3407 }, { "epoch": 0.46, "grad_norm": 0.8002046889662504, "learning_rate": 5.893797409093237e-06, "loss": 0.1699, "step": 3408 }, { "epoch": 0.46, "grad_norm": 1.152532995683105, "learning_rate": 5.891648713874006e-06, "loss": 0.2068, "step": 3409 }, { "epoch": 0.46, "grad_norm": 1.0174355993760777, "learning_rate": 5.889499848566882e-06, "loss": 0.2109, "step": 3410 }, { "epoch": 0.46, "grad_norm": 0.945765294753759, "learning_rate": 5.887350813581772e-06, "loss": 0.1225, "step": 3411 }, { "epoch": 0.46, "grad_norm": 0.8979048334893208, "learning_rate": 5.885201609328621e-06, "loss": 0.1729, "step": 3412 }, { "epoch": 0.46, "grad_norm": 0.9649419404678922, "learning_rate": 5.883052236217402e-06, "loss": 0.2008, "step": 3413 }, { "epoch": 0.46, "grad_norm": 0.9774624926271177, "learning_rate": 5.880902694658124e-06, "loss": 0.1634, "step": 3414 }, { "epoch": 0.46, "grad_norm": 0.944022093926397, "learning_rate": 5.878752985060826e-06, "loss": 0.2112, "step": 3415 }, { "epoch": 0.46, "grad_norm": 1.0409287926688275, "learning_rate": 5.87660310783558e-06, "loss": 0.1682, "step": 3416 }, { "epoch": 0.46, "grad_norm": 0.8826181799019032, "learning_rate": 5.874453063392488e-06, "loss": 0.1855, "step": 3417 }, { "epoch": 0.46, "grad_norm": 0.6332883384192426, "learning_rate": 5.872302852141684e-06, "loss": 0.1356, "step": 3418 }, { "epoch": 0.46, "grad_norm": 1.1492705512768928, "learning_rate": 5.870152474493338e-06, "loss": 0.2152, "step": 3419 }, { "epoch": 0.46, "grad_norm": 1.0209834107862066, "learning_rate": 5.868001930857647e-06, "loss": 0.1903, "step": 3420 }, { "epoch": 0.46, "grad_norm": 0.832543458960036, "learning_rate": 5.865851221644842e-06, "loss": 0.1644, "step": 3421 }, { "epoch": 0.46, "grad_norm": 1.0130905976110307, "learning_rate": 5.863700347265184e-06, "loss": 0.2182, "step": 3422 }, { "epoch": 0.46, "grad_norm": 0.986883211102315, "learning_rate": 5.861549308128968e-06, "loss": 0.1526, "step": 3423 }, { "epoch": 0.46, "grad_norm": 0.9745417863152303, "learning_rate": 5.859398104646518e-06, "loss": 0.1785, "step": 3424 }, { "epoch": 0.46, "grad_norm": 0.8796932929504703, "learning_rate": 5.857246737228191e-06, "loss": 0.1858, "step": 3425 }, { "epoch": 0.46, "grad_norm": 1.2282872135063234, "learning_rate": 5.855095206284373e-06, "loss": 0.2, "step": 3426 }, { "epoch": 0.46, "grad_norm": 0.7609843425897445, "learning_rate": 5.852943512225486e-06, "loss": 0.1014, "step": 3427 }, { "epoch": 0.46, "grad_norm": 1.1201751468037568, "learning_rate": 5.850791655461977e-06, "loss": 0.1907, "step": 3428 }, { "epoch": 0.46, "grad_norm": 1.1219976404566703, "learning_rate": 5.848639636404327e-06, "loss": 0.1784, "step": 3429 }, { "epoch": 0.46, "grad_norm": 1.3295595309451895, "learning_rate": 5.846487455463049e-06, "loss": 0.2374, "step": 3430 }, { "epoch": 0.46, "grad_norm": 0.9031893786804409, "learning_rate": 5.844335113048686e-06, "loss": 0.1992, "step": 3431 }, { "epoch": 0.46, "grad_norm": 1.0104859606776093, "learning_rate": 5.84218260957181e-06, "loss": 0.1799, "step": 3432 }, { "epoch": 0.46, "grad_norm": 0.9610803000226441, "learning_rate": 5.8400299454430275e-06, "loss": 0.218, "step": 3433 }, { "epoch": 0.46, "grad_norm": 0.9674117964998696, "learning_rate": 5.837877121072971e-06, "loss": 0.1808, "step": 3434 }, { "epoch": 0.46, "grad_norm": 0.9152081319901623, "learning_rate": 5.835724136872307e-06, "loss": 0.1239, "step": 3435 }, { "epoch": 0.46, "grad_norm": 1.1017368095735576, "learning_rate": 5.833570993251732e-06, "loss": 0.192, "step": 3436 }, { "epoch": 0.46, "grad_norm": 0.9452022223160129, "learning_rate": 5.831417690621972e-06, "loss": 0.2092, "step": 3437 }, { "epoch": 0.46, "grad_norm": 1.3397028462663014, "learning_rate": 5.829264229393784e-06, "loss": 0.232, "step": 3438 }, { "epoch": 0.46, "grad_norm": 0.8516797931249739, "learning_rate": 5.827110609977956e-06, "loss": 0.1621, "step": 3439 }, { "epoch": 0.46, "grad_norm": 0.6796373735735737, "learning_rate": 5.824956832785303e-06, "loss": 0.1052, "step": 3440 }, { "epoch": 0.46, "grad_norm": 0.6973093236612845, "learning_rate": 5.822802898226672e-06, "loss": 0.0876, "step": 3441 }, { "epoch": 0.46, "grad_norm": 0.4420564962796288, "learning_rate": 5.820648806712943e-06, "loss": 0.1051, "step": 3442 }, { "epoch": 0.46, "grad_norm": 0.9989974211910538, "learning_rate": 5.818494558655021e-06, "loss": 0.1588, "step": 3443 }, { "epoch": 0.46, "grad_norm": 0.9414351017342197, "learning_rate": 5.816340154463844e-06, "loss": 0.1848, "step": 3444 }, { "epoch": 0.46, "grad_norm": 1.053950370544144, "learning_rate": 5.814185594550378e-06, "loss": 0.227, "step": 3445 }, { "epoch": 0.46, "grad_norm": 0.9555570914021073, "learning_rate": 5.812030879325621e-06, "loss": 0.2126, "step": 3446 }, { "epoch": 0.46, "grad_norm": 0.8261788758088368, "learning_rate": 5.809876009200598e-06, "loss": 0.1686, "step": 3447 }, { "epoch": 0.46, "grad_norm": 1.0161307547515817, "learning_rate": 5.807720984586365e-06, "loss": 0.2288, "step": 3448 }, { "epoch": 0.47, "grad_norm": 0.7652965973486272, "learning_rate": 5.805565805894008e-06, "loss": 0.1144, "step": 3449 }, { "epoch": 0.47, "grad_norm": 1.268288047150215, "learning_rate": 5.803410473534641e-06, "loss": 0.22, "step": 3450 }, { "epoch": 0.47, "grad_norm": 0.8874696309029468, "learning_rate": 5.801254987919407e-06, "loss": 0.1448, "step": 3451 }, { "epoch": 0.47, "grad_norm": 0.9118862006062535, "learning_rate": 5.799099349459479e-06, "loss": 0.1593, "step": 3452 }, { "epoch": 0.47, "grad_norm": 0.9883780123556914, "learning_rate": 5.796943558566061e-06, "loss": 0.1875, "step": 3453 }, { "epoch": 0.47, "grad_norm": 0.9168870402365901, "learning_rate": 5.794787615650385e-06, "loss": 0.1468, "step": 3454 }, { "epoch": 0.47, "grad_norm": 1.0651034336346525, "learning_rate": 5.7926315211237106e-06, "loss": 0.1798, "step": 3455 }, { "epoch": 0.47, "grad_norm": 0.7640150744641651, "learning_rate": 5.790475275397325e-06, "loss": 0.1276, "step": 3456 }, { "epoch": 0.47, "grad_norm": 0.9717278698413699, "learning_rate": 5.788318878882548e-06, "loss": 0.178, "step": 3457 }, { "epoch": 0.47, "grad_norm": 0.95918783654714, "learning_rate": 5.786162331990727e-06, "loss": 0.1876, "step": 3458 }, { "epoch": 0.47, "grad_norm": 1.0583575613688803, "learning_rate": 5.784005635133236e-06, "loss": 0.1994, "step": 3459 }, { "epoch": 0.47, "grad_norm": 0.9978603621660672, "learning_rate": 5.781848788721481e-06, "loss": 0.214, "step": 3460 }, { "epoch": 0.47, "grad_norm": 0.9068990319223217, "learning_rate": 5.7796917931668935e-06, "loss": 0.183, "step": 3461 }, { "epoch": 0.47, "grad_norm": 0.8576320988105547, "learning_rate": 5.777534648880937e-06, "loss": 0.1232, "step": 3462 }, { "epoch": 0.47, "grad_norm": 0.9242432098619917, "learning_rate": 5.775377356275098e-06, "loss": 0.18, "step": 3463 }, { "epoch": 0.47, "grad_norm": 1.1420291410840144, "learning_rate": 5.773219915760895e-06, "loss": 0.2143, "step": 3464 }, { "epoch": 0.47, "grad_norm": 0.9821646769287372, "learning_rate": 5.771062327749875e-06, "loss": 0.2108, "step": 3465 }, { "epoch": 0.47, "grad_norm": 1.132330467662958, "learning_rate": 5.768904592653611e-06, "loss": 0.21, "step": 3466 }, { "epoch": 0.47, "grad_norm": 1.0367406959968277, "learning_rate": 5.766746710883707e-06, "loss": 0.2163, "step": 3467 }, { "epoch": 0.47, "grad_norm": 1.069365472975599, "learning_rate": 5.764588682851792e-06, "loss": 0.2063, "step": 3468 }, { "epoch": 0.47, "grad_norm": 1.4059140781470958, "learning_rate": 5.762430508969524e-06, "loss": 0.2294, "step": 3469 }, { "epoch": 0.47, "grad_norm": 0.8690181201396947, "learning_rate": 5.7602721896485904e-06, "loss": 0.1404, "step": 3470 }, { "epoch": 0.47, "grad_norm": 1.1767129945936163, "learning_rate": 5.758113725300703e-06, "loss": 0.2304, "step": 3471 }, { "epoch": 0.47, "grad_norm": 1.0449196273100205, "learning_rate": 5.755955116337605e-06, "loss": 0.1663, "step": 3472 }, { "epoch": 0.47, "grad_norm": 0.8802353986374349, "learning_rate": 5.753796363171064e-06, "loss": 0.1401, "step": 3473 }, { "epoch": 0.47, "grad_norm": 0.9828952353613137, "learning_rate": 5.751637466212876e-06, "loss": 0.1768, "step": 3474 }, { "epoch": 0.47, "grad_norm": 1.0202902730213153, "learning_rate": 5.7494784258748645e-06, "loss": 0.1622, "step": 3475 }, { "epoch": 0.47, "grad_norm": 1.0494429595613133, "learning_rate": 5.7473192425688836e-06, "loss": 0.182, "step": 3476 }, { "epoch": 0.47, "grad_norm": 1.2112512140926057, "learning_rate": 5.745159916706812e-06, "loss": 0.2316, "step": 3477 }, { "epoch": 0.47, "grad_norm": 0.8499543257192338, "learning_rate": 5.743000448700552e-06, "loss": 0.1744, "step": 3478 }, { "epoch": 0.47, "grad_norm": 0.9798604614172947, "learning_rate": 5.7408408389620374e-06, "loss": 0.1954, "step": 3479 }, { "epoch": 0.47, "grad_norm": 0.740024776831007, "learning_rate": 5.73868108790323e-06, "loss": 0.1811, "step": 3480 }, { "epoch": 0.47, "grad_norm": 0.8411731337569097, "learning_rate": 5.736521195936112e-06, "loss": 0.1642, "step": 3481 }, { "epoch": 0.47, "grad_norm": 0.9827423000380965, "learning_rate": 5.734361163472702e-06, "loss": 0.1428, "step": 3482 }, { "epoch": 0.47, "grad_norm": 1.0718480288965377, "learning_rate": 5.732200990925041e-06, "loss": 0.1754, "step": 3483 }, { "epoch": 0.47, "grad_norm": 1.0104395877320353, "learning_rate": 5.730040678705192e-06, "loss": 0.2213, "step": 3484 }, { "epoch": 0.47, "grad_norm": 0.8563384272814186, "learning_rate": 5.727880227225251e-06, "loss": 0.1581, "step": 3485 }, { "epoch": 0.47, "grad_norm": 0.9713886251759807, "learning_rate": 5.725719636897339e-06, "loss": 0.2047, "step": 3486 }, { "epoch": 0.47, "grad_norm": 0.7865898213250888, "learning_rate": 5.723558908133602e-06, "loss": 0.1424, "step": 3487 }, { "epoch": 0.47, "grad_norm": 1.0459945121199914, "learning_rate": 5.721398041346214e-06, "loss": 0.2041, "step": 3488 }, { "epoch": 0.47, "grad_norm": 0.9818061042867089, "learning_rate": 5.719237036947374e-06, "loss": 0.1833, "step": 3489 }, { "epoch": 0.47, "grad_norm": 0.9383920255726135, "learning_rate": 5.717075895349306e-06, "loss": 0.1783, "step": 3490 }, { "epoch": 0.47, "grad_norm": 0.9153731933072574, "learning_rate": 5.714914616964266e-06, "loss": 0.1674, "step": 3491 }, { "epoch": 0.47, "grad_norm": 1.0002359131655705, "learning_rate": 5.7127532022045305e-06, "loss": 0.1623, "step": 3492 }, { "epoch": 0.47, "grad_norm": 1.0672114663469872, "learning_rate": 5.710591651482403e-06, "loss": 0.2196, "step": 3493 }, { "epoch": 0.47, "grad_norm": 1.1761748709537494, "learning_rate": 5.7084299652102145e-06, "loss": 0.1666, "step": 3494 }, { "epoch": 0.47, "grad_norm": 0.9682970959143533, "learning_rate": 5.706268143800321e-06, "loss": 0.1886, "step": 3495 }, { "epoch": 0.47, "grad_norm": 1.0405008994674394, "learning_rate": 5.704106187665103e-06, "loss": 0.1921, "step": 3496 }, { "epoch": 0.47, "grad_norm": 0.9823288383911443, "learning_rate": 5.701944097216968e-06, "loss": 0.1859, "step": 3497 }, { "epoch": 0.47, "grad_norm": 1.2018100143826287, "learning_rate": 5.699781872868351e-06, "loss": 0.2162, "step": 3498 }, { "epoch": 0.47, "grad_norm": 1.0653882520636286, "learning_rate": 5.6976195150317096e-06, "loss": 0.2067, "step": 3499 }, { "epoch": 0.47, "grad_norm": 1.006553990943175, "learning_rate": 5.695457024119527e-06, "loss": 0.1979, "step": 3500 }, { "epoch": 0.47, "grad_norm": 0.9987735884761776, "learning_rate": 5.693294400544314e-06, "loss": 0.2431, "step": 3501 }, { "epoch": 0.47, "grad_norm": 1.1472477857111023, "learning_rate": 5.691131644718604e-06, "loss": 0.1962, "step": 3502 }, { "epoch": 0.47, "grad_norm": 1.118904099982939, "learning_rate": 5.6889687570549565e-06, "loss": 0.2026, "step": 3503 }, { "epoch": 0.47, "grad_norm": 1.0023062985956346, "learning_rate": 5.68680573796596e-06, "loss": 0.1666, "step": 3504 }, { "epoch": 0.47, "grad_norm": 0.7332298206860999, "learning_rate": 5.684642587864222e-06, "loss": 0.1377, "step": 3505 }, { "epoch": 0.47, "grad_norm": 0.872897716315616, "learning_rate": 5.682479307162378e-06, "loss": 0.1795, "step": 3506 }, { "epoch": 0.47, "grad_norm": 1.0498055256281973, "learning_rate": 5.680315896273087e-06, "loss": 0.2436, "step": 3507 }, { "epoch": 0.47, "grad_norm": 0.7710224397211426, "learning_rate": 5.678152355609038e-06, "loss": 0.1479, "step": 3508 }, { "epoch": 0.47, "grad_norm": 0.7713355257680096, "learning_rate": 5.6759886855829364e-06, "loss": 0.1228, "step": 3509 }, { "epoch": 0.47, "grad_norm": 1.024104911431396, "learning_rate": 5.673824886607519e-06, "loss": 0.2385, "step": 3510 }, { "epoch": 0.47, "grad_norm": 1.0452074848857618, "learning_rate": 5.6716609590955426e-06, "loss": 0.2264, "step": 3511 }, { "epoch": 0.47, "grad_norm": 0.9575451708936266, "learning_rate": 5.669496903459793e-06, "loss": 0.1708, "step": 3512 }, { "epoch": 0.47, "grad_norm": 0.9148167455801122, "learning_rate": 5.667332720113078e-06, "loss": 0.167, "step": 3513 }, { "epoch": 0.47, "grad_norm": 0.9583671232379307, "learning_rate": 5.665168409468227e-06, "loss": 0.1463, "step": 3514 }, { "epoch": 0.47, "grad_norm": 0.9675381448157362, "learning_rate": 5.663003971938102e-06, "loss": 0.1839, "step": 3515 }, { "epoch": 0.47, "grad_norm": 0.8912918651877492, "learning_rate": 5.660839407935578e-06, "loss": 0.1565, "step": 3516 }, { "epoch": 0.47, "grad_norm": 0.9098675355042668, "learning_rate": 5.658674717873562e-06, "loss": 0.1542, "step": 3517 }, { "epoch": 0.47, "grad_norm": 0.8562216402505095, "learning_rate": 5.656509902164985e-06, "loss": 0.1687, "step": 3518 }, { "epoch": 0.47, "grad_norm": 0.8166566002912837, "learning_rate": 5.654344961222798e-06, "loss": 0.1579, "step": 3519 }, { "epoch": 0.47, "grad_norm": 0.8779603615139941, "learning_rate": 5.652179895459976e-06, "loss": 0.1615, "step": 3520 }, { "epoch": 0.47, "grad_norm": 0.9783442337998776, "learning_rate": 5.650014705289523e-06, "loss": 0.1674, "step": 3521 }, { "epoch": 0.47, "grad_norm": 0.8359311917075843, "learning_rate": 5.647849391124463e-06, "loss": 0.1732, "step": 3522 }, { "epoch": 0.48, "grad_norm": 0.750251097841947, "learning_rate": 5.645683953377842e-06, "loss": 0.1439, "step": 3523 }, { "epoch": 0.48, "grad_norm": 1.3414916255840943, "learning_rate": 5.643518392462734e-06, "loss": 0.2307, "step": 3524 }, { "epoch": 0.48, "grad_norm": 0.8661671365989168, "learning_rate": 5.641352708792231e-06, "loss": 0.1508, "step": 3525 }, { "epoch": 0.48, "grad_norm": 1.1720625715883748, "learning_rate": 5.639186902779454e-06, "loss": 0.2176, "step": 3526 }, { "epoch": 0.48, "grad_norm": 1.2107444841320558, "learning_rate": 5.637020974837544e-06, "loss": 0.2586, "step": 3527 }, { "epoch": 0.48, "grad_norm": 1.1563562794642448, "learning_rate": 5.634854925379667e-06, "loss": 0.2224, "step": 3528 }, { "epoch": 0.48, "grad_norm": 1.3538301554865513, "learning_rate": 5.632688754819009e-06, "loss": 0.2352, "step": 3529 }, { "epoch": 0.48, "grad_norm": 0.6698463280136914, "learning_rate": 5.630522463568784e-06, "loss": 0.0969, "step": 3530 }, { "epoch": 0.48, "grad_norm": 0.5825995724031865, "learning_rate": 5.628356052042225e-06, "loss": 0.182, "step": 3531 }, { "epoch": 0.48, "grad_norm": 1.0570636785900962, "learning_rate": 5.6261895206525895e-06, "loss": 0.2011, "step": 3532 }, { "epoch": 0.48, "grad_norm": 1.0346633675418975, "learning_rate": 5.624022869813157e-06, "loss": 0.1854, "step": 3533 }, { "epoch": 0.48, "grad_norm": 0.7079188610213504, "learning_rate": 5.621856099937232e-06, "loss": 0.1534, "step": 3534 }, { "epoch": 0.48, "grad_norm": 0.9565611353916732, "learning_rate": 5.619689211438139e-06, "loss": 0.1768, "step": 3535 }, { "epoch": 0.48, "grad_norm": 1.0409529826638535, "learning_rate": 5.617522204729228e-06, "loss": 0.16, "step": 3536 }, { "epoch": 0.48, "grad_norm": 0.8864473133418783, "learning_rate": 5.615355080223867e-06, "loss": 0.224, "step": 3537 }, { "epoch": 0.48, "grad_norm": 0.8605921679849728, "learning_rate": 5.613187838335451e-06, "loss": 0.1604, "step": 3538 }, { "epoch": 0.48, "grad_norm": 1.0585880444371085, "learning_rate": 5.611020479477398e-06, "loss": 0.173, "step": 3539 }, { "epoch": 0.48, "grad_norm": 1.1705045711506876, "learning_rate": 5.608853004063142e-06, "loss": 0.1437, "step": 3540 }, { "epoch": 0.48, "grad_norm": 0.6917047579809597, "learning_rate": 5.6066854125061454e-06, "loss": 0.1492, "step": 3541 }, { "epoch": 0.48, "grad_norm": 1.0525906773472247, "learning_rate": 5.604517705219889e-06, "loss": 0.2108, "step": 3542 }, { "epoch": 0.48, "grad_norm": 0.5690129273301658, "learning_rate": 5.60234988261788e-06, "loss": 0.1008, "step": 3543 }, { "epoch": 0.48, "grad_norm": 1.0554368231834732, "learning_rate": 5.600181945113643e-06, "loss": 0.2277, "step": 3544 }, { "epoch": 0.48, "grad_norm": 1.219987602931922, "learning_rate": 5.5980138931207275e-06, "loss": 0.186, "step": 3545 }, { "epoch": 0.48, "grad_norm": 0.6476746608851938, "learning_rate": 5.5958457270527035e-06, "loss": 0.1195, "step": 3546 }, { "epoch": 0.48, "grad_norm": 0.863197560501459, "learning_rate": 5.593677447323164e-06, "loss": 0.1515, "step": 3547 }, { "epoch": 0.48, "grad_norm": 1.0093536425221958, "learning_rate": 5.591509054345717e-06, "loss": 0.2203, "step": 3548 }, { "epoch": 0.48, "grad_norm": 1.2076913122341653, "learning_rate": 5.589340548534006e-06, "loss": 0.2349, "step": 3549 }, { "epoch": 0.48, "grad_norm": 0.9770073218949222, "learning_rate": 5.587171930301683e-06, "loss": 0.2134, "step": 3550 }, { "epoch": 0.48, "grad_norm": 0.9633065094530128, "learning_rate": 5.585003200062428e-06, "loss": 0.2161, "step": 3551 }, { "epoch": 0.48, "grad_norm": 1.1004270477163367, "learning_rate": 5.5828343582299394e-06, "loss": 0.2013, "step": 3552 }, { "epoch": 0.48, "grad_norm": 0.9172014899471503, "learning_rate": 5.580665405217939e-06, "loss": 0.1759, "step": 3553 }, { "epoch": 0.48, "grad_norm": 1.0785858772116448, "learning_rate": 5.5784963414401685e-06, "loss": 0.1881, "step": 3554 }, { "epoch": 0.48, "grad_norm": 0.9529210909153586, "learning_rate": 5.576327167310391e-06, "loss": 0.1577, "step": 3555 }, { "epoch": 0.48, "grad_norm": 1.1016918245233671, "learning_rate": 5.574157883242393e-06, "loss": 0.2161, "step": 3556 }, { "epoch": 0.48, "grad_norm": 0.8375801257149773, "learning_rate": 5.571988489649977e-06, "loss": 0.1568, "step": 3557 }, { "epoch": 0.48, "grad_norm": 0.7903594821874299, "learning_rate": 5.56981898694697e-06, "loss": 0.1231, "step": 3558 }, { "epoch": 0.48, "grad_norm": 0.9616428325672117, "learning_rate": 5.567649375547218e-06, "loss": 0.199, "step": 3559 }, { "epoch": 0.48, "grad_norm": 1.0260416540005144, "learning_rate": 5.565479655864592e-06, "loss": 0.1592, "step": 3560 }, { "epoch": 0.48, "grad_norm": 0.8274255003576445, "learning_rate": 5.563309828312977e-06, "loss": 0.1441, "step": 3561 }, { "epoch": 0.48, "grad_norm": 0.873425399756485, "learning_rate": 5.561139893306285e-06, "loss": 0.1654, "step": 3562 }, { "epoch": 0.48, "grad_norm": 1.076896211540425, "learning_rate": 5.558969851258442e-06, "loss": 0.2073, "step": 3563 }, { "epoch": 0.48, "grad_norm": 0.7734796029700642, "learning_rate": 5.556799702583401e-06, "loss": 0.1732, "step": 3564 }, { "epoch": 0.48, "grad_norm": 0.9450725383948545, "learning_rate": 5.554629447695129e-06, "loss": 0.1438, "step": 3565 }, { "epoch": 0.48, "grad_norm": 0.9867616177966336, "learning_rate": 5.55245908700762e-06, "loss": 0.1274, "step": 3566 }, { "epoch": 0.48, "grad_norm": 1.150831198088945, "learning_rate": 5.550288620934883e-06, "loss": 0.2226, "step": 3567 }, { "epoch": 0.48, "grad_norm": 1.1526198774374081, "learning_rate": 5.548118049890948e-06, "loss": 0.1491, "step": 3568 }, { "epoch": 0.48, "grad_norm": 0.8210917956231526, "learning_rate": 5.545947374289867e-06, "loss": 0.192, "step": 3569 }, { "epoch": 0.48, "grad_norm": 0.9097368862353339, "learning_rate": 5.543776594545711e-06, "loss": 0.1675, "step": 3570 }, { "epoch": 0.48, "grad_norm": 0.994815847288984, "learning_rate": 5.541605711072569e-06, "loss": 0.1621, "step": 3571 }, { "epoch": 0.48, "grad_norm": 0.90247976551592, "learning_rate": 5.539434724284554e-06, "loss": 0.1753, "step": 3572 }, { "epoch": 0.48, "grad_norm": 1.0932026141492677, "learning_rate": 5.537263634595793e-06, "loss": 0.1748, "step": 3573 }, { "epoch": 0.48, "grad_norm": 1.0586148184682165, "learning_rate": 5.5350924424204376e-06, "loss": 0.1795, "step": 3574 }, { "epoch": 0.48, "grad_norm": 1.0978524270490575, "learning_rate": 5.532921148172657e-06, "loss": 0.2164, "step": 3575 }, { "epoch": 0.48, "grad_norm": 0.7344578482323675, "learning_rate": 5.530749752266638e-06, "loss": 0.1357, "step": 3576 }, { "epoch": 0.48, "grad_norm": 1.0858664837549474, "learning_rate": 5.528578255116593e-06, "loss": 0.1707, "step": 3577 }, { "epoch": 0.48, "grad_norm": 0.8141633051537962, "learning_rate": 5.526406657136745e-06, "loss": 0.1826, "step": 3578 }, { "epoch": 0.48, "grad_norm": 0.953478881051072, "learning_rate": 5.524234958741342e-06, "loss": 0.1829, "step": 3579 }, { "epoch": 0.48, "grad_norm": 1.2412610533315311, "learning_rate": 5.522063160344652e-06, "loss": 0.2241, "step": 3580 }, { "epoch": 0.48, "grad_norm": 0.7641880083277478, "learning_rate": 5.5198912623609565e-06, "loss": 0.1526, "step": 3581 }, { "epoch": 0.48, "grad_norm": 0.8451557498550003, "learning_rate": 5.517719265204562e-06, "loss": 0.1702, "step": 3582 }, { "epoch": 0.48, "grad_norm": 1.0683457251297876, "learning_rate": 5.51554716928979e-06, "loss": 0.2204, "step": 3583 }, { "epoch": 0.48, "grad_norm": 0.7782831607068591, "learning_rate": 5.513374975030984e-06, "loss": 0.1166, "step": 3584 }, { "epoch": 0.48, "grad_norm": 0.6882473252756633, "learning_rate": 5.511202682842504e-06, "loss": 0.1231, "step": 3585 }, { "epoch": 0.48, "grad_norm": 0.731788561585918, "learning_rate": 5.509030293138726e-06, "loss": 0.1519, "step": 3586 }, { "epoch": 0.48, "grad_norm": 0.8978042605443396, "learning_rate": 5.50685780633405e-06, "loss": 0.2028, "step": 3587 }, { "epoch": 0.48, "grad_norm": 1.101982744367731, "learning_rate": 5.504685222842893e-06, "loss": 0.2011, "step": 3588 }, { "epoch": 0.48, "grad_norm": 0.9894173848913205, "learning_rate": 5.5025125430796886e-06, "loss": 0.193, "step": 3589 }, { "epoch": 0.48, "grad_norm": 0.874257003577039, "learning_rate": 5.50033976745889e-06, "loss": 0.1716, "step": 3590 }, { "epoch": 0.48, "grad_norm": 0.9554569606149926, "learning_rate": 5.49816689639497e-06, "loss": 0.1345, "step": 3591 }, { "epoch": 0.48, "grad_norm": 0.9690039012165691, "learning_rate": 5.495993930302415e-06, "loss": 0.2328, "step": 3592 }, { "epoch": 0.48, "grad_norm": 0.7868152736489606, "learning_rate": 5.493820869595735e-06, "loss": 0.1647, "step": 3593 }, { "epoch": 0.48, "grad_norm": 1.0677968468792531, "learning_rate": 5.491647714689455e-06, "loss": 0.1914, "step": 3594 }, { "epoch": 0.48, "grad_norm": 0.9199206398593596, "learning_rate": 5.4894744659981205e-06, "loss": 0.1807, "step": 3595 }, { "epoch": 0.48, "grad_norm": 0.9858784024893309, "learning_rate": 5.487301123936291e-06, "loss": 0.1808, "step": 3596 }, { "epoch": 0.49, "grad_norm": 1.0113468206303298, "learning_rate": 5.485127688918544e-06, "loss": 0.1922, "step": 3597 }, { "epoch": 0.49, "grad_norm": 1.1257980626667028, "learning_rate": 5.482954161359481e-06, "loss": 0.2273, "step": 3598 }, { "epoch": 0.49, "grad_norm": 1.1095887966812763, "learning_rate": 5.480780541673714e-06, "loss": 0.1972, "step": 3599 }, { "epoch": 0.49, "grad_norm": 0.7861910991763744, "learning_rate": 5.478606830275877e-06, "loss": 0.1727, "step": 3600 }, { "epoch": 0.49, "grad_norm": 0.823223273321448, "learning_rate": 5.476433027580618e-06, "loss": 0.1025, "step": 3601 }, { "epoch": 0.49, "grad_norm": 0.771205468139475, "learning_rate": 5.474259134002605e-06, "loss": 0.1306, "step": 3602 }, { "epoch": 0.49, "grad_norm": 0.9217017574273735, "learning_rate": 5.4720851499565246e-06, "loss": 0.1909, "step": 3603 }, { "epoch": 0.49, "grad_norm": 0.8471343248957393, "learning_rate": 5.469911075857073e-06, "loss": 0.1802, "step": 3604 }, { "epoch": 0.49, "grad_norm": 0.9858553612101635, "learning_rate": 5.467736912118976e-06, "loss": 0.1903, "step": 3605 }, { "epoch": 0.49, "grad_norm": 1.0232034184852092, "learning_rate": 5.465562659156965e-06, "loss": 0.1862, "step": 3606 }, { "epoch": 0.49, "grad_norm": 0.9546579788799846, "learning_rate": 5.463388317385797e-06, "loss": 0.2129, "step": 3607 }, { "epoch": 0.49, "grad_norm": 0.9828461738442722, "learning_rate": 5.461213887220239e-06, "loss": 0.1434, "step": 3608 }, { "epoch": 0.49, "grad_norm": 1.136746714894649, "learning_rate": 5.45903936907508e-06, "loss": 0.2442, "step": 3609 }, { "epoch": 0.49, "grad_norm": 0.913525818024531, "learning_rate": 5.456864763365121e-06, "loss": 0.1826, "step": 3610 }, { "epoch": 0.49, "grad_norm": 1.0639287550147942, "learning_rate": 5.454690070505184e-06, "loss": 0.2229, "step": 3611 }, { "epoch": 0.49, "grad_norm": 1.0898767119185941, "learning_rate": 5.4525152909101065e-06, "loss": 0.2151, "step": 3612 }, { "epoch": 0.49, "grad_norm": 0.6596871108882049, "learning_rate": 5.450340424994742e-06, "loss": 0.0761, "step": 3613 }, { "epoch": 0.49, "grad_norm": 1.1620018399351548, "learning_rate": 5.4481654731739604e-06, "loss": 0.2256, "step": 3614 }, { "epoch": 0.49, "grad_norm": 1.2004976997978007, "learning_rate": 5.445990435862648e-06, "loss": 0.247, "step": 3615 }, { "epoch": 0.49, "grad_norm": 1.2953551982854867, "learning_rate": 5.4438153134757075e-06, "loss": 0.1979, "step": 3616 }, { "epoch": 0.49, "grad_norm": 1.1004544514733288, "learning_rate": 5.441640106428059e-06, "loss": 0.2173, "step": 3617 }, { "epoch": 0.49, "grad_norm": 0.9157738947571697, "learning_rate": 5.439464815134635e-06, "loss": 0.1735, "step": 3618 }, { "epoch": 0.49, "grad_norm": 0.8834613291696289, "learning_rate": 5.437289440010391e-06, "loss": 0.1813, "step": 3619 }, { "epoch": 0.49, "grad_norm": 0.917445412228481, "learning_rate": 5.435113981470289e-06, "loss": 0.2123, "step": 3620 }, { "epoch": 0.49, "grad_norm": 1.001847692815748, "learning_rate": 5.432938439929314e-06, "loss": 0.1834, "step": 3621 }, { "epoch": 0.49, "grad_norm": 0.9311612093486108, "learning_rate": 5.430762815802467e-06, "loss": 0.193, "step": 3622 }, { "epoch": 0.49, "grad_norm": 1.0095907547155138, "learning_rate": 5.428587109504763e-06, "loss": 0.1906, "step": 3623 }, { "epoch": 0.49, "grad_norm": 0.8904411173708802, "learning_rate": 5.426411321451229e-06, "loss": 0.1827, "step": 3624 }, { "epoch": 0.49, "grad_norm": 0.95546484305356, "learning_rate": 5.424235452056914e-06, "loss": 0.1916, "step": 3625 }, { "epoch": 0.49, "grad_norm": 0.8915963164099806, "learning_rate": 5.422059501736876e-06, "loss": 0.1683, "step": 3626 }, { "epoch": 0.49, "grad_norm": 0.902984218582121, "learning_rate": 5.419883470906196e-06, "loss": 0.1799, "step": 3627 }, { "epoch": 0.49, "grad_norm": 1.013722642294324, "learning_rate": 5.417707359979964e-06, "loss": 0.1396, "step": 3628 }, { "epoch": 0.49, "grad_norm": 0.9526072924273594, "learning_rate": 5.415531169373287e-06, "loss": 0.1862, "step": 3629 }, { "epoch": 0.49, "grad_norm": 1.1492951153146773, "learning_rate": 5.41335489950129e-06, "loss": 0.2321, "step": 3630 }, { "epoch": 0.49, "grad_norm": 0.9263173518589936, "learning_rate": 5.41117855077911e-06, "loss": 0.1991, "step": 3631 }, { "epoch": 0.49, "grad_norm": 0.9393622572011336, "learning_rate": 5.409002123621898e-06, "loss": 0.1253, "step": 3632 }, { "epoch": 0.49, "grad_norm": 1.0788788497014692, "learning_rate": 5.4068256184448245e-06, "loss": 0.1899, "step": 3633 }, { "epoch": 0.49, "grad_norm": 0.7325966349210554, "learning_rate": 5.404649035663071e-06, "loss": 0.1411, "step": 3634 }, { "epoch": 0.49, "grad_norm": 1.1083288581585722, "learning_rate": 5.402472375691835e-06, "loss": 0.2095, "step": 3635 }, { "epoch": 0.49, "grad_norm": 0.825090232017793, "learning_rate": 5.40029563894633e-06, "loss": 0.1585, "step": 3636 }, { "epoch": 0.49, "grad_norm": 0.8654264730502506, "learning_rate": 5.398118825841781e-06, "loss": 0.1895, "step": 3637 }, { "epoch": 0.49, "grad_norm": 0.8677014734881309, "learning_rate": 5.395941936793432e-06, "loss": 0.1913, "step": 3638 }, { "epoch": 0.49, "grad_norm": 0.9238181196349637, "learning_rate": 5.393764972216537e-06, "loss": 0.1694, "step": 3639 }, { "epoch": 0.49, "grad_norm": 0.8607202517942578, "learning_rate": 5.391587932526366e-06, "loss": 0.1615, "step": 3640 }, { "epoch": 0.49, "grad_norm": 1.1336732293067702, "learning_rate": 5.389410818138206e-06, "loss": 0.1825, "step": 3641 }, { "epoch": 0.49, "grad_norm": 1.111317686288547, "learning_rate": 5.387233629467354e-06, "loss": 0.247, "step": 3642 }, { "epoch": 0.49, "grad_norm": 0.9329492786221599, "learning_rate": 5.385056366929121e-06, "loss": 0.1915, "step": 3643 }, { "epoch": 0.49, "grad_norm": 1.0109906236856956, "learning_rate": 5.3828790309388404e-06, "loss": 0.1855, "step": 3644 }, { "epoch": 0.49, "grad_norm": 0.8784089201853924, "learning_rate": 5.3807016219118494e-06, "loss": 0.139, "step": 3645 }, { "epoch": 0.49, "grad_norm": 0.8642312616730615, "learning_rate": 5.378524140263504e-06, "loss": 0.1415, "step": 3646 }, { "epoch": 0.49, "grad_norm": 0.8109232383264376, "learning_rate": 5.3763465864091736e-06, "loss": 0.1561, "step": 3647 }, { "epoch": 0.49, "grad_norm": 0.9199860566185367, "learning_rate": 5.37416896076424e-06, "loss": 0.1925, "step": 3648 }, { "epoch": 0.49, "grad_norm": 1.0143839731739712, "learning_rate": 5.3719912637440995e-06, "loss": 0.196, "step": 3649 }, { "epoch": 0.49, "grad_norm": 0.6673311928997985, "learning_rate": 5.369813495764164e-06, "loss": 0.132, "step": 3650 }, { "epoch": 0.49, "grad_norm": 0.8274403623802135, "learning_rate": 5.367635657239856e-06, "loss": 0.1591, "step": 3651 }, { "epoch": 0.49, "grad_norm": 0.9311121873508922, "learning_rate": 5.365457748586612e-06, "loss": 0.1952, "step": 3652 }, { "epoch": 0.49, "grad_norm": 1.0232417178665827, "learning_rate": 5.363279770219885e-06, "loss": 0.1705, "step": 3653 }, { "epoch": 0.49, "grad_norm": 1.2514753980658904, "learning_rate": 5.3611017225551356e-06, "loss": 0.2134, "step": 3654 }, { "epoch": 0.49, "grad_norm": 1.0356607703978868, "learning_rate": 5.358923606007842e-06, "loss": 0.1886, "step": 3655 }, { "epoch": 0.49, "grad_norm": 0.7772378604546438, "learning_rate": 5.356745420993496e-06, "loss": 0.1378, "step": 3656 }, { "epoch": 0.49, "grad_norm": 0.967248535294205, "learning_rate": 5.354567167927598e-06, "loss": 0.1642, "step": 3657 }, { "epoch": 0.49, "grad_norm": 0.9944168615049148, "learning_rate": 5.352388847225666e-06, "loss": 0.1788, "step": 3658 }, { "epoch": 0.49, "grad_norm": 1.0562655031963255, "learning_rate": 5.350210459303229e-06, "loss": 0.1878, "step": 3659 }, { "epoch": 0.49, "grad_norm": 1.103791685514638, "learning_rate": 5.348032004575827e-06, "loss": 0.2362, "step": 3660 }, { "epoch": 0.49, "grad_norm": 0.9687019007317865, "learning_rate": 5.345853483459018e-06, "loss": 0.146, "step": 3661 }, { "epoch": 0.49, "grad_norm": 1.0309800366024215, "learning_rate": 5.3436748963683685e-06, "loss": 0.2167, "step": 3662 }, { "epoch": 0.49, "grad_norm": 0.9517773809528843, "learning_rate": 5.341496243719456e-06, "loss": 0.1981, "step": 3663 }, { "epoch": 0.49, "grad_norm": 1.0066908507659251, "learning_rate": 5.339317525927875e-06, "loss": 0.1887, "step": 3664 }, { "epoch": 0.49, "grad_norm": 0.9379421897708567, "learning_rate": 5.337138743409229e-06, "loss": 0.1673, "step": 3665 }, { "epoch": 0.49, "grad_norm": 0.9654508273396873, "learning_rate": 5.334959896579136e-06, "loss": 0.1692, "step": 3666 }, { "epoch": 0.49, "grad_norm": 0.8836846997517235, "learning_rate": 5.3327809858532256e-06, "loss": 0.1808, "step": 3667 }, { "epoch": 0.49, "grad_norm": 0.7502306371591951, "learning_rate": 5.33060201164714e-06, "loss": 0.1621, "step": 3668 }, { "epoch": 0.49, "grad_norm": 0.8971966074958593, "learning_rate": 5.328422974376532e-06, "loss": 0.1834, "step": 3669 }, { "epoch": 0.49, "grad_norm": 0.99092551374883, "learning_rate": 5.326243874457068e-06, "loss": 0.1652, "step": 3670 }, { "epoch": 0.5, "grad_norm": 0.8206882418584414, "learning_rate": 5.324064712304423e-06, "loss": 0.1789, "step": 3671 }, { "epoch": 0.5, "grad_norm": 1.0086199857677698, "learning_rate": 5.3218854883342906e-06, "loss": 0.2028, "step": 3672 }, { "epoch": 0.5, "grad_norm": 1.1661750999284348, "learning_rate": 5.31970620296237e-06, "loss": 0.2369, "step": 3673 }, { "epoch": 0.5, "grad_norm": 0.8977260028841173, "learning_rate": 5.317526856604376e-06, "loss": 0.1753, "step": 3674 }, { "epoch": 0.5, "grad_norm": 0.8885258793688319, "learning_rate": 5.315347449676032e-06, "loss": 0.1552, "step": 3675 }, { "epoch": 0.5, "grad_norm": 0.8894292777642367, "learning_rate": 5.3131679825930725e-06, "loss": 0.1678, "step": 3676 }, { "epoch": 0.5, "grad_norm": 0.7592331570893116, "learning_rate": 5.310988455771249e-06, "loss": 0.1683, "step": 3677 }, { "epoch": 0.5, "grad_norm": 1.1359499394120067, "learning_rate": 5.308808869626319e-06, "loss": 0.218, "step": 3678 }, { "epoch": 0.5, "grad_norm": 0.6283760831144379, "learning_rate": 5.306629224574052e-06, "loss": 0.1129, "step": 3679 }, { "epoch": 0.5, "grad_norm": 0.8952604335278628, "learning_rate": 5.304449521030231e-06, "loss": 0.176, "step": 3680 }, { "epoch": 0.5, "grad_norm": 0.9356140174534662, "learning_rate": 5.3022697594106485e-06, "loss": 0.1718, "step": 3681 }, { "epoch": 0.5, "grad_norm": 0.674183184663138, "learning_rate": 5.300089940131109e-06, "loss": 0.1315, "step": 3682 }, { "epoch": 0.5, "grad_norm": 0.9472304814852959, "learning_rate": 5.2979100636074254e-06, "loss": 0.1637, "step": 3683 }, { "epoch": 0.5, "grad_norm": 0.7847372566783003, "learning_rate": 5.295730130255427e-06, "loss": 0.0962, "step": 3684 }, { "epoch": 0.5, "grad_norm": 1.1484451253030608, "learning_rate": 5.293550140490948e-06, "loss": 0.2179, "step": 3685 }, { "epoch": 0.5, "grad_norm": 0.9499879374695065, "learning_rate": 5.291370094729837e-06, "loss": 0.1607, "step": 3686 }, { "epoch": 0.5, "grad_norm": 1.0789223456496657, "learning_rate": 5.28918999338795e-06, "loss": 0.1813, "step": 3687 }, { "epoch": 0.5, "grad_norm": 1.0056431545858957, "learning_rate": 5.287009836881157e-06, "loss": 0.1965, "step": 3688 }, { "epoch": 0.5, "grad_norm": 0.882661956032725, "learning_rate": 5.28482962562534e-06, "loss": 0.1627, "step": 3689 }, { "epoch": 0.5, "grad_norm": 1.0031708833227877, "learning_rate": 5.282649360036385e-06, "loss": 0.1893, "step": 3690 }, { "epoch": 0.5, "grad_norm": 0.9451564291994551, "learning_rate": 5.2804690405301935e-06, "loss": 0.1414, "step": 3691 }, { "epoch": 0.5, "grad_norm": 0.9293478902113413, "learning_rate": 5.278288667522677e-06, "loss": 0.1423, "step": 3692 }, { "epoch": 0.5, "grad_norm": 0.7224619315206502, "learning_rate": 5.276108241429754e-06, "loss": 0.1589, "step": 3693 }, { "epoch": 0.5, "grad_norm": 0.7333638817543263, "learning_rate": 5.273927762667357e-06, "loss": 0.1626, "step": 3694 }, { "epoch": 0.5, "grad_norm": 0.7005870033118, "learning_rate": 5.271747231651425e-06, "loss": 0.1223, "step": 3695 }, { "epoch": 0.5, "grad_norm": 1.0564115746875755, "learning_rate": 5.269566648797911e-06, "loss": 0.1803, "step": 3696 }, { "epoch": 0.5, "grad_norm": 0.968857120763289, "learning_rate": 5.267386014522773e-06, "loss": 0.1863, "step": 3697 }, { "epoch": 0.5, "grad_norm": 0.9884706711598452, "learning_rate": 5.265205329241984e-06, "loss": 0.1822, "step": 3698 }, { "epoch": 0.5, "grad_norm": 0.7072500858540849, "learning_rate": 5.2630245933715205e-06, "loss": 0.1101, "step": 3699 }, { "epoch": 0.5, "grad_norm": 1.0749306004075125, "learning_rate": 5.260843807327377e-06, "loss": 0.1995, "step": 3700 }, { "epoch": 0.5, "grad_norm": 0.7996933110290201, "learning_rate": 5.25866297152555e-06, "loss": 0.1733, "step": 3701 }, { "epoch": 0.5, "grad_norm": 0.7803833501486902, "learning_rate": 5.256482086382048e-06, "loss": 0.1633, "step": 3702 }, { "epoch": 0.5, "grad_norm": 1.0708962356231315, "learning_rate": 5.254301152312892e-06, "loss": 0.2159, "step": 3703 }, { "epoch": 0.5, "grad_norm": 0.7492188855353958, "learning_rate": 5.252120169734106e-06, "loss": 0.1117, "step": 3704 }, { "epoch": 0.5, "grad_norm": 1.1496295171836661, "learning_rate": 5.249939139061729e-06, "loss": 0.2188, "step": 3705 }, { "epoch": 0.5, "grad_norm": 1.269230452951328, "learning_rate": 5.247758060711807e-06, "loss": 0.2236, "step": 3706 }, { "epoch": 0.5, "grad_norm": 1.0604161548717466, "learning_rate": 5.245576935100395e-06, "loss": 0.1618, "step": 3707 }, { "epoch": 0.5, "grad_norm": 1.0870712949226224, "learning_rate": 5.243395762643557e-06, "loss": 0.221, "step": 3708 }, { "epoch": 0.5, "grad_norm": 1.046995467391783, "learning_rate": 5.241214543757367e-06, "loss": 0.2187, "step": 3709 }, { "epoch": 0.5, "grad_norm": 0.8722785257107022, "learning_rate": 5.2390332788579046e-06, "loss": 0.2145, "step": 3710 }, { "epoch": 0.5, "grad_norm": 0.8330654294192307, "learning_rate": 5.236851968361262e-06, "loss": 0.1539, "step": 3711 }, { "epoch": 0.5, "grad_norm": 0.9803912558800459, "learning_rate": 5.234670612683539e-06, "loss": 0.1935, "step": 3712 }, { "epoch": 0.5, "grad_norm": 1.0134989963033771, "learning_rate": 5.232489212240843e-06, "loss": 0.2017, "step": 3713 }, { "epoch": 0.5, "grad_norm": 0.8288271377475809, "learning_rate": 5.2303077674492905e-06, "loss": 0.1533, "step": 3714 }, { "epoch": 0.5, "grad_norm": 1.0449657593864183, "learning_rate": 5.2281262787250075e-06, "loss": 0.1936, "step": 3715 }, { "epoch": 0.5, "grad_norm": 0.8936409430176847, "learning_rate": 5.2259447464841265e-06, "loss": 0.1768, "step": 3716 }, { "epoch": 0.5, "grad_norm": 0.925984654368913, "learning_rate": 5.22376317114279e-06, "loss": 0.1789, "step": 3717 }, { "epoch": 0.5, "grad_norm": 1.2102594811799199, "learning_rate": 5.221581553117145e-06, "loss": 0.2347, "step": 3718 }, { "epoch": 0.5, "grad_norm": 1.165983991328409, "learning_rate": 5.219399892823354e-06, "loss": 0.1878, "step": 3719 }, { "epoch": 0.5, "grad_norm": 1.0632121966358203, "learning_rate": 5.2172181906775805e-06, "loss": 0.1838, "step": 3720 }, { "epoch": 0.5, "grad_norm": 0.837140372269511, "learning_rate": 5.215036447096e-06, "loss": 0.1526, "step": 3721 }, { "epoch": 0.5, "grad_norm": 0.9096107734033061, "learning_rate": 5.2128546624947915e-06, "loss": 0.1714, "step": 3722 }, { "epoch": 0.5, "grad_norm": 0.9441005146533967, "learning_rate": 5.210672837290151e-06, "loss": 0.1812, "step": 3723 }, { "epoch": 0.5, "grad_norm": 0.7969846321285017, "learning_rate": 5.208490971898269e-06, "loss": 0.1464, "step": 3724 }, { "epoch": 0.5, "grad_norm": 1.0092825384628463, "learning_rate": 5.206309066735354e-06, "loss": 0.1783, "step": 3725 }, { "epoch": 0.5, "grad_norm": 1.0802591936825787, "learning_rate": 5.20412712221762e-06, "loss": 0.1995, "step": 3726 }, { "epoch": 0.5, "grad_norm": 1.1109621951403017, "learning_rate": 5.2019451387612854e-06, "loss": 0.1868, "step": 3727 }, { "epoch": 0.5, "grad_norm": 1.1196502780326685, "learning_rate": 5.199763116782579e-06, "loss": 0.1991, "step": 3728 }, { "epoch": 0.5, "grad_norm": 0.8885200865369395, "learning_rate": 5.197581056697736e-06, "loss": 0.1969, "step": 3729 }, { "epoch": 0.5, "grad_norm": 1.0949503668631662, "learning_rate": 5.195398958922999e-06, "loss": 0.2354, "step": 3730 }, { "epoch": 0.5, "grad_norm": 0.8010768803967515, "learning_rate": 5.193216823874616e-06, "loss": 0.1395, "step": 3731 }, { "epoch": 0.5, "grad_norm": 1.1284662349481616, "learning_rate": 5.191034651968847e-06, "loss": 0.1985, "step": 3732 }, { "epoch": 0.5, "grad_norm": 0.8432873269386263, "learning_rate": 5.188852443621951e-06, "loss": 0.1308, "step": 3733 }, { "epoch": 0.5, "grad_norm": 0.9923910375440489, "learning_rate": 5.186670199250202e-06, "loss": 0.1937, "step": 3734 }, { "epoch": 0.5, "grad_norm": 0.9210543274045925, "learning_rate": 5.184487919269877e-06, "loss": 0.2218, "step": 3735 }, { "epoch": 0.5, "grad_norm": 0.7174372479587027, "learning_rate": 5.18230560409726e-06, "loss": 0.1566, "step": 3736 }, { "epoch": 0.5, "grad_norm": 0.7996729318732365, "learning_rate": 5.180123254148642e-06, "loss": 0.1473, "step": 3737 }, { "epoch": 0.5, "grad_norm": 0.928917185638948, "learning_rate": 5.177940869840321e-06, "loss": 0.1987, "step": 3738 }, { "epoch": 0.5, "grad_norm": 1.1613328954202478, "learning_rate": 5.175758451588601e-06, "loss": 0.2023, "step": 3739 }, { "epoch": 0.5, "grad_norm": 0.9902834827358428, "learning_rate": 5.173575999809795e-06, "loss": 0.2217, "step": 3740 }, { "epoch": 0.5, "grad_norm": 0.9499255124929148, "learning_rate": 5.171393514920216e-06, "loss": 0.1738, "step": 3741 }, { "epoch": 0.5, "grad_norm": 0.9702897431425609, "learning_rate": 5.1692109973361895e-06, "loss": 0.2137, "step": 3742 }, { "epoch": 0.5, "grad_norm": 1.1608040448497112, "learning_rate": 5.167028447474045e-06, "loss": 0.1524, "step": 3743 }, { "epoch": 0.5, "grad_norm": 0.9125324933125507, "learning_rate": 5.164845865750118e-06, "loss": 0.1794, "step": 3744 }, { "epoch": 0.5, "grad_norm": 1.079290013514648, "learning_rate": 5.162663252580752e-06, "loss": 0.1954, "step": 3745 }, { "epoch": 0.51, "grad_norm": 0.8890295834206469, "learning_rate": 5.160480608382293e-06, "loss": 0.1377, "step": 3746 }, { "epoch": 0.51, "grad_norm": 0.9680142387011001, "learning_rate": 5.158297933571098e-06, "loss": 0.205, "step": 3747 }, { "epoch": 0.51, "grad_norm": 1.2407246404578403, "learning_rate": 5.156115228563522e-06, "loss": 0.2283, "step": 3748 }, { "epoch": 0.51, "grad_norm": 0.8526313160584734, "learning_rate": 5.153932493775934e-06, "loss": 0.1528, "step": 3749 }, { "epoch": 0.51, "grad_norm": 1.1975366958051747, "learning_rate": 5.151749729624701e-06, "loss": 0.1682, "step": 3750 }, { "epoch": 0.51, "grad_norm": 1.143725427951856, "learning_rate": 5.149566936526205e-06, "loss": 0.2234, "step": 3751 }, { "epoch": 0.51, "grad_norm": 0.7777380370529448, "learning_rate": 5.147384114896825e-06, "loss": 0.141, "step": 3752 }, { "epoch": 0.51, "grad_norm": 0.906392766769436, "learning_rate": 5.1452012651529506e-06, "loss": 0.1465, "step": 3753 }, { "epoch": 0.51, "grad_norm": 0.8872235815541994, "learning_rate": 5.143018387710972e-06, "loss": 0.1452, "step": 3754 }, { "epoch": 0.51, "grad_norm": 1.0779168977171776, "learning_rate": 5.140835482987291e-06, "loss": 0.2064, "step": 3755 }, { "epoch": 0.51, "grad_norm": 0.7556475922923319, "learning_rate": 5.138652551398308e-06, "loss": 0.1604, "step": 3756 }, { "epoch": 0.51, "grad_norm": 0.9220340326586427, "learning_rate": 5.136469593360434e-06, "loss": 0.1587, "step": 3757 }, { "epoch": 0.51, "grad_norm": 1.000216532678795, "learning_rate": 5.13428660929008e-06, "loss": 0.1489, "step": 3758 }, { "epoch": 0.51, "grad_norm": 0.9745263769473879, "learning_rate": 5.132103599603668e-06, "loss": 0.1742, "step": 3759 }, { "epoch": 0.51, "grad_norm": 0.9203353122412775, "learning_rate": 5.1299205647176184e-06, "loss": 0.1723, "step": 3760 }, { "epoch": 0.51, "grad_norm": 1.1459578451592884, "learning_rate": 5.127737505048361e-06, "loss": 0.2186, "step": 3761 }, { "epoch": 0.51, "grad_norm": 0.9788207694932425, "learning_rate": 5.125554421012332e-06, "loss": 0.1935, "step": 3762 }, { "epoch": 0.51, "grad_norm": 1.1293794339222851, "learning_rate": 5.123371313025964e-06, "loss": 0.2445, "step": 3763 }, { "epoch": 0.51, "grad_norm": 0.8468951992218279, "learning_rate": 5.121188181505701e-06, "loss": 0.1789, "step": 3764 }, { "epoch": 0.51, "grad_norm": 0.8511899100651229, "learning_rate": 5.11900502686799e-06, "loss": 0.1112, "step": 3765 }, { "epoch": 0.51, "grad_norm": 0.8150212487547686, "learning_rate": 5.116821849529283e-06, "loss": 0.1364, "step": 3766 }, { "epoch": 0.51, "grad_norm": 1.1384247348524583, "learning_rate": 5.114638649906034e-06, "loss": 0.1952, "step": 3767 }, { "epoch": 0.51, "grad_norm": 0.9921294357015834, "learning_rate": 5.112455428414704e-06, "loss": 0.1817, "step": 3768 }, { "epoch": 0.51, "grad_norm": 0.9069453896081027, "learning_rate": 5.110272185471758e-06, "loss": 0.1863, "step": 3769 }, { "epoch": 0.51, "grad_norm": 0.8783263837582249, "learning_rate": 5.108088921493661e-06, "loss": 0.1968, "step": 3770 }, { "epoch": 0.51, "grad_norm": 0.7735576089171402, "learning_rate": 5.105905636896889e-06, "loss": 0.1313, "step": 3771 }, { "epoch": 0.51, "grad_norm": 1.23797015787439, "learning_rate": 5.103722332097912e-06, "loss": 0.2512, "step": 3772 }, { "epoch": 0.51, "grad_norm": 0.7130477471189424, "learning_rate": 5.1015390075132155e-06, "loss": 0.1377, "step": 3773 }, { "epoch": 0.51, "grad_norm": 0.848521292031544, "learning_rate": 5.099355663559281e-06, "loss": 0.1353, "step": 3774 }, { "epoch": 0.51, "grad_norm": 0.6120815964746659, "learning_rate": 5.0971723006525965e-06, "loss": 0.1413, "step": 3775 }, { "epoch": 0.51, "grad_norm": 0.8920882927959981, "learning_rate": 5.094988919209652e-06, "loss": 0.157, "step": 3776 }, { "epoch": 0.51, "grad_norm": 0.7132816950138048, "learning_rate": 5.0928055196469426e-06, "loss": 0.1521, "step": 3777 }, { "epoch": 0.51, "grad_norm": 0.7651111317849348, "learning_rate": 5.090622102380966e-06, "loss": 0.1261, "step": 3778 }, { "epoch": 0.51, "grad_norm": 0.9878036755759267, "learning_rate": 5.088438667828223e-06, "loss": 0.2148, "step": 3779 }, { "epoch": 0.51, "grad_norm": 1.0562284268517312, "learning_rate": 5.086255216405219e-06, "loss": 0.2493, "step": 3780 }, { "epoch": 0.51, "grad_norm": 0.9240016249332471, "learning_rate": 5.084071748528462e-06, "loss": 0.169, "step": 3781 }, { "epoch": 0.51, "grad_norm": 1.155340908162839, "learning_rate": 5.081888264614462e-06, "loss": 0.2524, "step": 3782 }, { "epoch": 0.51, "grad_norm": 0.735232596917756, "learning_rate": 5.079704765079733e-06, "loss": 0.1198, "step": 3783 }, { "epoch": 0.51, "grad_norm": 0.9793150313883279, "learning_rate": 5.077521250340791e-06, "loss": 0.1999, "step": 3784 }, { "epoch": 0.51, "grad_norm": 0.9258968967574076, "learning_rate": 5.07533772081416e-06, "loss": 0.2022, "step": 3785 }, { "epoch": 0.51, "grad_norm": 1.1438794534701744, "learning_rate": 5.07315417691636e-06, "loss": 0.1987, "step": 3786 }, { "epoch": 0.51, "grad_norm": 0.952465190342942, "learning_rate": 5.070970619063915e-06, "loss": 0.1341, "step": 3787 }, { "epoch": 0.51, "grad_norm": 1.0929583721354423, "learning_rate": 5.068787047673356e-06, "loss": 0.2026, "step": 3788 }, { "epoch": 0.51, "grad_norm": 0.9194558924745534, "learning_rate": 5.06660346316121e-06, "loss": 0.1823, "step": 3789 }, { "epoch": 0.51, "grad_norm": 0.7250770611759731, "learning_rate": 5.0644198659440145e-06, "loss": 0.1112, "step": 3790 }, { "epoch": 0.51, "grad_norm": 0.561716501723813, "learning_rate": 5.062236256438303e-06, "loss": 0.0948, "step": 3791 }, { "epoch": 0.51, "grad_norm": 1.0591681716700405, "learning_rate": 5.060052635060613e-06, "loss": 0.2446, "step": 3792 }, { "epoch": 0.51, "grad_norm": 0.9239851682394968, "learning_rate": 5.057869002227485e-06, "loss": 0.2056, "step": 3793 }, { "epoch": 0.51, "grad_norm": 0.910095183679671, "learning_rate": 5.055685358355464e-06, "loss": 0.1916, "step": 3794 }, { "epoch": 0.51, "grad_norm": 0.9918258352961842, "learning_rate": 5.05350170386109e-06, "loss": 0.1829, "step": 3795 }, { "epoch": 0.51, "grad_norm": 0.9333594347896143, "learning_rate": 5.051318039160913e-06, "loss": 0.1986, "step": 3796 }, { "epoch": 0.51, "grad_norm": 1.0220792205052367, "learning_rate": 5.04913436467148e-06, "loss": 0.1743, "step": 3797 }, { "epoch": 0.51, "grad_norm": 0.9156249494242872, "learning_rate": 5.046950680809341e-06, "loss": 0.1502, "step": 3798 }, { "epoch": 0.51, "grad_norm": 1.1598782644666075, "learning_rate": 5.04476698799105e-06, "loss": 0.2143, "step": 3799 }, { "epoch": 0.51, "grad_norm": 1.0080687912066868, "learning_rate": 5.042583286633158e-06, "loss": 0.1428, "step": 3800 }, { "epoch": 0.51, "grad_norm": 1.0173780467143883, "learning_rate": 5.040399577152224e-06, "loss": 0.1394, "step": 3801 }, { "epoch": 0.51, "grad_norm": 0.7082613800838057, "learning_rate": 5.038215859964802e-06, "loss": 0.1444, "step": 3802 }, { "epoch": 0.51, "grad_norm": 0.8471817688986188, "learning_rate": 5.036032135487452e-06, "loss": 0.1885, "step": 3803 }, { "epoch": 0.51, "grad_norm": 0.9600291022842052, "learning_rate": 5.033848404136734e-06, "loss": 0.1616, "step": 3804 }, { "epoch": 0.51, "grad_norm": 0.8172052669151572, "learning_rate": 5.031664666329209e-06, "loss": 0.1664, "step": 3805 }, { "epoch": 0.51, "grad_norm": 1.1941085359999977, "learning_rate": 5.029480922481438e-06, "loss": 0.1946, "step": 3806 }, { "epoch": 0.51, "grad_norm": 1.056428766836335, "learning_rate": 5.0272971730099865e-06, "loss": 0.154, "step": 3807 }, { "epoch": 0.51, "grad_norm": 0.8342434733433041, "learning_rate": 5.02511341833142e-06, "loss": 0.1741, "step": 3808 }, { "epoch": 0.51, "grad_norm": 1.3862723687186698, "learning_rate": 5.022929658862302e-06, "loss": 0.2148, "step": 3809 }, { "epoch": 0.51, "grad_norm": 1.121466693619795, "learning_rate": 5.020745895019199e-06, "loss": 0.2209, "step": 3810 }, { "epoch": 0.51, "grad_norm": 0.9486743015538583, "learning_rate": 5.01856212721868e-06, "loss": 0.1749, "step": 3811 }, { "epoch": 0.51, "grad_norm": 1.0468454423294558, "learning_rate": 5.0163783558773104e-06, "loss": 0.2424, "step": 3812 }, { "epoch": 0.51, "grad_norm": 0.9140835784280135, "learning_rate": 5.014194581411663e-06, "loss": 0.1866, "step": 3813 }, { "epoch": 0.51, "grad_norm": 0.6697742984396392, "learning_rate": 5.0120108042383045e-06, "loss": 0.1678, "step": 3814 }, { "epoch": 0.51, "grad_norm": 0.9000300708090622, "learning_rate": 5.009827024773806e-06, "loss": 0.1411, "step": 3815 }, { "epoch": 0.51, "grad_norm": 0.9538338036268852, "learning_rate": 5.0076432434347375e-06, "loss": 0.1917, "step": 3816 }, { "epoch": 0.51, "grad_norm": 1.0396754732741675, "learning_rate": 5.00545946063767e-06, "loss": 0.1918, "step": 3817 }, { "epoch": 0.51, "grad_norm": 0.8175653621383379, "learning_rate": 5.003275676799173e-06, "loss": 0.1417, "step": 3818 }, { "epoch": 0.51, "grad_norm": 0.9727316629977372, "learning_rate": 5.00109189233582e-06, "loss": 0.1915, "step": 3819 }, { "epoch": 0.52, "grad_norm": 0.7638908161671571, "learning_rate": 4.99890810766418e-06, "loss": 0.1433, "step": 3820 }, { "epoch": 0.52, "grad_norm": 0.8335404455816515, "learning_rate": 4.996724323200827e-06, "loss": 0.1518, "step": 3821 }, { "epoch": 0.52, "grad_norm": 0.8354504910341995, "learning_rate": 4.994540539362331e-06, "loss": 0.1585, "step": 3822 }, { "epoch": 0.52, "grad_norm": 1.171101342695506, "learning_rate": 4.992356756565264e-06, "loss": 0.1981, "step": 3823 }, { "epoch": 0.52, "grad_norm": 0.49215697852619444, "learning_rate": 4.990172975226195e-06, "loss": 0.1228, "step": 3824 }, { "epoch": 0.52, "grad_norm": 1.2023057132169046, "learning_rate": 4.987989195761696e-06, "loss": 0.2238, "step": 3825 }, { "epoch": 0.52, "grad_norm": 0.5834259218510427, "learning_rate": 4.985805418588339e-06, "loss": 0.0965, "step": 3826 }, { "epoch": 0.52, "grad_norm": 0.8290979221449061, "learning_rate": 4.983621644122691e-06, "loss": 0.1589, "step": 3827 }, { "epoch": 0.52, "grad_norm": 0.9231875261876668, "learning_rate": 4.981437872781323e-06, "loss": 0.1895, "step": 3828 }, { "epoch": 0.52, "grad_norm": 0.7912589633855552, "learning_rate": 4.979254104980803e-06, "loss": 0.1633, "step": 3829 }, { "epoch": 0.52, "grad_norm": 0.664169241479277, "learning_rate": 4.9770703411377005e-06, "loss": 0.1274, "step": 3830 }, { "epoch": 0.52, "grad_norm": 0.8119496474317474, "learning_rate": 4.974886581668581e-06, "loss": 0.1582, "step": 3831 }, { "epoch": 0.52, "grad_norm": 0.9496594922505179, "learning_rate": 4.9727028269900135e-06, "loss": 0.1853, "step": 3832 }, { "epoch": 0.52, "grad_norm": 0.8643991226422142, "learning_rate": 4.970519077518563e-06, "loss": 0.2108, "step": 3833 }, { "epoch": 0.52, "grad_norm": 0.8888097753339053, "learning_rate": 4.968335333670792e-06, "loss": 0.161, "step": 3834 }, { "epoch": 0.52, "grad_norm": 0.6702436629217124, "learning_rate": 4.966151595863267e-06, "loss": 0.0704, "step": 3835 }, { "epoch": 0.52, "grad_norm": 0.8828507512999598, "learning_rate": 4.963967864512549e-06, "loss": 0.16, "step": 3836 }, { "epoch": 0.52, "grad_norm": 0.7685815868248181, "learning_rate": 4.9617841400351994e-06, "loss": 0.1684, "step": 3837 }, { "epoch": 0.52, "grad_norm": 1.3278373412241034, "learning_rate": 4.9596004228477775e-06, "loss": 0.2339, "step": 3838 }, { "epoch": 0.52, "grad_norm": 1.2252368507395326, "learning_rate": 4.9574167133668425e-06, "loss": 0.1669, "step": 3839 }, { "epoch": 0.52, "grad_norm": 0.819353301957595, "learning_rate": 4.9552330120089535e-06, "loss": 0.1684, "step": 3840 }, { "epoch": 0.52, "grad_norm": 0.918986272952228, "learning_rate": 4.953049319190662e-06, "loss": 0.1619, "step": 3841 }, { "epoch": 0.52, "grad_norm": 1.282288024319792, "learning_rate": 4.95086563532852e-06, "loss": 0.2363, "step": 3842 }, { "epoch": 0.52, "grad_norm": 1.1785767737122321, "learning_rate": 4.948681960839088e-06, "loss": 0.1969, "step": 3843 }, { "epoch": 0.52, "grad_norm": 0.8795750540825493, "learning_rate": 4.946498296138911e-06, "loss": 0.1285, "step": 3844 }, { "epoch": 0.52, "grad_norm": 0.9809214891555782, "learning_rate": 4.944314641644537e-06, "loss": 0.2023, "step": 3845 }, { "epoch": 0.52, "grad_norm": 0.8960981582942757, "learning_rate": 4.942130997772515e-06, "loss": 0.215, "step": 3846 }, { "epoch": 0.52, "grad_norm": 0.8239846397827746, "learning_rate": 4.939947364939388e-06, "loss": 0.1283, "step": 3847 }, { "epoch": 0.52, "grad_norm": 1.1177728299646266, "learning_rate": 4.937763743561699e-06, "loss": 0.2586, "step": 3848 }, { "epoch": 0.52, "grad_norm": 0.6966964141432771, "learning_rate": 4.935580134055986e-06, "loss": 0.1059, "step": 3849 }, { "epoch": 0.52, "grad_norm": 0.9881611671501764, "learning_rate": 4.933396536838791e-06, "loss": 0.2216, "step": 3850 }, { "epoch": 0.52, "grad_norm": 0.905856216067459, "learning_rate": 4.931212952326646e-06, "loss": 0.1816, "step": 3851 }, { "epoch": 0.52, "grad_norm": 1.2350319586946417, "learning_rate": 4.929029380936087e-06, "loss": 0.2421, "step": 3852 }, { "epoch": 0.52, "grad_norm": 0.8690823772037858, "learning_rate": 4.926845823083643e-06, "loss": 0.1481, "step": 3853 }, { "epoch": 0.52, "grad_norm": 0.8874150227745957, "learning_rate": 4.92466227918584e-06, "loss": 0.1573, "step": 3854 }, { "epoch": 0.52, "grad_norm": 0.8554332111186558, "learning_rate": 4.922478749659208e-06, "loss": 0.1696, "step": 3855 }, { "epoch": 0.52, "grad_norm": 0.7882162099885286, "learning_rate": 4.920295234920269e-06, "loss": 0.1242, "step": 3856 }, { "epoch": 0.52, "grad_norm": 0.9158407549519675, "learning_rate": 4.91811173538554e-06, "loss": 0.1771, "step": 3857 }, { "epoch": 0.52, "grad_norm": 0.8213150441271284, "learning_rate": 4.91592825147154e-06, "loss": 0.1489, "step": 3858 }, { "epoch": 0.52, "grad_norm": 0.9216459569130909, "learning_rate": 4.913744783594783e-06, "loss": 0.1803, "step": 3859 }, { "epoch": 0.52, "grad_norm": 0.8033096340138559, "learning_rate": 4.911561332171779e-06, "loss": 0.1902, "step": 3860 }, { "epoch": 0.52, "grad_norm": 0.8523357117268346, "learning_rate": 4.909377897619036e-06, "loss": 0.1615, "step": 3861 }, { "epoch": 0.52, "grad_norm": 0.9353447964802794, "learning_rate": 4.907194480353059e-06, "loss": 0.1701, "step": 3862 }, { "epoch": 0.52, "grad_norm": 0.9897257452273038, "learning_rate": 4.9050110807903495e-06, "loss": 0.2112, "step": 3863 }, { "epoch": 0.52, "grad_norm": 1.0565965899154968, "learning_rate": 4.902827699347406e-06, "loss": 0.2284, "step": 3864 }, { "epoch": 0.52, "grad_norm": 0.9842966954383243, "learning_rate": 4.900644336440719e-06, "loss": 0.1784, "step": 3865 }, { "epoch": 0.52, "grad_norm": 1.0288535978056468, "learning_rate": 4.8984609924867845e-06, "loss": 0.189, "step": 3866 }, { "epoch": 0.52, "grad_norm": 0.9740721617788877, "learning_rate": 4.896277667902089e-06, "loss": 0.2244, "step": 3867 }, { "epoch": 0.52, "grad_norm": 0.9279381827293477, "learning_rate": 4.894094363103114e-06, "loss": 0.138, "step": 3868 }, { "epoch": 0.52, "grad_norm": 1.1279175366977847, "learning_rate": 4.89191107850634e-06, "loss": 0.1645, "step": 3869 }, { "epoch": 0.52, "grad_norm": 1.0306371738153002, "learning_rate": 4.889727814528245e-06, "loss": 0.1963, "step": 3870 }, { "epoch": 0.52, "grad_norm": 0.845423663031792, "learning_rate": 4.8875445715852974e-06, "loss": 0.1431, "step": 3871 }, { "epoch": 0.52, "grad_norm": 1.06912939847565, "learning_rate": 4.885361350093968e-06, "loss": 0.159, "step": 3872 }, { "epoch": 0.52, "grad_norm": 0.7664634001361564, "learning_rate": 4.883178150470719e-06, "loss": 0.1188, "step": 3873 }, { "epoch": 0.52, "grad_norm": 1.0879843258024027, "learning_rate": 4.8809949731320124e-06, "loss": 0.2073, "step": 3874 }, { "epoch": 0.52, "grad_norm": 0.9997711372013901, "learning_rate": 4.878811818494301e-06, "loss": 0.2016, "step": 3875 }, { "epoch": 0.52, "grad_norm": 0.7308478655475236, "learning_rate": 4.876628686974038e-06, "loss": 0.1245, "step": 3876 }, { "epoch": 0.52, "grad_norm": 0.8351028527851251, "learning_rate": 4.87444557898767e-06, "loss": 0.1313, "step": 3877 }, { "epoch": 0.52, "grad_norm": 0.8774215195067292, "learning_rate": 4.872262494951639e-06, "loss": 0.1502, "step": 3878 }, { "epoch": 0.52, "grad_norm": 1.0109846043409234, "learning_rate": 4.870079435282382e-06, "loss": 0.1723, "step": 3879 }, { "epoch": 0.52, "grad_norm": 0.8719588432564775, "learning_rate": 4.8678964003963336e-06, "loss": 0.1955, "step": 3880 }, { "epoch": 0.52, "grad_norm": 0.7222409609414056, "learning_rate": 4.8657133907099215e-06, "loss": 0.1677, "step": 3881 }, { "epoch": 0.52, "grad_norm": 0.8908594117517278, "learning_rate": 4.863530406639568e-06, "loss": 0.2171, "step": 3882 }, { "epoch": 0.52, "grad_norm": 0.7760040396997208, "learning_rate": 4.861347448601694e-06, "loss": 0.1613, "step": 3883 }, { "epoch": 0.52, "grad_norm": 0.8974476138476687, "learning_rate": 4.859164517012711e-06, "loss": 0.1569, "step": 3884 }, { "epoch": 0.52, "grad_norm": 1.1518958204108947, "learning_rate": 4.856981612289029e-06, "loss": 0.2268, "step": 3885 }, { "epoch": 0.52, "grad_norm": 1.0619909983189082, "learning_rate": 4.854798734847052e-06, "loss": 0.1883, "step": 3886 }, { "epoch": 0.52, "grad_norm": 1.0001360228366571, "learning_rate": 4.852615885103175e-06, "loss": 0.2275, "step": 3887 }, { "epoch": 0.52, "grad_norm": 0.8872152488709741, "learning_rate": 4.850433063473795e-06, "loss": 0.1599, "step": 3888 }, { "epoch": 0.52, "grad_norm": 0.837091214744943, "learning_rate": 4.848250270375298e-06, "loss": 0.1044, "step": 3889 }, { "epoch": 0.52, "grad_norm": 0.7167698345522614, "learning_rate": 4.846067506224068e-06, "loss": 0.1589, "step": 3890 }, { "epoch": 0.52, "grad_norm": 0.8407250864937743, "learning_rate": 4.843884771436479e-06, "loss": 0.1412, "step": 3891 }, { "epoch": 0.52, "grad_norm": 0.8554436321969415, "learning_rate": 4.841702066428904e-06, "loss": 0.1816, "step": 3892 }, { "epoch": 0.52, "grad_norm": 0.6856523374574385, "learning_rate": 4.839519391617708e-06, "loss": 0.1211, "step": 3893 }, { "epoch": 0.53, "grad_norm": 0.9310956211449959, "learning_rate": 4.83733674741925e-06, "loss": 0.1769, "step": 3894 }, { "epoch": 0.53, "grad_norm": 0.9322646242543944, "learning_rate": 4.835154134249883e-06, "loss": 0.1735, "step": 3895 }, { "epoch": 0.53, "grad_norm": 0.7801369193934391, "learning_rate": 4.832971552525957e-06, "loss": 0.1356, "step": 3896 }, { "epoch": 0.53, "grad_norm": 0.9306880129704641, "learning_rate": 4.830789002663813e-06, "loss": 0.1523, "step": 3897 }, { "epoch": 0.53, "grad_norm": 0.7746034376360663, "learning_rate": 4.828606485079787e-06, "loss": 0.1273, "step": 3898 }, { "epoch": 0.53, "grad_norm": 0.7581867379849405, "learning_rate": 4.826424000190207e-06, "loss": 0.1308, "step": 3899 }, { "epoch": 0.53, "grad_norm": 1.0522613901580788, "learning_rate": 4.8242415484113995e-06, "loss": 0.1884, "step": 3900 }, { "epoch": 0.53, "grad_norm": 0.8064230197129383, "learning_rate": 4.82205913015968e-06, "loss": 0.1345, "step": 3901 }, { "epoch": 0.53, "grad_norm": 0.7779891322021222, "learning_rate": 4.819876745851359e-06, "loss": 0.1476, "step": 3902 }, { "epoch": 0.53, "grad_norm": 1.126807938196983, "learning_rate": 4.817694395902741e-06, "loss": 0.2002, "step": 3903 }, { "epoch": 0.53, "grad_norm": 1.0695085496808847, "learning_rate": 4.815512080730125e-06, "loss": 0.1893, "step": 3904 }, { "epoch": 0.53, "grad_norm": 1.0269167445390677, "learning_rate": 4.813329800749799e-06, "loss": 0.1441, "step": 3905 }, { "epoch": 0.53, "grad_norm": 0.7329744240657049, "learning_rate": 4.811147556378051e-06, "loss": 0.1391, "step": 3906 }, { "epoch": 0.53, "grad_norm": 0.9205876881580598, "learning_rate": 4.808965348031156e-06, "loss": 0.1909, "step": 3907 }, { "epoch": 0.53, "grad_norm": 1.0097374705866675, "learning_rate": 4.806783176125385e-06, "loss": 0.2278, "step": 3908 }, { "epoch": 0.53, "grad_norm": 1.195305329629581, "learning_rate": 4.8046010410770025e-06, "loss": 0.1945, "step": 3909 }, { "epoch": 0.53, "grad_norm": 0.9222578623671897, "learning_rate": 4.8024189433022635e-06, "loss": 0.2043, "step": 3910 }, { "epoch": 0.53, "grad_norm": 1.0489907398001412, "learning_rate": 4.800236883217421e-06, "loss": 0.1838, "step": 3911 }, { "epoch": 0.53, "grad_norm": 1.0039734674458207, "learning_rate": 4.798054861238715e-06, "loss": 0.1916, "step": 3912 }, { "epoch": 0.53, "grad_norm": 0.9018080623348476, "learning_rate": 4.795872877782381e-06, "loss": 0.1778, "step": 3913 }, { "epoch": 0.53, "grad_norm": 0.690714923498507, "learning_rate": 4.7936909332646465e-06, "loss": 0.1359, "step": 3914 }, { "epoch": 0.53, "grad_norm": 0.9832511893317845, "learning_rate": 4.791509028101732e-06, "loss": 0.1679, "step": 3915 }, { "epoch": 0.53, "grad_norm": 0.8707693437230548, "learning_rate": 4.789327162709852e-06, "loss": 0.1451, "step": 3916 }, { "epoch": 0.53, "grad_norm": 0.978804276263669, "learning_rate": 4.787145337505209e-06, "loss": 0.1633, "step": 3917 }, { "epoch": 0.53, "grad_norm": 1.1533126938669376, "learning_rate": 4.7849635529040025e-06, "loss": 0.2131, "step": 3918 }, { "epoch": 0.53, "grad_norm": 0.8609182321384063, "learning_rate": 4.782781809322421e-06, "loss": 0.1321, "step": 3919 }, { "epoch": 0.53, "grad_norm": 0.5563422856951902, "learning_rate": 4.780600107176648e-06, "loss": 0.0982, "step": 3920 }, { "epoch": 0.53, "grad_norm": 1.0122362703245025, "learning_rate": 4.778418446882855e-06, "loss": 0.1718, "step": 3921 }, { "epoch": 0.53, "grad_norm": 0.8132614666926253, "learning_rate": 4.776236828857211e-06, "loss": 0.1428, "step": 3922 }, { "epoch": 0.53, "grad_norm": 0.5526081679855167, "learning_rate": 4.774055253515875e-06, "loss": 0.127, "step": 3923 }, { "epoch": 0.53, "grad_norm": 0.8701672349767257, "learning_rate": 4.771873721274994e-06, "loss": 0.1609, "step": 3924 }, { "epoch": 0.53, "grad_norm": 0.9486011271070752, "learning_rate": 4.769692232550711e-06, "loss": 0.2094, "step": 3925 }, { "epoch": 0.53, "grad_norm": 0.8940973243291737, "learning_rate": 4.767510787759158e-06, "loss": 0.1758, "step": 3926 }, { "epoch": 0.53, "grad_norm": 0.9997571213025298, "learning_rate": 4.765329387316463e-06, "loss": 0.1769, "step": 3927 }, { "epoch": 0.53, "grad_norm": 0.9610928325188381, "learning_rate": 4.7631480316387395e-06, "loss": 0.17, "step": 3928 }, { "epoch": 0.53, "grad_norm": 0.8105875399896512, "learning_rate": 4.760966721142097e-06, "loss": 0.1702, "step": 3929 }, { "epoch": 0.53, "grad_norm": 0.7184382961834598, "learning_rate": 4.758785456242636e-06, "loss": 0.1073, "step": 3930 }, { "epoch": 0.53, "grad_norm": 0.8388050098996329, "learning_rate": 4.7566042373564445e-06, "loss": 0.1256, "step": 3931 }, { "epoch": 0.53, "grad_norm": 0.778159926874574, "learning_rate": 4.754423064899605e-06, "loss": 0.1607, "step": 3932 }, { "epoch": 0.53, "grad_norm": 0.9129985058065117, "learning_rate": 4.752241939288193e-06, "loss": 0.2019, "step": 3933 }, { "epoch": 0.53, "grad_norm": 1.2180707685643726, "learning_rate": 4.7500608609382715e-06, "loss": 0.2343, "step": 3934 }, { "epoch": 0.53, "grad_norm": 0.8363574385680551, "learning_rate": 4.7478798302658944e-06, "loss": 0.175, "step": 3935 }, { "epoch": 0.53, "grad_norm": 0.7747051367198476, "learning_rate": 4.74569884768711e-06, "loss": 0.1227, "step": 3936 }, { "epoch": 0.53, "grad_norm": 0.825484052809516, "learning_rate": 4.7435179136179525e-06, "loss": 0.2104, "step": 3937 }, { "epoch": 0.53, "grad_norm": 1.141624365481997, "learning_rate": 4.741337028474452e-06, "loss": 0.193, "step": 3938 }, { "epoch": 0.53, "grad_norm": 0.8373712843838721, "learning_rate": 4.739156192672624e-06, "loss": 0.0907, "step": 3939 }, { "epoch": 0.53, "grad_norm": 0.8158017141345678, "learning_rate": 4.736975406628481e-06, "loss": 0.1521, "step": 3940 }, { "epoch": 0.53, "grad_norm": 0.9228094092804496, "learning_rate": 4.734794670758019e-06, "loss": 0.1609, "step": 3941 }, { "epoch": 0.53, "grad_norm": 0.8285708789042918, "learning_rate": 4.73261398547723e-06, "loss": 0.1526, "step": 3942 }, { "epoch": 0.53, "grad_norm": 1.1709764035782442, "learning_rate": 4.730433351202089e-06, "loss": 0.2283, "step": 3943 }, { "epoch": 0.53, "grad_norm": 0.8002350601611758, "learning_rate": 4.728252768348574e-06, "loss": 0.1928, "step": 3944 }, { "epoch": 0.53, "grad_norm": 1.085061167796444, "learning_rate": 4.726072237332644e-06, "loss": 0.17, "step": 3945 }, { "epoch": 0.53, "grad_norm": 1.08487666280776, "learning_rate": 4.723891758570247e-06, "loss": 0.1542, "step": 3946 }, { "epoch": 0.53, "grad_norm": 0.9051892534954081, "learning_rate": 4.721711332477324e-06, "loss": 0.1914, "step": 3947 }, { "epoch": 0.53, "grad_norm": 0.8930246522716515, "learning_rate": 4.719530959469807e-06, "loss": 0.1775, "step": 3948 }, { "epoch": 0.53, "grad_norm": 1.0204484269020921, "learning_rate": 4.717350639963616e-06, "loss": 0.2059, "step": 3949 }, { "epoch": 0.53, "grad_norm": 0.9597416738162747, "learning_rate": 4.715170374374662e-06, "loss": 0.1909, "step": 3950 }, { "epoch": 0.53, "grad_norm": 1.034549496385055, "learning_rate": 4.712990163118844e-06, "loss": 0.2302, "step": 3951 }, { "epoch": 0.53, "grad_norm": 0.9671694330706364, "learning_rate": 4.710810006612052e-06, "loss": 0.1915, "step": 3952 }, { "epoch": 0.53, "grad_norm": 0.686875115775582, "learning_rate": 4.708629905270166e-06, "loss": 0.1535, "step": 3953 }, { "epoch": 0.53, "grad_norm": 0.91762685678245, "learning_rate": 4.706449859509055e-06, "loss": 0.1828, "step": 3954 }, { "epoch": 0.53, "grad_norm": 0.8524657677565112, "learning_rate": 4.704269869744574e-06, "loss": 0.1838, "step": 3955 }, { "epoch": 0.53, "grad_norm": 0.8948072380999594, "learning_rate": 4.7020899363925745e-06, "loss": 0.1549, "step": 3956 }, { "epoch": 0.53, "grad_norm": 0.6935852370605616, "learning_rate": 4.699910059868892e-06, "loss": 0.1484, "step": 3957 }, { "epoch": 0.53, "grad_norm": 0.9164110305337375, "learning_rate": 4.697730240589352e-06, "loss": 0.1571, "step": 3958 }, { "epoch": 0.53, "grad_norm": 0.8564251540772106, "learning_rate": 4.695550478969769e-06, "loss": 0.1377, "step": 3959 }, { "epoch": 0.53, "grad_norm": 0.8496147896821714, "learning_rate": 4.6933707754259485e-06, "loss": 0.1766, "step": 3960 }, { "epoch": 0.53, "grad_norm": 0.9251392386733661, "learning_rate": 4.691191130373682e-06, "loss": 0.1313, "step": 3961 }, { "epoch": 0.53, "grad_norm": 0.656558428729138, "learning_rate": 4.689011544228752e-06, "loss": 0.1364, "step": 3962 }, { "epoch": 0.53, "grad_norm": 0.9569428186165508, "learning_rate": 4.686832017406929e-06, "loss": 0.154, "step": 3963 }, { "epoch": 0.53, "grad_norm": 1.151582101459163, "learning_rate": 4.684652550323972e-06, "loss": 0.2111, "step": 3964 }, { "epoch": 0.53, "grad_norm": 0.6208502537432142, "learning_rate": 4.682473143395626e-06, "loss": 0.0891, "step": 3965 }, { "epoch": 0.53, "grad_norm": 1.1200321627292125, "learning_rate": 4.68029379703763e-06, "loss": 0.1954, "step": 3966 }, { "epoch": 0.53, "grad_norm": 0.9096408166360085, "learning_rate": 4.678114511665709e-06, "loss": 0.1318, "step": 3967 }, { "epoch": 0.54, "grad_norm": 0.8433072798144762, "learning_rate": 4.675935287695577e-06, "loss": 0.1486, "step": 3968 }, { "epoch": 0.54, "grad_norm": 0.7623203418848972, "learning_rate": 4.673756125542934e-06, "loss": 0.1302, "step": 3969 }, { "epoch": 0.54, "grad_norm": 0.9144559463285483, "learning_rate": 4.67157702562347e-06, "loss": 0.1443, "step": 3970 }, { "epoch": 0.54, "grad_norm": 0.8519460747815207, "learning_rate": 4.669397988352862e-06, "loss": 0.1497, "step": 3971 }, { "epoch": 0.54, "grad_norm": 0.8002424926756003, "learning_rate": 4.667219014146775e-06, "loss": 0.1662, "step": 3972 }, { "epoch": 0.54, "grad_norm": 0.84366648117451, "learning_rate": 4.665040103420865e-06, "loss": 0.2015, "step": 3973 }, { "epoch": 0.54, "grad_norm": 1.0750896246863462, "learning_rate": 4.662861256590772e-06, "loss": 0.2256, "step": 3974 }, { "epoch": 0.54, "grad_norm": 1.0145208822731813, "learning_rate": 4.660682474072127e-06, "loss": 0.1532, "step": 3975 }, { "epoch": 0.54, "grad_norm": 0.9007004739412044, "learning_rate": 4.658503756280546e-06, "loss": 0.2014, "step": 3976 }, { "epoch": 0.54, "grad_norm": 0.7756406183127332, "learning_rate": 4.656325103631632e-06, "loss": 0.1412, "step": 3977 }, { "epoch": 0.54, "grad_norm": 1.1894541493237256, "learning_rate": 4.654146516540982e-06, "loss": 0.2161, "step": 3978 }, { "epoch": 0.54, "grad_norm": 0.848021161112219, "learning_rate": 4.651967995424173e-06, "loss": 0.1476, "step": 3979 }, { "epoch": 0.54, "grad_norm": 0.7630845014691678, "learning_rate": 4.649789540696772e-06, "loss": 0.1432, "step": 3980 }, { "epoch": 0.54, "grad_norm": 1.0862205465161958, "learning_rate": 4.647611152774335e-06, "loss": 0.1571, "step": 3981 }, { "epoch": 0.54, "grad_norm": 1.036127420357128, "learning_rate": 4.645432832072404e-06, "loss": 0.2073, "step": 3982 }, { "epoch": 0.54, "grad_norm": 0.9749022739896953, "learning_rate": 4.643254579006506e-06, "loss": 0.2198, "step": 3983 }, { "epoch": 0.54, "grad_norm": 0.8560423676473358, "learning_rate": 4.641076393992159e-06, "loss": 0.1314, "step": 3984 }, { "epoch": 0.54, "grad_norm": 1.0194804981069274, "learning_rate": 4.638898277444866e-06, "loss": 0.1955, "step": 3985 }, { "epoch": 0.54, "grad_norm": 1.0405580915572261, "learning_rate": 4.636720229780117e-06, "loss": 0.2142, "step": 3986 }, { "epoch": 0.54, "grad_norm": 0.8658518720618914, "learning_rate": 4.63454225141339e-06, "loss": 0.1471, "step": 3987 }, { "epoch": 0.54, "grad_norm": 0.5857244969691943, "learning_rate": 4.632364342760145e-06, "loss": 0.1045, "step": 3988 }, { "epoch": 0.54, "grad_norm": 0.9876477633595183, "learning_rate": 4.630186504235836e-06, "loss": 0.1654, "step": 3989 }, { "epoch": 0.54, "grad_norm": 0.9704478777980777, "learning_rate": 4.6280087362559004e-06, "loss": 0.2277, "step": 3990 }, { "epoch": 0.54, "grad_norm": 0.9200394975248318, "learning_rate": 4.625831039235761e-06, "loss": 0.1647, "step": 3991 }, { "epoch": 0.54, "grad_norm": 0.789408619787891, "learning_rate": 4.623653413590827e-06, "loss": 0.1259, "step": 3992 }, { "epoch": 0.54, "grad_norm": 1.4421378371323545, "learning_rate": 4.621475859736498e-06, "loss": 0.2243, "step": 3993 }, { "epoch": 0.54, "grad_norm": 0.8805704069820712, "learning_rate": 4.619298378088152e-06, "loss": 0.1556, "step": 3994 }, { "epoch": 0.54, "grad_norm": 0.9487497213141076, "learning_rate": 4.617120969061161e-06, "loss": 0.1979, "step": 3995 }, { "epoch": 0.54, "grad_norm": 0.8584474035430331, "learning_rate": 4.61494363307088e-06, "loss": 0.1663, "step": 3996 }, { "epoch": 0.54, "grad_norm": 0.9812679458656697, "learning_rate": 4.61276637053265e-06, "loss": 0.154, "step": 3997 }, { "epoch": 0.54, "grad_norm": 0.6311868090961761, "learning_rate": 4.6105891818617976e-06, "loss": 0.152, "step": 3998 }, { "epoch": 0.54, "grad_norm": 0.912480596398402, "learning_rate": 4.608412067473637e-06, "loss": 0.1546, "step": 3999 }, { "epoch": 0.54, "grad_norm": 0.9884926788821717, "learning_rate": 4.606235027783466e-06, "loss": 0.1826, "step": 4000 }, { "epoch": 0.54, "grad_norm": 1.2319768451719282, "learning_rate": 4.60405806320657e-06, "loss": 0.2191, "step": 4001 }, { "epoch": 0.54, "grad_norm": 0.8344184811739532, "learning_rate": 4.6018811741582194e-06, "loss": 0.153, "step": 4002 }, { "epoch": 0.54, "grad_norm": 1.4296759528869725, "learning_rate": 4.599704361053672e-06, "loss": 0.2204, "step": 4003 }, { "epoch": 0.54, "grad_norm": 1.00314379566071, "learning_rate": 4.597527624308166e-06, "loss": 0.188, "step": 4004 }, { "epoch": 0.54, "grad_norm": 1.0838807374529467, "learning_rate": 4.595350964336931e-06, "loss": 0.1729, "step": 4005 }, { "epoch": 0.54, "grad_norm": 0.7771085431627314, "learning_rate": 4.593174381555176e-06, "loss": 0.1488, "step": 4006 }, { "epoch": 0.54, "grad_norm": 1.2037287836327564, "learning_rate": 4.590997876378103e-06, "loss": 0.2149, "step": 4007 }, { "epoch": 0.54, "grad_norm": 0.8815845498533571, "learning_rate": 4.588821449220892e-06, "loss": 0.0974, "step": 4008 }, { "epoch": 0.54, "grad_norm": 0.9360401780098738, "learning_rate": 4.586645100498711e-06, "loss": 0.1919, "step": 4009 }, { "epoch": 0.54, "grad_norm": 1.0413401041512087, "learning_rate": 4.584468830626715e-06, "loss": 0.191, "step": 4010 }, { "epoch": 0.54, "grad_norm": 0.8521986721832869, "learning_rate": 4.582292640020037e-06, "loss": 0.1628, "step": 4011 }, { "epoch": 0.54, "grad_norm": 0.8698653303107923, "learning_rate": 4.580116529093805e-06, "loss": 0.148, "step": 4012 }, { "epoch": 0.54, "grad_norm": 0.6259978829698777, "learning_rate": 4.577940498263124e-06, "loss": 0.1131, "step": 4013 }, { "epoch": 0.54, "grad_norm": 0.9091311548318229, "learning_rate": 4.575764547943087e-06, "loss": 0.1763, "step": 4014 }, { "epoch": 0.54, "grad_norm": 0.8869144228550706, "learning_rate": 4.573588678548772e-06, "loss": 0.1565, "step": 4015 }, { "epoch": 0.54, "grad_norm": 1.2896149328545616, "learning_rate": 4.571412890495239e-06, "loss": 0.2172, "step": 4016 }, { "epoch": 0.54, "grad_norm": 1.2176540061192198, "learning_rate": 4.569237184197534e-06, "loss": 0.2123, "step": 4017 }, { "epoch": 0.54, "grad_norm": 0.710202093276334, "learning_rate": 4.567061560070687e-06, "loss": 0.1626, "step": 4018 }, { "epoch": 0.54, "grad_norm": 1.0601108740801823, "learning_rate": 4.564886018529714e-06, "loss": 0.2425, "step": 4019 }, { "epoch": 0.54, "grad_norm": 0.8956149030683478, "learning_rate": 4.5627105599896126e-06, "loss": 0.1672, "step": 4020 }, { "epoch": 0.54, "grad_norm": 1.0704603689730194, "learning_rate": 4.5605351848653665e-06, "loss": 0.2132, "step": 4021 }, { "epoch": 0.54, "grad_norm": 0.9411502925122591, "learning_rate": 4.5583598935719416e-06, "loss": 0.1501, "step": 4022 }, { "epoch": 0.54, "grad_norm": 1.2342280117433357, "learning_rate": 4.556184686524293e-06, "loss": 0.2331, "step": 4023 }, { "epoch": 0.54, "grad_norm": 1.1207839596553553, "learning_rate": 4.5540095641373535e-06, "loss": 0.2243, "step": 4024 }, { "epoch": 0.54, "grad_norm": 0.9410658471447971, "learning_rate": 4.55183452682604e-06, "loss": 0.1644, "step": 4025 }, { "epoch": 0.54, "grad_norm": 0.8680724627406653, "learning_rate": 4.5496595750052595e-06, "loss": 0.1657, "step": 4026 }, { "epoch": 0.54, "grad_norm": 0.9823308432112777, "learning_rate": 4.547484709089895e-06, "loss": 0.174, "step": 4027 }, { "epoch": 0.54, "grad_norm": 0.8499308210466723, "learning_rate": 4.545309929494817e-06, "loss": 0.1652, "step": 4028 }, { "epoch": 0.54, "grad_norm": 0.924271049703917, "learning_rate": 4.543135236634881e-06, "loss": 0.2032, "step": 4029 }, { "epoch": 0.54, "grad_norm": 0.743848513086163, "learning_rate": 4.540960630924923e-06, "loss": 0.1598, "step": 4030 }, { "epoch": 0.54, "grad_norm": 1.0212506762756084, "learning_rate": 4.5387861127797625e-06, "loss": 0.2116, "step": 4031 }, { "epoch": 0.54, "grad_norm": 0.6010012224961518, "learning_rate": 4.5366116826142045e-06, "loss": 0.1248, "step": 4032 }, { "epoch": 0.54, "grad_norm": 1.1908766260894346, "learning_rate": 4.534437340843035e-06, "loss": 0.2708, "step": 4033 }, { "epoch": 0.54, "grad_norm": 0.8290303909564103, "learning_rate": 4.532263087881025e-06, "loss": 0.1112, "step": 4034 }, { "epoch": 0.54, "grad_norm": 0.4611738477111491, "learning_rate": 4.530088924142927e-06, "loss": 0.0934, "step": 4035 }, { "epoch": 0.54, "grad_norm": 0.8513272386388259, "learning_rate": 4.527914850043478e-06, "loss": 0.1736, "step": 4036 }, { "epoch": 0.54, "grad_norm": 0.9838473054440287, "learning_rate": 4.525740865997396e-06, "loss": 0.1688, "step": 4037 }, { "epoch": 0.54, "grad_norm": 1.1382452705436044, "learning_rate": 4.523566972419383e-06, "loss": 0.2355, "step": 4038 }, { "epoch": 0.54, "grad_norm": 0.9535398031508988, "learning_rate": 4.521393169724124e-06, "loss": 0.1676, "step": 4039 }, { "epoch": 0.54, "grad_norm": 0.8754414034418576, "learning_rate": 4.519219458326288e-06, "loss": 0.1552, "step": 4040 }, { "epoch": 0.54, "grad_norm": 1.213750850031794, "learning_rate": 4.517045838640521e-06, "loss": 0.2415, "step": 4041 }, { "epoch": 0.55, "grad_norm": 1.0646903186835386, "learning_rate": 4.514872311081458e-06, "loss": 0.1736, "step": 4042 }, { "epoch": 0.55, "grad_norm": 1.1345127328588112, "learning_rate": 4.512698876063713e-06, "loss": 0.2213, "step": 4043 }, { "epoch": 0.55, "grad_norm": 0.8770933454780391, "learning_rate": 4.51052553400188e-06, "loss": 0.1874, "step": 4044 }, { "epoch": 0.55, "grad_norm": 0.9781070597347284, "learning_rate": 4.508352285310545e-06, "loss": 0.2123, "step": 4045 }, { "epoch": 0.55, "grad_norm": 1.1899006760999102, "learning_rate": 4.5061791304042664e-06, "loss": 0.231, "step": 4046 }, { "epoch": 0.55, "grad_norm": 0.6514754645011095, "learning_rate": 4.504006069697586e-06, "loss": 0.1015, "step": 4047 }, { "epoch": 0.55, "grad_norm": 0.7135806317198017, "learning_rate": 4.501833103605032e-06, "loss": 0.1401, "step": 4048 }, { "epoch": 0.55, "grad_norm": 0.8036282012724237, "learning_rate": 4.499660232541112e-06, "loss": 0.1306, "step": 4049 }, { "epoch": 0.55, "grad_norm": 0.7320451886726567, "learning_rate": 4.497487456920313e-06, "loss": 0.0987, "step": 4050 }, { "epoch": 0.55, "grad_norm": 1.1606528760063328, "learning_rate": 4.495314777157109e-06, "loss": 0.1983, "step": 4051 }, { "epoch": 0.55, "grad_norm": 0.8239643162407717, "learning_rate": 4.493142193665952e-06, "loss": 0.1767, "step": 4052 }, { "epoch": 0.55, "grad_norm": 1.0674063098011812, "learning_rate": 4.4909697068612765e-06, "loss": 0.2433, "step": 4053 }, { "epoch": 0.55, "grad_norm": 1.0942715865190606, "learning_rate": 4.4887973171574994e-06, "loss": 0.2112, "step": 4054 }, { "epoch": 0.55, "grad_norm": 0.9280973661799354, "learning_rate": 4.486625024969018e-06, "loss": 0.1865, "step": 4055 }, { "epoch": 0.55, "grad_norm": 0.7742259707084385, "learning_rate": 4.48445283071021e-06, "loss": 0.1711, "step": 4056 }, { "epoch": 0.55, "grad_norm": 0.8807524525074715, "learning_rate": 4.4822807347954385e-06, "loss": 0.1547, "step": 4057 }, { "epoch": 0.55, "grad_norm": 0.8585319803567698, "learning_rate": 4.480108737639044e-06, "loss": 0.1574, "step": 4058 }, { "epoch": 0.55, "grad_norm": 1.1707702966288704, "learning_rate": 4.4779368396553505e-06, "loss": 0.214, "step": 4059 }, { "epoch": 0.55, "grad_norm": 0.9488487322160449, "learning_rate": 4.4757650412586594e-06, "loss": 0.1605, "step": 4060 }, { "epoch": 0.55, "grad_norm": 0.7100787750552442, "learning_rate": 4.4735933428632575e-06, "loss": 0.1399, "step": 4061 }, { "epoch": 0.55, "grad_norm": 0.7656167150022092, "learning_rate": 4.47142174488341e-06, "loss": 0.1423, "step": 4062 }, { "epoch": 0.55, "grad_norm": 0.8259217139523918, "learning_rate": 4.469250247733365e-06, "loss": 0.1951, "step": 4063 }, { "epoch": 0.55, "grad_norm": 0.8915774171501172, "learning_rate": 4.467078851827346e-06, "loss": 0.1634, "step": 4064 }, { "epoch": 0.55, "grad_norm": 0.9790131138045057, "learning_rate": 4.464907557579565e-06, "loss": 0.1824, "step": 4065 }, { "epoch": 0.55, "grad_norm": 1.1060879137307627, "learning_rate": 4.46273636540421e-06, "loss": 0.2469, "step": 4066 }, { "epoch": 0.55, "grad_norm": 0.8190436448632614, "learning_rate": 4.460565275715447e-06, "loss": 0.144, "step": 4067 }, { "epoch": 0.55, "grad_norm": 0.8297031286720372, "learning_rate": 4.458394288927431e-06, "loss": 0.1658, "step": 4068 }, { "epoch": 0.55, "grad_norm": 0.8148795311610765, "learning_rate": 4.45622340545429e-06, "loss": 0.1274, "step": 4069 }, { "epoch": 0.55, "grad_norm": 0.8815679485791238, "learning_rate": 4.454052625710134e-06, "loss": 0.1771, "step": 4070 }, { "epoch": 0.55, "grad_norm": 0.8392646544445846, "learning_rate": 4.4518819501090534e-06, "loss": 0.1718, "step": 4071 }, { "epoch": 0.55, "grad_norm": 1.2386216182143632, "learning_rate": 4.449711379065119e-06, "loss": 0.196, "step": 4072 }, { "epoch": 0.55, "grad_norm": 0.8666623255465813, "learning_rate": 4.447540912992382e-06, "loss": 0.1935, "step": 4073 }, { "epoch": 0.55, "grad_norm": 0.9212923115593503, "learning_rate": 4.4453705523048714e-06, "loss": 0.1598, "step": 4074 }, { "epoch": 0.55, "grad_norm": 0.8066923934859511, "learning_rate": 4.443200297416601e-06, "loss": 0.1296, "step": 4075 }, { "epoch": 0.55, "grad_norm": 1.173032827648446, "learning_rate": 4.4410301487415596e-06, "loss": 0.2146, "step": 4076 }, { "epoch": 0.55, "grad_norm": 0.9098098046078548, "learning_rate": 4.438860106693717e-06, "loss": 0.1441, "step": 4077 }, { "epoch": 0.55, "grad_norm": 0.9043190334816884, "learning_rate": 4.436690171687024e-06, "loss": 0.1752, "step": 4078 }, { "epoch": 0.55, "grad_norm": 0.8307129158130062, "learning_rate": 4.434520344135409e-06, "loss": 0.1823, "step": 4079 }, { "epoch": 0.55, "grad_norm": 0.8929591439607125, "learning_rate": 4.432350624452783e-06, "loss": 0.1785, "step": 4080 }, { "epoch": 0.55, "grad_norm": 1.0408125219348519, "learning_rate": 4.430181013053032e-06, "loss": 0.1609, "step": 4081 }, { "epoch": 0.55, "grad_norm": 0.9297187360258055, "learning_rate": 4.428011510350025e-06, "loss": 0.154, "step": 4082 }, { "epoch": 0.55, "grad_norm": 0.7433803096602359, "learning_rate": 4.425842116757609e-06, "loss": 0.1529, "step": 4083 }, { "epoch": 0.55, "grad_norm": 0.9717912414507244, "learning_rate": 4.4236728326896096e-06, "loss": 0.1872, "step": 4084 }, { "epoch": 0.55, "grad_norm": 0.8394959094484026, "learning_rate": 4.421503658559832e-06, "loss": 0.1743, "step": 4085 }, { "epoch": 0.55, "grad_norm": 1.4534737426942952, "learning_rate": 4.419334594782062e-06, "loss": 0.2925, "step": 4086 }, { "epoch": 0.55, "grad_norm": 0.9105984452734304, "learning_rate": 4.417165641770063e-06, "loss": 0.1903, "step": 4087 }, { "epoch": 0.55, "grad_norm": 1.0238123248831859, "learning_rate": 4.414996799937575e-06, "loss": 0.1351, "step": 4088 }, { "epoch": 0.55, "grad_norm": 1.2862331839588483, "learning_rate": 4.412828069698317e-06, "loss": 0.2205, "step": 4089 }, { "epoch": 0.55, "grad_norm": 1.0727312412957997, "learning_rate": 4.410659451465995e-06, "loss": 0.2015, "step": 4090 }, { "epoch": 0.55, "grad_norm": 1.1601423032698672, "learning_rate": 4.408490945654282e-06, "loss": 0.1821, "step": 4091 }, { "epoch": 0.55, "grad_norm": 0.9093717169883975, "learning_rate": 4.406322552676838e-06, "loss": 0.1582, "step": 4092 }, { "epoch": 0.55, "grad_norm": 0.8728885079446868, "learning_rate": 4.404154272947298e-06, "loss": 0.1539, "step": 4093 }, { "epoch": 0.55, "grad_norm": 0.8511054975265999, "learning_rate": 4.401986106879273e-06, "loss": 0.1169, "step": 4094 }, { "epoch": 0.55, "grad_norm": 1.0234634134188116, "learning_rate": 4.3998180548863576e-06, "loss": 0.21, "step": 4095 }, { "epoch": 0.55, "grad_norm": 1.2232808696113255, "learning_rate": 4.397650117382122e-06, "loss": 0.184, "step": 4096 }, { "epoch": 0.55, "grad_norm": 0.9202050380194406, "learning_rate": 4.395482294780112e-06, "loss": 0.1423, "step": 4097 }, { "epoch": 0.55, "grad_norm": 0.9066138048309804, "learning_rate": 4.393314587493857e-06, "loss": 0.1723, "step": 4098 }, { "epoch": 0.55, "grad_norm": 0.7230675468656245, "learning_rate": 4.3911469959368605e-06, "loss": 0.0809, "step": 4099 }, { "epoch": 0.55, "grad_norm": 0.7561007991005488, "learning_rate": 4.388979520522605e-06, "loss": 0.1164, "step": 4100 }, { "epoch": 0.55, "grad_norm": 0.910139865294084, "learning_rate": 4.3868121616645495e-06, "loss": 0.1868, "step": 4101 }, { "epoch": 0.55, "grad_norm": 0.8391222067305387, "learning_rate": 4.384644919776134e-06, "loss": 0.1295, "step": 4102 }, { "epoch": 0.55, "grad_norm": 0.8018865821232832, "learning_rate": 4.3824777952707736e-06, "loss": 0.1527, "step": 4103 }, { "epoch": 0.55, "grad_norm": 0.7188985436334914, "learning_rate": 4.3803107885618615e-06, "loss": 0.0894, "step": 4104 }, { "epoch": 0.55, "grad_norm": 0.6060783364462642, "learning_rate": 4.378143900062769e-06, "loss": 0.1408, "step": 4105 }, { "epoch": 0.55, "grad_norm": 0.8690236803189219, "learning_rate": 4.375977130186844e-06, "loss": 0.1912, "step": 4106 }, { "epoch": 0.55, "grad_norm": 0.9334437571599191, "learning_rate": 4.373810479347412e-06, "loss": 0.1772, "step": 4107 }, { "epoch": 0.55, "grad_norm": 0.9566805439908534, "learning_rate": 4.371643947957777e-06, "loss": 0.1707, "step": 4108 }, { "epoch": 0.55, "grad_norm": 0.8567539723634645, "learning_rate": 4.369477536431218e-06, "loss": 0.1267, "step": 4109 }, { "epoch": 0.55, "grad_norm": 0.7966280572862711, "learning_rate": 4.367311245180993e-06, "loss": 0.1296, "step": 4110 }, { "epoch": 0.55, "grad_norm": 0.7822870647851352, "learning_rate": 4.365145074620336e-06, "loss": 0.1827, "step": 4111 }, { "epoch": 0.55, "grad_norm": 0.9100850345146164, "learning_rate": 4.362979025162456e-06, "loss": 0.1651, "step": 4112 }, { "epoch": 0.55, "grad_norm": 0.8190390069536524, "learning_rate": 4.360813097220546e-06, "loss": 0.1542, "step": 4113 }, { "epoch": 0.55, "grad_norm": 0.9233266994912741, "learning_rate": 4.358647291207769e-06, "loss": 0.1944, "step": 4114 }, { "epoch": 0.55, "grad_norm": 1.2784655416391657, "learning_rate": 4.3564816075372675e-06, "loss": 0.2786, "step": 4115 }, { "epoch": 0.56, "grad_norm": 0.9185110289972842, "learning_rate": 4.354316046622159e-06, "loss": 0.1596, "step": 4116 }, { "epoch": 0.56, "grad_norm": 0.8450131800236639, "learning_rate": 4.35215060887554e-06, "loss": 0.1685, "step": 4117 }, { "epoch": 0.56, "grad_norm": 0.8418572110329651, "learning_rate": 4.349985294710478e-06, "loss": 0.1565, "step": 4118 }, { "epoch": 0.56, "grad_norm": 0.7106367375044466, "learning_rate": 4.347820104540026e-06, "loss": 0.174, "step": 4119 }, { "epoch": 0.56, "grad_norm": 0.7994350939269989, "learning_rate": 4.345655038777206e-06, "loss": 0.1674, "step": 4120 }, { "epoch": 0.56, "grad_norm": 1.0649610569256458, "learning_rate": 4.343490097835018e-06, "loss": 0.1913, "step": 4121 }, { "epoch": 0.56, "grad_norm": 1.0890211867676458, "learning_rate": 4.34132528212644e-06, "loss": 0.2145, "step": 4122 }, { "epoch": 0.56, "grad_norm": 1.2334438403606458, "learning_rate": 4.339160592064424e-06, "loss": 0.2073, "step": 4123 }, { "epoch": 0.56, "grad_norm": 1.0511043255951023, "learning_rate": 4.336996028061901e-06, "loss": 0.1889, "step": 4124 }, { "epoch": 0.56, "grad_norm": 0.9951541062244113, "learning_rate": 4.334831590531773e-06, "loss": 0.185, "step": 4125 }, { "epoch": 0.56, "grad_norm": 1.1191612501267645, "learning_rate": 4.3326672798869236e-06, "loss": 0.2358, "step": 4126 }, { "epoch": 0.56, "grad_norm": 1.1551304217625893, "learning_rate": 4.330503096540208e-06, "loss": 0.2372, "step": 4127 }, { "epoch": 0.56, "grad_norm": 1.0458751992033648, "learning_rate": 4.328339040904458e-06, "loss": 0.1853, "step": 4128 }, { "epoch": 0.56, "grad_norm": 0.9467668000196772, "learning_rate": 4.326175113392483e-06, "loss": 0.1865, "step": 4129 }, { "epoch": 0.56, "grad_norm": 0.8415132581654822, "learning_rate": 4.324011314417064e-06, "loss": 0.1762, "step": 4130 }, { "epoch": 0.56, "grad_norm": 0.9918147203127362, "learning_rate": 4.321847644390964e-06, "loss": 0.2097, "step": 4131 }, { "epoch": 0.56, "grad_norm": 1.1165682946278441, "learning_rate": 4.3196841037269135e-06, "loss": 0.1775, "step": 4132 }, { "epoch": 0.56, "grad_norm": 1.220155310058599, "learning_rate": 4.317520692837625e-06, "loss": 0.2123, "step": 4133 }, { "epoch": 0.56, "grad_norm": 1.0481163972883267, "learning_rate": 4.315357412135779e-06, "loss": 0.1836, "step": 4134 }, { "epoch": 0.56, "grad_norm": 1.0451749854735635, "learning_rate": 4.313194262034041e-06, "loss": 0.1799, "step": 4135 }, { "epoch": 0.56, "grad_norm": 0.9921384299744181, "learning_rate": 4.3110312429450435e-06, "loss": 0.1638, "step": 4136 }, { "epoch": 0.56, "grad_norm": 1.060426636121653, "learning_rate": 4.308868355281397e-06, "loss": 0.2068, "step": 4137 }, { "epoch": 0.56, "grad_norm": 0.7714851251276741, "learning_rate": 4.306705599455687e-06, "loss": 0.1114, "step": 4138 }, { "epoch": 0.56, "grad_norm": 0.9875981941134427, "learning_rate": 4.304542975880474e-06, "loss": 0.174, "step": 4139 }, { "epoch": 0.56, "grad_norm": 0.9680101521124876, "learning_rate": 4.302380484968293e-06, "loss": 0.1451, "step": 4140 }, { "epoch": 0.56, "grad_norm": 0.9293515932174983, "learning_rate": 4.30021812713165e-06, "loss": 0.1947, "step": 4141 }, { "epoch": 0.56, "grad_norm": 0.8566131032725971, "learning_rate": 4.2980559027830336e-06, "loss": 0.1654, "step": 4142 }, { "epoch": 0.56, "grad_norm": 0.784295075098625, "learning_rate": 4.295893812334899e-06, "loss": 0.1212, "step": 4143 }, { "epoch": 0.56, "grad_norm": 0.9253288024538547, "learning_rate": 4.293731856199682e-06, "loss": 0.1388, "step": 4144 }, { "epoch": 0.56, "grad_norm": 1.294931748706023, "learning_rate": 4.2915700347897854e-06, "loss": 0.2606, "step": 4145 }, { "epoch": 0.56, "grad_norm": 0.9634026751431582, "learning_rate": 4.2894083485175974e-06, "loss": 0.1502, "step": 4146 }, { "epoch": 0.56, "grad_norm": 1.089398376493181, "learning_rate": 4.28724679779547e-06, "loss": 0.2056, "step": 4147 }, { "epoch": 0.56, "grad_norm": 1.1726861012422092, "learning_rate": 4.2850853830357345e-06, "loss": 0.2124, "step": 4148 }, { "epoch": 0.56, "grad_norm": 1.1134002541294197, "learning_rate": 4.282924104650694e-06, "loss": 0.216, "step": 4149 }, { "epoch": 0.56, "grad_norm": 0.7213529751742481, "learning_rate": 4.280762963052628e-06, "loss": 0.171, "step": 4150 }, { "epoch": 0.56, "grad_norm": 0.7949120331478577, "learning_rate": 4.278601958653788e-06, "loss": 0.1044, "step": 4151 }, { "epoch": 0.56, "grad_norm": 0.8616212886328642, "learning_rate": 4.2764410918664e-06, "loss": 0.1738, "step": 4152 }, { "epoch": 0.56, "grad_norm": 1.006976092714174, "learning_rate": 4.274280363102663e-06, "loss": 0.1873, "step": 4153 }, { "epoch": 0.56, "grad_norm": 0.8536662913922969, "learning_rate": 4.27211977277475e-06, "loss": 0.1153, "step": 4154 }, { "epoch": 0.56, "grad_norm": 0.9698801349946972, "learning_rate": 4.269959321294809e-06, "loss": 0.1958, "step": 4155 }, { "epoch": 0.56, "grad_norm": 0.9552656297363844, "learning_rate": 4.267799009074961e-06, "loss": 0.1577, "step": 4156 }, { "epoch": 0.56, "grad_norm": 0.8510012443720653, "learning_rate": 4.265638836527297e-06, "loss": 0.1724, "step": 4157 }, { "epoch": 0.56, "grad_norm": 0.6031416670002322, "learning_rate": 4.263478804063888e-06, "loss": 0.166, "step": 4158 }, { "epoch": 0.56, "grad_norm": 0.9978147630216729, "learning_rate": 4.261318912096772e-06, "loss": 0.1404, "step": 4159 }, { "epoch": 0.56, "grad_norm": 1.1972300468722703, "learning_rate": 4.259159161037964e-06, "loss": 0.2221, "step": 4160 }, { "epoch": 0.56, "grad_norm": 0.7393346487078943, "learning_rate": 4.25699955129945e-06, "loss": 0.1555, "step": 4161 }, { "epoch": 0.56, "grad_norm": 0.7291015108458768, "learning_rate": 4.25484008329319e-06, "loss": 0.1198, "step": 4162 }, { "epoch": 0.56, "grad_norm": 0.9894766357695457, "learning_rate": 4.252680757431117e-06, "loss": 0.1808, "step": 4163 }, { "epoch": 0.56, "grad_norm": 0.9833411935168395, "learning_rate": 4.250521574125136e-06, "loss": 0.2069, "step": 4164 }, { "epoch": 0.56, "grad_norm": 0.8182886320611911, "learning_rate": 4.248362533787127e-06, "loss": 0.108, "step": 4165 }, { "epoch": 0.56, "grad_norm": 0.8367755570677949, "learning_rate": 4.24620363682894e-06, "loss": 0.1883, "step": 4166 }, { "epoch": 0.56, "grad_norm": 1.2225577630086737, "learning_rate": 4.244044883662398e-06, "loss": 0.2509, "step": 4167 }, { "epoch": 0.56, "grad_norm": 1.1914350845987183, "learning_rate": 4.241886274699298e-06, "loss": 0.2176, "step": 4168 }, { "epoch": 0.56, "grad_norm": 1.0687047057034496, "learning_rate": 4.239727810351411e-06, "loss": 0.2176, "step": 4169 }, { "epoch": 0.56, "grad_norm": 1.0185437280927194, "learning_rate": 4.237569491030477e-06, "loss": 0.1927, "step": 4170 }, { "epoch": 0.56, "grad_norm": 0.8867003796993029, "learning_rate": 4.235411317148209e-06, "loss": 0.1859, "step": 4171 }, { "epoch": 0.56, "grad_norm": 1.0881688304173232, "learning_rate": 4.233253289116295e-06, "loss": 0.2072, "step": 4172 }, { "epoch": 0.56, "grad_norm": 0.7137406022400554, "learning_rate": 4.231095407346391e-06, "loss": 0.1609, "step": 4173 }, { "epoch": 0.56, "grad_norm": 1.0302464737756754, "learning_rate": 4.228937672250128e-06, "loss": 0.1957, "step": 4174 }, { "epoch": 0.56, "grad_norm": 1.1415431297964815, "learning_rate": 4.226780084239107e-06, "loss": 0.2056, "step": 4175 }, { "epoch": 0.56, "grad_norm": 1.2220959921835037, "learning_rate": 4.224622643724905e-06, "loss": 0.1913, "step": 4176 }, { "epoch": 0.56, "grad_norm": 0.8064137585119698, "learning_rate": 4.222465351119065e-06, "loss": 0.1374, "step": 4177 }, { "epoch": 0.56, "grad_norm": 0.7035770668925161, "learning_rate": 4.220308206833107e-06, "loss": 0.1215, "step": 4178 }, { "epoch": 0.56, "grad_norm": 0.7735099807850028, "learning_rate": 4.2181512112785195e-06, "loss": 0.1237, "step": 4179 }, { "epoch": 0.56, "grad_norm": 0.930293463997304, "learning_rate": 4.215994364866765e-06, "loss": 0.2046, "step": 4180 }, { "epoch": 0.56, "grad_norm": 1.0820761952927465, "learning_rate": 4.213837668009274e-06, "loss": 0.1648, "step": 4181 }, { "epoch": 0.56, "grad_norm": 0.8589396725033536, "learning_rate": 4.211681121117452e-06, "loss": 0.1571, "step": 4182 }, { "epoch": 0.56, "grad_norm": 1.0653636171353231, "learning_rate": 4.209524724602676e-06, "loss": 0.1782, "step": 4183 }, { "epoch": 0.56, "grad_norm": 0.9855245189070144, "learning_rate": 4.207368478876291e-06, "loss": 0.1641, "step": 4184 }, { "epoch": 0.56, "grad_norm": 0.7712460160152794, "learning_rate": 4.205212384349615e-06, "loss": 0.1068, "step": 4185 }, { "epoch": 0.56, "grad_norm": 0.890889697199211, "learning_rate": 4.2030564414339395e-06, "loss": 0.1658, "step": 4186 }, { "epoch": 0.56, "grad_norm": 0.9768495230978815, "learning_rate": 4.2009006505405225e-06, "loss": 0.2154, "step": 4187 }, { "epoch": 0.56, "grad_norm": 0.8946853066804379, "learning_rate": 4.1987450120805955e-06, "loss": 0.1705, "step": 4188 }, { "epoch": 0.56, "grad_norm": 0.946277786606894, "learning_rate": 4.1965895264653625e-06, "loss": 0.1868, "step": 4189 }, { "epoch": 0.56, "grad_norm": 0.6289851755347592, "learning_rate": 4.194434194105993e-06, "loss": 0.1054, "step": 4190 }, { "epoch": 0.57, "grad_norm": 0.6337960196603084, "learning_rate": 4.192279015413635e-06, "loss": 0.1027, "step": 4191 }, { "epoch": 0.57, "grad_norm": 0.8567601266210736, "learning_rate": 4.1901239907994025e-06, "loss": 0.0982, "step": 4192 }, { "epoch": 0.57, "grad_norm": 1.2904871783516103, "learning_rate": 4.187969120674381e-06, "loss": 0.2679, "step": 4193 }, { "epoch": 0.57, "grad_norm": 1.125705117216332, "learning_rate": 4.185814405449623e-06, "loss": 0.2176, "step": 4194 }, { "epoch": 0.57, "grad_norm": 1.0064307380227573, "learning_rate": 4.183659845536158e-06, "loss": 0.195, "step": 4195 }, { "epoch": 0.57, "grad_norm": 0.9269246737410933, "learning_rate": 4.181505441344981e-06, "loss": 0.1431, "step": 4196 }, { "epoch": 0.57, "grad_norm": 1.1061247978287434, "learning_rate": 4.179351193287059e-06, "loss": 0.1856, "step": 4197 }, { "epoch": 0.57, "grad_norm": 0.9177950508044852, "learning_rate": 4.177197101773329e-06, "loss": 0.1415, "step": 4198 }, { "epoch": 0.57, "grad_norm": 0.887271568986184, "learning_rate": 4.175043167214699e-06, "loss": 0.2028, "step": 4199 }, { "epoch": 0.57, "grad_norm": 0.7078550037916349, "learning_rate": 4.172889390022046e-06, "loss": 0.1578, "step": 4200 }, { "epoch": 0.57, "grad_norm": 1.0805452163821907, "learning_rate": 4.170735770606217e-06, "loss": 0.1733, "step": 4201 }, { "epoch": 0.57, "grad_norm": 0.8943839649941758, "learning_rate": 4.168582309378028e-06, "loss": 0.1841, "step": 4202 }, { "epoch": 0.57, "grad_norm": 0.9278391334849861, "learning_rate": 4.166429006748268e-06, "loss": 0.1422, "step": 4203 }, { "epoch": 0.57, "grad_norm": 0.9249586476838173, "learning_rate": 4.164275863127694e-06, "loss": 0.1649, "step": 4204 }, { "epoch": 0.57, "grad_norm": 1.2126928488028028, "learning_rate": 4.16212287892703e-06, "loss": 0.2375, "step": 4205 }, { "epoch": 0.57, "grad_norm": 0.9612736433143728, "learning_rate": 4.159970054556974e-06, "loss": 0.1819, "step": 4206 }, { "epoch": 0.57, "grad_norm": 1.1255270205590455, "learning_rate": 4.157817390428191e-06, "loss": 0.2403, "step": 4207 }, { "epoch": 0.57, "grad_norm": 0.9902206430364914, "learning_rate": 4.155664886951315e-06, "loss": 0.1964, "step": 4208 }, { "epoch": 0.57, "grad_norm": 0.9251439992722739, "learning_rate": 4.153512544536952e-06, "loss": 0.1751, "step": 4209 }, { "epoch": 0.57, "grad_norm": 0.872256771175488, "learning_rate": 4.151360363595675e-06, "loss": 0.1658, "step": 4210 }, { "epoch": 0.57, "grad_norm": 1.208241801949881, "learning_rate": 4.149208344538026e-06, "loss": 0.2129, "step": 4211 }, { "epoch": 0.57, "grad_norm": 1.04595434535565, "learning_rate": 4.147056487774517e-06, "loss": 0.2094, "step": 4212 }, { "epoch": 0.57, "grad_norm": 1.0631927236337988, "learning_rate": 4.144904793715626e-06, "loss": 0.2147, "step": 4213 }, { "epoch": 0.57, "grad_norm": 0.9614890754833446, "learning_rate": 4.142753262771809e-06, "loss": 0.1677, "step": 4214 }, { "epoch": 0.57, "grad_norm": 1.1309573344753328, "learning_rate": 4.1406018953534825e-06, "loss": 0.2012, "step": 4215 }, { "epoch": 0.57, "grad_norm": 0.8550574802755017, "learning_rate": 4.138450691871033e-06, "loss": 0.1539, "step": 4216 }, { "epoch": 0.57, "grad_norm": 0.8526788694383064, "learning_rate": 4.136299652734818e-06, "loss": 0.132, "step": 4217 }, { "epoch": 0.57, "grad_norm": 1.333834725826035, "learning_rate": 4.13414877835516e-06, "loss": 0.2352, "step": 4218 }, { "epoch": 0.57, "grad_norm": 1.1685036675148126, "learning_rate": 4.131998069142354e-06, "loss": 0.2247, "step": 4219 }, { "epoch": 0.57, "grad_norm": 0.743967777113806, "learning_rate": 4.129847525506664e-06, "loss": 0.1392, "step": 4220 }, { "epoch": 0.57, "grad_norm": 0.7827536233701168, "learning_rate": 4.127697147858317e-06, "loss": 0.1421, "step": 4221 }, { "epoch": 0.57, "grad_norm": 0.9864448819699293, "learning_rate": 4.125546936607515e-06, "loss": 0.1973, "step": 4222 }, { "epoch": 0.57, "grad_norm": 0.9179879398121278, "learning_rate": 4.123396892164422e-06, "loss": 0.154, "step": 4223 }, { "epoch": 0.57, "grad_norm": 0.9886951239492435, "learning_rate": 4.121247014939174e-06, "loss": 0.2097, "step": 4224 }, { "epoch": 0.57, "grad_norm": 1.1182820209439202, "learning_rate": 4.119097305341877e-06, "loss": 0.2009, "step": 4225 }, { "epoch": 0.57, "grad_norm": 1.0301983387990012, "learning_rate": 4.116947763782599e-06, "loss": 0.1358, "step": 4226 }, { "epoch": 0.57, "grad_norm": 0.9993318764926071, "learning_rate": 4.114798390671381e-06, "loss": 0.1792, "step": 4227 }, { "epoch": 0.57, "grad_norm": 1.0719993577673506, "learning_rate": 4.112649186418229e-06, "loss": 0.2223, "step": 4228 }, { "epoch": 0.57, "grad_norm": 0.8498983007756447, "learning_rate": 4.11050015143312e-06, "loss": 0.1407, "step": 4229 }, { "epoch": 0.57, "grad_norm": 0.7083319149853353, "learning_rate": 4.1083512861259945e-06, "loss": 0.1351, "step": 4230 }, { "epoch": 0.57, "grad_norm": 0.9045948426990083, "learning_rate": 4.106202590906764e-06, "loss": 0.1922, "step": 4231 }, { "epoch": 0.57, "grad_norm": 1.1608746129639385, "learning_rate": 4.104054066185306e-06, "loss": 0.2145, "step": 4232 }, { "epoch": 0.57, "grad_norm": 1.0378795019151072, "learning_rate": 4.101905712371466e-06, "loss": 0.1576, "step": 4233 }, { "epoch": 0.57, "grad_norm": 1.1643210733695855, "learning_rate": 4.099757529875057e-06, "loss": 0.2302, "step": 4234 }, { "epoch": 0.57, "grad_norm": 1.1164009343307735, "learning_rate": 4.097609519105855e-06, "loss": 0.2376, "step": 4235 }, { "epoch": 0.57, "grad_norm": 1.3408624791048491, "learning_rate": 4.095461680473613e-06, "loss": 0.2544, "step": 4236 }, { "epoch": 0.57, "grad_norm": 0.737710779797675, "learning_rate": 4.093314014388043e-06, "loss": 0.1169, "step": 4237 }, { "epoch": 0.57, "grad_norm": 1.0733480546170082, "learning_rate": 4.091166521258828e-06, "loss": 0.1688, "step": 4238 }, { "epoch": 0.57, "grad_norm": 0.8538967475213206, "learning_rate": 4.089019201495616e-06, "loss": 0.1537, "step": 4239 }, { "epoch": 0.57, "grad_norm": 1.2106769958762, "learning_rate": 4.08687205550802e-06, "loss": 0.2297, "step": 4240 }, { "epoch": 0.57, "grad_norm": 0.8679750105358881, "learning_rate": 4.084725083705624e-06, "loss": 0.1458, "step": 4241 }, { "epoch": 0.57, "grad_norm": 1.084293791677736, "learning_rate": 4.0825782864979765e-06, "loss": 0.2162, "step": 4242 }, { "epoch": 0.57, "grad_norm": 1.040634754710091, "learning_rate": 4.080431664294595e-06, "loss": 0.2003, "step": 4243 }, { "epoch": 0.57, "grad_norm": 1.0580864806194143, "learning_rate": 4.078285217504961e-06, "loss": 0.1916, "step": 4244 }, { "epoch": 0.57, "grad_norm": 0.7261735961732284, "learning_rate": 4.076138946538523e-06, "loss": 0.1546, "step": 4245 }, { "epoch": 0.57, "grad_norm": 1.0607420150003017, "learning_rate": 4.073992851804695e-06, "loss": 0.2091, "step": 4246 }, { "epoch": 0.57, "grad_norm": 0.7748094919206441, "learning_rate": 4.071846933712862e-06, "loss": 0.1459, "step": 4247 }, { "epoch": 0.57, "grad_norm": 1.0898261167286223, "learning_rate": 4.069701192672369e-06, "loss": 0.2319, "step": 4248 }, { "epoch": 0.57, "grad_norm": 1.0885720090618063, "learning_rate": 4.067555629092534e-06, "loss": 0.2113, "step": 4249 }, { "epoch": 0.57, "grad_norm": 0.9823298484011916, "learning_rate": 4.0654102433826345e-06, "loss": 0.1714, "step": 4250 }, { "epoch": 0.57, "grad_norm": 0.9278810086666247, "learning_rate": 4.063265035951917e-06, "loss": 0.1535, "step": 4251 }, { "epoch": 0.57, "grad_norm": 0.9060001509791987, "learning_rate": 4.061120007209595e-06, "loss": 0.1691, "step": 4252 }, { "epoch": 0.57, "grad_norm": 0.9269308387371156, "learning_rate": 4.058975157564847e-06, "loss": 0.1613, "step": 4253 }, { "epoch": 0.57, "grad_norm": 0.8292464864496741, "learning_rate": 4.056830487426817e-06, "loss": 0.1755, "step": 4254 }, { "epoch": 0.57, "grad_norm": 0.7819177023027984, "learning_rate": 4.0546859972046155e-06, "loss": 0.1482, "step": 4255 }, { "epoch": 0.57, "grad_norm": 1.2537309577212188, "learning_rate": 4.052541687307318e-06, "loss": 0.2233, "step": 4256 }, { "epoch": 0.57, "grad_norm": 0.9456157896626027, "learning_rate": 4.050397558143965e-06, "loss": 0.166, "step": 4257 }, { "epoch": 0.57, "grad_norm": 0.874768215762556, "learning_rate": 4.048253610123562e-06, "loss": 0.1594, "step": 4258 }, { "epoch": 0.57, "grad_norm": 0.8639673860975546, "learning_rate": 4.046109843655084e-06, "loss": 0.1747, "step": 4259 }, { "epoch": 0.57, "grad_norm": 1.0378116066865217, "learning_rate": 4.043966259147468e-06, "loss": 0.1818, "step": 4260 }, { "epoch": 0.57, "grad_norm": 0.8696019583047706, "learning_rate": 4.0418228570096174e-06, "loss": 0.1853, "step": 4261 }, { "epoch": 0.57, "grad_norm": 0.7017695856377557, "learning_rate": 4.0396796376503995e-06, "loss": 0.13, "step": 4262 }, { "epoch": 0.57, "grad_norm": 0.9339363795184776, "learning_rate": 4.037536601478649e-06, "loss": 0.1746, "step": 4263 }, { "epoch": 0.57, "grad_norm": 1.0205860592333624, "learning_rate": 4.035393748903162e-06, "loss": 0.1942, "step": 4264 }, { "epoch": 0.58, "grad_norm": 0.9187630681679945, "learning_rate": 4.033251080332702e-06, "loss": 0.1447, "step": 4265 }, { "epoch": 0.58, "grad_norm": 1.0094850464406286, "learning_rate": 4.031108596175998e-06, "loss": 0.1801, "step": 4266 }, { "epoch": 0.58, "grad_norm": 0.9219462685693404, "learning_rate": 4.028966296841744e-06, "loss": 0.1927, "step": 4267 }, { "epoch": 0.58, "grad_norm": 1.1687809878401463, "learning_rate": 4.026824182738597e-06, "loss": 0.1717, "step": 4268 }, { "epoch": 0.58, "grad_norm": 0.8896347974989808, "learning_rate": 4.024682254275176e-06, "loss": 0.1802, "step": 4269 }, { "epoch": 0.58, "grad_norm": 0.7377228799190884, "learning_rate": 4.022540511860075e-06, "loss": 0.1506, "step": 4270 }, { "epoch": 0.58, "grad_norm": 1.2753933069929435, "learning_rate": 4.0203989559018396e-06, "loss": 0.204, "step": 4271 }, { "epoch": 0.58, "grad_norm": 0.8838197768004928, "learning_rate": 4.0182575868089895e-06, "loss": 0.1661, "step": 4272 }, { "epoch": 0.58, "grad_norm": 0.9541817011824509, "learning_rate": 4.016116404990003e-06, "loss": 0.1707, "step": 4273 }, { "epoch": 0.58, "grad_norm": 1.1560388261911985, "learning_rate": 4.013975410853325e-06, "loss": 0.226, "step": 4274 }, { "epoch": 0.58, "grad_norm": 1.0264929107021035, "learning_rate": 4.0118346048073656e-06, "loss": 0.1576, "step": 4275 }, { "epoch": 0.58, "grad_norm": 0.6864703715611559, "learning_rate": 4.009693987260496e-06, "loss": 0.1281, "step": 4276 }, { "epoch": 0.58, "grad_norm": 1.045446454584618, "learning_rate": 4.007553558621054e-06, "loss": 0.1782, "step": 4277 }, { "epoch": 0.58, "grad_norm": 0.884685069169878, "learning_rate": 4.00541331929734e-06, "loss": 0.1954, "step": 4278 }, { "epoch": 0.58, "grad_norm": 1.0342211055316315, "learning_rate": 4.003273269697619e-06, "loss": 0.1802, "step": 4279 }, { "epoch": 0.58, "grad_norm": 0.602777805988345, "learning_rate": 4.001133410230119e-06, "loss": 0.1093, "step": 4280 }, { "epoch": 0.58, "grad_norm": 1.309743549341959, "learning_rate": 3.998993741303035e-06, "loss": 0.2056, "step": 4281 }, { "epoch": 0.58, "grad_norm": 1.0215130020801706, "learning_rate": 3.99685426332452e-06, "loss": 0.1738, "step": 4282 }, { "epoch": 0.58, "grad_norm": 0.8717393824833066, "learning_rate": 3.994714976702695e-06, "loss": 0.1498, "step": 4283 }, { "epoch": 0.58, "grad_norm": 0.6082546377672297, "learning_rate": 3.9925758818456436e-06, "loss": 0.1258, "step": 4284 }, { "epoch": 0.58, "grad_norm": 0.7654086571678702, "learning_rate": 3.990436979161411e-06, "loss": 0.1044, "step": 4285 }, { "epoch": 0.58, "grad_norm": 0.9158785328091376, "learning_rate": 3.988298269058009e-06, "loss": 0.1598, "step": 4286 }, { "epoch": 0.58, "grad_norm": 0.987609245276838, "learning_rate": 3.986159751943408e-06, "loss": 0.1803, "step": 4287 }, { "epoch": 0.58, "grad_norm": 0.9462328741268768, "learning_rate": 3.984021428225546e-06, "loss": 0.1343, "step": 4288 }, { "epoch": 0.58, "grad_norm": 0.9998912109767663, "learning_rate": 3.9818832983123215e-06, "loss": 0.1858, "step": 4289 }, { "epoch": 0.58, "grad_norm": 1.0325526380204775, "learning_rate": 3.979745362611598e-06, "loss": 0.2002, "step": 4290 }, { "epoch": 0.58, "grad_norm": 0.7783522462477583, "learning_rate": 3.9776076215311975e-06, "loss": 0.1083, "step": 4291 }, { "epoch": 0.58, "grad_norm": 1.029685819483452, "learning_rate": 3.975470075478915e-06, "loss": 0.1796, "step": 4292 }, { "epoch": 0.58, "grad_norm": 1.0450909182613894, "learning_rate": 3.973332724862497e-06, "loss": 0.1605, "step": 4293 }, { "epoch": 0.58, "grad_norm": 0.7466294458932318, "learning_rate": 3.971195570089657e-06, "loss": 0.1426, "step": 4294 }, { "epoch": 0.58, "grad_norm": 1.0204528926741319, "learning_rate": 3.9690586115680725e-06, "loss": 0.2043, "step": 4295 }, { "epoch": 0.58, "grad_norm": 1.0932758574710444, "learning_rate": 3.966921849705383e-06, "loss": 0.2045, "step": 4296 }, { "epoch": 0.58, "grad_norm": 0.9760282748949191, "learning_rate": 3.964785284909188e-06, "loss": 0.174, "step": 4297 }, { "epoch": 0.58, "grad_norm": 1.1197683567651049, "learning_rate": 3.962648917587053e-06, "loss": 0.2089, "step": 4298 }, { "epoch": 0.58, "grad_norm": 1.0153846857327227, "learning_rate": 3.960512748146505e-06, "loss": 0.2231, "step": 4299 }, { "epoch": 0.58, "grad_norm": 1.1300732952473194, "learning_rate": 3.958376776995031e-06, "loss": 0.1981, "step": 4300 }, { "epoch": 0.58, "grad_norm": 0.8421186219261809, "learning_rate": 3.956241004540081e-06, "loss": 0.1596, "step": 4301 }, { "epoch": 0.58, "grad_norm": 0.9562758227105759, "learning_rate": 3.95410543118907e-06, "loss": 0.1649, "step": 4302 }, { "epoch": 0.58, "grad_norm": 1.0277270805932626, "learning_rate": 3.95197005734937e-06, "loss": 0.2239, "step": 4303 }, { "epoch": 0.58, "grad_norm": 1.0423714290241082, "learning_rate": 3.94983488342832e-06, "loss": 0.166, "step": 4304 }, { "epoch": 0.58, "grad_norm": 1.049547593067017, "learning_rate": 3.947699909833218e-06, "loss": 0.1694, "step": 4305 }, { "epoch": 0.58, "grad_norm": 0.7664576564245964, "learning_rate": 3.9455651369713246e-06, "loss": 0.1468, "step": 4306 }, { "epoch": 0.58, "grad_norm": 0.8572318796250151, "learning_rate": 3.94343056524986e-06, "loss": 0.1593, "step": 4307 }, { "epoch": 0.58, "grad_norm": 0.8844059486163716, "learning_rate": 3.941296195076011e-06, "loss": 0.156, "step": 4308 }, { "epoch": 0.58, "grad_norm": 0.9805527451499938, "learning_rate": 3.939162026856921e-06, "loss": 0.1652, "step": 4309 }, { "epoch": 0.58, "grad_norm": 0.8609219995939573, "learning_rate": 3.937028060999696e-06, "loss": 0.1429, "step": 4310 }, { "epoch": 0.58, "grad_norm": 1.0427528041432963, "learning_rate": 3.934894297911404e-06, "loss": 0.227, "step": 4311 }, { "epoch": 0.58, "grad_norm": 0.8119318405515041, "learning_rate": 3.932760737999077e-06, "loss": 0.1303, "step": 4312 }, { "epoch": 0.58, "grad_norm": 0.6693403507997321, "learning_rate": 3.930627381669703e-06, "loss": 0.1331, "step": 4313 }, { "epoch": 0.58, "grad_norm": 1.362416561045485, "learning_rate": 3.928494229330233e-06, "loss": 0.2397, "step": 4314 }, { "epoch": 0.58, "grad_norm": 1.2590251441993785, "learning_rate": 3.9263612813875835e-06, "loss": 0.2213, "step": 4315 }, { "epoch": 0.58, "grad_norm": 1.2539309767364777, "learning_rate": 3.9242285382486275e-06, "loss": 0.2161, "step": 4316 }, { "epoch": 0.58, "grad_norm": 0.9358855406920246, "learning_rate": 3.922096000320199e-06, "loss": 0.1475, "step": 4317 }, { "epoch": 0.58, "grad_norm": 1.1556718858877635, "learning_rate": 3.919963668009093e-06, "loss": 0.2528, "step": 4318 }, { "epoch": 0.58, "grad_norm": 0.9046784105484051, "learning_rate": 3.9178315417220684e-06, "loss": 0.153, "step": 4319 }, { "epoch": 0.58, "grad_norm": 0.7610395338627288, "learning_rate": 3.915699621865839e-06, "loss": 0.1612, "step": 4320 }, { "epoch": 0.58, "grad_norm": 1.0275703229240063, "learning_rate": 3.913567908847085e-06, "loss": 0.214, "step": 4321 }, { "epoch": 0.58, "grad_norm": 0.7897235010122751, "learning_rate": 3.9114364030724446e-06, "loss": 0.1496, "step": 4322 }, { "epoch": 0.58, "grad_norm": 0.996369238849826, "learning_rate": 3.909305104948516e-06, "loss": 0.1899, "step": 4323 }, { "epoch": 0.58, "grad_norm": 0.7536907347742022, "learning_rate": 3.907174014881858e-06, "loss": 0.1143, "step": 4324 }, { "epoch": 0.58, "grad_norm": 1.2467441571725923, "learning_rate": 3.905043133278991e-06, "loss": 0.2089, "step": 4325 }, { "epoch": 0.58, "grad_norm": 0.6771452182861245, "learning_rate": 3.902912460546395e-06, "loss": 0.1399, "step": 4326 }, { "epoch": 0.58, "grad_norm": 0.9681263652489857, "learning_rate": 3.9007819970905095e-06, "loss": 0.2063, "step": 4327 }, { "epoch": 0.58, "grad_norm": 1.0129340410876682, "learning_rate": 3.898651743317735e-06, "loss": 0.2166, "step": 4328 }, { "epoch": 0.58, "grad_norm": 0.9320811197834922, "learning_rate": 3.896521699634431e-06, "loss": 0.165, "step": 4329 }, { "epoch": 0.58, "grad_norm": 1.0777103487242672, "learning_rate": 3.894391866446917e-06, "loss": 0.2084, "step": 4330 }, { "epoch": 0.58, "grad_norm": 0.7457118381794521, "learning_rate": 3.892262244161473e-06, "loss": 0.1563, "step": 4331 }, { "epoch": 0.58, "grad_norm": 0.9813617376673082, "learning_rate": 3.89013283318434e-06, "loss": 0.1526, "step": 4332 }, { "epoch": 0.58, "grad_norm": 0.9566108207079296, "learning_rate": 3.888003633921716e-06, "loss": 0.1847, "step": 4333 }, { "epoch": 0.58, "grad_norm": 0.7629111886823083, "learning_rate": 3.885874646779759e-06, "loss": 0.1124, "step": 4334 }, { "epoch": 0.58, "grad_norm": 0.7237974208251365, "learning_rate": 3.883745872164588e-06, "loss": 0.1214, "step": 4335 }, { "epoch": 0.58, "grad_norm": 0.8195315978424395, "learning_rate": 3.88161731048228e-06, "loss": 0.1854, "step": 4336 }, { "epoch": 0.58, "grad_norm": 1.091040009199511, "learning_rate": 3.879488962138875e-06, "loss": 0.2121, "step": 4337 }, { "epoch": 0.58, "grad_norm": 0.8312332072644686, "learning_rate": 3.877360827540367e-06, "loss": 0.1655, "step": 4338 }, { "epoch": 0.59, "grad_norm": 0.9145977387692709, "learning_rate": 3.875232907092715e-06, "loss": 0.1379, "step": 4339 }, { "epoch": 0.59, "grad_norm": 0.930743684841806, "learning_rate": 3.87310520120183e-06, "loss": 0.19, "step": 4340 }, { "epoch": 0.59, "grad_norm": 1.019461174775143, "learning_rate": 3.870977710273588e-06, "loss": 0.1885, "step": 4341 }, { "epoch": 0.59, "grad_norm": 0.9570020211337245, "learning_rate": 3.868850434713821e-06, "loss": 0.1857, "step": 4342 }, { "epoch": 0.59, "grad_norm": 0.8763609182615707, "learning_rate": 3.866723374928322e-06, "loss": 0.1589, "step": 4343 }, { "epoch": 0.59, "grad_norm": 0.7951626454630002, "learning_rate": 3.86459653132284e-06, "loss": 0.1441, "step": 4344 }, { "epoch": 0.59, "grad_norm": 1.1047139952974843, "learning_rate": 3.862469904303087e-06, "loss": 0.2462, "step": 4345 }, { "epoch": 0.59, "grad_norm": 1.0648137395163366, "learning_rate": 3.860343494274729e-06, "loss": 0.179, "step": 4346 }, { "epoch": 0.59, "grad_norm": 0.8008961703247325, "learning_rate": 3.858217301643393e-06, "loss": 0.1647, "step": 4347 }, { "epoch": 0.59, "grad_norm": 0.896981207075646, "learning_rate": 3.8560913268146664e-06, "loss": 0.1627, "step": 4348 }, { "epoch": 0.59, "grad_norm": 1.110358597877956, "learning_rate": 3.85396557019409e-06, "loss": 0.2025, "step": 4349 }, { "epoch": 0.59, "grad_norm": 1.086913851395784, "learning_rate": 3.851840032187169e-06, "loss": 0.2342, "step": 4350 }, { "epoch": 0.59, "grad_norm": 1.0678556076209735, "learning_rate": 3.849714713199362e-06, "loss": 0.157, "step": 4351 }, { "epoch": 0.59, "grad_norm": 1.0083109904489493, "learning_rate": 3.8475896136360876e-06, "loss": 0.1947, "step": 4352 }, { "epoch": 0.59, "grad_norm": 0.9013780974241382, "learning_rate": 3.8454647339027225e-06, "loss": 0.1276, "step": 4353 }, { "epoch": 0.59, "grad_norm": 1.241728486750486, "learning_rate": 3.8433400744046035e-06, "loss": 0.229, "step": 4354 }, { "epoch": 0.59, "grad_norm": 0.7984764583538554, "learning_rate": 3.841215635547021e-06, "loss": 0.1771, "step": 4355 }, { "epoch": 0.59, "grad_norm": 1.0424173464922903, "learning_rate": 3.839091417735228e-06, "loss": 0.2025, "step": 4356 }, { "epoch": 0.59, "grad_norm": 0.6104221074752221, "learning_rate": 3.836967421374431e-06, "loss": 0.1179, "step": 4357 }, { "epoch": 0.59, "grad_norm": 1.1333987416793896, "learning_rate": 3.834843646869797e-06, "loss": 0.2042, "step": 4358 }, { "epoch": 0.59, "grad_norm": 0.9379317587905673, "learning_rate": 3.832720094626449e-06, "loss": 0.2058, "step": 4359 }, { "epoch": 0.59, "grad_norm": 0.6089141225165466, "learning_rate": 3.830596765049471e-06, "loss": 0.1058, "step": 4360 }, { "epoch": 0.59, "grad_norm": 0.9612654948431042, "learning_rate": 3.828473658543902e-06, "loss": 0.1711, "step": 4361 }, { "epoch": 0.59, "grad_norm": 0.6895698018751334, "learning_rate": 3.826350775514739e-06, "loss": 0.1502, "step": 4362 }, { "epoch": 0.59, "grad_norm": 0.8025653941494961, "learning_rate": 3.824228116366933e-06, "loss": 0.1333, "step": 4363 }, { "epoch": 0.59, "grad_norm": 0.6485388846390948, "learning_rate": 3.8221056815053966e-06, "loss": 0.1259, "step": 4364 }, { "epoch": 0.59, "grad_norm": 0.8448730437153577, "learning_rate": 3.8199834713349995e-06, "loss": 0.1274, "step": 4365 }, { "epoch": 0.59, "grad_norm": 0.8629960959858458, "learning_rate": 3.8178614862605666e-06, "loss": 0.1658, "step": 4366 }, { "epoch": 0.59, "grad_norm": 0.9787737186563928, "learning_rate": 3.81573972668688e-06, "loss": 0.142, "step": 4367 }, { "epoch": 0.59, "grad_norm": 0.7838068830547757, "learning_rate": 3.813618193018681e-06, "loss": 0.1265, "step": 4368 }, { "epoch": 0.59, "grad_norm": 1.1265947429829317, "learning_rate": 3.8114968856606644e-06, "loss": 0.1786, "step": 4369 }, { "epoch": 0.59, "grad_norm": 0.6884550410794877, "learning_rate": 3.8093758050174833e-06, "loss": 0.1595, "step": 4370 }, { "epoch": 0.59, "grad_norm": 0.9506723351868047, "learning_rate": 3.80725495149375e-06, "loss": 0.1481, "step": 4371 }, { "epoch": 0.59, "grad_norm": 1.1858330603745277, "learning_rate": 3.805134325494031e-06, "loss": 0.2093, "step": 4372 }, { "epoch": 0.59, "grad_norm": 0.8121047944787255, "learning_rate": 3.8030139274228482e-06, "loss": 0.1602, "step": 4373 }, { "epoch": 0.59, "grad_norm": 0.6616328853151616, "learning_rate": 3.8008937576846826e-06, "loss": 0.1117, "step": 4374 }, { "epoch": 0.59, "grad_norm": 0.7762314122844813, "learning_rate": 3.7987738166839705e-06, "loss": 0.1075, "step": 4375 }, { "epoch": 0.59, "grad_norm": 0.7978710968400071, "learning_rate": 3.796654104825105e-06, "loss": 0.1426, "step": 4376 }, { "epoch": 0.59, "grad_norm": 1.0249015597028943, "learning_rate": 3.7945346225124345e-06, "loss": 0.1792, "step": 4377 }, { "epoch": 0.59, "grad_norm": 1.0114177233775752, "learning_rate": 3.792415370150264e-06, "loss": 0.1687, "step": 4378 }, { "epoch": 0.59, "grad_norm": 0.9128591590129569, "learning_rate": 3.7902963481428566e-06, "loss": 0.2018, "step": 4379 }, { "epoch": 0.59, "grad_norm": 0.8165488894415546, "learning_rate": 3.788177556894429e-06, "loss": 0.1305, "step": 4380 }, { "epoch": 0.59, "grad_norm": 0.9883363558048808, "learning_rate": 3.786058996809151e-06, "loss": 0.1908, "step": 4381 }, { "epoch": 0.59, "grad_norm": 1.1365464212106637, "learning_rate": 3.783940668291158e-06, "loss": 0.1901, "step": 4382 }, { "epoch": 0.59, "grad_norm": 1.1404514480563013, "learning_rate": 3.7818225717445316e-06, "loss": 0.2058, "step": 4383 }, { "epoch": 0.59, "grad_norm": 1.1129470563321358, "learning_rate": 3.779704707573314e-06, "loss": 0.1812, "step": 4384 }, { "epoch": 0.59, "grad_norm": 0.9550327046781285, "learning_rate": 3.7775870761815016e-06, "loss": 0.154, "step": 4385 }, { "epoch": 0.59, "grad_norm": 0.852461900926676, "learning_rate": 3.775469677973047e-06, "loss": 0.1198, "step": 4386 }, { "epoch": 0.59, "grad_norm": 0.7979021743045372, "learning_rate": 3.773352513351857e-06, "loss": 0.1411, "step": 4387 }, { "epoch": 0.59, "grad_norm": 0.8966331418071114, "learning_rate": 3.7712355827217946e-06, "loss": 0.1673, "step": 4388 }, { "epoch": 0.59, "grad_norm": 1.1429355612818262, "learning_rate": 3.769118886486679e-06, "loss": 0.1648, "step": 4389 }, { "epoch": 0.59, "grad_norm": 0.9853186194581216, "learning_rate": 3.7670024250502836e-06, "loss": 0.1318, "step": 4390 }, { "epoch": 0.59, "grad_norm": 1.0504913357284047, "learning_rate": 3.7648861988163376e-06, "loss": 0.1763, "step": 4391 }, { "epoch": 0.59, "grad_norm": 0.7817560271003773, "learning_rate": 3.7627702081885243e-06, "loss": 0.1708, "step": 4392 }, { "epoch": 0.59, "grad_norm": 0.9054835621938473, "learning_rate": 3.760654453570485e-06, "loss": 0.1504, "step": 4393 }, { "epoch": 0.59, "grad_norm": 0.9988285676439191, "learning_rate": 3.758538935365813e-06, "loss": 0.2048, "step": 4394 }, { "epoch": 0.59, "grad_norm": 0.861377987307909, "learning_rate": 3.756423653978057e-06, "loss": 0.132, "step": 4395 }, { "epoch": 0.59, "grad_norm": 0.9454423015623813, "learning_rate": 3.7543086098107206e-06, "loss": 0.1708, "step": 4396 }, { "epoch": 0.59, "grad_norm": 1.0590714517549784, "learning_rate": 3.7521938032672634e-06, "loss": 0.209, "step": 4397 }, { "epoch": 0.59, "grad_norm": 0.7323216905502147, "learning_rate": 3.750079234751098e-06, "loss": 0.1412, "step": 4398 }, { "epoch": 0.59, "grad_norm": 0.8939032014540864, "learning_rate": 3.747964904665593e-06, "loss": 0.1476, "step": 4399 }, { "epoch": 0.59, "grad_norm": 1.0430439294381324, "learning_rate": 3.74585081341407e-06, "loss": 0.1742, "step": 4400 }, { "epoch": 0.59, "grad_norm": 0.8705340264317661, "learning_rate": 3.7437369613998076e-06, "loss": 0.1507, "step": 4401 }, { "epoch": 0.59, "grad_norm": 1.1462817525901825, "learning_rate": 3.741623349026035e-06, "loss": 0.2135, "step": 4402 }, { "epoch": 0.59, "grad_norm": 1.1465349334619217, "learning_rate": 3.7395099766959398e-06, "loss": 0.2087, "step": 4403 }, { "epoch": 0.59, "grad_norm": 0.9618171696341987, "learning_rate": 3.7373968448126574e-06, "loss": 0.1679, "step": 4404 }, { "epoch": 0.59, "grad_norm": 0.9799370337088611, "learning_rate": 3.7352839537792874e-06, "loss": 0.1962, "step": 4405 }, { "epoch": 0.59, "grad_norm": 0.8457284413632584, "learning_rate": 3.733171303998875e-06, "loss": 0.1329, "step": 4406 }, { "epoch": 0.59, "grad_norm": 1.0581805302951763, "learning_rate": 3.731058895874422e-06, "loss": 0.2168, "step": 4407 }, { "epoch": 0.59, "grad_norm": 0.8844634071053589, "learning_rate": 3.7289467298088843e-06, "loss": 0.14, "step": 4408 }, { "epoch": 0.59, "grad_norm": 0.7998596245250369, "learning_rate": 3.726834806205174e-06, "loss": 0.1543, "step": 4409 }, { "epoch": 0.59, "grad_norm": 0.9295592868120984, "learning_rate": 3.72472312546615e-06, "loss": 0.2057, "step": 4410 }, { "epoch": 0.59, "grad_norm": 0.9839983325812759, "learning_rate": 3.722611687994633e-06, "loss": 0.1686, "step": 4411 }, { "epoch": 0.59, "grad_norm": 1.0969758523090805, "learning_rate": 3.720500494193392e-06, "loss": 0.2022, "step": 4412 }, { "epoch": 0.6, "grad_norm": 0.8652971790423482, "learning_rate": 3.718389544465151e-06, "loss": 0.107, "step": 4413 }, { "epoch": 0.6, "grad_norm": 1.1246244523623548, "learning_rate": 3.716278839212589e-06, "loss": 0.2148, "step": 4414 }, { "epoch": 0.6, "grad_norm": 1.2935225858308905, "learning_rate": 3.7141683788383354e-06, "loss": 0.2345, "step": 4415 }, { "epoch": 0.6, "grad_norm": 0.6921995408167112, "learning_rate": 3.712058163744976e-06, "loss": 0.1357, "step": 4416 }, { "epoch": 0.6, "grad_norm": 1.0247927553044356, "learning_rate": 3.7099481943350472e-06, "loss": 0.1604, "step": 4417 }, { "epoch": 0.6, "grad_norm": 0.9782041461346989, "learning_rate": 3.7078384710110404e-06, "loss": 0.1972, "step": 4418 }, { "epoch": 0.6, "grad_norm": 0.728102977436545, "learning_rate": 3.7057289941753992e-06, "loss": 0.1187, "step": 4419 }, { "epoch": 0.6, "grad_norm": 1.2266249848607977, "learning_rate": 3.7036197642305193e-06, "loss": 0.1864, "step": 4420 }, { "epoch": 0.6, "grad_norm": 0.8673612894284716, "learning_rate": 3.7015107815787523e-06, "loss": 0.1644, "step": 4421 }, { "epoch": 0.6, "grad_norm": 1.004667994532613, "learning_rate": 3.699402046622398e-06, "loss": 0.1553, "step": 4422 }, { "epoch": 0.6, "grad_norm": 0.8500833747112413, "learning_rate": 3.697293559763714e-06, "loss": 0.177, "step": 4423 }, { "epoch": 0.6, "grad_norm": 1.0191450765466155, "learning_rate": 3.695185321404906e-06, "loss": 0.196, "step": 4424 }, { "epoch": 0.6, "grad_norm": 0.7963899272908277, "learning_rate": 3.693077331948136e-06, "loss": 0.1649, "step": 4425 }, { "epoch": 0.6, "grad_norm": 1.082095325474534, "learning_rate": 3.6909695917955145e-06, "loss": 0.1664, "step": 4426 }, { "epoch": 0.6, "grad_norm": 1.0368060975357833, "learning_rate": 3.688862101349111e-06, "loss": 0.1745, "step": 4427 }, { "epoch": 0.6, "grad_norm": 0.7942456868032501, "learning_rate": 3.68675486101094e-06, "loss": 0.1321, "step": 4428 }, { "epoch": 0.6, "grad_norm": 0.994254114113499, "learning_rate": 3.684647871182973e-06, "loss": 0.1691, "step": 4429 }, { "epoch": 0.6, "grad_norm": 0.9163162103059245, "learning_rate": 3.6825411322671318e-06, "loss": 0.1903, "step": 4430 }, { "epoch": 0.6, "grad_norm": 0.8793965199358442, "learning_rate": 3.6804346446652904e-06, "loss": 0.1441, "step": 4431 }, { "epoch": 0.6, "grad_norm": 1.0549612049608847, "learning_rate": 3.678328408779276e-06, "loss": 0.1824, "step": 4432 }, { "epoch": 0.6, "grad_norm": 0.8764979411811156, "learning_rate": 3.6762224250108674e-06, "loss": 0.1671, "step": 4433 }, { "epoch": 0.6, "grad_norm": 0.7982902111753772, "learning_rate": 3.6741166937617935e-06, "loss": 0.1375, "step": 4434 }, { "epoch": 0.6, "grad_norm": 1.028769820396496, "learning_rate": 3.6720112154337375e-06, "loss": 0.1945, "step": 4435 }, { "epoch": 0.6, "grad_norm": 0.856837345886647, "learning_rate": 3.669905990428332e-06, "loss": 0.1277, "step": 4436 }, { "epoch": 0.6, "grad_norm": 0.9271705832557484, "learning_rate": 3.6678010191471625e-06, "loss": 0.1316, "step": 4437 }, { "epoch": 0.6, "grad_norm": 1.0004998055770955, "learning_rate": 3.665696301991769e-06, "loss": 0.1731, "step": 4438 }, { "epoch": 0.6, "grad_norm": 0.7973974758750759, "learning_rate": 3.6635918393636394e-06, "loss": 0.1507, "step": 4439 }, { "epoch": 0.6, "grad_norm": 1.0560277695024831, "learning_rate": 3.6614876316642125e-06, "loss": 0.2458, "step": 4440 }, { "epoch": 0.6, "grad_norm": 1.3483038254270716, "learning_rate": 3.65938367929488e-06, "loss": 0.231, "step": 4441 }, { "epoch": 0.6, "grad_norm": 0.7392127727252089, "learning_rate": 3.6572799826569847e-06, "loss": 0.1225, "step": 4442 }, { "epoch": 0.6, "grad_norm": 0.7292942232325496, "learning_rate": 3.655176542151822e-06, "loss": 0.1204, "step": 4443 }, { "epoch": 0.6, "grad_norm": 0.8412470707434573, "learning_rate": 3.6530733581806356e-06, "loss": 0.1616, "step": 4444 }, { "epoch": 0.6, "grad_norm": 0.6786400787908524, "learning_rate": 3.6509704311446227e-06, "loss": 0.1558, "step": 4445 }, { "epoch": 0.6, "grad_norm": 0.7886541910143329, "learning_rate": 3.64886776144493e-06, "loss": 0.1322, "step": 4446 }, { "epoch": 0.6, "grad_norm": 1.161657849575285, "learning_rate": 3.646765349482656e-06, "loss": 0.2134, "step": 4447 }, { "epoch": 0.6, "grad_norm": 1.0255639568542627, "learning_rate": 3.6446631956588485e-06, "loss": 0.2234, "step": 4448 }, { "epoch": 0.6, "grad_norm": 0.7376456278593372, "learning_rate": 3.6425613003745096e-06, "loss": 0.1445, "step": 4449 }, { "epoch": 0.6, "grad_norm": 1.0738157601614973, "learning_rate": 3.640459664030588e-06, "loss": 0.1831, "step": 4450 }, { "epoch": 0.6, "grad_norm": 0.9038256235729651, "learning_rate": 3.6383582870279853e-06, "loss": 0.1975, "step": 4451 }, { "epoch": 0.6, "grad_norm": 0.7245501511684216, "learning_rate": 3.6362571697675535e-06, "loss": 0.1655, "step": 4452 }, { "epoch": 0.6, "grad_norm": 1.2588851372719416, "learning_rate": 3.6341563126500933e-06, "loss": 0.2166, "step": 4453 }, { "epoch": 0.6, "grad_norm": 1.0911587710991606, "learning_rate": 3.6320557160763594e-06, "loss": 0.1878, "step": 4454 }, { "epoch": 0.6, "grad_norm": 1.0700135443224643, "learning_rate": 3.629955380447051e-06, "loss": 0.1784, "step": 4455 }, { "epoch": 0.6, "grad_norm": 1.4604125703276245, "learning_rate": 3.627855306162825e-06, "loss": 0.2599, "step": 4456 }, { "epoch": 0.6, "grad_norm": 0.8425089229012506, "learning_rate": 3.6257554936242822e-06, "loss": 0.1464, "step": 4457 }, { "epoch": 0.6, "grad_norm": 0.8552934993045179, "learning_rate": 3.6236559432319753e-06, "loss": 0.16, "step": 4458 }, { "epoch": 0.6, "grad_norm": 0.820730037841644, "learning_rate": 3.6215566553864077e-06, "loss": 0.1234, "step": 4459 }, { "epoch": 0.6, "grad_norm": 1.0237349616095686, "learning_rate": 3.619457630488032e-06, "loss": 0.2224, "step": 4460 }, { "epoch": 0.6, "grad_norm": 0.8494805584474145, "learning_rate": 3.6173588689372536e-06, "loss": 0.1692, "step": 4461 }, { "epoch": 0.6, "grad_norm": 0.9086261420033765, "learning_rate": 3.615260371134425e-06, "loss": 0.1693, "step": 4462 }, { "epoch": 0.6, "grad_norm": 1.206986443224008, "learning_rate": 3.613162137479845e-06, "loss": 0.1826, "step": 4463 }, { "epoch": 0.6, "grad_norm": 1.2127496180595394, "learning_rate": 3.6110641683737686e-06, "loss": 0.1852, "step": 4464 }, { "epoch": 0.6, "grad_norm": 1.0257720771634098, "learning_rate": 3.608966464216396e-06, "loss": 0.1796, "step": 4465 }, { "epoch": 0.6, "grad_norm": 1.042241899651314, "learning_rate": 3.6068690254078785e-06, "loss": 0.1704, "step": 4466 }, { "epoch": 0.6, "grad_norm": 1.0995776951519298, "learning_rate": 3.604771852348317e-06, "loss": 0.2251, "step": 4467 }, { "epoch": 0.6, "grad_norm": 0.8083039986789788, "learning_rate": 3.602674945437761e-06, "loss": 0.1358, "step": 4468 }, { "epoch": 0.6, "grad_norm": 1.0746366339828586, "learning_rate": 3.600578305076208e-06, "loss": 0.1987, "step": 4469 }, { "epoch": 0.6, "grad_norm": 0.8939585565035044, "learning_rate": 3.5984819316636077e-06, "loss": 0.1698, "step": 4470 }, { "epoch": 0.6, "grad_norm": 0.9722621146085114, "learning_rate": 3.5963858255998553e-06, "loss": 0.1966, "step": 4471 }, { "epoch": 0.6, "grad_norm": 0.8979090042184962, "learning_rate": 3.5942899872847986e-06, "loss": 0.1537, "step": 4472 }, { "epoch": 0.6, "grad_norm": 0.7697040462872451, "learning_rate": 3.5921944171182327e-06, "loss": 0.12, "step": 4473 }, { "epoch": 0.6, "grad_norm": 1.269833207718989, "learning_rate": 3.590099115499901e-06, "loss": 0.233, "step": 4474 }, { "epoch": 0.6, "grad_norm": 1.1525058373074206, "learning_rate": 3.588004082829496e-06, "loss": 0.1853, "step": 4475 }, { "epoch": 0.6, "grad_norm": 0.9301251145749455, "learning_rate": 3.58590931950666e-06, "loss": 0.195, "step": 4476 }, { "epoch": 0.6, "grad_norm": 0.9575574665792909, "learning_rate": 3.583814825930981e-06, "loss": 0.1604, "step": 4477 }, { "epoch": 0.6, "grad_norm": 0.881857599469321, "learning_rate": 3.581720602501999e-06, "loss": 0.1814, "step": 4478 }, { "epoch": 0.6, "grad_norm": 0.9072136445887775, "learning_rate": 3.579626649619201e-06, "loss": 0.1921, "step": 4479 }, { "epoch": 0.6, "grad_norm": 0.8079343654231306, "learning_rate": 3.5775329676820223e-06, "loss": 0.1228, "step": 4480 }, { "epoch": 0.6, "grad_norm": 1.241310297370198, "learning_rate": 3.5754395570898456e-06, "loss": 0.2303, "step": 4481 }, { "epoch": 0.6, "grad_norm": 1.0105450487480667, "learning_rate": 3.5733464182420013e-06, "loss": 0.1735, "step": 4482 }, { "epoch": 0.6, "grad_norm": 0.9724084122382136, "learning_rate": 3.5712535515377746e-06, "loss": 0.1295, "step": 4483 }, { "epoch": 0.6, "grad_norm": 0.8289514753138669, "learning_rate": 3.56916095737639e-06, "loss": 0.1744, "step": 4484 }, { "epoch": 0.6, "grad_norm": 0.8006659192608335, "learning_rate": 3.567068636157024e-06, "loss": 0.1458, "step": 4485 }, { "epoch": 0.6, "grad_norm": 0.9385725153149054, "learning_rate": 3.5649765882788034e-06, "loss": 0.1558, "step": 4486 }, { "epoch": 0.61, "grad_norm": 0.662360582911537, "learning_rate": 3.5628848141407963e-06, "loss": 0.1396, "step": 4487 }, { "epoch": 0.61, "grad_norm": 0.8531207981230648, "learning_rate": 3.560793314142024e-06, "loss": 0.1766, "step": 4488 }, { "epoch": 0.61, "grad_norm": 0.7520623718204582, "learning_rate": 3.558702088681455e-06, "loss": 0.119, "step": 4489 }, { "epoch": 0.61, "grad_norm": 0.8711978222351363, "learning_rate": 3.5566111381580025e-06, "loss": 0.1681, "step": 4490 }, { "epoch": 0.61, "grad_norm": 0.9966460992887938, "learning_rate": 3.554520462970531e-06, "loss": 0.1918, "step": 4491 }, { "epoch": 0.61, "grad_norm": 0.8132305722985139, "learning_rate": 3.5524300635178492e-06, "loss": 0.1591, "step": 4492 }, { "epoch": 0.61, "grad_norm": 0.7919837502478271, "learning_rate": 3.550339940198715e-06, "loss": 0.1415, "step": 4493 }, { "epoch": 0.61, "grad_norm": 0.820027185819046, "learning_rate": 3.5482500934118334e-06, "loss": 0.145, "step": 4494 }, { "epoch": 0.61, "grad_norm": 0.9588757554065278, "learning_rate": 3.546160523555857e-06, "loss": 0.1863, "step": 4495 }, { "epoch": 0.61, "grad_norm": 0.8829445516554887, "learning_rate": 3.5440712310293853e-06, "loss": 0.1496, "step": 4496 }, { "epoch": 0.61, "grad_norm": 0.9396350272701753, "learning_rate": 3.5419822162309634e-06, "loss": 0.1946, "step": 4497 }, { "epoch": 0.61, "grad_norm": 0.9814323983525045, "learning_rate": 3.5398934795590857e-06, "loss": 0.1447, "step": 4498 }, { "epoch": 0.61, "grad_norm": 0.8729369274175995, "learning_rate": 3.5378050214121918e-06, "loss": 0.1383, "step": 4499 }, { "epoch": 0.61, "grad_norm": 0.6982352181583841, "learning_rate": 3.53571684218867e-06, "loss": 0.1713, "step": 4500 }, { "epoch": 0.61, "grad_norm": 0.9207443472087757, "learning_rate": 3.533628942286853e-06, "loss": 0.169, "step": 4501 }, { "epoch": 0.61, "grad_norm": 1.0371153833318443, "learning_rate": 3.5315413221050225e-06, "loss": 0.1639, "step": 4502 }, { "epoch": 0.61, "grad_norm": 1.005996970925269, "learning_rate": 3.5294539820414063e-06, "loss": 0.2086, "step": 4503 }, { "epoch": 0.61, "grad_norm": 1.1957081947452903, "learning_rate": 3.5273669224941776e-06, "loss": 0.2428, "step": 4504 }, { "epoch": 0.61, "grad_norm": 0.9451956512271618, "learning_rate": 3.5252801438614537e-06, "loss": 0.1532, "step": 4505 }, { "epoch": 0.61, "grad_norm": 1.0666362952943391, "learning_rate": 3.523193646541306e-06, "loss": 0.2061, "step": 4506 }, { "epoch": 0.61, "grad_norm": 0.9201230523817502, "learning_rate": 3.5211074309317478e-06, "loss": 0.1627, "step": 4507 }, { "epoch": 0.61, "grad_norm": 0.995297712777418, "learning_rate": 3.5190214974307358e-06, "loss": 0.2034, "step": 4508 }, { "epoch": 0.61, "grad_norm": 1.0091940045185608, "learning_rate": 3.5169358464361787e-06, "loss": 0.1652, "step": 4509 }, { "epoch": 0.61, "grad_norm": 0.9876780416796546, "learning_rate": 3.5148504783459246e-06, "loss": 0.1871, "step": 4510 }, { "epoch": 0.61, "grad_norm": 0.7211483884967476, "learning_rate": 3.512765393557773e-06, "loss": 0.1393, "step": 4511 }, { "epoch": 0.61, "grad_norm": 0.8080020771147695, "learning_rate": 3.5106805924694676e-06, "loss": 0.175, "step": 4512 }, { "epoch": 0.61, "grad_norm": 0.8634681218944426, "learning_rate": 3.5085960754786986e-06, "loss": 0.128, "step": 4513 }, { "epoch": 0.61, "grad_norm": 0.8166753444743415, "learning_rate": 3.5065118429831e-06, "loss": 0.1427, "step": 4514 }, { "epoch": 0.61, "grad_norm": 0.9292296834440131, "learning_rate": 3.5044278953802546e-06, "loss": 0.1625, "step": 4515 }, { "epoch": 0.61, "grad_norm": 0.9252885925102377, "learning_rate": 3.5023442330676866e-06, "loss": 0.1723, "step": 4516 }, { "epoch": 0.61, "grad_norm": 0.9599160241164099, "learning_rate": 3.5002608564428712e-06, "loss": 0.193, "step": 4517 }, { "epoch": 0.61, "grad_norm": 1.164857903713516, "learning_rate": 3.4981777659032255e-06, "loss": 0.1634, "step": 4518 }, { "epoch": 0.61, "grad_norm": 0.9828351949089699, "learning_rate": 3.4960949618461128e-06, "loss": 0.1867, "step": 4519 }, { "epoch": 0.61, "grad_norm": 0.7787881539685579, "learning_rate": 3.494012444668841e-06, "loss": 0.1432, "step": 4520 }, { "epoch": 0.61, "grad_norm": 0.9507791000133687, "learning_rate": 3.491930214768665e-06, "loss": 0.167, "step": 4521 }, { "epoch": 0.61, "grad_norm": 0.8686341149585823, "learning_rate": 3.4898482725427827e-06, "loss": 0.1733, "step": 4522 }, { "epoch": 0.61, "grad_norm": 1.0499711529718079, "learning_rate": 3.48776661838834e-06, "loss": 0.1875, "step": 4523 }, { "epoch": 0.61, "grad_norm": 1.0175050016758014, "learning_rate": 3.4856852527024256e-06, "loss": 0.188, "step": 4524 }, { "epoch": 0.61, "grad_norm": 1.119662288871847, "learning_rate": 3.4836041758820726e-06, "loss": 0.1836, "step": 4525 }, { "epoch": 0.61, "grad_norm": 0.8910567478160418, "learning_rate": 3.4815233883242626e-06, "loss": 0.1541, "step": 4526 }, { "epoch": 0.61, "grad_norm": 1.0949918194146493, "learning_rate": 3.479442890425915e-06, "loss": 0.1866, "step": 4527 }, { "epoch": 0.61, "grad_norm": 0.9548138338277052, "learning_rate": 3.4773626825839033e-06, "loss": 0.1445, "step": 4528 }, { "epoch": 0.61, "grad_norm": 1.0531968370779805, "learning_rate": 3.47528276519504e-06, "loss": 0.2022, "step": 4529 }, { "epoch": 0.61, "grad_norm": 1.0287437416129133, "learning_rate": 3.4732031386560814e-06, "loss": 0.1949, "step": 4530 }, { "epoch": 0.61, "grad_norm": 0.9648087608713005, "learning_rate": 3.4711238033637318e-06, "loss": 0.1989, "step": 4531 }, { "epoch": 0.61, "grad_norm": 0.9917322537094673, "learning_rate": 3.4690447597146387e-06, "loss": 0.1822, "step": 4532 }, { "epoch": 0.61, "grad_norm": 0.9200326667232288, "learning_rate": 3.4669660081053903e-06, "loss": 0.1361, "step": 4533 }, { "epoch": 0.61, "grad_norm": 1.0411144358844189, "learning_rate": 3.4648875489325246e-06, "loss": 0.1842, "step": 4534 }, { "epoch": 0.61, "grad_norm": 0.6892061958947691, "learning_rate": 3.4628093825925208e-06, "loss": 0.1603, "step": 4535 }, { "epoch": 0.61, "grad_norm": 0.9500314055168871, "learning_rate": 3.460731509481803e-06, "loss": 0.1688, "step": 4536 }, { "epoch": 0.61, "grad_norm": 0.9846723267618007, "learning_rate": 3.4586539299967393e-06, "loss": 0.1715, "step": 4537 }, { "epoch": 0.61, "grad_norm": 1.2292051708095493, "learning_rate": 3.456576644533639e-06, "loss": 0.2399, "step": 4538 }, { "epoch": 0.61, "grad_norm": 0.9382214013926128, "learning_rate": 3.4544996534887643e-06, "loss": 0.184, "step": 4539 }, { "epoch": 0.61, "grad_norm": 0.8764650658769491, "learning_rate": 3.4524229572583102e-06, "loss": 0.1739, "step": 4540 }, { "epoch": 0.61, "grad_norm": 0.7779110592598545, "learning_rate": 3.450346556238422e-06, "loss": 0.1513, "step": 4541 }, { "epoch": 0.61, "grad_norm": 0.9450908359300433, "learning_rate": 3.4482704508251857e-06, "loss": 0.1867, "step": 4542 }, { "epoch": 0.61, "grad_norm": 1.2246611598404553, "learning_rate": 3.446194641414633e-06, "loss": 0.2121, "step": 4543 }, { "epoch": 0.61, "grad_norm": 0.9956812924030509, "learning_rate": 3.4441191284027385e-06, "loss": 0.1973, "step": 4544 }, { "epoch": 0.61, "grad_norm": 0.9667226214217274, "learning_rate": 3.44204391218542e-06, "loss": 0.1687, "step": 4545 }, { "epoch": 0.61, "grad_norm": 0.6551926637563678, "learning_rate": 3.4399689931585376e-06, "loss": 0.0771, "step": 4546 }, { "epoch": 0.61, "grad_norm": 0.8225457309235048, "learning_rate": 3.437894371717897e-06, "loss": 0.1466, "step": 4547 }, { "epoch": 0.61, "grad_norm": 1.0658106521496067, "learning_rate": 3.435820048259246e-06, "loss": 0.1858, "step": 4548 }, { "epoch": 0.61, "grad_norm": 1.077362310236794, "learning_rate": 3.4337460231782738e-06, "loss": 0.1994, "step": 4549 }, { "epoch": 0.61, "grad_norm": 0.9876564536571913, "learning_rate": 3.431672296870616e-06, "loss": 0.1605, "step": 4550 }, { "epoch": 0.61, "grad_norm": 0.7742761402369993, "learning_rate": 3.4295988697318506e-06, "loss": 0.1721, "step": 4551 }, { "epoch": 0.61, "grad_norm": 1.014831628215709, "learning_rate": 3.427525742157496e-06, "loss": 0.2027, "step": 4552 }, { "epoch": 0.61, "grad_norm": 1.0157819267087174, "learning_rate": 3.4254529145430156e-06, "loss": 0.1856, "step": 4553 }, { "epoch": 0.61, "grad_norm": 1.0297482197884502, "learning_rate": 3.4233803872838143e-06, "loss": 0.1645, "step": 4554 }, { "epoch": 0.61, "grad_norm": 1.0618733392576813, "learning_rate": 3.4213081607752407e-06, "loss": 0.1935, "step": 4555 }, { "epoch": 0.61, "grad_norm": 0.7262816442135156, "learning_rate": 3.4192362354125873e-06, "loss": 0.1174, "step": 4556 }, { "epoch": 0.61, "grad_norm": 0.8476900065962328, "learning_rate": 3.4171646115910856e-06, "loss": 0.1763, "step": 4557 }, { "epoch": 0.61, "grad_norm": 1.2274938484002913, "learning_rate": 3.415093289705911e-06, "loss": 0.2513, "step": 4558 }, { "epoch": 0.61, "grad_norm": 1.0768721066197002, "learning_rate": 3.413022270152184e-06, "loss": 0.1806, "step": 4559 }, { "epoch": 0.61, "grad_norm": 0.662100076404571, "learning_rate": 3.4109515533249637e-06, "loss": 0.1723, "step": 4560 }, { "epoch": 0.62, "grad_norm": 0.6450220421246762, "learning_rate": 3.4088811396192518e-06, "loss": 0.1487, "step": 4561 }, { "epoch": 0.62, "grad_norm": 0.9498045730907304, "learning_rate": 3.406811029429998e-06, "loss": 0.1629, "step": 4562 }, { "epoch": 0.62, "grad_norm": 0.7974376511157453, "learning_rate": 3.404741223152086e-06, "loss": 0.1407, "step": 4563 }, { "epoch": 0.62, "grad_norm": 0.8986281443811109, "learning_rate": 3.4026717211803454e-06, "loss": 0.1853, "step": 4564 }, { "epoch": 0.62, "grad_norm": 0.8720219820289326, "learning_rate": 3.4006025239095487e-06, "loss": 0.1744, "step": 4565 }, { "epoch": 0.62, "grad_norm": 1.0550160862561246, "learning_rate": 3.398533631734407e-06, "loss": 0.1778, "step": 4566 }, { "epoch": 0.62, "grad_norm": 0.9759399822355166, "learning_rate": 3.3964650450495766e-06, "loss": 0.1536, "step": 4567 }, { "epoch": 0.62, "grad_norm": 0.9519809949132851, "learning_rate": 3.394396764249654e-06, "loss": 0.1829, "step": 4568 }, { "epoch": 0.62, "grad_norm": 0.9707480369754697, "learning_rate": 3.3923287897291766e-06, "loss": 0.1773, "step": 4569 }, { "epoch": 0.62, "grad_norm": 0.8987976439244042, "learning_rate": 3.3902611218826256e-06, "loss": 0.1328, "step": 4570 }, { "epoch": 0.62, "grad_norm": 0.9375632184735514, "learning_rate": 3.3881937611044215e-06, "loss": 0.1775, "step": 4571 }, { "epoch": 0.62, "grad_norm": 0.6894913615151235, "learning_rate": 3.386126707788926e-06, "loss": 0.1322, "step": 4572 }, { "epoch": 0.62, "grad_norm": 0.8394670154366565, "learning_rate": 3.384059962330445e-06, "loss": 0.1369, "step": 4573 }, { "epoch": 0.62, "grad_norm": 1.1641576063161887, "learning_rate": 3.3819935251232226e-06, "loss": 0.2489, "step": 4574 }, { "epoch": 0.62, "grad_norm": 0.8991987393924215, "learning_rate": 3.379927396561447e-06, "loss": 0.1576, "step": 4575 }, { "epoch": 0.62, "grad_norm": 0.9747255565656484, "learning_rate": 3.377861577039245e-06, "loss": 0.1773, "step": 4576 }, { "epoch": 0.62, "grad_norm": 1.1194564171051788, "learning_rate": 3.3757960669506846e-06, "loss": 0.2077, "step": 4577 }, { "epoch": 0.62, "grad_norm": 0.9790791056289458, "learning_rate": 3.373730866689776e-06, "loss": 0.2315, "step": 4578 }, { "epoch": 0.62, "grad_norm": 1.3183438816796527, "learning_rate": 3.371665976650472e-06, "loss": 0.2097, "step": 4579 }, { "epoch": 0.62, "grad_norm": 0.923497548372654, "learning_rate": 3.36960139722666e-06, "loss": 0.197, "step": 4580 }, { "epoch": 0.62, "grad_norm": 0.9145629467808072, "learning_rate": 3.367537128812175e-06, "loss": 0.1649, "step": 4581 }, { "epoch": 0.62, "grad_norm": 1.2195707084639895, "learning_rate": 3.3654731718007883e-06, "loss": 0.2311, "step": 4582 }, { "epoch": 0.62, "grad_norm": 1.1433895243376289, "learning_rate": 3.363409526586212e-06, "loss": 0.2306, "step": 4583 }, { "epoch": 0.62, "grad_norm": 1.2917766630805192, "learning_rate": 3.361346193562105e-06, "loss": 0.2152, "step": 4584 }, { "epoch": 0.62, "grad_norm": 0.6525528969927801, "learning_rate": 3.35928317312206e-06, "loss": 0.1056, "step": 4585 }, { "epoch": 0.62, "grad_norm": 1.180738708538722, "learning_rate": 3.357220465659609e-06, "loss": 0.2023, "step": 4586 }, { "epoch": 0.62, "grad_norm": 0.9127680294804665, "learning_rate": 3.3551580715682292e-06, "loss": 0.1642, "step": 4587 }, { "epoch": 0.62, "grad_norm": 1.0592064115722888, "learning_rate": 3.353095991241335e-06, "loss": 0.1659, "step": 4588 }, { "epoch": 0.62, "grad_norm": 0.7986429380523816, "learning_rate": 3.351034225072284e-06, "loss": 0.137, "step": 4589 }, { "epoch": 0.62, "grad_norm": 0.8278908961363475, "learning_rate": 3.3489727734543686e-06, "loss": 0.139, "step": 4590 }, { "epoch": 0.62, "grad_norm": 0.8320387730050671, "learning_rate": 3.3469116367808267e-06, "loss": 0.1951, "step": 4591 }, { "epoch": 0.62, "grad_norm": 1.0277599958162604, "learning_rate": 3.3448508154448333e-06, "loss": 0.1803, "step": 4592 }, { "epoch": 0.62, "grad_norm": 0.7807217366472202, "learning_rate": 3.342790309839503e-06, "loss": 0.1055, "step": 4593 }, { "epoch": 0.62, "grad_norm": 0.8110272910399833, "learning_rate": 3.34073012035789e-06, "loss": 0.1774, "step": 4594 }, { "epoch": 0.62, "grad_norm": 0.6661303118912044, "learning_rate": 3.338670247392992e-06, "loss": 0.13, "step": 4595 }, { "epoch": 0.62, "grad_norm": 1.1728342322457213, "learning_rate": 3.3366106913377416e-06, "loss": 0.2354, "step": 4596 }, { "epoch": 0.62, "grad_norm": 1.0326021574357824, "learning_rate": 3.3345514525850125e-06, "loss": 0.1928, "step": 4597 }, { "epoch": 0.62, "grad_norm": 0.9106358544487076, "learning_rate": 3.332492531527619e-06, "loss": 0.133, "step": 4598 }, { "epoch": 0.62, "grad_norm": 0.7321092094031858, "learning_rate": 3.330433928558313e-06, "loss": 0.1589, "step": 4599 }, { "epoch": 0.62, "grad_norm": 0.9946287814713226, "learning_rate": 3.328375644069788e-06, "loss": 0.1945, "step": 4600 }, { "epoch": 0.62, "grad_norm": 0.7877409897337574, "learning_rate": 3.3263176784546735e-06, "loss": 0.1603, "step": 4601 }, { "epoch": 0.62, "grad_norm": 0.9837890652917382, "learning_rate": 3.324260032105543e-06, "loss": 0.1881, "step": 4602 }, { "epoch": 0.62, "grad_norm": 0.803746496720738, "learning_rate": 3.322202705414903e-06, "loss": 0.1356, "step": 4603 }, { "epoch": 0.62, "grad_norm": 0.8749032181759611, "learning_rate": 3.3201456987752034e-06, "loss": 0.1812, "step": 4604 }, { "epoch": 0.62, "grad_norm": 0.6701665415306419, "learning_rate": 3.3180890125788314e-06, "loss": 0.1331, "step": 4605 }, { "epoch": 0.62, "grad_norm": 0.9865490871229168, "learning_rate": 3.3160326472181127e-06, "loss": 0.1997, "step": 4606 }, { "epoch": 0.62, "grad_norm": 1.3658752086063302, "learning_rate": 3.3139766030853147e-06, "loss": 0.2497, "step": 4607 }, { "epoch": 0.62, "grad_norm": 0.8195093719383653, "learning_rate": 3.3119208805726416e-06, "loss": 0.1477, "step": 4608 }, { "epoch": 0.62, "grad_norm": 0.9217987883637799, "learning_rate": 3.3098654800722362e-06, "loss": 0.1706, "step": 4609 }, { "epoch": 0.62, "grad_norm": 1.2129340293902025, "learning_rate": 3.307810401976177e-06, "loss": 0.2327, "step": 4610 }, { "epoch": 0.62, "grad_norm": 0.8750391608409409, "learning_rate": 3.305755646676486e-06, "loss": 0.1536, "step": 4611 }, { "epoch": 0.62, "grad_norm": 0.8983894602547882, "learning_rate": 3.3037012145651202e-06, "loss": 0.1776, "step": 4612 }, { "epoch": 0.62, "grad_norm": 0.9071893849070287, "learning_rate": 3.3016471060339773e-06, "loss": 0.1686, "step": 4613 }, { "epoch": 0.62, "grad_norm": 0.8391424752787728, "learning_rate": 3.299593321474891e-06, "loss": 0.1124, "step": 4614 }, { "epoch": 0.62, "grad_norm": 1.0876608009478774, "learning_rate": 3.2975398612796346e-06, "loss": 0.1662, "step": 4615 }, { "epoch": 0.62, "grad_norm": 1.0777583521502876, "learning_rate": 3.295486725839919e-06, "loss": 0.2103, "step": 4616 }, { "epoch": 0.62, "grad_norm": 1.2074986321544159, "learning_rate": 3.2934339155473938e-06, "loss": 0.2523, "step": 4617 }, { "epoch": 0.62, "grad_norm": 1.2069908600482675, "learning_rate": 3.291381430793647e-06, "loss": 0.2238, "step": 4618 }, { "epoch": 0.62, "grad_norm": 1.1741390332002202, "learning_rate": 3.2893292719702025e-06, "loss": 0.237, "step": 4619 }, { "epoch": 0.62, "grad_norm": 0.8934270592470233, "learning_rate": 3.2872774394685234e-06, "loss": 0.1759, "step": 4620 }, { "epoch": 0.62, "grad_norm": 0.512355432035999, "learning_rate": 3.2852259336800106e-06, "loss": 0.0997, "step": 4621 }, { "epoch": 0.62, "grad_norm": 0.8174303309055896, "learning_rate": 3.2831747549960026e-06, "loss": 0.1367, "step": 4622 }, { "epoch": 0.62, "grad_norm": 0.7316293061132229, "learning_rate": 3.281123903807775e-06, "loss": 0.1205, "step": 4623 }, { "epoch": 0.62, "grad_norm": 0.9171273038336089, "learning_rate": 3.27907338050654e-06, "loss": 0.1827, "step": 4624 }, { "epoch": 0.62, "grad_norm": 0.8944997861914638, "learning_rate": 3.27702318548345e-06, "loss": 0.1264, "step": 4625 }, { "epoch": 0.62, "grad_norm": 0.9488221518048677, "learning_rate": 3.2749733191295946e-06, "loss": 0.1765, "step": 4626 }, { "epoch": 0.62, "grad_norm": 0.8824570539684398, "learning_rate": 3.272923781835996e-06, "loss": 0.1527, "step": 4627 }, { "epoch": 0.62, "grad_norm": 0.9289769157745674, "learning_rate": 3.270874573993618e-06, "loss": 0.1457, "step": 4628 }, { "epoch": 0.62, "grad_norm": 0.8297932745124901, "learning_rate": 3.2688256959933618e-06, "loss": 0.1606, "step": 4629 }, { "epoch": 0.62, "grad_norm": 0.7616949266201526, "learning_rate": 3.2667771482260645e-06, "loss": 0.148, "step": 4630 }, { "epoch": 0.62, "grad_norm": 1.173681715311864, "learning_rate": 3.264728931082499e-06, "loss": 0.2068, "step": 4631 }, { "epoch": 0.62, "grad_norm": 0.8743689494479515, "learning_rate": 3.2626810449533787e-06, "loss": 0.1314, "step": 4632 }, { "epoch": 0.62, "grad_norm": 1.008504857984711, "learning_rate": 3.260633490229348e-06, "loss": 0.1437, "step": 4633 }, { "epoch": 0.62, "grad_norm": 1.141785425342125, "learning_rate": 3.258586267300994e-06, "loss": 0.1666, "step": 4634 }, { "epoch": 0.62, "grad_norm": 1.1677927229255276, "learning_rate": 3.256539376558836e-06, "loss": 0.2524, "step": 4635 }, { "epoch": 0.63, "grad_norm": 0.8161048497938125, "learning_rate": 3.2544928183933332e-06, "loss": 0.1212, "step": 4636 }, { "epoch": 0.63, "grad_norm": 1.1719568917740018, "learning_rate": 3.2524465931948806e-06, "loss": 0.2168, "step": 4637 }, { "epoch": 0.63, "grad_norm": 1.049685953760718, "learning_rate": 3.2504007013538088e-06, "loss": 0.1827, "step": 4638 }, { "epoch": 0.63, "grad_norm": 0.9352824492157631, "learning_rate": 3.248355143260383e-06, "loss": 0.156, "step": 4639 }, { "epoch": 0.63, "grad_norm": 0.8012241066958157, "learning_rate": 3.246309919304811e-06, "loss": 0.1284, "step": 4640 }, { "epoch": 0.63, "grad_norm": 1.0609089949376782, "learning_rate": 3.2442650298772298e-06, "loss": 0.1582, "step": 4641 }, { "epoch": 0.63, "grad_norm": 0.8579941623799037, "learning_rate": 3.2422204753677172e-06, "loss": 0.1672, "step": 4642 }, { "epoch": 0.63, "grad_norm": 1.0122259781526974, "learning_rate": 3.2401762561662846e-06, "loss": 0.1988, "step": 4643 }, { "epoch": 0.63, "grad_norm": 0.9781313079450916, "learning_rate": 3.238132372662881e-06, "loss": 0.147, "step": 4644 }, { "epoch": 0.63, "grad_norm": 0.9135127982577672, "learning_rate": 3.2360888252473898e-06, "loss": 0.1835, "step": 4645 }, { "epoch": 0.63, "grad_norm": 0.6737067851456214, "learning_rate": 3.234045614309632e-06, "loss": 0.1241, "step": 4646 }, { "epoch": 0.63, "grad_norm": 0.969767821902647, "learning_rate": 3.2320027402393634e-06, "loss": 0.1672, "step": 4647 }, { "epoch": 0.63, "grad_norm": 0.7416989508436309, "learning_rate": 3.2299602034262754e-06, "loss": 0.1557, "step": 4648 }, { "epoch": 0.63, "grad_norm": 0.9357358296453814, "learning_rate": 3.2279180042599965e-06, "loss": 0.183, "step": 4649 }, { "epoch": 0.63, "grad_norm": 0.9622169165867278, "learning_rate": 3.2258761431300868e-06, "loss": 0.179, "step": 4650 }, { "epoch": 0.63, "grad_norm": 1.1867241464361895, "learning_rate": 3.2238346204260484e-06, "loss": 0.1641, "step": 4651 }, { "epoch": 0.63, "grad_norm": 1.0254252902786325, "learning_rate": 3.2217934365373138e-06, "loss": 0.1717, "step": 4652 }, { "epoch": 0.63, "grad_norm": 0.9846442372903196, "learning_rate": 3.2197525918532524e-06, "loss": 0.206, "step": 4653 }, { "epoch": 0.63, "grad_norm": 1.1399545041059882, "learning_rate": 3.2177120867631683e-06, "loss": 0.2179, "step": 4654 }, { "epoch": 0.63, "grad_norm": 0.5758285043719985, "learning_rate": 3.2156719216563037e-06, "loss": 0.064, "step": 4655 }, { "epoch": 0.63, "grad_norm": 0.8716949354551982, "learning_rate": 3.2136320969218298e-06, "loss": 0.1738, "step": 4656 }, { "epoch": 0.63, "grad_norm": 0.8591966437610068, "learning_rate": 3.2115926129488596e-06, "loss": 0.1623, "step": 4657 }, { "epoch": 0.63, "grad_norm": 0.9320967968744298, "learning_rate": 3.2095534701264363e-06, "loss": 0.1701, "step": 4658 }, { "epoch": 0.63, "grad_norm": 1.3896971306776897, "learning_rate": 3.207514668843541e-06, "loss": 0.1887, "step": 4659 }, { "epoch": 0.63, "grad_norm": 0.9027084214937802, "learning_rate": 3.205476209489089e-06, "loss": 0.1455, "step": 4660 }, { "epoch": 0.63, "grad_norm": 0.9902470570050866, "learning_rate": 3.2034380924519283e-06, "loss": 0.1474, "step": 4661 }, { "epoch": 0.63, "grad_norm": 0.5751687605867268, "learning_rate": 3.201400318120843e-06, "loss": 0.0883, "step": 4662 }, { "epoch": 0.63, "grad_norm": 0.907805202910934, "learning_rate": 3.199362886884555e-06, "loss": 0.1559, "step": 4663 }, { "epoch": 0.63, "grad_norm": 1.20419968864583, "learning_rate": 3.1973257991317157e-06, "loss": 0.2188, "step": 4664 }, { "epoch": 0.63, "grad_norm": 1.2012780330053823, "learning_rate": 3.195289055250913e-06, "loss": 0.2206, "step": 4665 }, { "epoch": 0.63, "grad_norm": 1.1550470062555551, "learning_rate": 3.1932526556306697e-06, "loss": 0.1897, "step": 4666 }, { "epoch": 0.63, "grad_norm": 0.9821163239671993, "learning_rate": 3.1912166006594432e-06, "loss": 0.2205, "step": 4667 }, { "epoch": 0.63, "grad_norm": 0.7689602517285052, "learning_rate": 3.189180890725623e-06, "loss": 0.1044, "step": 4668 }, { "epoch": 0.63, "grad_norm": 0.8391607507124611, "learning_rate": 3.1871455262175356e-06, "loss": 0.1728, "step": 4669 }, { "epoch": 0.63, "grad_norm": 1.0054510064736555, "learning_rate": 3.185110507523439e-06, "loss": 0.1771, "step": 4670 }, { "epoch": 0.63, "grad_norm": 0.7423220948980407, "learning_rate": 3.183075835031527e-06, "loss": 0.1333, "step": 4671 }, { "epoch": 0.63, "grad_norm": 1.0073715944520527, "learning_rate": 3.1810415091299266e-06, "loss": 0.1763, "step": 4672 }, { "epoch": 0.63, "grad_norm": 1.128621795086374, "learning_rate": 3.1790075302066993e-06, "loss": 0.2009, "step": 4673 }, { "epoch": 0.63, "grad_norm": 0.7773586165632235, "learning_rate": 3.17697389864984e-06, "loss": 0.0982, "step": 4674 }, { "epoch": 0.63, "grad_norm": 0.8929705715401999, "learning_rate": 3.174940614847278e-06, "loss": 0.1724, "step": 4675 }, { "epoch": 0.63, "grad_norm": 0.839676269033263, "learning_rate": 3.172907679186875e-06, "loss": 0.1075, "step": 4676 }, { "epoch": 0.63, "grad_norm": 0.7182422297544032, "learning_rate": 3.1708750920564266e-06, "loss": 0.1476, "step": 4677 }, { "epoch": 0.63, "grad_norm": 0.9239410154652626, "learning_rate": 3.1688428538436634e-06, "loss": 0.174, "step": 4678 }, { "epoch": 0.63, "grad_norm": 0.932465751424384, "learning_rate": 3.166810964936248e-06, "loss": 0.1695, "step": 4679 }, { "epoch": 0.63, "grad_norm": 0.6765082632263673, "learning_rate": 3.1647794257217745e-06, "loss": 0.106, "step": 4680 }, { "epoch": 0.63, "grad_norm": 0.9958074602467104, "learning_rate": 3.1627482365877747e-06, "loss": 0.1484, "step": 4681 }, { "epoch": 0.63, "grad_norm": 0.9037742544887448, "learning_rate": 3.160717397921711e-06, "loss": 0.1603, "step": 4682 }, { "epoch": 0.63, "grad_norm": 1.0312237570634508, "learning_rate": 3.1586869101109796e-06, "loss": 0.1917, "step": 4683 }, { "epoch": 0.63, "grad_norm": 0.9883708825961035, "learning_rate": 3.156656773542906e-06, "loss": 0.1798, "step": 4684 }, { "epoch": 0.63, "grad_norm": 0.992635483852042, "learning_rate": 3.1546269886047586e-06, "loss": 0.1848, "step": 4685 }, { "epoch": 0.63, "grad_norm": 0.7654255115355624, "learning_rate": 3.1525975556837278e-06, "loss": 0.1048, "step": 4686 }, { "epoch": 0.63, "grad_norm": 0.7624053390828998, "learning_rate": 3.150568475166943e-06, "loss": 0.1288, "step": 4687 }, { "epoch": 0.63, "grad_norm": 0.8629100268672427, "learning_rate": 3.1485397474414637e-06, "loss": 0.1453, "step": 4688 }, { "epoch": 0.63, "grad_norm": 0.9120497335856028, "learning_rate": 3.146511372894283e-06, "loss": 0.1312, "step": 4689 }, { "epoch": 0.63, "grad_norm": 1.1637253539741486, "learning_rate": 3.1444833519123286e-06, "loss": 0.192, "step": 4690 }, { "epoch": 0.63, "grad_norm": 0.8792423793765689, "learning_rate": 3.1424556848824573e-06, "loss": 0.1752, "step": 4691 }, { "epoch": 0.63, "grad_norm": 0.7555037467913729, "learning_rate": 3.140428372191461e-06, "loss": 0.2004, "step": 4692 }, { "epoch": 0.63, "grad_norm": 1.2998532857818257, "learning_rate": 3.138401414226062e-06, "loss": 0.2404, "step": 4693 }, { "epoch": 0.63, "grad_norm": 0.8808909061276046, "learning_rate": 3.136374811372916e-06, "loss": 0.1516, "step": 4694 }, { "epoch": 0.63, "grad_norm": 0.9460629918439518, "learning_rate": 3.1343485640186104e-06, "loss": 0.1761, "step": 4695 }, { "epoch": 0.63, "grad_norm": 0.7018433602696402, "learning_rate": 3.132322672549668e-06, "loss": 0.1305, "step": 4696 }, { "epoch": 0.63, "grad_norm": 0.8919964621390223, "learning_rate": 3.1302971373525385e-06, "loss": 0.1554, "step": 4697 }, { "epoch": 0.63, "grad_norm": 0.825481481652663, "learning_rate": 3.1282719588136072e-06, "loss": 0.1432, "step": 4698 }, { "epoch": 0.63, "grad_norm": 1.3014503830079158, "learning_rate": 3.1262471373191904e-06, "loss": 0.2389, "step": 4699 }, { "epoch": 0.63, "grad_norm": 0.9844062586557191, "learning_rate": 3.1242226732555355e-06, "loss": 0.1952, "step": 4700 }, { "epoch": 0.63, "grad_norm": 0.8262983021723914, "learning_rate": 3.122198567008822e-06, "loss": 0.1346, "step": 4701 }, { "epoch": 0.63, "grad_norm": 0.8414212209562214, "learning_rate": 3.120174818965164e-06, "loss": 0.1076, "step": 4702 }, { "epoch": 0.63, "grad_norm": 0.7816012662712774, "learning_rate": 3.1181514295106024e-06, "loss": 0.1292, "step": 4703 }, { "epoch": 0.63, "grad_norm": 0.933007027669192, "learning_rate": 3.1161283990311127e-06, "loss": 0.1959, "step": 4704 }, { "epoch": 0.63, "grad_norm": 0.8142592024916373, "learning_rate": 3.1141057279126006e-06, "loss": 0.127, "step": 4705 }, { "epoch": 0.63, "grad_norm": 0.8977675565069196, "learning_rate": 3.1120834165409066e-06, "loss": 0.1747, "step": 4706 }, { "epoch": 0.63, "grad_norm": 1.0142617059707162, "learning_rate": 3.110061465301795e-06, "loss": 0.1856, "step": 4707 }, { "epoch": 0.63, "grad_norm": 0.5787933122528763, "learning_rate": 3.1080398745809733e-06, "loss": 0.1255, "step": 4708 }, { "epoch": 0.63, "grad_norm": 0.9028827655613233, "learning_rate": 3.1060186447640685e-06, "loss": 0.1919, "step": 4709 }, { "epoch": 0.64, "grad_norm": 1.2166440916401415, "learning_rate": 3.1039977762366447e-06, "loss": 0.2121, "step": 4710 }, { "epoch": 0.64, "grad_norm": 1.0305622417918696, "learning_rate": 3.1019772693841952e-06, "loss": 0.18, "step": 4711 }, { "epoch": 0.64, "grad_norm": 1.1785041015568778, "learning_rate": 3.0999571245921475e-06, "loss": 0.2293, "step": 4712 }, { "epoch": 0.64, "grad_norm": 0.9873133416148526, "learning_rate": 3.097937342245855e-06, "loss": 0.1838, "step": 4713 }, { "epoch": 0.64, "grad_norm": 0.8367546086231239, "learning_rate": 3.0959179227306056e-06, "loss": 0.141, "step": 4714 }, { "epoch": 0.64, "grad_norm": 0.9011294115088571, "learning_rate": 3.093898866431617e-06, "loss": 0.1524, "step": 4715 }, { "epoch": 0.64, "grad_norm": 0.8690168195115833, "learning_rate": 3.0918801737340374e-06, "loss": 0.1209, "step": 4716 }, { "epoch": 0.64, "grad_norm": 0.8852269284734271, "learning_rate": 3.0898618450229455e-06, "loss": 0.1623, "step": 4717 }, { "epoch": 0.64, "grad_norm": 0.9603474170706017, "learning_rate": 3.087843880683351e-06, "loss": 0.1798, "step": 4718 }, { "epoch": 0.64, "grad_norm": 0.8125298717035702, "learning_rate": 3.0858262811001944e-06, "loss": 0.1355, "step": 4719 }, { "epoch": 0.64, "grad_norm": 1.0724194260988933, "learning_rate": 3.0838090466583458e-06, "loss": 0.1462, "step": 4720 }, { "epoch": 0.64, "grad_norm": 0.9483391604822478, "learning_rate": 3.0817921777426063e-06, "loss": 0.1816, "step": 4721 }, { "epoch": 0.64, "grad_norm": 0.9912235115481471, "learning_rate": 3.079775674737707e-06, "loss": 0.1307, "step": 4722 }, { "epoch": 0.64, "grad_norm": 1.102864631891142, "learning_rate": 3.0777595380283095e-06, "loss": 0.2193, "step": 4723 }, { "epoch": 0.64, "grad_norm": 0.9221522872250497, "learning_rate": 3.0757437679990043e-06, "loss": 0.1676, "step": 4724 }, { "epoch": 0.64, "grad_norm": 1.059514108826089, "learning_rate": 3.073728365034313e-06, "loss": 0.1551, "step": 4725 }, { "epoch": 0.64, "grad_norm": 0.8056517009676307, "learning_rate": 3.0717133295186884e-06, "loss": 0.1267, "step": 4726 }, { "epoch": 0.64, "grad_norm": 0.9532885277281021, "learning_rate": 3.06969866183651e-06, "loss": 0.145, "step": 4727 }, { "epoch": 0.64, "grad_norm": 0.9391945319040846, "learning_rate": 3.06768436237209e-06, "loss": 0.1469, "step": 4728 }, { "epoch": 0.64, "grad_norm": 0.9591246376952401, "learning_rate": 3.0656704315096675e-06, "loss": 0.1921, "step": 4729 }, { "epoch": 0.64, "grad_norm": 0.8892942626510202, "learning_rate": 3.0636568696334164e-06, "loss": 0.126, "step": 4730 }, { "epoch": 0.64, "grad_norm": 0.6044399870731322, "learning_rate": 3.061643677127435e-06, "loss": 0.1473, "step": 4731 }, { "epoch": 0.64, "grad_norm": 0.9800494029939545, "learning_rate": 3.059630854375755e-06, "loss": 0.1708, "step": 4732 }, { "epoch": 0.64, "grad_norm": 1.3165675312630576, "learning_rate": 3.0576184017623334e-06, "loss": 0.2568, "step": 4733 }, { "epoch": 0.64, "grad_norm": 0.7308396428443287, "learning_rate": 3.0556063196710595e-06, "loss": 0.1618, "step": 4734 }, { "epoch": 0.64, "grad_norm": 0.7885930793030683, "learning_rate": 3.0535946084857503e-06, "loss": 0.1542, "step": 4735 }, { "epoch": 0.64, "grad_norm": 0.8603305423208258, "learning_rate": 3.051583268590156e-06, "loss": 0.1693, "step": 4736 }, { "epoch": 0.64, "grad_norm": 0.8821665873058916, "learning_rate": 3.0495723003679493e-06, "loss": 0.135, "step": 4737 }, { "epoch": 0.64, "grad_norm": 0.9575363023584477, "learning_rate": 3.0475617042027384e-06, "loss": 0.1669, "step": 4738 }, { "epoch": 0.64, "grad_norm": 0.9822071519773357, "learning_rate": 3.0455514804780572e-06, "loss": 0.1723, "step": 4739 }, { "epoch": 0.64, "grad_norm": 0.8440556137190863, "learning_rate": 3.043541629577368e-06, "loss": 0.1191, "step": 4740 }, { "epoch": 0.64, "grad_norm": 0.9619401740069229, "learning_rate": 3.0415321518840647e-06, "loss": 0.195, "step": 4741 }, { "epoch": 0.64, "grad_norm": 1.0030515299759246, "learning_rate": 3.0395230477814677e-06, "loss": 0.1434, "step": 4742 }, { "epoch": 0.64, "grad_norm": 1.0418609164577066, "learning_rate": 3.0375143176528264e-06, "loss": 0.1712, "step": 4743 }, { "epoch": 0.64, "grad_norm": 0.9622703821668259, "learning_rate": 3.0355059618813216e-06, "loss": 0.1338, "step": 4744 }, { "epoch": 0.64, "grad_norm": 0.8763670983899442, "learning_rate": 3.0334979808500583e-06, "loss": 0.1438, "step": 4745 }, { "epoch": 0.64, "grad_norm": 1.0173447446403108, "learning_rate": 3.0314903749420723e-06, "loss": 0.1774, "step": 4746 }, { "epoch": 0.64, "grad_norm": 1.1220916482726853, "learning_rate": 3.0294831445403285e-06, "loss": 0.1647, "step": 4747 }, { "epoch": 0.64, "grad_norm": 0.8496556456535859, "learning_rate": 3.0274762900277196e-06, "loss": 0.1409, "step": 4748 }, { "epoch": 0.64, "grad_norm": 1.0435682996361686, "learning_rate": 3.0254698117870664e-06, "loss": 0.1889, "step": 4749 }, { "epoch": 0.64, "grad_norm": 0.956638168714782, "learning_rate": 3.023463710201116e-06, "loss": 0.1186, "step": 4750 }, { "epoch": 0.64, "grad_norm": 0.8938649234206243, "learning_rate": 3.021457985652545e-06, "loss": 0.1616, "step": 4751 }, { "epoch": 0.64, "grad_norm": 0.9319665123173214, "learning_rate": 3.019452638523963e-06, "loss": 0.1686, "step": 4752 }, { "epoch": 0.64, "grad_norm": 1.0465032874120548, "learning_rate": 3.0174476691979005e-06, "loss": 0.1822, "step": 4753 }, { "epoch": 0.64, "grad_norm": 0.6969689309043711, "learning_rate": 3.0154430780568196e-06, "loss": 0.1448, "step": 4754 }, { "epoch": 0.64, "grad_norm": 0.9003995500944247, "learning_rate": 3.013438865483109e-06, "loss": 0.1646, "step": 4755 }, { "epoch": 0.64, "grad_norm": 0.8606296099847768, "learning_rate": 3.0114350318590847e-06, "loss": 0.1126, "step": 4756 }, { "epoch": 0.64, "grad_norm": 1.082992204586381, "learning_rate": 3.009431577566991e-06, "loss": 0.1784, "step": 4757 }, { "epoch": 0.64, "grad_norm": 0.6970237592584401, "learning_rate": 3.0074285029890016e-06, "loss": 0.1404, "step": 4758 }, { "epoch": 0.64, "grad_norm": 0.8866239586696696, "learning_rate": 3.0054258085072153e-06, "loss": 0.1685, "step": 4759 }, { "epoch": 0.64, "grad_norm": 0.8464996950373507, "learning_rate": 3.0034234945036587e-06, "loss": 0.167, "step": 4760 }, { "epoch": 0.64, "grad_norm": 1.228618416155985, "learning_rate": 3.0014215613602884e-06, "loss": 0.2002, "step": 4761 }, { "epoch": 0.64, "grad_norm": 0.8704290531227764, "learning_rate": 2.9994200094589844e-06, "loss": 0.1633, "step": 4762 }, { "epoch": 0.64, "grad_norm": 1.2394523241248638, "learning_rate": 2.997418839181556e-06, "loss": 0.1973, "step": 4763 }, { "epoch": 0.64, "grad_norm": 0.8064928044798542, "learning_rate": 2.995418050909742e-06, "loss": 0.1798, "step": 4764 }, { "epoch": 0.64, "grad_norm": 1.062920202697664, "learning_rate": 2.993417645025204e-06, "loss": 0.1433, "step": 4765 }, { "epoch": 0.64, "grad_norm": 0.8385776645017993, "learning_rate": 2.9914176219095327e-06, "loss": 0.1085, "step": 4766 }, { "epoch": 0.64, "grad_norm": 1.0197112439293319, "learning_rate": 2.989417981944247e-06, "loss": 0.1732, "step": 4767 }, { "epoch": 0.64, "grad_norm": 0.804082981864474, "learning_rate": 2.98741872551079e-06, "loss": 0.1378, "step": 4768 }, { "epoch": 0.64, "grad_norm": 1.0056812833276678, "learning_rate": 2.985419852990534e-06, "loss": 0.1835, "step": 4769 }, { "epoch": 0.64, "grad_norm": 0.716864732055956, "learning_rate": 2.983421364764777e-06, "loss": 0.1202, "step": 4770 }, { "epoch": 0.64, "grad_norm": 1.084244550595785, "learning_rate": 2.9814232612147444e-06, "loss": 0.2024, "step": 4771 }, { "epoch": 0.64, "grad_norm": 0.8658803581171032, "learning_rate": 2.979425542721587e-06, "loss": 0.1246, "step": 4772 }, { "epoch": 0.64, "grad_norm": 0.916954278597525, "learning_rate": 2.9774282096663822e-06, "loss": 0.1451, "step": 4773 }, { "epoch": 0.64, "grad_norm": 1.0221929370304814, "learning_rate": 2.9754312624301335e-06, "loss": 0.2156, "step": 4774 }, { "epoch": 0.64, "grad_norm": 0.8027431827048073, "learning_rate": 2.9734347013937752e-06, "loss": 0.1658, "step": 4775 }, { "epoch": 0.64, "grad_norm": 0.8315427209831505, "learning_rate": 2.971438526938163e-06, "loss": 0.1248, "step": 4776 }, { "epoch": 0.64, "grad_norm": 0.9582127288730451, "learning_rate": 2.9694427394440808e-06, "loss": 0.1722, "step": 4777 }, { "epoch": 0.64, "grad_norm": 1.0903193866119865, "learning_rate": 2.9674473392922366e-06, "loss": 0.1886, "step": 4778 }, { "epoch": 0.64, "grad_norm": 0.8893955154132113, "learning_rate": 2.9654523268632683e-06, "loss": 0.1465, "step": 4779 }, { "epoch": 0.64, "grad_norm": 0.9549492086593641, "learning_rate": 2.9634577025377365e-06, "loss": 0.172, "step": 4780 }, { "epoch": 0.64, "grad_norm": 0.8807861039043311, "learning_rate": 2.961463466696129e-06, "loss": 0.1218, "step": 4781 }, { "epoch": 0.64, "grad_norm": 1.027094832454673, "learning_rate": 2.9594696197188597e-06, "loss": 0.1721, "step": 4782 }, { "epoch": 0.64, "grad_norm": 0.8206977142731728, "learning_rate": 2.957476161986268e-06, "loss": 0.1606, "step": 4783 }, { "epoch": 0.65, "grad_norm": 0.8951631196813647, "learning_rate": 2.9554830938786195e-06, "loss": 0.1533, "step": 4784 }, { "epoch": 0.65, "grad_norm": 1.1185414486313783, "learning_rate": 2.9534904157761035e-06, "loss": 0.2101, "step": 4785 }, { "epoch": 0.65, "grad_norm": 1.006288592247412, "learning_rate": 2.951498128058839e-06, "loss": 0.1806, "step": 4786 }, { "epoch": 0.65, "grad_norm": 1.1030353571482443, "learning_rate": 2.949506231106867e-06, "loss": 0.2169, "step": 4787 }, { "epoch": 0.65, "grad_norm": 0.9796649893215852, "learning_rate": 2.947514725300155e-06, "loss": 0.1763, "step": 4788 }, { "epoch": 0.65, "grad_norm": 0.813726248407595, "learning_rate": 2.945523611018596e-06, "loss": 0.1267, "step": 4789 }, { "epoch": 0.65, "grad_norm": 0.7249971524991313, "learning_rate": 2.9435328886420083e-06, "loss": 0.1802, "step": 4790 }, { "epoch": 0.65, "grad_norm": 0.7732774892790756, "learning_rate": 2.9415425585501358e-06, "loss": 0.1548, "step": 4791 }, { "epoch": 0.65, "grad_norm": 1.2141814842452416, "learning_rate": 2.9395526211226455e-06, "loss": 0.2432, "step": 4792 }, { "epoch": 0.65, "grad_norm": 1.028388431257084, "learning_rate": 2.9375630767391327e-06, "loss": 0.2034, "step": 4793 }, { "epoch": 0.65, "grad_norm": 1.0034995196157483, "learning_rate": 2.9355739257791162e-06, "loss": 0.1742, "step": 4794 }, { "epoch": 0.65, "grad_norm": 1.1598721453309726, "learning_rate": 2.9335851686220383e-06, "loss": 0.2087, "step": 4795 }, { "epoch": 0.65, "grad_norm": 0.8173224979188186, "learning_rate": 2.9315968056472676e-06, "loss": 0.1733, "step": 4796 }, { "epoch": 0.65, "grad_norm": 1.3188343627103112, "learning_rate": 2.9296088372341005e-06, "loss": 0.1905, "step": 4797 }, { "epoch": 0.65, "grad_norm": 0.9068180675228201, "learning_rate": 2.927621263761752e-06, "loss": 0.1576, "step": 4798 }, { "epoch": 0.65, "grad_norm": 0.665957550717257, "learning_rate": 2.9256340856093655e-06, "loss": 0.1345, "step": 4799 }, { "epoch": 0.65, "grad_norm": 0.6578799790089187, "learning_rate": 2.92364730315601e-06, "loss": 0.1321, "step": 4800 }, { "epoch": 0.65, "grad_norm": 0.8790663317083451, "learning_rate": 2.921660916780676e-06, "loss": 0.1026, "step": 4801 }, { "epoch": 0.65, "grad_norm": 0.6276982356140018, "learning_rate": 2.9196749268622805e-06, "loss": 0.0725, "step": 4802 }, { "epoch": 0.65, "grad_norm": 1.1303678960663681, "learning_rate": 2.917689333779662e-06, "loss": 0.2218, "step": 4803 }, { "epoch": 0.65, "grad_norm": 0.9570048230430263, "learning_rate": 2.9157041379115887e-06, "loss": 0.1226, "step": 4804 }, { "epoch": 0.65, "grad_norm": 0.6029698039622101, "learning_rate": 2.913719339636748e-06, "loss": 0.0908, "step": 4805 }, { "epoch": 0.65, "grad_norm": 0.8071461689604195, "learning_rate": 2.911734939333754e-06, "loss": 0.131, "step": 4806 }, { "epoch": 0.65, "grad_norm": 0.8697785503460298, "learning_rate": 2.909750937381144e-06, "loss": 0.1537, "step": 4807 }, { "epoch": 0.65, "grad_norm": 0.7987084658563163, "learning_rate": 2.9077673341573794e-06, "loss": 0.107, "step": 4808 }, { "epoch": 0.65, "grad_norm": 0.909522332604642, "learning_rate": 2.9057841300408456e-06, "loss": 0.1718, "step": 4809 }, { "epoch": 0.65, "grad_norm": 0.7867016939766268, "learning_rate": 2.9038013254098522e-06, "loss": 0.121, "step": 4810 }, { "epoch": 0.65, "grad_norm": 0.959135463381658, "learning_rate": 2.9018189206426324e-06, "loss": 0.1488, "step": 4811 }, { "epoch": 0.65, "grad_norm": 1.075103323579178, "learning_rate": 2.8998369161173423e-06, "loss": 0.1805, "step": 4812 }, { "epoch": 0.65, "grad_norm": 0.7768557722895444, "learning_rate": 2.8978553122120635e-06, "loss": 0.1033, "step": 4813 }, { "epoch": 0.65, "grad_norm": 0.8781246818661913, "learning_rate": 2.8958741093047985e-06, "loss": 0.1213, "step": 4814 }, { "epoch": 0.65, "grad_norm": 0.9727553220334444, "learning_rate": 2.8938933077734765e-06, "loss": 0.162, "step": 4815 }, { "epoch": 0.65, "grad_norm": 0.7935084646691422, "learning_rate": 2.8919129079959474e-06, "loss": 0.1669, "step": 4816 }, { "epoch": 0.65, "grad_norm": 1.0993067399361633, "learning_rate": 2.889932910349986e-06, "loss": 0.2098, "step": 4817 }, { "epoch": 0.65, "grad_norm": 1.2947943912878046, "learning_rate": 2.8879533152132904e-06, "loss": 0.2097, "step": 4818 }, { "epoch": 0.65, "grad_norm": 0.8688280512115046, "learning_rate": 2.8859741229634787e-06, "loss": 0.127, "step": 4819 }, { "epoch": 0.65, "grad_norm": 0.6402263096526185, "learning_rate": 2.8839953339781013e-06, "loss": 0.138, "step": 4820 }, { "epoch": 0.65, "grad_norm": 1.0807794460482705, "learning_rate": 2.8820169486346193e-06, "loss": 0.2171, "step": 4821 }, { "epoch": 0.65, "grad_norm": 0.9710305423306551, "learning_rate": 2.8800389673104244e-06, "loss": 0.2059, "step": 4822 }, { "epoch": 0.65, "grad_norm": 1.0157273312425896, "learning_rate": 2.8780613903828305e-06, "loss": 0.1486, "step": 4823 }, { "epoch": 0.65, "grad_norm": 1.0965099272987313, "learning_rate": 2.876084218229072e-06, "loss": 0.2052, "step": 4824 }, { "epoch": 0.65, "grad_norm": 0.836446702287726, "learning_rate": 2.874107451226309e-06, "loss": 0.1559, "step": 4825 }, { "epoch": 0.65, "grad_norm": 0.878137083378054, "learning_rate": 2.872131089751623e-06, "loss": 0.1671, "step": 4826 }, { "epoch": 0.65, "grad_norm": 1.027135409409414, "learning_rate": 2.8701551341820165e-06, "loss": 0.1768, "step": 4827 }, { "epoch": 0.65, "grad_norm": 1.0134466071145143, "learning_rate": 2.8681795848944185e-06, "loss": 0.1507, "step": 4828 }, { "epoch": 0.65, "grad_norm": 1.23006419872225, "learning_rate": 2.8662044422656753e-06, "loss": 0.212, "step": 4829 }, { "epoch": 0.65, "grad_norm": 0.827899827946285, "learning_rate": 2.8642297066725577e-06, "loss": 0.1363, "step": 4830 }, { "epoch": 0.65, "grad_norm": 0.9714034647235003, "learning_rate": 2.8622553784917647e-06, "loss": 0.2037, "step": 4831 }, { "epoch": 0.65, "grad_norm": 0.9299197230262347, "learning_rate": 2.860281458099908e-06, "loss": 0.1842, "step": 4832 }, { "epoch": 0.65, "grad_norm": 0.6460089547109259, "learning_rate": 2.8583079458735287e-06, "loss": 0.1189, "step": 4833 }, { "epoch": 0.65, "grad_norm": 1.172698705510818, "learning_rate": 2.8563348421890858e-06, "loss": 0.2359, "step": 4834 }, { "epoch": 0.65, "grad_norm": 1.0033515192590712, "learning_rate": 2.8543621474229623e-06, "loss": 0.1835, "step": 4835 }, { "epoch": 0.65, "grad_norm": 0.8584835187902697, "learning_rate": 2.8523898619514623e-06, "loss": 0.2032, "step": 4836 }, { "epoch": 0.65, "grad_norm": 0.8348646256394, "learning_rate": 2.8504179861508143e-06, "loss": 0.1586, "step": 4837 }, { "epoch": 0.65, "grad_norm": 0.9005735993715169, "learning_rate": 2.8484465203971635e-06, "loss": 0.1805, "step": 4838 }, { "epoch": 0.65, "grad_norm": 0.9019604243017688, "learning_rate": 2.8464754650665815e-06, "loss": 0.1814, "step": 4839 }, { "epoch": 0.65, "grad_norm": 0.7632945770478157, "learning_rate": 2.84450482053506e-06, "loss": 0.1161, "step": 4840 }, { "epoch": 0.65, "grad_norm": 0.8225135741914956, "learning_rate": 2.84253458717851e-06, "loss": 0.1311, "step": 4841 }, { "epoch": 0.65, "grad_norm": 0.6683193615935838, "learning_rate": 2.840564765372773e-06, "loss": 0.1092, "step": 4842 }, { "epoch": 0.65, "grad_norm": 0.8458115034340621, "learning_rate": 2.838595355493601e-06, "loss": 0.1536, "step": 4843 }, { "epoch": 0.65, "grad_norm": 1.1178934794828033, "learning_rate": 2.836626357916672e-06, "loss": 0.2272, "step": 4844 }, { "epoch": 0.65, "grad_norm": 0.8472580259674335, "learning_rate": 2.8346577730175867e-06, "loss": 0.1356, "step": 4845 }, { "epoch": 0.65, "grad_norm": 1.0105084846074728, "learning_rate": 2.8326896011718654e-06, "loss": 0.1956, "step": 4846 }, { "epoch": 0.65, "grad_norm": 0.7959298869524345, "learning_rate": 2.8307218427549487e-06, "loss": 0.1517, "step": 4847 }, { "epoch": 0.65, "grad_norm": 0.9511320940520632, "learning_rate": 2.828754498142201e-06, "loss": 0.2027, "step": 4848 }, { "epoch": 0.65, "grad_norm": 0.7518357207977259, "learning_rate": 2.8267875677089053e-06, "loss": 0.1668, "step": 4849 }, { "epoch": 0.65, "grad_norm": 0.9793635173183574, "learning_rate": 2.8248210518302675e-06, "loss": 0.1686, "step": 4850 }, { "epoch": 0.65, "grad_norm": 0.9146176217753562, "learning_rate": 2.822854950881413e-06, "loss": 0.1599, "step": 4851 }, { "epoch": 0.65, "grad_norm": 0.743975218666986, "learning_rate": 2.820889265237389e-06, "loss": 0.1562, "step": 4852 }, { "epoch": 0.65, "grad_norm": 1.1435401370818297, "learning_rate": 2.818923995273162e-06, "loss": 0.2269, "step": 4853 }, { "epoch": 0.65, "grad_norm": 0.8450150930946893, "learning_rate": 2.816959141363621e-06, "loss": 0.1425, "step": 4854 }, { "epoch": 0.65, "grad_norm": 0.9921950584478195, "learning_rate": 2.814994703883575e-06, "loss": 0.1814, "step": 4855 }, { "epoch": 0.65, "grad_norm": 0.7886715820051566, "learning_rate": 2.8130306832077535e-06, "loss": 0.1743, "step": 4856 }, { "epoch": 0.65, "grad_norm": 0.9107799483175354, "learning_rate": 2.8110670797108057e-06, "loss": 0.1381, "step": 4857 }, { "epoch": 0.66, "grad_norm": 0.9106638857420376, "learning_rate": 2.8091038937673034e-06, "loss": 0.1875, "step": 4858 }, { "epoch": 0.66, "grad_norm": 0.8197867054484369, "learning_rate": 2.8071411257517356e-06, "loss": 0.1699, "step": 4859 }, { "epoch": 0.66, "grad_norm": 1.1413741824611612, "learning_rate": 2.805178776038514e-06, "loss": 0.1477, "step": 4860 }, { "epoch": 0.66, "grad_norm": 1.0262320467470374, "learning_rate": 2.8032168450019703e-06, "loss": 0.1656, "step": 4861 }, { "epoch": 0.66, "grad_norm": 0.6711636723178491, "learning_rate": 2.801255333016355e-06, "loss": 0.1232, "step": 4862 }, { "epoch": 0.66, "grad_norm": 1.065653770566519, "learning_rate": 2.7992942404558404e-06, "loss": 0.1827, "step": 4863 }, { "epoch": 0.66, "grad_norm": 1.077839493639535, "learning_rate": 2.797333567694515e-06, "loss": 0.2016, "step": 4864 }, { "epoch": 0.66, "grad_norm": 1.0223102259983146, "learning_rate": 2.7953733151063948e-06, "loss": 0.1949, "step": 4865 }, { "epoch": 0.66, "grad_norm": 0.8268838621654786, "learning_rate": 2.7934134830654093e-06, "loss": 0.1819, "step": 4866 }, { "epoch": 0.66, "grad_norm": 0.5175993121355847, "learning_rate": 2.7914540719454097e-06, "loss": 0.0662, "step": 4867 }, { "epoch": 0.66, "grad_norm": 0.9211252321467628, "learning_rate": 2.7894950821201638e-06, "loss": 0.1396, "step": 4868 }, { "epoch": 0.66, "grad_norm": 0.8043077136518327, "learning_rate": 2.7875365139633647e-06, "loss": 0.1416, "step": 4869 }, { "epoch": 0.66, "grad_norm": 1.1046337176119685, "learning_rate": 2.7855783678486203e-06, "loss": 0.1707, "step": 4870 }, { "epoch": 0.66, "grad_norm": 1.092899806341238, "learning_rate": 2.7836206441494618e-06, "loss": 0.181, "step": 4871 }, { "epoch": 0.66, "grad_norm": 1.18064151717057, "learning_rate": 2.781663343239337e-06, "loss": 0.1735, "step": 4872 }, { "epoch": 0.66, "grad_norm": 0.9011248429938981, "learning_rate": 2.779706465491614e-06, "loss": 0.1834, "step": 4873 }, { "epoch": 0.66, "grad_norm": 0.9031445572440274, "learning_rate": 2.777750011279581e-06, "loss": 0.1511, "step": 4874 }, { "epoch": 0.66, "grad_norm": 0.9627888897745234, "learning_rate": 2.7757939809764413e-06, "loss": 0.1934, "step": 4875 }, { "epoch": 0.66, "grad_norm": 1.0951358067464123, "learning_rate": 2.7738383749553254e-06, "loss": 0.2187, "step": 4876 }, { "epoch": 0.66, "grad_norm": 1.0749176552623034, "learning_rate": 2.7718831935892753e-06, "loss": 0.1593, "step": 4877 }, { "epoch": 0.66, "grad_norm": 1.0028915598066523, "learning_rate": 2.7699284372512563e-06, "loss": 0.1685, "step": 4878 }, { "epoch": 0.66, "grad_norm": 0.9835473543079484, "learning_rate": 2.76797410631415e-06, "loss": 0.2003, "step": 4879 }, { "epoch": 0.66, "grad_norm": 1.3698175117798213, "learning_rate": 2.7660202011507585e-06, "loss": 0.2021, "step": 4880 }, { "epoch": 0.66, "grad_norm": 0.9747997004958198, "learning_rate": 2.7640667221338013e-06, "loss": 0.1783, "step": 4881 }, { "epoch": 0.66, "grad_norm": 0.8997395472062103, "learning_rate": 2.7621136696359186e-06, "loss": 0.1839, "step": 4882 }, { "epoch": 0.66, "grad_norm": 1.1065298608959115, "learning_rate": 2.7601610440296673e-06, "loss": 0.2226, "step": 4883 }, { "epoch": 0.66, "grad_norm": 0.7886372979369028, "learning_rate": 2.7582088456875255e-06, "loss": 0.1396, "step": 4884 }, { "epoch": 0.66, "grad_norm": 0.9154607089372749, "learning_rate": 2.756257074981885e-06, "loss": 0.1802, "step": 4885 }, { "epoch": 0.66, "grad_norm": 1.107841802877891, "learning_rate": 2.754305732285058e-06, "loss": 0.1832, "step": 4886 }, { "epoch": 0.66, "grad_norm": 0.929118655787956, "learning_rate": 2.7523548179692806e-06, "loss": 0.1594, "step": 4887 }, { "epoch": 0.66, "grad_norm": 0.8621829802577224, "learning_rate": 2.7504043324067e-06, "loss": 0.1534, "step": 4888 }, { "epoch": 0.66, "grad_norm": 0.8875096291707457, "learning_rate": 2.7484542759693846e-06, "loss": 0.143, "step": 4889 }, { "epoch": 0.66, "grad_norm": 0.7674300583388913, "learning_rate": 2.7465046490293206e-06, "loss": 0.1385, "step": 4890 }, { "epoch": 0.66, "grad_norm": 0.5662707365314065, "learning_rate": 2.744555451958413e-06, "loss": 0.11, "step": 4891 }, { "epoch": 0.66, "grad_norm": 0.930931617103319, "learning_rate": 2.7426066851284816e-06, "loss": 0.1605, "step": 4892 }, { "epoch": 0.66, "grad_norm": 0.9009581868697794, "learning_rate": 2.7406583489112694e-06, "loss": 0.1577, "step": 4893 }, { "epoch": 0.66, "grad_norm": 1.031723586198371, "learning_rate": 2.7387104436784336e-06, "loss": 0.2007, "step": 4894 }, { "epoch": 0.66, "grad_norm": 1.135196708512929, "learning_rate": 2.7367629698015486e-06, "loss": 0.2075, "step": 4895 }, { "epoch": 0.66, "grad_norm": 1.029401131757408, "learning_rate": 2.7348159276521103e-06, "loss": 0.1929, "step": 4896 }, { "epoch": 0.66, "grad_norm": 0.9617139931323638, "learning_rate": 2.7328693176015273e-06, "loss": 0.1691, "step": 4897 }, { "epoch": 0.66, "grad_norm": 1.0602676121517567, "learning_rate": 2.7309231400211306e-06, "loss": 0.1826, "step": 4898 }, { "epoch": 0.66, "grad_norm": 0.6933974124064597, "learning_rate": 2.728977395282165e-06, "loss": 0.1604, "step": 4899 }, { "epoch": 0.66, "grad_norm": 0.9394910491726669, "learning_rate": 2.727032083755795e-06, "loss": 0.1781, "step": 4900 }, { "epoch": 0.66, "grad_norm": 1.0431110133861634, "learning_rate": 2.7250872058131013e-06, "loss": 0.177, "step": 4901 }, { "epoch": 0.66, "grad_norm": 0.9125153945728175, "learning_rate": 2.7231427618250823e-06, "loss": 0.2008, "step": 4902 }, { "epoch": 0.66, "grad_norm": 1.0722480778287622, "learning_rate": 2.7211987521626536e-06, "loss": 0.2114, "step": 4903 }, { "epoch": 0.66, "grad_norm": 0.8298620236612262, "learning_rate": 2.7192551771966475e-06, "loss": 0.1323, "step": 4904 }, { "epoch": 0.66, "grad_norm": 1.1552043956475029, "learning_rate": 2.7173120372978144e-06, "loss": 0.2187, "step": 4905 }, { "epoch": 0.66, "grad_norm": 0.7392139932655177, "learning_rate": 2.7153693328368207e-06, "loss": 0.1515, "step": 4906 }, { "epoch": 0.66, "grad_norm": 1.0431608091097093, "learning_rate": 2.713427064184251e-06, "loss": 0.202, "step": 4907 }, { "epoch": 0.66, "grad_norm": 0.9397554828447723, "learning_rate": 2.7114852317106023e-06, "loss": 0.1523, "step": 4908 }, { "epoch": 0.66, "grad_norm": 0.8085887811788575, "learning_rate": 2.7095438357862972e-06, "loss": 0.1637, "step": 4909 }, { "epoch": 0.66, "grad_norm": 0.7822490232194957, "learning_rate": 2.7076028767816674e-06, "loss": 0.1161, "step": 4910 }, { "epoch": 0.66, "grad_norm": 0.9753560693573874, "learning_rate": 2.705662355066964e-06, "loss": 0.2163, "step": 4911 }, { "epoch": 0.66, "grad_norm": 1.0290225393703578, "learning_rate": 2.7037222710123545e-06, "loss": 0.1877, "step": 4912 }, { "epoch": 0.66, "grad_norm": 0.9038007296892648, "learning_rate": 2.7017826249879244e-06, "loss": 0.166, "step": 4913 }, { "epoch": 0.66, "grad_norm": 1.1601220851744785, "learning_rate": 2.6998434173636687e-06, "loss": 0.1974, "step": 4914 }, { "epoch": 0.66, "grad_norm": 1.1724047181894401, "learning_rate": 2.697904648509509e-06, "loss": 0.191, "step": 4915 }, { "epoch": 0.66, "grad_norm": 0.8597624695773306, "learning_rate": 2.695966318795276e-06, "loss": 0.1394, "step": 4916 }, { "epoch": 0.66, "grad_norm": 0.9823322978982176, "learning_rate": 2.6940284285907205e-06, "loss": 0.1795, "step": 4917 }, { "epoch": 0.66, "grad_norm": 0.7545007544205657, "learning_rate": 2.692090978265506e-06, "loss": 0.146, "step": 4918 }, { "epoch": 0.66, "grad_norm": 0.9219104275125721, "learning_rate": 2.6901539681892153e-06, "loss": 0.1469, "step": 4919 }, { "epoch": 0.66, "grad_norm": 1.062550925235499, "learning_rate": 2.688217398731344e-06, "loss": 0.1834, "step": 4920 }, { "epoch": 0.66, "grad_norm": 0.9798978509939383, "learning_rate": 2.686281270261309e-06, "loss": 0.2156, "step": 4921 }, { "epoch": 0.66, "grad_norm": 1.016589851735975, "learning_rate": 2.6843455831484376e-06, "loss": 0.1714, "step": 4922 }, { "epoch": 0.66, "grad_norm": 1.1261135174922774, "learning_rate": 2.682410337761975e-06, "loss": 0.1907, "step": 4923 }, { "epoch": 0.66, "grad_norm": 0.8303924231503154, "learning_rate": 2.6804755344710824e-06, "loss": 0.1219, "step": 4924 }, { "epoch": 0.66, "grad_norm": 0.8432149557552374, "learning_rate": 2.6785411736448365e-06, "loss": 0.1402, "step": 4925 }, { "epoch": 0.66, "grad_norm": 0.8295936771079949, "learning_rate": 2.676607255652228e-06, "loss": 0.133, "step": 4926 }, { "epoch": 0.66, "grad_norm": 1.0205283687895117, "learning_rate": 2.674673780862167e-06, "loss": 0.1811, "step": 4927 }, { "epoch": 0.66, "grad_norm": 1.0886087559976536, "learning_rate": 2.672740749643474e-06, "loss": 0.2034, "step": 4928 }, { "epoch": 0.66, "grad_norm": 0.953343014542083, "learning_rate": 2.6708081623648896e-06, "loss": 0.1876, "step": 4929 }, { "epoch": 0.66, "grad_norm": 1.0740419365380711, "learning_rate": 2.668876019395066e-06, "loss": 0.1954, "step": 4930 }, { "epoch": 0.66, "grad_norm": 0.8844083826647653, "learning_rate": 2.6669443211025736e-06, "loss": 0.171, "step": 4931 }, { "epoch": 0.67, "grad_norm": 1.0259010949252416, "learning_rate": 2.6650130678558963e-06, "loss": 0.1595, "step": 4932 }, { "epoch": 0.67, "grad_norm": 0.7414898813220253, "learning_rate": 2.663082260023432e-06, "loss": 0.1286, "step": 4933 }, { "epoch": 0.67, "grad_norm": 0.8704693791231196, "learning_rate": 2.6611518979734974e-06, "loss": 0.1466, "step": 4934 }, { "epoch": 0.67, "grad_norm": 1.0221241635082021, "learning_rate": 2.6592219820743197e-06, "loss": 0.1724, "step": 4935 }, { "epoch": 0.67, "grad_norm": 0.9877515163929151, "learning_rate": 2.657292512694045e-06, "loss": 0.1456, "step": 4936 }, { "epoch": 0.67, "grad_norm": 0.955458565765673, "learning_rate": 2.6553634902007306e-06, "loss": 0.1673, "step": 4937 }, { "epoch": 0.67, "grad_norm": 0.5987937095392742, "learning_rate": 2.6534349149623523e-06, "loss": 0.1129, "step": 4938 }, { "epoch": 0.67, "grad_norm": 1.0849952744452915, "learning_rate": 2.651506787346797e-06, "loss": 0.1966, "step": 4939 }, { "epoch": 0.67, "grad_norm": 0.8270842317955355, "learning_rate": 2.649579107721868e-06, "loss": 0.1315, "step": 4940 }, { "epoch": 0.67, "grad_norm": 0.8372803192993726, "learning_rate": 2.647651876455284e-06, "loss": 0.1538, "step": 4941 }, { "epoch": 0.67, "grad_norm": 1.4000311811375346, "learning_rate": 2.6457250939146734e-06, "loss": 0.2651, "step": 4942 }, { "epoch": 0.67, "grad_norm": 1.018041156863994, "learning_rate": 2.6437987604675903e-06, "loss": 0.1981, "step": 4943 }, { "epoch": 0.67, "grad_norm": 0.9630017397462728, "learning_rate": 2.641872876481488e-06, "loss": 0.1594, "step": 4944 }, { "epoch": 0.67, "grad_norm": 0.9461052659333563, "learning_rate": 2.639947442323746e-06, "loss": 0.1635, "step": 4945 }, { "epoch": 0.67, "grad_norm": 0.9196950444466561, "learning_rate": 2.6380224583616517e-06, "loss": 0.1818, "step": 4946 }, { "epoch": 0.67, "grad_norm": 1.0595362492378826, "learning_rate": 2.6360979249624087e-06, "loss": 0.2316, "step": 4947 }, { "epoch": 0.67, "grad_norm": 0.9284966626606888, "learning_rate": 2.6341738424931353e-06, "loss": 0.1385, "step": 4948 }, { "epoch": 0.67, "grad_norm": 0.9616744996666852, "learning_rate": 2.6322502113208613e-06, "loss": 0.1703, "step": 4949 }, { "epoch": 0.67, "grad_norm": 1.0189651750590845, "learning_rate": 2.630327031812534e-06, "loss": 0.1561, "step": 4950 }, { "epoch": 0.67, "grad_norm": 0.8362343989100496, "learning_rate": 2.628404304335012e-06, "loss": 0.1565, "step": 4951 }, { "epoch": 0.67, "grad_norm": 0.9298926465590625, "learning_rate": 2.6264820292550684e-06, "loss": 0.1864, "step": 4952 }, { "epoch": 0.67, "grad_norm": 0.8428261741174568, "learning_rate": 2.624560206939387e-06, "loss": 0.1039, "step": 4953 }, { "epoch": 0.67, "grad_norm": 0.8629670862660379, "learning_rate": 2.6226388377545732e-06, "loss": 0.1936, "step": 4954 }, { "epoch": 0.67, "grad_norm": 0.9176221248905747, "learning_rate": 2.6207179220671385e-06, "loss": 0.1785, "step": 4955 }, { "epoch": 0.67, "grad_norm": 1.1263552613312258, "learning_rate": 2.61879746024351e-06, "loss": 0.205, "step": 4956 }, { "epoch": 0.67, "grad_norm": 0.7201854280511801, "learning_rate": 2.6168774526500294e-06, "loss": 0.1117, "step": 4957 }, { "epoch": 0.67, "grad_norm": 0.8978325583679906, "learning_rate": 2.6149578996529495e-06, "loss": 0.1642, "step": 4958 }, { "epoch": 0.67, "grad_norm": 1.107118698110606, "learning_rate": 2.613038801618439e-06, "loss": 0.179, "step": 4959 }, { "epoch": 0.67, "grad_norm": 0.7684805110304622, "learning_rate": 2.611120158912579e-06, "loss": 0.1603, "step": 4960 }, { "epoch": 0.67, "grad_norm": 1.0038605700813508, "learning_rate": 2.6092019719013616e-06, "loss": 0.1705, "step": 4961 }, { "epoch": 0.67, "grad_norm": 0.856933148338922, "learning_rate": 2.6072842409506937e-06, "loss": 0.1635, "step": 4962 }, { "epoch": 0.67, "grad_norm": 0.7265176829528964, "learning_rate": 2.605366966426396e-06, "loss": 0.1668, "step": 4963 }, { "epoch": 0.67, "grad_norm": 1.2653035807144446, "learning_rate": 2.603450148694201e-06, "loss": 0.1927, "step": 4964 }, { "epoch": 0.67, "grad_norm": 0.6966857026458907, "learning_rate": 2.601533788119753e-06, "loss": 0.1285, "step": 4965 }, { "epoch": 0.67, "grad_norm": 1.0300545173263491, "learning_rate": 2.599617885068614e-06, "loss": 0.1441, "step": 4966 }, { "epoch": 0.67, "grad_norm": 1.1903023341721095, "learning_rate": 2.597702439906252e-06, "loss": 0.2118, "step": 4967 }, { "epoch": 0.67, "grad_norm": 1.1070020978735333, "learning_rate": 2.595787452998053e-06, "loss": 0.2286, "step": 4968 }, { "epoch": 0.67, "grad_norm": 1.015053682614415, "learning_rate": 2.5938729247093125e-06, "loss": 0.2131, "step": 4969 }, { "epoch": 0.67, "grad_norm": 1.1977043978898612, "learning_rate": 2.5919588554052395e-06, "loss": 0.2415, "step": 4970 }, { "epoch": 0.67, "grad_norm": 1.005005658363005, "learning_rate": 2.590045245450955e-06, "loss": 0.1832, "step": 4971 }, { "epoch": 0.67, "grad_norm": 0.9304965662605854, "learning_rate": 2.588132095211493e-06, "loss": 0.1844, "step": 4972 }, { "epoch": 0.67, "grad_norm": 0.8391636061721983, "learning_rate": 2.5862194050517998e-06, "loss": 0.124, "step": 4973 }, { "epoch": 0.67, "grad_norm": 0.7993799134104876, "learning_rate": 2.584307175336734e-06, "loss": 0.1349, "step": 4974 }, { "epoch": 0.67, "grad_norm": 0.7694278462386184, "learning_rate": 2.5823954064310653e-06, "loss": 0.1108, "step": 4975 }, { "epoch": 0.67, "grad_norm": 0.883512382030539, "learning_rate": 2.5804840986994762e-06, "loss": 0.173, "step": 4976 }, { "epoch": 0.67, "grad_norm": 1.0466132217474229, "learning_rate": 2.5785732525065623e-06, "loss": 0.1983, "step": 4977 }, { "epoch": 0.67, "grad_norm": 0.9995683366197746, "learning_rate": 2.576662868216829e-06, "loss": 0.1913, "step": 4978 }, { "epoch": 0.67, "grad_norm": 0.942675044402386, "learning_rate": 2.5747529461946953e-06, "loss": 0.1361, "step": 4979 }, { "epoch": 0.67, "grad_norm": 1.0678490325454144, "learning_rate": 2.5728434868044917e-06, "loss": 0.2034, "step": 4980 }, { "epoch": 0.67, "grad_norm": 0.855711788036404, "learning_rate": 2.5709344904104605e-06, "loss": 0.1178, "step": 4981 }, { "epoch": 0.67, "grad_norm": 0.8802276266636793, "learning_rate": 2.569025957376754e-06, "loss": 0.1297, "step": 4982 }, { "epoch": 0.67, "grad_norm": 0.9665482842784512, "learning_rate": 2.567117888067439e-06, "loss": 0.1771, "step": 4983 }, { "epoch": 0.67, "grad_norm": 0.8320550189394075, "learning_rate": 2.565210282846492e-06, "loss": 0.1531, "step": 4984 }, { "epoch": 0.67, "grad_norm": 0.967760833103234, "learning_rate": 2.5633031420778002e-06, "loss": 0.1863, "step": 4985 }, { "epoch": 0.67, "grad_norm": 1.0895959805449746, "learning_rate": 2.5613964661251646e-06, "loss": 0.2192, "step": 4986 }, { "epoch": 0.67, "grad_norm": 1.0655185301352366, "learning_rate": 2.5594902553522943e-06, "loss": 0.165, "step": 4987 }, { "epoch": 0.67, "grad_norm": 0.7502761992193296, "learning_rate": 2.557584510122815e-06, "loss": 0.1461, "step": 4988 }, { "epoch": 0.67, "grad_norm": 1.0520375055582707, "learning_rate": 2.5556792308002583e-06, "loss": 0.2106, "step": 4989 }, { "epoch": 0.67, "grad_norm": 0.5947797109108266, "learning_rate": 2.5537744177480706e-06, "loss": 0.1071, "step": 4990 }, { "epoch": 0.67, "grad_norm": 0.9902984039287009, "learning_rate": 2.5518700713296043e-06, "loss": 0.1804, "step": 4991 }, { "epoch": 0.67, "grad_norm": 0.6977344637516543, "learning_rate": 2.5499661919081277e-06, "loss": 0.1192, "step": 4992 }, { "epoch": 0.67, "grad_norm": 0.8406358734908599, "learning_rate": 2.548062779846818e-06, "loss": 0.1273, "step": 4993 }, { "epoch": 0.67, "grad_norm": 1.0328463761284725, "learning_rate": 2.546159835508765e-06, "loss": 0.1938, "step": 4994 }, { "epoch": 0.67, "grad_norm": 1.0735827552012194, "learning_rate": 2.5442573592569664e-06, "loss": 0.2028, "step": 4995 }, { "epoch": 0.67, "grad_norm": 1.3058994325826796, "learning_rate": 2.542355351454333e-06, "loss": 0.24, "step": 4996 }, { "epoch": 0.67, "grad_norm": 0.8918427751451438, "learning_rate": 2.540453812463685e-06, "loss": 0.1391, "step": 4997 }, { "epoch": 0.67, "grad_norm": 0.9503791180877594, "learning_rate": 2.538552742647752e-06, "loss": 0.1535, "step": 4998 }, { "epoch": 0.67, "grad_norm": 1.0063472982630326, "learning_rate": 2.5366521423691793e-06, "loss": 0.1726, "step": 4999 }, { "epoch": 0.67, "grad_norm": 1.1351249585757697, "learning_rate": 2.534752011990517e-06, "loss": 0.1762, "step": 5000 }, { "epoch": 0.67, "grad_norm": 1.0802731287496359, "learning_rate": 2.532852351874227e-06, "loss": 0.1432, "step": 5001 }, { "epoch": 0.67, "grad_norm": 0.9439284234343366, "learning_rate": 2.5309531623826833e-06, "loss": 0.1404, "step": 5002 }, { "epoch": 0.67, "grad_norm": 0.8028148458528475, "learning_rate": 2.529054443878168e-06, "loss": 0.1589, "step": 5003 }, { "epoch": 0.67, "grad_norm": 0.5281968179123187, "learning_rate": 2.527156196722874e-06, "loss": 0.1244, "step": 5004 }, { "epoch": 0.67, "grad_norm": 1.169659722048614, "learning_rate": 2.5252584212789055e-06, "loss": 0.1949, "step": 5005 }, { "epoch": 0.68, "grad_norm": 1.0478902265960715, "learning_rate": 2.5233611179082753e-06, "loss": 0.1842, "step": 5006 }, { "epoch": 0.68, "grad_norm": 0.9589867573984824, "learning_rate": 2.5214642869729073e-06, "loss": 0.1416, "step": 5007 }, { "epoch": 0.68, "grad_norm": 1.0676009126138715, "learning_rate": 2.5195679288346325e-06, "loss": 0.1874, "step": 5008 }, { "epoch": 0.68, "grad_norm": 1.2304802480481578, "learning_rate": 2.517672043855193e-06, "loss": 0.1987, "step": 5009 }, { "epoch": 0.68, "grad_norm": 0.8380452816935902, "learning_rate": 2.515776632396245e-06, "loss": 0.1742, "step": 5010 }, { "epoch": 0.68, "grad_norm": 1.1408834865088873, "learning_rate": 2.5138816948193494e-06, "loss": 0.1813, "step": 5011 }, { "epoch": 0.68, "grad_norm": 0.7098328813037635, "learning_rate": 2.5119872314859777e-06, "loss": 0.1192, "step": 5012 }, { "epoch": 0.68, "grad_norm": 1.1036324943480669, "learning_rate": 2.510093242757512e-06, "loss": 0.1883, "step": 5013 }, { "epoch": 0.68, "grad_norm": 1.128462133429825, "learning_rate": 2.508199728995242e-06, "loss": 0.2176, "step": 5014 }, { "epoch": 0.68, "grad_norm": 1.1422569404373972, "learning_rate": 2.506306690560368e-06, "loss": 0.2189, "step": 5015 }, { "epoch": 0.68, "grad_norm": 1.0728098403700177, "learning_rate": 2.5044141278140014e-06, "loss": 0.1842, "step": 5016 }, { "epoch": 0.68, "grad_norm": 0.7970595932776317, "learning_rate": 2.50252204111716e-06, "loss": 0.1314, "step": 5017 }, { "epoch": 0.68, "grad_norm": 0.6157326849927425, "learning_rate": 2.500630430830771e-06, "loss": 0.1227, "step": 5018 }, { "epoch": 0.68, "grad_norm": 0.8677606247047926, "learning_rate": 2.4987392973156727e-06, "loss": 0.1126, "step": 5019 }, { "epoch": 0.68, "grad_norm": 0.944757254390419, "learning_rate": 2.496848640932611e-06, "loss": 0.1893, "step": 5020 }, { "epoch": 0.68, "grad_norm": 0.9382244652714963, "learning_rate": 2.494958462042241e-06, "loss": 0.1317, "step": 5021 }, { "epoch": 0.68, "grad_norm": 1.0756587692628046, "learning_rate": 2.4930687610051266e-06, "loss": 0.2128, "step": 5022 }, { "epoch": 0.68, "grad_norm": 0.8470497365216256, "learning_rate": 2.4911795381817416e-06, "loss": 0.1603, "step": 5023 }, { "epoch": 0.68, "grad_norm": 0.8475764923468746, "learning_rate": 2.4892907939324672e-06, "loss": 0.1472, "step": 5024 }, { "epoch": 0.68, "grad_norm": 0.7460335721487082, "learning_rate": 2.487402528617594e-06, "loss": 0.1236, "step": 5025 }, { "epoch": 0.68, "grad_norm": 1.0130219775169587, "learning_rate": 2.4855147425973214e-06, "loss": 0.1671, "step": 5026 }, { "epoch": 0.68, "grad_norm": 0.958345586285766, "learning_rate": 2.4836274362317565e-06, "loss": 0.1585, "step": 5027 }, { "epoch": 0.68, "grad_norm": 1.1400144573458444, "learning_rate": 2.4817406098809157e-06, "loss": 0.1961, "step": 5028 }, { "epoch": 0.68, "grad_norm": 0.9525985072456087, "learning_rate": 2.4798542639047235e-06, "loss": 0.1885, "step": 5029 }, { "epoch": 0.68, "grad_norm": 1.1542850095548496, "learning_rate": 2.4779683986630127e-06, "loss": 0.2116, "step": 5030 }, { "epoch": 0.68, "grad_norm": 0.7334920669392312, "learning_rate": 2.4760830145155257e-06, "loss": 0.1197, "step": 5031 }, { "epoch": 0.68, "grad_norm": 0.957624873994848, "learning_rate": 2.4741981118219087e-06, "loss": 0.1472, "step": 5032 }, { "epoch": 0.68, "grad_norm": 1.0872731762885373, "learning_rate": 2.472313690941724e-06, "loss": 0.1945, "step": 5033 }, { "epoch": 0.68, "grad_norm": 0.6648534687223439, "learning_rate": 2.470429752234435e-06, "loss": 0.0758, "step": 5034 }, { "epoch": 0.68, "grad_norm": 0.6854211110486883, "learning_rate": 2.468546296059416e-06, "loss": 0.0992, "step": 5035 }, { "epoch": 0.68, "grad_norm": 1.113776009658678, "learning_rate": 2.4666633227759505e-06, "loss": 0.2038, "step": 5036 }, { "epoch": 0.68, "grad_norm": 1.0338249463492504, "learning_rate": 2.4647808327432236e-06, "loss": 0.1857, "step": 5037 }, { "epoch": 0.68, "grad_norm": 1.0257189585816513, "learning_rate": 2.4628988263203355e-06, "loss": 0.2006, "step": 5038 }, { "epoch": 0.68, "grad_norm": 0.7666094338756401, "learning_rate": 2.4610173038662915e-06, "loss": 0.1049, "step": 5039 }, { "epoch": 0.68, "grad_norm": 0.9324259649161197, "learning_rate": 2.4591362657400036e-06, "loss": 0.1477, "step": 5040 }, { "epoch": 0.68, "grad_norm": 0.8034754333922486, "learning_rate": 2.4572557123002927e-06, "loss": 0.1381, "step": 5041 }, { "epoch": 0.68, "grad_norm": 0.5314159206350622, "learning_rate": 2.4553756439058874e-06, "loss": 0.0862, "step": 5042 }, { "epoch": 0.68, "grad_norm": 1.245609959179011, "learning_rate": 2.4534960609154203e-06, "loss": 0.1989, "step": 5043 }, { "epoch": 0.68, "grad_norm": 1.1235561243795296, "learning_rate": 2.451616963687438e-06, "loss": 0.1475, "step": 5044 }, { "epoch": 0.68, "grad_norm": 0.9337526265132309, "learning_rate": 2.44973835258039e-06, "loss": 0.1726, "step": 5045 }, { "epoch": 0.68, "grad_norm": 0.8488738986074333, "learning_rate": 2.447860227952633e-06, "loss": 0.1577, "step": 5046 }, { "epoch": 0.68, "grad_norm": 0.9864744172281046, "learning_rate": 2.4459825901624322e-06, "loss": 0.1684, "step": 5047 }, { "epoch": 0.68, "grad_norm": 0.8441573131182548, "learning_rate": 2.444105439567959e-06, "loss": 0.1527, "step": 5048 }, { "epoch": 0.68, "grad_norm": 0.9585103022467782, "learning_rate": 2.442228776527292e-06, "loss": 0.1503, "step": 5049 }, { "epoch": 0.68, "grad_norm": 0.7500707487113747, "learning_rate": 2.440352601398418e-06, "loss": 0.1569, "step": 5050 }, { "epoch": 0.68, "grad_norm": 1.0539816389128671, "learning_rate": 2.438476914539229e-06, "loss": 0.1937, "step": 5051 }, { "epoch": 0.68, "grad_norm": 0.7516059262346256, "learning_rate": 2.436601716307525e-06, "loss": 0.123, "step": 5052 }, { "epoch": 0.68, "grad_norm": 1.0459280020756099, "learning_rate": 2.434727007061012e-06, "loss": 0.1714, "step": 5053 }, { "epoch": 0.68, "grad_norm": 0.8251990321510023, "learning_rate": 2.432852787157305e-06, "loss": 0.1439, "step": 5054 }, { "epoch": 0.68, "grad_norm": 1.1241683765097508, "learning_rate": 2.430979056953922e-06, "loss": 0.1948, "step": 5055 }, { "epoch": 0.68, "grad_norm": 1.1054064016672984, "learning_rate": 2.4291058168082898e-06, "loss": 0.2003, "step": 5056 }, { "epoch": 0.68, "grad_norm": 1.02437708225197, "learning_rate": 2.4272330670777415e-06, "loss": 0.1817, "step": 5057 }, { "epoch": 0.68, "grad_norm": 0.7913649350936726, "learning_rate": 2.4253608081195166e-06, "loss": 0.1072, "step": 5058 }, { "epoch": 0.68, "grad_norm": 0.8043152485015671, "learning_rate": 2.4234890402907612e-06, "loss": 0.1307, "step": 5059 }, { "epoch": 0.68, "grad_norm": 1.1672378412474331, "learning_rate": 2.4216177639485265e-06, "loss": 0.2007, "step": 5060 }, { "epoch": 0.68, "grad_norm": 0.6863391410575154, "learning_rate": 2.4197469794497717e-06, "loss": 0.0844, "step": 5061 }, { "epoch": 0.68, "grad_norm": 1.2061577466716975, "learning_rate": 2.417876687151361e-06, "loss": 0.1776, "step": 5062 }, { "epoch": 0.68, "grad_norm": 0.9522119257465232, "learning_rate": 2.416006887410065e-06, "loss": 0.1797, "step": 5063 }, { "epoch": 0.68, "grad_norm": 1.0740371359176373, "learning_rate": 2.4141375805825607e-06, "loss": 0.1731, "step": 5064 }, { "epoch": 0.68, "grad_norm": 1.1729497481940145, "learning_rate": 2.41226876702543e-06, "loss": 0.1946, "step": 5065 }, { "epoch": 0.68, "grad_norm": 1.1390606680093514, "learning_rate": 2.4104004470951623e-06, "loss": 0.1637, "step": 5066 }, { "epoch": 0.68, "grad_norm": 1.0118070548094733, "learning_rate": 2.408532621148152e-06, "loss": 0.1548, "step": 5067 }, { "epoch": 0.68, "grad_norm": 0.8710333982898321, "learning_rate": 2.4066652895406983e-06, "loss": 0.1437, "step": 5068 }, { "epoch": 0.68, "grad_norm": 1.2274925460234825, "learning_rate": 2.404798452629008e-06, "loss": 0.2016, "step": 5069 }, { "epoch": 0.68, "grad_norm": 0.9341683472655709, "learning_rate": 2.402932110769192e-06, "loss": 0.1582, "step": 5070 }, { "epoch": 0.68, "grad_norm": 0.944447910789672, "learning_rate": 2.401066264317268e-06, "loss": 0.1704, "step": 5071 }, { "epoch": 0.68, "grad_norm": 1.0436935554043743, "learning_rate": 2.399200913629158e-06, "loss": 0.1886, "step": 5072 }, { "epoch": 0.68, "grad_norm": 1.0749504191875847, "learning_rate": 2.3973360590606903e-06, "loss": 0.1743, "step": 5073 }, { "epoch": 0.68, "grad_norm": 0.8572239529721192, "learning_rate": 2.3954717009675984e-06, "loss": 0.1775, "step": 5074 }, { "epoch": 0.68, "grad_norm": 1.0766307924036584, "learning_rate": 2.39360783970552e-06, "loss": 0.1828, "step": 5075 }, { "epoch": 0.68, "grad_norm": 1.0078245099656848, "learning_rate": 2.3917444756300002e-06, "loss": 0.1658, "step": 5076 }, { "epoch": 0.68, "grad_norm": 0.727297567207007, "learning_rate": 2.389881609096485e-06, "loss": 0.1136, "step": 5077 }, { "epoch": 0.68, "grad_norm": 0.8621710283224906, "learning_rate": 2.3880192404603338e-06, "loss": 0.1375, "step": 5078 }, { "epoch": 0.68, "grad_norm": 1.0328353484135506, "learning_rate": 2.386157370076802e-06, "loss": 0.1685, "step": 5079 }, { "epoch": 0.69, "grad_norm": 0.8750360978279925, "learning_rate": 2.3842959983010557e-06, "loss": 0.1646, "step": 5080 }, { "epoch": 0.69, "grad_norm": 1.0475545424155337, "learning_rate": 2.3824351254881617e-06, "loss": 0.172, "step": 5081 }, { "epoch": 0.69, "grad_norm": 1.0988304466283993, "learning_rate": 2.3805747519930954e-06, "loss": 0.2305, "step": 5082 }, { "epoch": 0.69, "grad_norm": 1.1981055221316899, "learning_rate": 2.3787148781707374e-06, "loss": 0.2172, "step": 5083 }, { "epoch": 0.69, "grad_norm": 1.1533842644206684, "learning_rate": 2.3768555043758662e-06, "loss": 0.2248, "step": 5084 }, { "epoch": 0.69, "grad_norm": 0.9494068398462501, "learning_rate": 2.3749966309631717e-06, "loss": 0.1383, "step": 5085 }, { "epoch": 0.69, "grad_norm": 0.7900901271151252, "learning_rate": 2.373138258287247e-06, "loss": 0.1725, "step": 5086 }, { "epoch": 0.69, "grad_norm": 0.9320834329748456, "learning_rate": 2.371280386702588e-06, "loss": 0.1947, "step": 5087 }, { "epoch": 0.69, "grad_norm": 0.9406886576628941, "learning_rate": 2.369423016563595e-06, "loss": 0.174, "step": 5088 }, { "epoch": 0.69, "grad_norm": 0.8778789621198979, "learning_rate": 2.3675661482245773e-06, "loss": 0.1933, "step": 5089 }, { "epoch": 0.69, "grad_norm": 0.7831283934332902, "learning_rate": 2.3657097820397417e-06, "loss": 0.1302, "step": 5090 }, { "epoch": 0.69, "grad_norm": 0.922131107060234, "learning_rate": 2.363853918363204e-06, "loss": 0.1378, "step": 5091 }, { "epoch": 0.69, "grad_norm": 0.7248989588391285, "learning_rate": 2.361998557548982e-06, "loss": 0.1274, "step": 5092 }, { "epoch": 0.69, "grad_norm": 0.7529272475357218, "learning_rate": 2.360143699950998e-06, "loss": 0.1358, "step": 5093 }, { "epoch": 0.69, "grad_norm": 1.0069107050970092, "learning_rate": 2.3582893459230776e-06, "loss": 0.1777, "step": 5094 }, { "epoch": 0.69, "grad_norm": 0.9823774865426554, "learning_rate": 2.3564354958189522e-06, "loss": 0.1479, "step": 5095 }, { "epoch": 0.69, "grad_norm": 0.7986351648939432, "learning_rate": 2.3545821499922557e-06, "loss": 0.1594, "step": 5096 }, { "epoch": 0.69, "grad_norm": 0.9582672122822573, "learning_rate": 2.352729308796526e-06, "loss": 0.1795, "step": 5097 }, { "epoch": 0.69, "grad_norm": 0.8787777407716979, "learning_rate": 2.3508769725852038e-06, "loss": 0.1469, "step": 5098 }, { "epoch": 0.69, "grad_norm": 0.9471124745118521, "learning_rate": 2.349025141711636e-06, "loss": 0.1533, "step": 5099 }, { "epoch": 0.69, "grad_norm": 0.5429932397042518, "learning_rate": 2.3471738165290712e-06, "loss": 0.1189, "step": 5100 }, { "epoch": 0.69, "grad_norm": 0.8266176305934444, "learning_rate": 2.3453229973906614e-06, "loss": 0.1669, "step": 5101 }, { "epoch": 0.69, "grad_norm": 0.9925605371962564, "learning_rate": 2.343472684649464e-06, "loss": 0.1771, "step": 5102 }, { "epoch": 0.69, "grad_norm": 1.066525959093927, "learning_rate": 2.341622878658436e-06, "loss": 0.1864, "step": 5103 }, { "epoch": 0.69, "grad_norm": 1.1210195352494572, "learning_rate": 2.3397735797704425e-06, "loss": 0.2111, "step": 5104 }, { "epoch": 0.69, "grad_norm": 0.7813281589620362, "learning_rate": 2.3379247883382476e-06, "loss": 0.1306, "step": 5105 }, { "epoch": 0.69, "grad_norm": 1.2977392793612965, "learning_rate": 2.336076504714522e-06, "loss": 0.238, "step": 5106 }, { "epoch": 0.69, "grad_norm": 1.116612436157339, "learning_rate": 2.3342287292518377e-06, "loss": 0.1934, "step": 5107 }, { "epoch": 0.69, "grad_norm": 1.0947873966125956, "learning_rate": 2.3323814623026696e-06, "loss": 0.1422, "step": 5108 }, { "epoch": 0.69, "grad_norm": 1.112795829058827, "learning_rate": 2.330534704219396e-06, "loss": 0.1965, "step": 5109 }, { "epoch": 0.69, "grad_norm": 0.9438473768329463, "learning_rate": 2.328688455354297e-06, "loss": 0.1632, "step": 5110 }, { "epoch": 0.69, "grad_norm": 1.016264107438513, "learning_rate": 2.3268427160595602e-06, "loss": 0.1713, "step": 5111 }, { "epoch": 0.69, "grad_norm": 0.960341462021817, "learning_rate": 2.324997486687271e-06, "loss": 0.2051, "step": 5112 }, { "epoch": 0.69, "grad_norm": 1.0240516053417597, "learning_rate": 2.3231527675894205e-06, "loss": 0.2039, "step": 5113 }, { "epoch": 0.69, "grad_norm": 0.8157893026883156, "learning_rate": 2.321308559117898e-06, "loss": 0.1236, "step": 5114 }, { "epoch": 0.69, "grad_norm": 1.1264107303909108, "learning_rate": 2.3194648616244995e-06, "loss": 0.2153, "step": 5115 }, { "epoch": 0.69, "grad_norm": 1.3270381281993306, "learning_rate": 2.317621675460923e-06, "loss": 0.1993, "step": 5116 }, { "epoch": 0.69, "grad_norm": 1.0550879244316786, "learning_rate": 2.3157790009787686e-06, "loss": 0.1809, "step": 5117 }, { "epoch": 0.69, "grad_norm": 0.8956920463995288, "learning_rate": 2.313936838529538e-06, "loss": 0.1281, "step": 5118 }, { "epoch": 0.69, "grad_norm": 1.0533202113164644, "learning_rate": 2.312095188464637e-06, "loss": 0.161, "step": 5119 }, { "epoch": 0.69, "grad_norm": 0.8181804232562605, "learning_rate": 2.3102540511353715e-06, "loss": 0.147, "step": 5120 }, { "epoch": 0.69, "grad_norm": 1.0293074893066025, "learning_rate": 2.308413426892951e-06, "loss": 0.2033, "step": 5121 }, { "epoch": 0.69, "grad_norm": 0.9117239520495174, "learning_rate": 2.3065733160884847e-06, "loss": 0.1414, "step": 5122 }, { "epoch": 0.69, "grad_norm": 0.8623257118781961, "learning_rate": 2.3047337190729903e-06, "loss": 0.1352, "step": 5123 }, { "epoch": 0.69, "grad_norm": 0.9716478673365669, "learning_rate": 2.3028946361973803e-06, "loss": 0.1646, "step": 5124 }, { "epoch": 0.69, "grad_norm": 1.0648760026144501, "learning_rate": 2.3010560678124726e-06, "loss": 0.1888, "step": 5125 }, { "epoch": 0.69, "grad_norm": 1.1313076324853084, "learning_rate": 2.299218014268987e-06, "loss": 0.2101, "step": 5126 }, { "epoch": 0.69, "grad_norm": 0.9580658071158289, "learning_rate": 2.2973804759175423e-06, "loss": 0.1458, "step": 5127 }, { "epoch": 0.69, "grad_norm": 0.9532111303456124, "learning_rate": 2.2955434531086627e-06, "loss": 0.1652, "step": 5128 }, { "epoch": 0.69, "grad_norm": 0.8970308706461048, "learning_rate": 2.2937069461927726e-06, "loss": 0.1308, "step": 5129 }, { "epoch": 0.69, "grad_norm": 0.9917669902260314, "learning_rate": 2.2918709555201983e-06, "loss": 0.1975, "step": 5130 }, { "epoch": 0.69, "grad_norm": 0.988467556711384, "learning_rate": 2.2900354814411648e-06, "loss": 0.1574, "step": 5131 }, { "epoch": 0.69, "grad_norm": 1.1071305931698558, "learning_rate": 2.288200524305802e-06, "loss": 0.1762, "step": 5132 }, { "epoch": 0.69, "grad_norm": 0.9000778413542483, "learning_rate": 2.286366084464139e-06, "loss": 0.1156, "step": 5133 }, { "epoch": 0.69, "grad_norm": 1.1825181549487118, "learning_rate": 2.2845321622661105e-06, "loss": 0.152, "step": 5134 }, { "epoch": 0.69, "grad_norm": 1.002968732943295, "learning_rate": 2.2826987580615472e-06, "loss": 0.1534, "step": 5135 }, { "epoch": 0.69, "grad_norm": 0.9256462105237258, "learning_rate": 2.2808658722001843e-06, "loss": 0.1603, "step": 5136 }, { "epoch": 0.69, "grad_norm": 0.6438485621906281, "learning_rate": 2.2790335050316553e-06, "loss": 0.0969, "step": 5137 }, { "epoch": 0.69, "grad_norm": 1.080375375929712, "learning_rate": 2.2772016569054978e-06, "loss": 0.1707, "step": 5138 }, { "epoch": 0.69, "grad_norm": 0.8881551425631201, "learning_rate": 2.2753703281711475e-06, "loss": 0.1649, "step": 5139 }, { "epoch": 0.69, "grad_norm": 0.900662474429067, "learning_rate": 2.273539519177944e-06, "loss": 0.1746, "step": 5140 }, { "epoch": 0.69, "grad_norm": 0.7530601353410219, "learning_rate": 2.2717092302751252e-06, "loss": 0.1304, "step": 5141 }, { "epoch": 0.69, "grad_norm": 1.1665486209602873, "learning_rate": 2.2698794618118315e-06, "loss": 0.1836, "step": 5142 }, { "epoch": 0.69, "grad_norm": 0.5819622603185716, "learning_rate": 2.2680502141371024e-06, "loss": 0.1387, "step": 5143 }, { "epoch": 0.69, "grad_norm": 1.1375526963458324, "learning_rate": 2.26622148759988e-06, "loss": 0.1794, "step": 5144 }, { "epoch": 0.69, "grad_norm": 0.9564087941045062, "learning_rate": 2.2643932825490055e-06, "loss": 0.1612, "step": 5145 }, { "epoch": 0.69, "grad_norm": 1.0146145917495004, "learning_rate": 2.2625655993332215e-06, "loss": 0.1486, "step": 5146 }, { "epoch": 0.69, "grad_norm": 1.240640982924503, "learning_rate": 2.26073843830117e-06, "loss": 0.1951, "step": 5147 }, { "epoch": 0.69, "grad_norm": 0.8482580719019635, "learning_rate": 2.258911799801394e-06, "loss": 0.2029, "step": 5148 }, { "epoch": 0.69, "grad_norm": 0.8461575199178509, "learning_rate": 2.2570856841823384e-06, "loss": 0.1369, "step": 5149 }, { "epoch": 0.69, "grad_norm": 1.0962315198062242, "learning_rate": 2.255260091792345e-06, "loss": 0.2003, "step": 5150 }, { "epoch": 0.69, "grad_norm": 0.9390515766605418, "learning_rate": 2.2534350229796585e-06, "loss": 0.1154, "step": 5151 }, { "epoch": 0.69, "grad_norm": 1.0075079836268932, "learning_rate": 2.251610478092423e-06, "loss": 0.186, "step": 5152 }, { "epoch": 0.69, "grad_norm": 0.7114508789236266, "learning_rate": 2.249786457478682e-06, "loss": 0.102, "step": 5153 }, { "epoch": 0.69, "grad_norm": 0.7184812877238694, "learning_rate": 2.24796296148638e-06, "loss": 0.1522, "step": 5154 }, { "epoch": 0.7, "grad_norm": 1.0110855473720566, "learning_rate": 2.246139990463359e-06, "loss": 0.1712, "step": 5155 }, { "epoch": 0.7, "grad_norm": 0.7878846590671814, "learning_rate": 2.244317544757366e-06, "loss": 0.1295, "step": 5156 }, { "epoch": 0.7, "grad_norm": 0.9965085160043814, "learning_rate": 2.2424956247160435e-06, "loss": 0.1673, "step": 5157 }, { "epoch": 0.7, "grad_norm": 0.8605602746376789, "learning_rate": 2.240674230686934e-06, "loss": 0.1843, "step": 5158 }, { "epoch": 0.7, "grad_norm": 0.872362048465069, "learning_rate": 2.238853363017481e-06, "loss": 0.1384, "step": 5159 }, { "epoch": 0.7, "grad_norm": 0.8188834087233577, "learning_rate": 2.2370330220550284e-06, "loss": 0.1495, "step": 5160 }, { "epoch": 0.7, "grad_norm": 0.8002486175780793, "learning_rate": 2.235213208146816e-06, "loss": 0.1509, "step": 5161 }, { "epoch": 0.7, "grad_norm": 0.9021615217937132, "learning_rate": 2.2333939216399854e-06, "loss": 0.1504, "step": 5162 }, { "epoch": 0.7, "grad_norm": 0.9818770379348042, "learning_rate": 2.2315751628815778e-06, "loss": 0.2064, "step": 5163 }, { "epoch": 0.7, "grad_norm": 0.8759056678475724, "learning_rate": 2.2297569322185348e-06, "loss": 0.1468, "step": 5164 }, { "epoch": 0.7, "grad_norm": 0.8957949581590213, "learning_rate": 2.227939229997695e-06, "loss": 0.1453, "step": 5165 }, { "epoch": 0.7, "grad_norm": 1.6427456929822455, "learning_rate": 2.226122056565797e-06, "loss": 0.2783, "step": 5166 }, { "epoch": 0.7, "grad_norm": 1.0141908655576757, "learning_rate": 2.2243054122694774e-06, "loss": 0.1587, "step": 5167 }, { "epoch": 0.7, "grad_norm": 0.812522632106931, "learning_rate": 2.222489297455276e-06, "loss": 0.1398, "step": 5168 }, { "epoch": 0.7, "grad_norm": 1.0106492432016099, "learning_rate": 2.220673712469628e-06, "loss": 0.1785, "step": 5169 }, { "epoch": 0.7, "grad_norm": 1.1855843959977845, "learning_rate": 2.2188586576588666e-06, "loss": 0.2148, "step": 5170 }, { "epoch": 0.7, "grad_norm": 0.995596222855162, "learning_rate": 2.217044133369227e-06, "loss": 0.1746, "step": 5171 }, { "epoch": 0.7, "grad_norm": 0.933059021988597, "learning_rate": 2.215230139946842e-06, "loss": 0.1294, "step": 5172 }, { "epoch": 0.7, "grad_norm": 1.0483737508295283, "learning_rate": 2.213416677737741e-06, "loss": 0.1903, "step": 5173 }, { "epoch": 0.7, "grad_norm": 0.9237163894722823, "learning_rate": 2.2116037470878554e-06, "loss": 0.1625, "step": 5174 }, { "epoch": 0.7, "grad_norm": 0.49015472028336493, "learning_rate": 2.209791348343013e-06, "loss": 0.102, "step": 5175 }, { "epoch": 0.7, "grad_norm": 0.8674382980299368, "learning_rate": 2.207979481848942e-06, "loss": 0.1163, "step": 5176 }, { "epoch": 0.7, "grad_norm": 1.1727263321915589, "learning_rate": 2.2061681479512687e-06, "loss": 0.2241, "step": 5177 }, { "epoch": 0.7, "grad_norm": 0.8988021870393325, "learning_rate": 2.2043573469955116e-06, "loss": 0.1317, "step": 5178 }, { "epoch": 0.7, "grad_norm": 1.107406486485925, "learning_rate": 2.2025470793270986e-06, "loss": 0.1751, "step": 5179 }, { "epoch": 0.7, "grad_norm": 0.9003429385001971, "learning_rate": 2.200737345291349e-06, "loss": 0.1292, "step": 5180 }, { "epoch": 0.7, "grad_norm": 0.6569041920213644, "learning_rate": 2.1989281452334806e-06, "loss": 0.0803, "step": 5181 }, { "epoch": 0.7, "grad_norm": 0.9257221441552794, "learning_rate": 2.1971194794986107e-06, "loss": 0.1639, "step": 5182 }, { "epoch": 0.7, "grad_norm": 0.8386203024312245, "learning_rate": 2.1953113484317543e-06, "loss": 0.1477, "step": 5183 }, { "epoch": 0.7, "grad_norm": 0.9114815409037735, "learning_rate": 2.193503752377824e-06, "loss": 0.149, "step": 5184 }, { "epoch": 0.7, "grad_norm": 1.0213664731821395, "learning_rate": 2.1916966916816316e-06, "loss": 0.1621, "step": 5185 }, { "epoch": 0.7, "grad_norm": 1.1952351703344137, "learning_rate": 2.1898901666878853e-06, "loss": 0.2326, "step": 5186 }, { "epoch": 0.7, "grad_norm": 0.7851603745791829, "learning_rate": 2.1880841777411915e-06, "loss": 0.1124, "step": 5187 }, { "epoch": 0.7, "grad_norm": 0.892088859988036, "learning_rate": 2.186278725186055e-06, "loss": 0.1625, "step": 5188 }, { "epoch": 0.7, "grad_norm": 0.9854940778802016, "learning_rate": 2.1844738093668777e-06, "loss": 0.1988, "step": 5189 }, { "epoch": 0.7, "grad_norm": 0.8632707079473064, "learning_rate": 2.182669430627959e-06, "loss": 0.1357, "step": 5190 }, { "epoch": 0.7, "grad_norm": 1.0562654812654555, "learning_rate": 2.1808655893134955e-06, "loss": 0.1804, "step": 5191 }, { "epoch": 0.7, "grad_norm": 1.2198153096477928, "learning_rate": 2.1790622857675837e-06, "loss": 0.2426, "step": 5192 }, { "epoch": 0.7, "grad_norm": 1.0637791280134588, "learning_rate": 2.177259520334214e-06, "loss": 0.1929, "step": 5193 }, { "epoch": 0.7, "grad_norm": 0.60654151394383, "learning_rate": 2.175457293357277e-06, "loss": 0.1153, "step": 5194 }, { "epoch": 0.7, "grad_norm": 1.0569009990405513, "learning_rate": 2.1736556051805585e-06, "loss": 0.1672, "step": 5195 }, { "epoch": 0.7, "grad_norm": 1.0170889472585958, "learning_rate": 2.1718544561477427e-06, "loss": 0.1765, "step": 5196 }, { "epoch": 0.7, "grad_norm": 1.0131191553742562, "learning_rate": 2.1700538466024105e-06, "loss": 0.1838, "step": 5197 }, { "epoch": 0.7, "grad_norm": 1.1632332231193023, "learning_rate": 2.1682537768880413e-06, "loss": 0.209, "step": 5198 }, { "epoch": 0.7, "grad_norm": 1.0293797206695703, "learning_rate": 2.1664542473480086e-06, "loss": 0.1605, "step": 5199 }, { "epoch": 0.7, "grad_norm": 0.9099613619784896, "learning_rate": 2.164655258325584e-06, "loss": 0.1999, "step": 5200 }, { "epoch": 0.7, "grad_norm": 0.8976723182341697, "learning_rate": 2.16285681016394e-06, "loss": 0.1508, "step": 5201 }, { "epoch": 0.7, "grad_norm": 1.265683348626232, "learning_rate": 2.1610589032061403e-06, "loss": 0.2422, "step": 5202 }, { "epoch": 0.7, "grad_norm": 0.8295317581508024, "learning_rate": 2.1592615377951475e-06, "loss": 0.1504, "step": 5203 }, { "epoch": 0.7, "grad_norm": 0.7809470254550457, "learning_rate": 2.1574647142738214e-06, "loss": 0.1683, "step": 5204 }, { "epoch": 0.7, "grad_norm": 1.1309135896854745, "learning_rate": 2.1556684329849174e-06, "loss": 0.2059, "step": 5205 }, { "epoch": 0.7, "grad_norm": 1.0342725243436408, "learning_rate": 2.1538726942710904e-06, "loss": 0.1643, "step": 5206 }, { "epoch": 0.7, "grad_norm": 0.9081723049490497, "learning_rate": 2.1520774984748856e-06, "loss": 0.1534, "step": 5207 }, { "epoch": 0.7, "grad_norm": 0.9882693223839399, "learning_rate": 2.1502828459387504e-06, "loss": 0.1435, "step": 5208 }, { "epoch": 0.7, "grad_norm": 0.8511187749380588, "learning_rate": 2.1484887370050263e-06, "loss": 0.1419, "step": 5209 }, { "epoch": 0.7, "grad_norm": 0.9831264283688805, "learning_rate": 2.1466951720159517e-06, "loss": 0.1706, "step": 5210 }, { "epoch": 0.7, "grad_norm": 1.1530945443461806, "learning_rate": 2.1449021513136595e-06, "loss": 0.1926, "step": 5211 }, { "epoch": 0.7, "grad_norm": 0.610726711005002, "learning_rate": 2.1431096752401837e-06, "loss": 0.1059, "step": 5212 }, { "epoch": 0.7, "grad_norm": 1.0398826691506844, "learning_rate": 2.141317744137449e-06, "loss": 0.1815, "step": 5213 }, { "epoch": 0.7, "grad_norm": 0.8330419687361401, "learning_rate": 2.1395263583472777e-06, "loss": 0.1375, "step": 5214 }, { "epoch": 0.7, "grad_norm": 1.0010854260808537, "learning_rate": 2.1377355182113895e-06, "loss": 0.1867, "step": 5215 }, { "epoch": 0.7, "grad_norm": 0.510746136928644, "learning_rate": 2.135945224071398e-06, "loss": 0.0592, "step": 5216 }, { "epoch": 0.7, "grad_norm": 1.0883541881050738, "learning_rate": 2.1341554762688145e-06, "loss": 0.1675, "step": 5217 }, { "epoch": 0.7, "grad_norm": 0.6919849382440006, "learning_rate": 2.1323662751450445e-06, "loss": 0.092, "step": 5218 }, { "epoch": 0.7, "grad_norm": 0.9951065894943325, "learning_rate": 2.1305776210413907e-06, "loss": 0.1665, "step": 5219 }, { "epoch": 0.7, "grad_norm": 0.7701168540477913, "learning_rate": 2.1287895142990506e-06, "loss": 0.1315, "step": 5220 }, { "epoch": 0.7, "grad_norm": 1.101413132589804, "learning_rate": 2.127001955259117e-06, "loss": 0.2014, "step": 5221 }, { "epoch": 0.7, "grad_norm": 1.03601286170246, "learning_rate": 2.1252149442625785e-06, "loss": 0.1842, "step": 5222 }, { "epoch": 0.7, "grad_norm": 1.0530853431504639, "learning_rate": 2.12342848165032e-06, "loss": 0.1832, "step": 5223 }, { "epoch": 0.7, "grad_norm": 0.8977572010305682, "learning_rate": 2.1216425677631196e-06, "loss": 0.1189, "step": 5224 }, { "epoch": 0.7, "grad_norm": 0.9782610301869513, "learning_rate": 2.1198572029416544e-06, "loss": 0.1961, "step": 5225 }, { "epoch": 0.7, "grad_norm": 1.043529878859056, "learning_rate": 2.1180723875264932e-06, "loss": 0.1848, "step": 5226 }, { "epoch": 0.7, "grad_norm": 1.1013732971515666, "learning_rate": 2.1162881218581007e-06, "loss": 0.195, "step": 5227 }, { "epoch": 0.7, "grad_norm": 0.8941496504241335, "learning_rate": 2.1145044062768387e-06, "loss": 0.1863, "step": 5228 }, { "epoch": 0.71, "grad_norm": 0.7576476698147447, "learning_rate": 2.1127212411229625e-06, "loss": 0.1062, "step": 5229 }, { "epoch": 0.71, "grad_norm": 0.9466267988173712, "learning_rate": 2.1109386267366223e-06, "loss": 0.1403, "step": 5230 }, { "epoch": 0.71, "grad_norm": 1.0293238869550285, "learning_rate": 2.109156563457864e-06, "loss": 0.1663, "step": 5231 }, { "epoch": 0.71, "grad_norm": 1.1439182190285688, "learning_rate": 2.1073750516266267e-06, "loss": 0.2378, "step": 5232 }, { "epoch": 0.71, "grad_norm": 0.8217742111350279, "learning_rate": 2.1055940915827473e-06, "loss": 0.1356, "step": 5233 }, { "epoch": 0.71, "grad_norm": 0.9489767098395485, "learning_rate": 2.103813683665953e-06, "loss": 0.1629, "step": 5234 }, { "epoch": 0.71, "grad_norm": 0.9767184461498047, "learning_rate": 2.1020338282158725e-06, "loss": 0.1738, "step": 5235 }, { "epoch": 0.71, "grad_norm": 0.6310124445555436, "learning_rate": 2.100254525572024e-06, "loss": 0.1048, "step": 5236 }, { "epoch": 0.71, "grad_norm": 0.8655798330292693, "learning_rate": 2.098475776073819e-06, "loss": 0.1592, "step": 5237 }, { "epoch": 0.71, "grad_norm": 1.146051526110434, "learning_rate": 2.0966975800605665e-06, "loss": 0.1851, "step": 5238 }, { "epoch": 0.71, "grad_norm": 0.8428867258701347, "learning_rate": 2.0949199378714697e-06, "loss": 0.1515, "step": 5239 }, { "epoch": 0.71, "grad_norm": 0.7963409783993155, "learning_rate": 2.093142849845626e-06, "loss": 0.139, "step": 5240 }, { "epoch": 0.71, "grad_norm": 1.0955848527579124, "learning_rate": 2.0913663163220256e-06, "loss": 0.1734, "step": 5241 }, { "epoch": 0.71, "grad_norm": 0.8720216859568629, "learning_rate": 2.089590337639554e-06, "loss": 0.147, "step": 5242 }, { "epoch": 0.71, "grad_norm": 1.1346446940221524, "learning_rate": 2.0878149141369926e-06, "loss": 0.1515, "step": 5243 }, { "epoch": 0.71, "grad_norm": 0.8168763246855515, "learning_rate": 2.0860400461530133e-06, "loss": 0.1248, "step": 5244 }, { "epoch": 0.71, "grad_norm": 0.7763956036236361, "learning_rate": 2.0842657340261836e-06, "loss": 0.1509, "step": 5245 }, { "epoch": 0.71, "grad_norm": 0.9702166752976114, "learning_rate": 2.0824919780949675e-06, "loss": 0.1852, "step": 5246 }, { "epoch": 0.71, "grad_norm": 1.1739192904037745, "learning_rate": 2.0807187786977196e-06, "loss": 0.1976, "step": 5247 }, { "epoch": 0.71, "grad_norm": 1.0369331175779324, "learning_rate": 2.07894613617269e-06, "loss": 0.1765, "step": 5248 }, { "epoch": 0.71, "grad_norm": 1.086548562229614, "learning_rate": 2.0771740508580205e-06, "loss": 0.2202, "step": 5249 }, { "epoch": 0.71, "grad_norm": 1.0117202254394895, "learning_rate": 2.0754025230917498e-06, "loss": 0.1691, "step": 5250 }, { "epoch": 0.71, "grad_norm": 1.1970278177469342, "learning_rate": 2.0736315532118073e-06, "loss": 0.184, "step": 5251 }, { "epoch": 0.71, "grad_norm": 0.8920700214670607, "learning_rate": 2.071861141556018e-06, "loss": 0.1983, "step": 5252 }, { "epoch": 0.71, "grad_norm": 1.0736438891447917, "learning_rate": 2.0700912884621006e-06, "loss": 0.2071, "step": 5253 }, { "epoch": 0.71, "grad_norm": 0.8167570164894775, "learning_rate": 2.0683219942676634e-06, "loss": 0.1697, "step": 5254 }, { "epoch": 0.71, "grad_norm": 0.9740113411298168, "learning_rate": 2.066553259310213e-06, "loss": 0.1608, "step": 5255 }, { "epoch": 0.71, "grad_norm": 0.7849418991734501, "learning_rate": 2.0647850839271444e-06, "loss": 0.1136, "step": 5256 }, { "epoch": 0.71, "grad_norm": 1.0352898796803098, "learning_rate": 2.063017468455753e-06, "loss": 0.1747, "step": 5257 }, { "epoch": 0.71, "grad_norm": 0.9077111918012932, "learning_rate": 2.0612504132332217e-06, "loss": 0.1709, "step": 5258 }, { "epoch": 0.71, "grad_norm": 1.1638459548831896, "learning_rate": 2.0594839185966282e-06, "loss": 0.1935, "step": 5259 }, { "epoch": 0.71, "grad_norm": 0.7328953261542221, "learning_rate": 2.0577179848829415e-06, "loss": 0.12, "step": 5260 }, { "epoch": 0.71, "grad_norm": 0.7841789009596776, "learning_rate": 2.0559526124290268e-06, "loss": 0.1536, "step": 5261 }, { "epoch": 0.71, "grad_norm": 0.9815132744598671, "learning_rate": 2.05418780157164e-06, "loss": 0.1588, "step": 5262 }, { "epoch": 0.71, "grad_norm": 0.8221090142948434, "learning_rate": 2.05242355264743e-06, "loss": 0.1385, "step": 5263 }, { "epoch": 0.71, "grad_norm": 0.8618266127016659, "learning_rate": 2.0506598659929396e-06, "loss": 0.1764, "step": 5264 }, { "epoch": 0.71, "grad_norm": 1.2500024971100976, "learning_rate": 2.048896741944603e-06, "loss": 0.2384, "step": 5265 }, { "epoch": 0.71, "grad_norm": 1.0760427993074166, "learning_rate": 2.047134180838749e-06, "loss": 0.2252, "step": 5266 }, { "epoch": 0.71, "grad_norm": 0.9023281874688119, "learning_rate": 2.045372183011597e-06, "loss": 0.1738, "step": 5267 }, { "epoch": 0.71, "grad_norm": 0.8243776562930734, "learning_rate": 2.043610748799259e-06, "loss": 0.1476, "step": 5268 }, { "epoch": 0.71, "grad_norm": 0.8555697313986249, "learning_rate": 2.041849878537742e-06, "loss": 0.1263, "step": 5269 }, { "epoch": 0.71, "grad_norm": 0.9700938273876363, "learning_rate": 2.0400895725629415e-06, "loss": 0.1611, "step": 5270 }, { "epoch": 0.71, "grad_norm": 0.8002054841546978, "learning_rate": 2.03832983121065e-06, "loss": 0.1377, "step": 5271 }, { "epoch": 0.71, "grad_norm": 1.2803784775704683, "learning_rate": 2.0365706548165476e-06, "loss": 0.2131, "step": 5272 }, { "epoch": 0.71, "grad_norm": 0.8351147356682314, "learning_rate": 2.03481204371621e-06, "loss": 0.1628, "step": 5273 }, { "epoch": 0.71, "grad_norm": 0.9123391068617992, "learning_rate": 2.0330539982451037e-06, "loss": 0.1562, "step": 5274 }, { "epoch": 0.71, "grad_norm": 0.8190161243674605, "learning_rate": 2.0312965187385875e-06, "loss": 0.1471, "step": 5275 }, { "epoch": 0.71, "grad_norm": 0.8425671254140754, "learning_rate": 2.0295396055319123e-06, "loss": 0.1658, "step": 5276 }, { "epoch": 0.71, "grad_norm": 1.0595181298958565, "learning_rate": 2.027783258960221e-06, "loss": 0.203, "step": 5277 }, { "epoch": 0.71, "grad_norm": 1.1268614820310185, "learning_rate": 2.0260274793585478e-06, "loss": 0.1736, "step": 5278 }, { "epoch": 0.71, "grad_norm": 0.9203087905955695, "learning_rate": 2.0242722670618183e-06, "loss": 0.1452, "step": 5279 }, { "epoch": 0.71, "grad_norm": 1.0834300001714667, "learning_rate": 2.0225176224048537e-06, "loss": 0.1874, "step": 5280 }, { "epoch": 0.71, "grad_norm": 0.7559041610638716, "learning_rate": 2.0207635457223624e-06, "loss": 0.1226, "step": 5281 }, { "epoch": 0.71, "grad_norm": 1.0256709212544945, "learning_rate": 2.019010037348947e-06, "loss": 0.1626, "step": 5282 }, { "epoch": 0.71, "grad_norm": 1.0349278421051835, "learning_rate": 2.0172570976191013e-06, "loss": 0.2077, "step": 5283 }, { "epoch": 0.71, "grad_norm": 0.7202276765622286, "learning_rate": 2.015504726867208e-06, "loss": 0.1162, "step": 5284 }, { "epoch": 0.71, "grad_norm": 0.9846218411458137, "learning_rate": 2.0137529254275436e-06, "loss": 0.1654, "step": 5285 }, { "epoch": 0.71, "grad_norm": 0.9379416350026927, "learning_rate": 2.0120016936342774e-06, "loss": 0.1413, "step": 5286 }, { "epoch": 0.71, "grad_norm": 0.6919445113794854, "learning_rate": 2.0102510318214673e-06, "loss": 0.1255, "step": 5287 }, { "epoch": 0.71, "grad_norm": 1.048181465115288, "learning_rate": 2.0085009403230642e-06, "loss": 0.1581, "step": 5288 }, { "epoch": 0.71, "grad_norm": 1.0325553221663513, "learning_rate": 2.006751419472909e-06, "loss": 0.1733, "step": 5289 }, { "epoch": 0.71, "grad_norm": 1.113486861472338, "learning_rate": 2.0050024696047343e-06, "loss": 0.2009, "step": 5290 }, { "epoch": 0.71, "grad_norm": 0.8339606364351033, "learning_rate": 2.0032540910521653e-06, "loss": 0.147, "step": 5291 }, { "epoch": 0.71, "grad_norm": 0.9504036200905683, "learning_rate": 2.0015062841487167e-06, "loss": 0.1327, "step": 5292 }, { "epoch": 0.71, "grad_norm": 0.8208450201860303, "learning_rate": 1.999759049227793e-06, "loss": 0.1419, "step": 5293 }, { "epoch": 0.71, "grad_norm": 0.5489374468204958, "learning_rate": 1.998012386622691e-06, "loss": 0.0997, "step": 5294 }, { "epoch": 0.71, "grad_norm": 0.9579334410818509, "learning_rate": 1.9962662966665992e-06, "loss": 0.1483, "step": 5295 }, { "epoch": 0.71, "grad_norm": 0.9788628378500015, "learning_rate": 1.994520779692594e-06, "loss": 0.1504, "step": 5296 }, { "epoch": 0.71, "grad_norm": 0.9919918870701304, "learning_rate": 1.9927758360336465e-06, "loss": 0.2337, "step": 5297 }, { "epoch": 0.71, "grad_norm": 0.9919868064119409, "learning_rate": 1.991031466022615e-06, "loss": 0.2223, "step": 5298 }, { "epoch": 0.71, "grad_norm": 1.0236799393365767, "learning_rate": 1.9892876699922493e-06, "loss": 0.1608, "step": 5299 }, { "epoch": 0.71, "grad_norm": 0.733208717862599, "learning_rate": 1.987544448275192e-06, "loss": 0.1105, "step": 5300 }, { "epoch": 0.71, "grad_norm": 0.9835954782308561, "learning_rate": 1.98580180120397e-06, "loss": 0.1801, "step": 5301 }, { "epoch": 0.71, "grad_norm": 0.9339238707063232, "learning_rate": 1.9840597291110084e-06, "loss": 0.1957, "step": 5302 }, { "epoch": 0.72, "grad_norm": 0.9647266537162923, "learning_rate": 1.982318232328619e-06, "loss": 0.1481, "step": 5303 }, { "epoch": 0.72, "grad_norm": 0.8344959280350065, "learning_rate": 1.980577311189002e-06, "loss": 0.1425, "step": 5304 }, { "epoch": 0.72, "grad_norm": 1.1421081454938207, "learning_rate": 1.978836966024251e-06, "loss": 0.1501, "step": 5305 }, { "epoch": 0.72, "grad_norm": 0.8491257477772186, "learning_rate": 1.977097197166348e-06, "loss": 0.1649, "step": 5306 }, { "epoch": 0.72, "grad_norm": 0.8542633315063157, "learning_rate": 1.975358004947165e-06, "loss": 0.1804, "step": 5307 }, { "epoch": 0.72, "grad_norm": 1.1565498750848222, "learning_rate": 1.9736193896984646e-06, "loss": 0.18, "step": 5308 }, { "epoch": 0.72, "grad_norm": 0.9684978311827142, "learning_rate": 1.9718813517518996e-06, "loss": 0.1353, "step": 5309 }, { "epoch": 0.72, "grad_norm": 0.7253621200522771, "learning_rate": 1.970143891439012e-06, "loss": 0.0833, "step": 5310 }, { "epoch": 0.72, "grad_norm": 1.0721993558509795, "learning_rate": 1.9684070090912335e-06, "loss": 0.1692, "step": 5311 }, { "epoch": 0.72, "grad_norm": 1.010311528627763, "learning_rate": 1.966670705039886e-06, "loss": 0.1688, "step": 5312 }, { "epoch": 0.72, "grad_norm": 0.9648709116292027, "learning_rate": 1.9649349796161817e-06, "loss": 0.168, "step": 5313 }, { "epoch": 0.72, "grad_norm": 0.8838888316031701, "learning_rate": 1.963199833151221e-06, "loss": 0.129, "step": 5314 }, { "epoch": 0.72, "grad_norm": 0.6782709416999746, "learning_rate": 1.961465265975995e-06, "loss": 0.1176, "step": 5315 }, { "epoch": 0.72, "grad_norm": 0.9546968332019982, "learning_rate": 1.9597312784213834e-06, "loss": 0.17, "step": 5316 }, { "epoch": 0.72, "grad_norm": 0.8749465878586437, "learning_rate": 1.9579978708181557e-06, "loss": 0.1629, "step": 5317 }, { "epoch": 0.72, "grad_norm": 0.9598452259419247, "learning_rate": 1.956265043496972e-06, "loss": 0.1441, "step": 5318 }, { "epoch": 0.72, "grad_norm": 0.7127740613850134, "learning_rate": 1.9545327967883794e-06, "loss": 0.1269, "step": 5319 }, { "epoch": 0.72, "grad_norm": 0.8162826308357644, "learning_rate": 1.952801131022816e-06, "loss": 0.132, "step": 5320 }, { "epoch": 0.72, "grad_norm": 0.8509184968916198, "learning_rate": 1.9510700465306084e-06, "loss": 0.1191, "step": 5321 }, { "epoch": 0.72, "grad_norm": 1.0181565378631932, "learning_rate": 1.949339543641972e-06, "loss": 0.1278, "step": 5322 }, { "epoch": 0.72, "grad_norm": 0.8756280587819664, "learning_rate": 1.947609622687012e-06, "loss": 0.1603, "step": 5323 }, { "epoch": 0.72, "grad_norm": 1.1701573719205893, "learning_rate": 1.9458802839957207e-06, "loss": 0.1937, "step": 5324 }, { "epoch": 0.72, "grad_norm": 0.8362396992818782, "learning_rate": 1.944151527897985e-06, "loss": 0.1748, "step": 5325 }, { "epoch": 0.72, "grad_norm": 1.0354479131670675, "learning_rate": 1.9424233547235733e-06, "loss": 0.2147, "step": 5326 }, { "epoch": 0.72, "grad_norm": 1.0693667739764026, "learning_rate": 1.940695764802147e-06, "loss": 0.1715, "step": 5327 }, { "epoch": 0.72, "grad_norm": 1.129490777760813, "learning_rate": 1.9389687584632554e-06, "loss": 0.2406, "step": 5328 }, { "epoch": 0.72, "grad_norm": 0.7630987507169708, "learning_rate": 1.937242336036336e-06, "loss": 0.1314, "step": 5329 }, { "epoch": 0.72, "grad_norm": 1.0023161619976755, "learning_rate": 1.9355164978507175e-06, "loss": 0.1931, "step": 5330 }, { "epoch": 0.72, "grad_norm": 0.9671511970772629, "learning_rate": 1.933791244235611e-06, "loss": 0.1781, "step": 5331 }, { "epoch": 0.72, "grad_norm": 0.8151386349864533, "learning_rate": 1.932066575520122e-06, "loss": 0.1644, "step": 5332 }, { "epoch": 0.72, "grad_norm": 1.0198801931239905, "learning_rate": 1.9303424920332426e-06, "loss": 0.2088, "step": 5333 }, { "epoch": 0.72, "grad_norm": 0.9868815360822494, "learning_rate": 1.928618994103853e-06, "loss": 0.1844, "step": 5334 }, { "epoch": 0.72, "grad_norm": 0.8527657503713765, "learning_rate": 1.9268960820607197e-06, "loss": 0.154, "step": 5335 }, { "epoch": 0.72, "grad_norm": 0.9718021505644555, "learning_rate": 1.925173756232504e-06, "loss": 0.1699, "step": 5336 }, { "epoch": 0.72, "grad_norm": 0.9130805863250179, "learning_rate": 1.9234520169477484e-06, "loss": 0.141, "step": 5337 }, { "epoch": 0.72, "grad_norm": 1.0228060442827374, "learning_rate": 1.9217308645348864e-06, "loss": 0.1537, "step": 5338 }, { "epoch": 0.72, "grad_norm": 0.9254347278192775, "learning_rate": 1.9200102993222393e-06, "loss": 0.1711, "step": 5339 }, { "epoch": 0.72, "grad_norm": 0.949552748696963, "learning_rate": 1.9182903216380156e-06, "loss": 0.159, "step": 5340 }, { "epoch": 0.72, "grad_norm": 1.222887694800065, "learning_rate": 1.9165709318103133e-06, "loss": 0.2328, "step": 5341 }, { "epoch": 0.72, "grad_norm": 1.0068035866795517, "learning_rate": 1.9148521301671173e-06, "loss": 0.1351, "step": 5342 }, { "epoch": 0.72, "grad_norm": 0.9326670631790344, "learning_rate": 1.9131339170363e-06, "loss": 0.1749, "step": 5343 }, { "epoch": 0.72, "grad_norm": 1.1510465501893044, "learning_rate": 1.911416292745622e-06, "loss": 0.2035, "step": 5344 }, { "epoch": 0.72, "grad_norm": 0.7838839829117576, "learning_rate": 1.9096992576227317e-06, "loss": 0.1259, "step": 5345 }, { "epoch": 0.72, "grad_norm": 0.9565993591412771, "learning_rate": 1.907982811995165e-06, "loss": 0.1612, "step": 5346 }, { "epoch": 0.72, "grad_norm": 0.9384915829239686, "learning_rate": 1.906266956190345e-06, "loss": 0.1502, "step": 5347 }, { "epoch": 0.72, "grad_norm": 0.8726706958401146, "learning_rate": 1.9045516905355826e-06, "loss": 0.1195, "step": 5348 }, { "epoch": 0.72, "grad_norm": 0.8842047133941989, "learning_rate": 1.9028370153580761e-06, "loss": 0.1438, "step": 5349 }, { "epoch": 0.72, "grad_norm": 0.9495672725360129, "learning_rate": 1.9011229309849111e-06, "loss": 0.1401, "step": 5350 }, { "epoch": 0.72, "grad_norm": 0.9417264392585245, "learning_rate": 1.8994094377430605e-06, "loss": 0.13, "step": 5351 }, { "epoch": 0.72, "grad_norm": 0.8657780191659399, "learning_rate": 1.8976965359593852e-06, "loss": 0.1337, "step": 5352 }, { "epoch": 0.72, "grad_norm": 0.8961755915593453, "learning_rate": 1.8959842259606315e-06, "loss": 0.1127, "step": 5353 }, { "epoch": 0.72, "grad_norm": 0.8968673043411751, "learning_rate": 1.8942725080734342e-06, "loss": 0.1608, "step": 5354 }, { "epoch": 0.72, "grad_norm": 0.7618691511198207, "learning_rate": 1.892561382624315e-06, "loss": 0.124, "step": 5355 }, { "epoch": 0.72, "grad_norm": 0.8051426497114296, "learning_rate": 1.8908508499396821e-06, "loss": 0.1229, "step": 5356 }, { "epoch": 0.72, "grad_norm": 0.9765329372765934, "learning_rate": 1.8891409103458298e-06, "loss": 0.1746, "step": 5357 }, { "epoch": 0.72, "grad_norm": 1.1614209139481608, "learning_rate": 1.8874315641689433e-06, "loss": 0.1888, "step": 5358 }, { "epoch": 0.72, "grad_norm": 0.7888030207381139, "learning_rate": 1.8857228117350917e-06, "loss": 0.0969, "step": 5359 }, { "epoch": 0.72, "grad_norm": 1.263304422261045, "learning_rate": 1.8840146533702281e-06, "loss": 0.2088, "step": 5360 }, { "epoch": 0.72, "grad_norm": 1.1608047090302553, "learning_rate": 1.8823070894001955e-06, "loss": 0.1719, "step": 5361 }, { "epoch": 0.72, "grad_norm": 1.1367430240685457, "learning_rate": 1.8806001201507245e-06, "loss": 0.1574, "step": 5362 }, { "epoch": 0.72, "grad_norm": 0.9842733680059509, "learning_rate": 1.8788937459474294e-06, "loss": 0.2035, "step": 5363 }, { "epoch": 0.72, "grad_norm": 0.7915964174443448, "learning_rate": 1.8771879671158132e-06, "loss": 0.12, "step": 5364 }, { "epoch": 0.72, "grad_norm": 0.9341801845188683, "learning_rate": 1.8754827839812646e-06, "loss": 0.1578, "step": 5365 }, { "epoch": 0.72, "grad_norm": 0.7648744377262323, "learning_rate": 1.8737781968690578e-06, "loss": 0.1021, "step": 5366 }, { "epoch": 0.72, "grad_norm": 1.345672414325671, "learning_rate": 1.8720742061043545e-06, "loss": 0.2273, "step": 5367 }, { "epoch": 0.72, "grad_norm": 0.8347922674002691, "learning_rate": 1.8703708120122027e-06, "loss": 0.1412, "step": 5368 }, { "epoch": 0.72, "grad_norm": 1.1877888656630105, "learning_rate": 1.868668014917534e-06, "loss": 0.1853, "step": 5369 }, { "epoch": 0.72, "grad_norm": 0.9773247489617155, "learning_rate": 1.8669658151451713e-06, "loss": 0.1668, "step": 5370 }, { "epoch": 0.72, "grad_norm": 0.9132367948971447, "learning_rate": 1.865264213019819e-06, "loss": 0.1661, "step": 5371 }, { "epoch": 0.72, "grad_norm": 1.1125178856587739, "learning_rate": 1.8635632088660694e-06, "loss": 0.2488, "step": 5372 }, { "epoch": 0.72, "grad_norm": 1.092058812786988, "learning_rate": 1.8618628030083996e-06, "loss": 0.1853, "step": 5373 }, { "epoch": 0.72, "grad_norm": 0.861028066630506, "learning_rate": 1.8601629957711736e-06, "loss": 0.1976, "step": 5374 }, { "epoch": 0.72, "grad_norm": 0.7866770316065402, "learning_rate": 1.858463787478641e-06, "loss": 0.1454, "step": 5375 }, { "epoch": 0.72, "grad_norm": 1.0065448268292279, "learning_rate": 1.8567651784549385e-06, "loss": 0.1472, "step": 5376 }, { "epoch": 0.73, "grad_norm": 1.0082142925520245, "learning_rate": 1.8550671690240835e-06, "loss": 0.1686, "step": 5377 }, { "epoch": 0.73, "grad_norm": 0.8863809009780179, "learning_rate": 1.8533697595099848e-06, "loss": 0.1156, "step": 5378 }, { "epoch": 0.73, "grad_norm": 0.8751721879883956, "learning_rate": 1.8516729502364345e-06, "loss": 0.1598, "step": 5379 }, { "epoch": 0.73, "grad_norm": 0.9231061399215724, "learning_rate": 1.8499767415271074e-06, "loss": 0.1604, "step": 5380 }, { "epoch": 0.73, "grad_norm": 0.9323320970002122, "learning_rate": 1.8482811337055713e-06, "loss": 0.1328, "step": 5381 }, { "epoch": 0.73, "grad_norm": 0.9372305690183812, "learning_rate": 1.8465861270952724e-06, "loss": 0.1614, "step": 5382 }, { "epoch": 0.73, "grad_norm": 0.8735599435618272, "learning_rate": 1.844891722019544e-06, "loss": 0.1609, "step": 5383 }, { "epoch": 0.73, "grad_norm": 0.8629125894800985, "learning_rate": 1.8431979188016054e-06, "loss": 0.1436, "step": 5384 }, { "epoch": 0.73, "grad_norm": 0.9388482019274271, "learning_rate": 1.8415047177645607e-06, "loss": 0.1369, "step": 5385 }, { "epoch": 0.73, "grad_norm": 0.9553333220720569, "learning_rate": 1.8398121192314e-06, "loss": 0.1678, "step": 5386 }, { "epoch": 0.73, "grad_norm": 0.7228720751952686, "learning_rate": 1.8381201235249962e-06, "loss": 0.1487, "step": 5387 }, { "epoch": 0.73, "grad_norm": 0.8413931767789704, "learning_rate": 1.83642873096811e-06, "loss": 0.1304, "step": 5388 }, { "epoch": 0.73, "grad_norm": 1.1904565139417393, "learning_rate": 1.834737941883385e-06, "loss": 0.1981, "step": 5389 }, { "epoch": 0.73, "grad_norm": 1.280228348121079, "learning_rate": 1.8330477565933507e-06, "loss": 0.2036, "step": 5390 }, { "epoch": 0.73, "grad_norm": 1.2594449220718116, "learning_rate": 1.8313581754204214e-06, "loss": 0.2191, "step": 5391 }, { "epoch": 0.73, "grad_norm": 1.170869738458693, "learning_rate": 1.8296691986868947e-06, "loss": 0.1875, "step": 5392 }, { "epoch": 0.73, "grad_norm": 1.1119545650276978, "learning_rate": 1.8279808267149558e-06, "loss": 0.214, "step": 5393 }, { "epoch": 0.73, "grad_norm": 1.00294364287825, "learning_rate": 1.8262930598266714e-06, "loss": 0.1892, "step": 5394 }, { "epoch": 0.73, "grad_norm": 1.0639947492333646, "learning_rate": 1.8246058983439952e-06, "loss": 0.1498, "step": 5395 }, { "epoch": 0.73, "grad_norm": 0.6871366144639812, "learning_rate": 1.8229193425887637e-06, "loss": 0.1353, "step": 5396 }, { "epoch": 0.73, "grad_norm": 0.8269479362672745, "learning_rate": 1.8212333928826986e-06, "loss": 0.1224, "step": 5397 }, { "epoch": 0.73, "grad_norm": 0.6467686611127685, "learning_rate": 1.8195480495474065e-06, "loss": 0.1001, "step": 5398 }, { "epoch": 0.73, "grad_norm": 1.0007948539676346, "learning_rate": 1.8178633129043776e-06, "loss": 0.1759, "step": 5399 }, { "epoch": 0.73, "grad_norm": 0.8885275144394643, "learning_rate": 1.8161791832749863e-06, "loss": 0.1302, "step": 5400 }, { "epoch": 0.73, "grad_norm": 1.0401311793780996, "learning_rate": 1.814495660980492e-06, "loss": 0.194, "step": 5401 }, { "epoch": 0.73, "grad_norm": 0.8206311143511128, "learning_rate": 1.8128127463420358e-06, "loss": 0.1007, "step": 5402 }, { "epoch": 0.73, "grad_norm": 1.0963129894110348, "learning_rate": 1.8111304396806478e-06, "loss": 0.1564, "step": 5403 }, { "epoch": 0.73, "grad_norm": 0.7995167093211596, "learning_rate": 1.8094487413172379e-06, "loss": 0.1304, "step": 5404 }, { "epoch": 0.73, "grad_norm": 1.2468767398170977, "learning_rate": 1.8077676515726012e-06, "loss": 0.2007, "step": 5405 }, { "epoch": 0.73, "grad_norm": 1.134145907113633, "learning_rate": 1.8060871707674178e-06, "loss": 0.1999, "step": 5406 }, { "epoch": 0.73, "grad_norm": 0.9119617384773656, "learning_rate": 1.8044072992222478e-06, "loss": 0.1507, "step": 5407 }, { "epoch": 0.73, "grad_norm": 1.3348615056878388, "learning_rate": 1.802728037257539e-06, "loss": 0.2352, "step": 5408 }, { "epoch": 0.73, "grad_norm": 1.1781174410333242, "learning_rate": 1.8010493851936223e-06, "loss": 0.1871, "step": 5409 }, { "epoch": 0.73, "grad_norm": 0.8941143045490761, "learning_rate": 1.7993713433507115e-06, "loss": 0.1507, "step": 5410 }, { "epoch": 0.73, "grad_norm": 0.963012396037207, "learning_rate": 1.797693912048904e-06, "loss": 0.1893, "step": 5411 }, { "epoch": 0.73, "grad_norm": 1.1095288047957839, "learning_rate": 1.796017091608181e-06, "loss": 0.1797, "step": 5412 }, { "epoch": 0.73, "grad_norm": 0.8549753379826608, "learning_rate": 1.7943408823484054e-06, "loss": 0.122, "step": 5413 }, { "epoch": 0.73, "grad_norm": 1.047721072342754, "learning_rate": 1.792665284589329e-06, "loss": 0.1726, "step": 5414 }, { "epoch": 0.73, "grad_norm": 1.0945967833304888, "learning_rate": 1.7909902986505806e-06, "loss": 0.1817, "step": 5415 }, { "epoch": 0.73, "grad_norm": 1.0425265157849413, "learning_rate": 1.789315924851675e-06, "loss": 0.2065, "step": 5416 }, { "epoch": 0.73, "grad_norm": 0.8610060232200819, "learning_rate": 1.787642163512011e-06, "loss": 0.1269, "step": 5417 }, { "epoch": 0.73, "grad_norm": 0.9606647165541904, "learning_rate": 1.7859690149508696e-06, "loss": 0.2041, "step": 5418 }, { "epoch": 0.73, "grad_norm": 0.8281492129533026, "learning_rate": 1.7842964794874135e-06, "loss": 0.1358, "step": 5419 }, { "epoch": 0.73, "grad_norm": 0.6473910054004474, "learning_rate": 1.7826245574406915e-06, "loss": 0.1609, "step": 5420 }, { "epoch": 0.73, "grad_norm": 0.7961446192204192, "learning_rate": 1.7809532491296332e-06, "loss": 0.1545, "step": 5421 }, { "epoch": 0.73, "grad_norm": 1.007512520091417, "learning_rate": 1.7792825548730513e-06, "loss": 0.1336, "step": 5422 }, { "epoch": 0.73, "grad_norm": 0.8522686896225697, "learning_rate": 1.777612474989644e-06, "loss": 0.1493, "step": 5423 }, { "epoch": 0.73, "grad_norm": 1.011361838350534, "learning_rate": 1.7759430097979868e-06, "loss": 0.1451, "step": 5424 }, { "epoch": 0.73, "grad_norm": 1.0789222974700516, "learning_rate": 1.7742741596165407e-06, "loss": 0.1683, "step": 5425 }, { "epoch": 0.73, "grad_norm": 1.253729487296883, "learning_rate": 1.772605924763654e-06, "loss": 0.207, "step": 5426 }, { "epoch": 0.73, "grad_norm": 0.7606642516651748, "learning_rate": 1.7709383055575514e-06, "loss": 0.1235, "step": 5427 }, { "epoch": 0.73, "grad_norm": 0.7825772703010311, "learning_rate": 1.769271302316342e-06, "loss": 0.1479, "step": 5428 }, { "epoch": 0.73, "grad_norm": 1.1599054520153789, "learning_rate": 1.7676049153580188e-06, "loss": 0.2025, "step": 5429 }, { "epoch": 0.73, "grad_norm": 0.9852829728043756, "learning_rate": 1.7659391450004548e-06, "loss": 0.175, "step": 5430 }, { "epoch": 0.73, "grad_norm": 0.8094354614684406, "learning_rate": 1.7642739915614077e-06, "loss": 0.1547, "step": 5431 }, { "epoch": 0.73, "grad_norm": 0.7545588845526919, "learning_rate": 1.762609455358516e-06, "loss": 0.0985, "step": 5432 }, { "epoch": 0.73, "grad_norm": 1.0354635576010562, "learning_rate": 1.760945536709301e-06, "loss": 0.1559, "step": 5433 }, { "epoch": 0.73, "grad_norm": 1.1567417956550479, "learning_rate": 1.7592822359311667e-06, "loss": 0.1668, "step": 5434 }, { "epoch": 0.73, "grad_norm": 1.1672085174680804, "learning_rate": 1.7576195533413981e-06, "loss": 0.1766, "step": 5435 }, { "epoch": 0.73, "grad_norm": 0.7467470971973799, "learning_rate": 1.7559574892571635e-06, "loss": 0.131, "step": 5436 }, { "epoch": 0.73, "grad_norm": 0.9500435463888054, "learning_rate": 1.7542960439955126e-06, "loss": 0.1604, "step": 5437 }, { "epoch": 0.73, "grad_norm": 1.2391174108957301, "learning_rate": 1.7526352178733762e-06, "loss": 0.221, "step": 5438 }, { "epoch": 0.73, "grad_norm": 0.9182079286650079, "learning_rate": 1.7509750112075692e-06, "loss": 0.1754, "step": 5439 }, { "epoch": 0.73, "grad_norm": 1.0716663594325768, "learning_rate": 1.7493154243147858e-06, "loss": 0.1375, "step": 5440 }, { "epoch": 0.73, "grad_norm": 1.0765272277260522, "learning_rate": 1.747656457511604e-06, "loss": 0.1392, "step": 5441 }, { "epoch": 0.73, "grad_norm": 1.0319700167968473, "learning_rate": 1.7459981111144826e-06, "loss": 0.1814, "step": 5442 }, { "epoch": 0.73, "grad_norm": 0.7886637480103499, "learning_rate": 1.7443403854397618e-06, "loss": 0.1529, "step": 5443 }, { "epoch": 0.73, "grad_norm": 1.2076623677161185, "learning_rate": 1.7426832808036638e-06, "loss": 0.2207, "step": 5444 }, { "epoch": 0.73, "grad_norm": 0.9509386673433309, "learning_rate": 1.7410267975222922e-06, "loss": 0.1495, "step": 5445 }, { "epoch": 0.73, "grad_norm": 0.9602054767810572, "learning_rate": 1.7393709359116323e-06, "loss": 0.131, "step": 5446 }, { "epoch": 0.73, "grad_norm": 0.9033005096261524, "learning_rate": 1.7377156962875496e-06, "loss": 0.143, "step": 5447 }, { "epoch": 0.73, "grad_norm": 1.0625621679589154, "learning_rate": 1.7360610789657944e-06, "loss": 0.1829, "step": 5448 }, { "epoch": 0.73, "grad_norm": 0.6372684838955184, "learning_rate": 1.7344070842619948e-06, "loss": 0.1161, "step": 5449 }, { "epoch": 0.73, "grad_norm": 0.9634444178010282, "learning_rate": 1.732753712491661e-06, "loss": 0.1493, "step": 5450 }, { "epoch": 0.74, "grad_norm": 0.8615483070364983, "learning_rate": 1.7311009639701848e-06, "loss": 0.1315, "step": 5451 }, { "epoch": 0.74, "grad_norm": 1.0988195880025384, "learning_rate": 1.7294488390128384e-06, "loss": 0.1733, "step": 5452 }, { "epoch": 0.74, "grad_norm": 1.005520054751948, "learning_rate": 1.727797337934778e-06, "loss": 0.1825, "step": 5453 }, { "epoch": 0.74, "grad_norm": 0.7633987827384069, "learning_rate": 1.7261464610510347e-06, "loss": 0.1076, "step": 5454 }, { "epoch": 0.74, "grad_norm": 0.7387304410193812, "learning_rate": 1.7244962086765254e-06, "loss": 0.1214, "step": 5455 }, { "epoch": 0.74, "grad_norm": 0.8386256169952521, "learning_rate": 1.7228465811260475e-06, "loss": 0.1628, "step": 5456 }, { "epoch": 0.74, "grad_norm": 0.9566476741462138, "learning_rate": 1.7211975787142777e-06, "loss": 0.1576, "step": 5457 }, { "epoch": 0.74, "grad_norm": 0.8552766362973051, "learning_rate": 1.719549201755773e-06, "loss": 0.1553, "step": 5458 }, { "epoch": 0.74, "grad_norm": 0.9743817772253441, "learning_rate": 1.7179014505649755e-06, "loss": 0.1953, "step": 5459 }, { "epoch": 0.74, "grad_norm": 0.9444874071094768, "learning_rate": 1.716254325456202e-06, "loss": 0.1821, "step": 5460 }, { "epoch": 0.74, "grad_norm": 1.0923167002585408, "learning_rate": 1.714607826743654e-06, "loss": 0.1725, "step": 5461 }, { "epoch": 0.74, "grad_norm": 0.952472455505045, "learning_rate": 1.7129619547414105e-06, "loss": 0.1407, "step": 5462 }, { "epoch": 0.74, "grad_norm": 1.0963534398476074, "learning_rate": 1.7113167097634342e-06, "loss": 0.2155, "step": 5463 }, { "epoch": 0.74, "grad_norm": 0.8164158874705063, "learning_rate": 1.7096720921235648e-06, "loss": 0.1184, "step": 5464 }, { "epoch": 0.74, "grad_norm": 0.8054809766976814, "learning_rate": 1.7080281021355256e-06, "loss": 0.1561, "step": 5465 }, { "epoch": 0.74, "grad_norm": 0.9705310204865646, "learning_rate": 1.706384740112917e-06, "loss": 0.14, "step": 5466 }, { "epoch": 0.74, "grad_norm": 1.0724777459505972, "learning_rate": 1.7047420063692222e-06, "loss": 0.1975, "step": 5467 }, { "epoch": 0.74, "grad_norm": 0.9497698007344063, "learning_rate": 1.7030999012178029e-06, "loss": 0.1798, "step": 5468 }, { "epoch": 0.74, "grad_norm": 0.8342502419295791, "learning_rate": 1.7014584249719024e-06, "loss": 0.1636, "step": 5469 }, { "epoch": 0.74, "grad_norm": 0.9821182758757945, "learning_rate": 1.6998175779446424e-06, "loss": 0.1466, "step": 5470 }, { "epoch": 0.74, "grad_norm": 0.9019738525741529, "learning_rate": 1.6981773604490254e-06, "loss": 0.1423, "step": 5471 }, { "epoch": 0.74, "grad_norm": 0.9152563559663186, "learning_rate": 1.6965377727979342e-06, "loss": 0.145, "step": 5472 }, { "epoch": 0.74, "grad_norm": 1.0315619596169243, "learning_rate": 1.6948988153041307e-06, "loss": 0.1706, "step": 5473 }, { "epoch": 0.74, "grad_norm": 1.0198951300663672, "learning_rate": 1.6932604882802567e-06, "loss": 0.1791, "step": 5474 }, { "epoch": 0.74, "grad_norm": 0.9348215652138215, "learning_rate": 1.6916227920388345e-06, "loss": 0.1388, "step": 5475 }, { "epoch": 0.74, "grad_norm": 0.7431968548710054, "learning_rate": 1.6899857268922647e-06, "loss": 0.1135, "step": 5476 }, { "epoch": 0.74, "grad_norm": 0.9143702913586842, "learning_rate": 1.6883492931528289e-06, "loss": 0.1348, "step": 5477 }, { "epoch": 0.74, "grad_norm": 0.9970812233112853, "learning_rate": 1.6867134911326877e-06, "loss": 0.1891, "step": 5478 }, { "epoch": 0.74, "grad_norm": 0.9384317346354392, "learning_rate": 1.6850783211438804e-06, "loss": 0.1782, "step": 5479 }, { "epoch": 0.74, "grad_norm": 1.1001379968157268, "learning_rate": 1.6834437834983274e-06, "loss": 0.2264, "step": 5480 }, { "epoch": 0.74, "grad_norm": 0.7307114684172865, "learning_rate": 1.6818098785078257e-06, "loss": 0.1484, "step": 5481 }, { "epoch": 0.74, "grad_norm": 0.9344541317650762, "learning_rate": 1.6801766064840587e-06, "loss": 0.1645, "step": 5482 }, { "epoch": 0.74, "grad_norm": 1.1581846894826273, "learning_rate": 1.6785439677385778e-06, "loss": 0.1385, "step": 5483 }, { "epoch": 0.74, "grad_norm": 1.1489096849929483, "learning_rate": 1.6769119625828228e-06, "loss": 0.2031, "step": 5484 }, { "epoch": 0.74, "grad_norm": 0.982095821377364, "learning_rate": 1.6752805913281085e-06, "loss": 0.1803, "step": 5485 }, { "epoch": 0.74, "grad_norm": 1.2067290595279863, "learning_rate": 1.6736498542856294e-06, "loss": 0.1941, "step": 5486 }, { "epoch": 0.74, "grad_norm": 1.0047829592389081, "learning_rate": 1.6720197517664604e-06, "loss": 0.1398, "step": 5487 }, { "epoch": 0.74, "grad_norm": 1.15305928225377, "learning_rate": 1.6703902840815539e-06, "loss": 0.1752, "step": 5488 }, { "epoch": 0.74, "grad_norm": 0.9516659481619251, "learning_rate": 1.6687614515417416e-06, "loss": 0.2168, "step": 5489 }, { "epoch": 0.74, "grad_norm": 1.0483501218882803, "learning_rate": 1.6671332544577345e-06, "loss": 0.1639, "step": 5490 }, { "epoch": 0.74, "grad_norm": 1.0610141017427652, "learning_rate": 1.6655056931401215e-06, "loss": 0.1564, "step": 5491 }, { "epoch": 0.74, "grad_norm": 0.8270610909792203, "learning_rate": 1.663878767899369e-06, "loss": 0.1344, "step": 5492 }, { "epoch": 0.74, "grad_norm": 1.0045895550506574, "learning_rate": 1.6622524790458273e-06, "loss": 0.1499, "step": 5493 }, { "epoch": 0.74, "grad_norm": 0.551689764944302, "learning_rate": 1.6606268268897202e-06, "loss": 0.0801, "step": 5494 }, { "epoch": 0.74, "grad_norm": 0.844407426078191, "learning_rate": 1.659001811741151e-06, "loss": 0.1469, "step": 5495 }, { "epoch": 0.74, "grad_norm": 1.0956942505532217, "learning_rate": 1.6573774339101028e-06, "loss": 0.1872, "step": 5496 }, { "epoch": 0.74, "grad_norm": 0.9115158175073138, "learning_rate": 1.6557536937064366e-06, "loss": 0.1799, "step": 5497 }, { "epoch": 0.74, "grad_norm": 0.7484539065891064, "learning_rate": 1.6541305914398908e-06, "loss": 0.1178, "step": 5498 }, { "epoch": 0.74, "grad_norm": 1.0333671128011532, "learning_rate": 1.652508127420085e-06, "loss": 0.1862, "step": 5499 }, { "epoch": 0.74, "grad_norm": 1.1806714706709942, "learning_rate": 1.6508863019565114e-06, "loss": 0.2127, "step": 5500 }, { "epoch": 0.74, "grad_norm": 1.3139308982416964, "learning_rate": 1.6492651153585459e-06, "loss": 0.2415, "step": 5501 }, { "epoch": 0.74, "grad_norm": 0.682756636954985, "learning_rate": 1.6476445679354408e-06, "loss": 0.1327, "step": 5502 }, { "epoch": 0.74, "grad_norm": 1.043175784904232, "learning_rate": 1.6460246599963238e-06, "loss": 0.1846, "step": 5503 }, { "epoch": 0.74, "grad_norm": 1.102911454743628, "learning_rate": 1.6444053918502068e-06, "loss": 0.1843, "step": 5504 }, { "epoch": 0.74, "grad_norm": 1.0136337027989546, "learning_rate": 1.6427867638059746e-06, "loss": 0.1725, "step": 5505 }, { "epoch": 0.74, "grad_norm": 1.0436292667882043, "learning_rate": 1.6411687761723906e-06, "loss": 0.2029, "step": 5506 }, { "epoch": 0.74, "grad_norm": 0.9342564730889761, "learning_rate": 1.6395514292580971e-06, "loss": 0.1274, "step": 5507 }, { "epoch": 0.74, "grad_norm": 0.9673205167441755, "learning_rate": 1.6379347233716136e-06, "loss": 0.1741, "step": 5508 }, { "epoch": 0.74, "grad_norm": 1.1323444102190257, "learning_rate": 1.636318658821337e-06, "loss": 0.2095, "step": 5509 }, { "epoch": 0.74, "grad_norm": 0.8352982749066139, "learning_rate": 1.6347032359155435e-06, "loss": 0.1689, "step": 5510 }, { "epoch": 0.74, "grad_norm": 0.9289999106356058, "learning_rate": 1.6330884549623844e-06, "loss": 0.1234, "step": 5511 }, { "epoch": 0.74, "grad_norm": 0.8227342329762959, "learning_rate": 1.6314743162698903e-06, "loss": 0.1569, "step": 5512 }, { "epoch": 0.74, "grad_norm": 0.6304139715984225, "learning_rate": 1.6298608201459692e-06, "loss": 0.1017, "step": 5513 }, { "epoch": 0.74, "grad_norm": 1.284321275418402, "learning_rate": 1.6282479668984064e-06, "loss": 0.202, "step": 5514 }, { "epoch": 0.74, "grad_norm": 0.7373380090481375, "learning_rate": 1.626635756834863e-06, "loss": 0.1218, "step": 5515 }, { "epoch": 0.74, "grad_norm": 1.2768852473346564, "learning_rate": 1.62502419026288e-06, "loss": 0.2195, "step": 5516 }, { "epoch": 0.74, "grad_norm": 1.1362639833519115, "learning_rate": 1.623413267489874e-06, "loss": 0.1853, "step": 5517 }, { "epoch": 0.74, "grad_norm": 1.047154621570371, "learning_rate": 1.6218029888231391e-06, "loss": 0.1735, "step": 5518 }, { "epoch": 0.74, "grad_norm": 1.1270426496368198, "learning_rate": 1.6201933545698467e-06, "loss": 0.2275, "step": 5519 }, { "epoch": 0.74, "grad_norm": 0.5647129504044901, "learning_rate": 1.6185843650370448e-06, "loss": 0.0969, "step": 5520 }, { "epoch": 0.74, "grad_norm": 1.124113837138375, "learning_rate": 1.61697602053166e-06, "loss": 0.1682, "step": 5521 }, { "epoch": 0.74, "grad_norm": 0.8908526677721254, "learning_rate": 1.6153683213604932e-06, "loss": 0.1711, "step": 5522 }, { "epoch": 0.74, "grad_norm": 0.956939829983241, "learning_rate": 1.6137612678302245e-06, "loss": 0.1697, "step": 5523 }, { "epoch": 0.74, "grad_norm": 0.6998761363753127, "learning_rate": 1.6121548602474096e-06, "loss": 0.1066, "step": 5524 }, { "epoch": 0.75, "grad_norm": 0.6648704446287486, "learning_rate": 1.6105490989184813e-06, "loss": 0.119, "step": 5525 }, { "epoch": 0.75, "grad_norm": 0.9293492675173619, "learning_rate": 1.6089439841497485e-06, "loss": 0.1625, "step": 5526 }, { "epoch": 0.75, "grad_norm": 0.9876269026968426, "learning_rate": 1.6073395162473997e-06, "loss": 0.1674, "step": 5527 }, { "epoch": 0.75, "grad_norm": 0.6395535729600427, "learning_rate": 1.6057356955174964e-06, "loss": 0.0844, "step": 5528 }, { "epoch": 0.75, "grad_norm": 0.801493152608683, "learning_rate": 1.6041325222659805e-06, "loss": 0.1193, "step": 5529 }, { "epoch": 0.75, "grad_norm": 1.3240641542918758, "learning_rate": 1.602529996798663e-06, "loss": 0.2315, "step": 5530 }, { "epoch": 0.75, "grad_norm": 1.0454119522016874, "learning_rate": 1.6009281194212388e-06, "loss": 0.1783, "step": 5531 }, { "epoch": 0.75, "grad_norm": 1.077045539841088, "learning_rate": 1.5993268904392772e-06, "loss": 0.1535, "step": 5532 }, { "epoch": 0.75, "grad_norm": 0.9727301000594214, "learning_rate": 1.5977263101582225e-06, "loss": 0.1645, "step": 5533 }, { "epoch": 0.75, "grad_norm": 0.8747164168925882, "learning_rate": 1.5961263788833959e-06, "loss": 0.1809, "step": 5534 }, { "epoch": 0.75, "grad_norm": 1.1405230482328645, "learning_rate": 1.5945270969199955e-06, "loss": 0.1897, "step": 5535 }, { "epoch": 0.75, "grad_norm": 0.8783806163066304, "learning_rate": 1.5929284645730942e-06, "loss": 0.1493, "step": 5536 }, { "epoch": 0.75, "grad_norm": 1.0328312553917045, "learning_rate": 1.5913304821476405e-06, "loss": 0.165, "step": 5537 }, { "epoch": 0.75, "grad_norm": 1.0454692372869767, "learning_rate": 1.589733149948463e-06, "loss": 0.1671, "step": 5538 }, { "epoch": 0.75, "grad_norm": 0.7758327226389495, "learning_rate": 1.5881364682802618e-06, "loss": 0.1539, "step": 5539 }, { "epoch": 0.75, "grad_norm": 1.0224892774115582, "learning_rate": 1.5865404374476156e-06, "loss": 0.1757, "step": 5540 }, { "epoch": 0.75, "grad_norm": 1.0560452256710988, "learning_rate": 1.5849450577549763e-06, "loss": 0.1717, "step": 5541 }, { "epoch": 0.75, "grad_norm": 0.9024617933965221, "learning_rate": 1.5833503295066744e-06, "loss": 0.1555, "step": 5542 }, { "epoch": 0.75, "grad_norm": 0.880744951425145, "learning_rate": 1.5817562530069135e-06, "loss": 0.1382, "step": 5543 }, { "epoch": 0.75, "grad_norm": 0.7048328101355242, "learning_rate": 1.5801628285597759e-06, "loss": 0.1418, "step": 5544 }, { "epoch": 0.75, "grad_norm": 0.7987488367880792, "learning_rate": 1.5785700564692163e-06, "loss": 0.1376, "step": 5545 }, { "epoch": 0.75, "grad_norm": 0.8413783461022013, "learning_rate": 1.5769779370390692e-06, "loss": 0.1169, "step": 5546 }, { "epoch": 0.75, "grad_norm": 1.029651895164156, "learning_rate": 1.5753864705730382e-06, "loss": 0.172, "step": 5547 }, { "epoch": 0.75, "grad_norm": 1.1538576137785854, "learning_rate": 1.5737956573747059e-06, "loss": 0.1803, "step": 5548 }, { "epoch": 0.75, "grad_norm": 0.9432674627611783, "learning_rate": 1.5722054977475342e-06, "loss": 0.1637, "step": 5549 }, { "epoch": 0.75, "grad_norm": 0.8959409774455163, "learning_rate": 1.570615991994855e-06, "loss": 0.131, "step": 5550 }, { "epoch": 0.75, "grad_norm": 1.0463617303541264, "learning_rate": 1.5690271404198765e-06, "loss": 0.156, "step": 5551 }, { "epoch": 0.75, "grad_norm": 1.2061974851011135, "learning_rate": 1.5674389433256826e-06, "loss": 0.1749, "step": 5552 }, { "epoch": 0.75, "grad_norm": 0.9877365891002947, "learning_rate": 1.5658514010152332e-06, "loss": 0.1658, "step": 5553 }, { "epoch": 0.75, "grad_norm": 0.9293110973778718, "learning_rate": 1.564264513791362e-06, "loss": 0.2134, "step": 5554 }, { "epoch": 0.75, "grad_norm": 0.9173628612286365, "learning_rate": 1.5626782819567781e-06, "loss": 0.1585, "step": 5555 }, { "epoch": 0.75, "grad_norm": 0.962001324113383, "learning_rate": 1.5610927058140663e-06, "loss": 0.1486, "step": 5556 }, { "epoch": 0.75, "grad_norm": 1.1624011752326804, "learning_rate": 1.559507785665685e-06, "loss": 0.1696, "step": 5557 }, { "epoch": 0.75, "grad_norm": 0.8759865584940039, "learning_rate": 1.5579235218139682e-06, "loss": 0.1407, "step": 5558 }, { "epoch": 0.75, "grad_norm": 0.8271025241275688, "learning_rate": 1.5563399145611258e-06, "loss": 0.1251, "step": 5559 }, { "epoch": 0.75, "grad_norm": 0.9544529717132422, "learning_rate": 1.5547569642092397e-06, "loss": 0.1707, "step": 5560 }, { "epoch": 0.75, "grad_norm": 0.9888278611344647, "learning_rate": 1.5531746710602701e-06, "loss": 0.1614, "step": 5561 }, { "epoch": 0.75, "grad_norm": 1.0416680824334568, "learning_rate": 1.5515930354160485e-06, "loss": 0.1543, "step": 5562 }, { "epoch": 0.75, "grad_norm": 0.8725634249036154, "learning_rate": 1.5500120575782823e-06, "loss": 0.1329, "step": 5563 }, { "epoch": 0.75, "grad_norm": 1.1104195833926191, "learning_rate": 1.5484317378485548e-06, "loss": 0.1865, "step": 5564 }, { "epoch": 0.75, "grad_norm": 1.0939594770680836, "learning_rate": 1.5468520765283207e-06, "loss": 0.1488, "step": 5565 }, { "epoch": 0.75, "grad_norm": 0.8151971076384611, "learning_rate": 1.5452730739189125e-06, "loss": 0.1317, "step": 5566 }, { "epoch": 0.75, "grad_norm": 0.792759132210997, "learning_rate": 1.5436947303215344e-06, "loss": 0.1431, "step": 5567 }, { "epoch": 0.75, "grad_norm": 1.010319955220926, "learning_rate": 1.5421170460372659e-06, "loss": 0.1973, "step": 5568 }, { "epoch": 0.75, "grad_norm": 0.7824304138752047, "learning_rate": 1.5405400213670612e-06, "loss": 0.1234, "step": 5569 }, { "epoch": 0.75, "grad_norm": 1.118546803335767, "learning_rate": 1.5389636566117478e-06, "loss": 0.2131, "step": 5570 }, { "epoch": 0.75, "grad_norm": 1.0304472791435797, "learning_rate": 1.5373879520720264e-06, "loss": 0.1809, "step": 5571 }, { "epoch": 0.75, "grad_norm": 0.6428866752117786, "learning_rate": 1.5358129080484763e-06, "loss": 0.1435, "step": 5572 }, { "epoch": 0.75, "grad_norm": 0.8789017747528924, "learning_rate": 1.5342385248415453e-06, "loss": 0.1561, "step": 5573 }, { "epoch": 0.75, "grad_norm": 1.193529561889568, "learning_rate": 1.5326648027515583e-06, "loss": 0.2206, "step": 5574 }, { "epoch": 0.75, "grad_norm": 0.7778456083303918, "learning_rate": 1.531091742078713e-06, "loss": 0.1579, "step": 5575 }, { "epoch": 0.75, "grad_norm": 0.8606557804844505, "learning_rate": 1.529519343123082e-06, "loss": 0.1605, "step": 5576 }, { "epoch": 0.75, "grad_norm": 0.8069643537418828, "learning_rate": 1.5279476061846088e-06, "loss": 0.1396, "step": 5577 }, { "epoch": 0.75, "grad_norm": 0.9620302721686195, "learning_rate": 1.5263765315631134e-06, "loss": 0.1734, "step": 5578 }, { "epoch": 0.75, "grad_norm": 0.8893656261145784, "learning_rate": 1.5248061195582886e-06, "loss": 0.1464, "step": 5579 }, { "epoch": 0.75, "grad_norm": 0.9261914567791036, "learning_rate": 1.5232363704697017e-06, "loss": 0.1459, "step": 5580 }, { "epoch": 0.75, "grad_norm": 1.086310222780151, "learning_rate": 1.5216672845967918e-06, "loss": 0.1777, "step": 5581 }, { "epoch": 0.75, "grad_norm": 0.8644090970310923, "learning_rate": 1.5200988622388713e-06, "loss": 0.1631, "step": 5582 }, { "epoch": 0.75, "grad_norm": 1.235735675725106, "learning_rate": 1.51853110369513e-06, "loss": 0.2307, "step": 5583 }, { "epoch": 0.75, "grad_norm": 1.0952194450837458, "learning_rate": 1.5169640092646275e-06, "loss": 0.2012, "step": 5584 }, { "epoch": 0.75, "grad_norm": 0.7398299296211878, "learning_rate": 1.5153975792462966e-06, "loss": 0.1179, "step": 5585 }, { "epoch": 0.75, "grad_norm": 1.0347185627966597, "learning_rate": 1.513831813938944e-06, "loss": 0.1474, "step": 5586 }, { "epoch": 0.75, "grad_norm": 1.2622209013377907, "learning_rate": 1.5122667136412511e-06, "loss": 0.2183, "step": 5587 }, { "epoch": 0.75, "grad_norm": 0.9466966713189474, "learning_rate": 1.5107022786517694e-06, "loss": 0.2023, "step": 5588 }, { "epoch": 0.75, "grad_norm": 0.9791336878466935, "learning_rate": 1.5091385092689265e-06, "loss": 0.1559, "step": 5589 }, { "epoch": 0.75, "grad_norm": 1.06101603680242, "learning_rate": 1.5075754057910214e-06, "loss": 0.2369, "step": 5590 }, { "epoch": 0.75, "grad_norm": 1.1955610819353113, "learning_rate": 1.5060129685162262e-06, "loss": 0.2095, "step": 5591 }, { "epoch": 0.75, "grad_norm": 1.0451710084567567, "learning_rate": 1.5044511977425862e-06, "loss": 0.1903, "step": 5592 }, { "epoch": 0.75, "grad_norm": 1.1678192508754173, "learning_rate": 1.5028900937680202e-06, "loss": 0.2016, "step": 5593 }, { "epoch": 0.75, "grad_norm": 0.7890223590441017, "learning_rate": 1.501329656890318e-06, "loss": 0.1374, "step": 5594 }, { "epoch": 0.75, "grad_norm": 0.9170023407665784, "learning_rate": 1.4997698874071443e-06, "loss": 0.1734, "step": 5595 }, { "epoch": 0.75, "grad_norm": 1.0555325633452421, "learning_rate": 1.498210785616035e-06, "loss": 0.1823, "step": 5596 }, { "epoch": 0.75, "grad_norm": 0.9241047222843082, "learning_rate": 1.4966523518143987e-06, "loss": 0.1511, "step": 5597 }, { "epoch": 0.75, "grad_norm": 1.0332136986723277, "learning_rate": 1.4950945862995176e-06, "loss": 0.1918, "step": 5598 }, { "epoch": 0.75, "grad_norm": 0.7067934023814627, "learning_rate": 1.493537489368545e-06, "loss": 0.1073, "step": 5599 }, { "epoch": 0.76, "grad_norm": 1.3075666604691603, "learning_rate": 1.4919810613185088e-06, "loss": 0.2156, "step": 5600 }, { "epoch": 0.76, "grad_norm": 1.135673787936759, "learning_rate": 1.4904253024463066e-06, "loss": 0.1385, "step": 5601 }, { "epoch": 0.76, "grad_norm": 1.1021359164254065, "learning_rate": 1.4888702130487104e-06, "loss": 0.201, "step": 5602 }, { "epoch": 0.76, "grad_norm": 0.6399577586157662, "learning_rate": 1.4873157934223636e-06, "loss": 0.096, "step": 5603 }, { "epoch": 0.76, "grad_norm": 0.7872210984821215, "learning_rate": 1.4857620438637804e-06, "loss": 0.1551, "step": 5604 }, { "epoch": 0.76, "grad_norm": 0.957906227860744, "learning_rate": 1.4842089646693536e-06, "loss": 0.1831, "step": 5605 }, { "epoch": 0.76, "grad_norm": 0.9703273195668216, "learning_rate": 1.4826565561353389e-06, "loss": 0.144, "step": 5606 }, { "epoch": 0.76, "grad_norm": 0.7493042032582041, "learning_rate": 1.4811048185578697e-06, "loss": 0.1186, "step": 5607 }, { "epoch": 0.76, "grad_norm": 0.6747202643314286, "learning_rate": 1.47955375223295e-06, "loss": 0.1167, "step": 5608 }, { "epoch": 0.76, "grad_norm": 0.8721626873468166, "learning_rate": 1.4780033574564568e-06, "loss": 0.1253, "step": 5609 }, { "epoch": 0.76, "grad_norm": 0.9627178599231091, "learning_rate": 1.4764536345241375e-06, "loss": 0.1812, "step": 5610 }, { "epoch": 0.76, "grad_norm": 0.8388705920721807, "learning_rate": 1.4749045837316123e-06, "loss": 0.1442, "step": 5611 }, { "epoch": 0.76, "grad_norm": 0.8618166817019361, "learning_rate": 1.4733562053743727e-06, "loss": 0.1614, "step": 5612 }, { "epoch": 0.76, "grad_norm": 1.0413587841562717, "learning_rate": 1.4718084997477821e-06, "loss": 0.2213, "step": 5613 }, { "epoch": 0.76, "grad_norm": 1.040488357778776, "learning_rate": 1.4702614671470754e-06, "loss": 0.2158, "step": 5614 }, { "epoch": 0.76, "grad_norm": 1.1399917066332172, "learning_rate": 1.4687151078673584e-06, "loss": 0.1972, "step": 5615 }, { "epoch": 0.76, "grad_norm": 1.4147503745655832, "learning_rate": 1.467169422203612e-06, "loss": 0.2306, "step": 5616 }, { "epoch": 0.76, "grad_norm": 1.1358502844034042, "learning_rate": 1.4656244104506844e-06, "loss": 0.1796, "step": 5617 }, { "epoch": 0.76, "grad_norm": 1.1103195022267982, "learning_rate": 1.4640800729032973e-06, "loss": 0.2047, "step": 5618 }, { "epoch": 0.76, "grad_norm": 0.9910118593414691, "learning_rate": 1.4625364098560429e-06, "loss": 0.1873, "step": 5619 }, { "epoch": 0.76, "grad_norm": 1.0121085147324915, "learning_rate": 1.460993421603385e-06, "loss": 0.1419, "step": 5620 }, { "epoch": 0.76, "grad_norm": 1.10786415179282, "learning_rate": 1.459451108439659e-06, "loss": 0.2308, "step": 5621 }, { "epoch": 0.76, "grad_norm": 0.7044331219777688, "learning_rate": 1.4579094706590718e-06, "loss": 0.1054, "step": 5622 }, { "epoch": 0.76, "grad_norm": 0.9493912619493424, "learning_rate": 1.4563685085557017e-06, "loss": 0.1646, "step": 5623 }, { "epoch": 0.76, "grad_norm": 0.9918273686576888, "learning_rate": 1.454828222423495e-06, "loss": 0.1548, "step": 5624 }, { "epoch": 0.76, "grad_norm": 1.0983804197984168, "learning_rate": 1.453288612556273e-06, "loss": 0.1864, "step": 5625 }, { "epoch": 0.76, "grad_norm": 1.0648045147064589, "learning_rate": 1.451749679247726e-06, "loss": 0.1971, "step": 5626 }, { "epoch": 0.76, "grad_norm": 0.8536868742808928, "learning_rate": 1.4502114227914138e-06, "loss": 0.1367, "step": 5627 }, { "epoch": 0.76, "grad_norm": 0.8308743792421408, "learning_rate": 1.4486738434807734e-06, "loss": 0.1102, "step": 5628 }, { "epoch": 0.76, "grad_norm": 1.2338921326083185, "learning_rate": 1.4471369416091064e-06, "loss": 0.2544, "step": 5629 }, { "epoch": 0.76, "grad_norm": 0.8969279313640397, "learning_rate": 1.4456007174695857e-06, "loss": 0.134, "step": 5630 }, { "epoch": 0.76, "grad_norm": 0.9770668390667788, "learning_rate": 1.4440651713552577e-06, "loss": 0.1108, "step": 5631 }, { "epoch": 0.76, "grad_norm": 0.7927420891065524, "learning_rate": 1.4425303035590372e-06, "loss": 0.1753, "step": 5632 }, { "epoch": 0.76, "grad_norm": 0.9489299400712636, "learning_rate": 1.440996114373711e-06, "loss": 0.1493, "step": 5633 }, { "epoch": 0.76, "grad_norm": 0.8917261578576979, "learning_rate": 1.439462604091935e-06, "loss": 0.1619, "step": 5634 }, { "epoch": 0.76, "grad_norm": 0.7609235212659502, "learning_rate": 1.4379297730062368e-06, "loss": 0.1462, "step": 5635 }, { "epoch": 0.76, "grad_norm": 0.8551429292482775, "learning_rate": 1.4363976214090147e-06, "loss": 0.157, "step": 5636 }, { "epoch": 0.76, "grad_norm": 0.9692526868830637, "learning_rate": 1.434866149592536e-06, "loss": 0.1881, "step": 5637 }, { "epoch": 0.76, "grad_norm": 1.0973316753119573, "learning_rate": 1.4333353578489396e-06, "loss": 0.1673, "step": 5638 }, { "epoch": 0.76, "grad_norm": 1.0825385513582186, "learning_rate": 1.4318052464702337e-06, "loss": 0.1694, "step": 5639 }, { "epoch": 0.76, "grad_norm": 0.8149984197755236, "learning_rate": 1.4302758157482972e-06, "loss": 0.162, "step": 5640 }, { "epoch": 0.76, "grad_norm": 0.9962131220599628, "learning_rate": 1.4287470659748792e-06, "loss": 0.1324, "step": 5641 }, { "epoch": 0.76, "grad_norm": 0.8915416187671057, "learning_rate": 1.427218997441599e-06, "loss": 0.1251, "step": 5642 }, { "epoch": 0.76, "grad_norm": 1.0812529006923497, "learning_rate": 1.425691610439946e-06, "loss": 0.1719, "step": 5643 }, { "epoch": 0.76, "grad_norm": 0.9362491981693407, "learning_rate": 1.4241649052612789e-06, "loss": 0.1259, "step": 5644 }, { "epoch": 0.76, "grad_norm": 0.8424636730629799, "learning_rate": 1.4226388821968267e-06, "loss": 0.1739, "step": 5645 }, { "epoch": 0.76, "grad_norm": 0.9562604652735005, "learning_rate": 1.4211135415376892e-06, "loss": 0.155, "step": 5646 }, { "epoch": 0.76, "grad_norm": 0.9031468710374682, "learning_rate": 1.4195888835748344e-06, "loss": 0.1534, "step": 5647 }, { "epoch": 0.76, "grad_norm": 1.2728602359404124, "learning_rate": 1.4180649085991015e-06, "loss": 0.2219, "step": 5648 }, { "epoch": 0.76, "grad_norm": 1.1111606718826983, "learning_rate": 1.416541616901197e-06, "loss": 0.2128, "step": 5649 }, { "epoch": 0.76, "grad_norm": 0.8797074975436308, "learning_rate": 1.415019008771702e-06, "loss": 0.1568, "step": 5650 }, { "epoch": 0.76, "grad_norm": 0.8814655184396062, "learning_rate": 1.4134970845010627e-06, "loss": 0.1534, "step": 5651 }, { "epoch": 0.76, "grad_norm": 0.959618854986842, "learning_rate": 1.4119758443795972e-06, "loss": 0.1919, "step": 5652 }, { "epoch": 0.76, "grad_norm": 0.7698253445927986, "learning_rate": 1.4104552886974892e-06, "loss": 0.1049, "step": 5653 }, { "epoch": 0.76, "grad_norm": 0.9110000825562816, "learning_rate": 1.4089354177447974e-06, "loss": 0.1461, "step": 5654 }, { "epoch": 0.76, "grad_norm": 0.7366159046721621, "learning_rate": 1.4074162318114459e-06, "loss": 0.119, "step": 5655 }, { "epoch": 0.76, "grad_norm": 0.7464123105917371, "learning_rate": 1.4058977311872295e-06, "loss": 0.1403, "step": 5656 }, { "epoch": 0.76, "grad_norm": 0.7013735715734631, "learning_rate": 1.4043799161618132e-06, "loss": 0.1246, "step": 5657 }, { "epoch": 0.76, "grad_norm": 0.7132618340693992, "learning_rate": 1.4028627870247297e-06, "loss": 0.0992, "step": 5658 }, { "epoch": 0.76, "grad_norm": 0.7200297265489327, "learning_rate": 1.4013463440653812e-06, "loss": 0.1242, "step": 5659 }, { "epoch": 0.76, "grad_norm": 1.0617393123796535, "learning_rate": 1.3998305875730383e-06, "loss": 0.1697, "step": 5660 }, { "epoch": 0.76, "grad_norm": 0.7250957027438037, "learning_rate": 1.3983155178368447e-06, "loss": 0.1363, "step": 5661 }, { "epoch": 0.76, "grad_norm": 0.9339822146924116, "learning_rate": 1.3968011351458077e-06, "loss": 0.2175, "step": 5662 }, { "epoch": 0.76, "grad_norm": 0.6988704840035852, "learning_rate": 1.395287439788806e-06, "loss": 0.1294, "step": 5663 }, { "epoch": 0.76, "grad_norm": 0.9800955001482545, "learning_rate": 1.3937744320545871e-06, "loss": 0.1773, "step": 5664 }, { "epoch": 0.76, "grad_norm": 0.9105204826255374, "learning_rate": 1.392262112231768e-06, "loss": 0.1556, "step": 5665 }, { "epoch": 0.76, "grad_norm": 1.3835248990232372, "learning_rate": 1.3907504806088323e-06, "loss": 0.2526, "step": 5666 }, { "epoch": 0.76, "grad_norm": 1.0099776428486726, "learning_rate": 1.3892395374741353e-06, "loss": 0.1673, "step": 5667 }, { "epoch": 0.76, "grad_norm": 0.9665290850852246, "learning_rate": 1.3877292831158983e-06, "loss": 0.137, "step": 5668 }, { "epoch": 0.76, "grad_norm": 0.8712943672350778, "learning_rate": 1.3862197178222143e-06, "loss": 0.1593, "step": 5669 }, { "epoch": 0.76, "grad_norm": 0.8684475796888456, "learning_rate": 1.384710841881039e-06, "loss": 0.1384, "step": 5670 }, { "epoch": 0.76, "grad_norm": 0.7722624935808339, "learning_rate": 1.3832026555802031e-06, "loss": 0.1258, "step": 5671 }, { "epoch": 0.76, "grad_norm": 1.0462096718908946, "learning_rate": 1.3816951592074013e-06, "loss": 0.1473, "step": 5672 }, { "epoch": 0.76, "grad_norm": 1.0720667145425578, "learning_rate": 1.3801883530502015e-06, "loss": 0.1646, "step": 5673 }, { "epoch": 0.77, "grad_norm": 1.0972721963892826, "learning_rate": 1.3786822373960352e-06, "loss": 0.159, "step": 5674 }, { "epoch": 0.77, "grad_norm": 0.8854877266050669, "learning_rate": 1.3771768125322044e-06, "loss": 0.1694, "step": 5675 }, { "epoch": 0.77, "grad_norm": 1.1248657585674666, "learning_rate": 1.3756720787458783e-06, "loss": 0.1712, "step": 5676 }, { "epoch": 0.77, "grad_norm": 0.9691120486744056, "learning_rate": 1.374168036324095e-06, "loss": 0.1557, "step": 5677 }, { "epoch": 0.77, "grad_norm": 1.1130400492617574, "learning_rate": 1.3726646855537606e-06, "loss": 0.1846, "step": 5678 }, { "epoch": 0.77, "grad_norm": 0.6587941779904131, "learning_rate": 1.3711620267216492e-06, "loss": 0.0776, "step": 5679 }, { "epoch": 0.77, "grad_norm": 1.0998800752416784, "learning_rate": 1.3696600601144034e-06, "loss": 0.1406, "step": 5680 }, { "epoch": 0.77, "grad_norm": 1.0158183768688154, "learning_rate": 1.3681587860185325e-06, "loss": 0.1287, "step": 5681 }, { "epoch": 0.77, "grad_norm": 0.9351924039951767, "learning_rate": 1.3666582047204146e-06, "loss": 0.146, "step": 5682 }, { "epoch": 0.77, "grad_norm": 1.0956756287615625, "learning_rate": 1.365158316506296e-06, "loss": 0.2, "step": 5683 }, { "epoch": 0.77, "grad_norm": 0.7018238401737436, "learning_rate": 1.3636591216622902e-06, "loss": 0.0994, "step": 5684 }, { "epoch": 0.77, "grad_norm": 0.9765663848449894, "learning_rate": 1.3621606204743782e-06, "loss": 0.1549, "step": 5685 }, { "epoch": 0.77, "grad_norm": 0.8824223612408356, "learning_rate": 1.3606628132284088e-06, "loss": 0.1667, "step": 5686 }, { "epoch": 0.77, "grad_norm": 0.8451413505798369, "learning_rate": 1.3591657002100995e-06, "loss": 0.1255, "step": 5687 }, { "epoch": 0.77, "grad_norm": 0.8403027281084284, "learning_rate": 1.357669281705034e-06, "loss": 0.1643, "step": 5688 }, { "epoch": 0.77, "grad_norm": 0.9881361673906525, "learning_rate": 1.356173557998664e-06, "loss": 0.1777, "step": 5689 }, { "epoch": 0.77, "grad_norm": 1.060683946321515, "learning_rate": 1.3546785293763086e-06, "loss": 0.1892, "step": 5690 }, { "epoch": 0.77, "grad_norm": 1.1857659145387929, "learning_rate": 1.353184196123155e-06, "loss": 0.2204, "step": 5691 }, { "epoch": 0.77, "grad_norm": 0.977262425871865, "learning_rate": 1.3516905585242568e-06, "loss": 0.169, "step": 5692 }, { "epoch": 0.77, "grad_norm": 0.7982493384346914, "learning_rate": 1.3501976168645348e-06, "loss": 0.1239, "step": 5693 }, { "epoch": 0.77, "grad_norm": 1.0247354937286968, "learning_rate": 1.3487053714287768e-06, "loss": 0.1928, "step": 5694 }, { "epoch": 0.77, "grad_norm": 0.852812593073416, "learning_rate": 1.3472138225016406e-06, "loss": 0.1631, "step": 5695 }, { "epoch": 0.77, "grad_norm": 0.5204030194361894, "learning_rate": 1.3457229703676483e-06, "loss": 0.1027, "step": 5696 }, { "epoch": 0.77, "grad_norm": 0.8480415384857102, "learning_rate": 1.34423281531119e-06, "loss": 0.1673, "step": 5697 }, { "epoch": 0.77, "grad_norm": 1.0401528402016536, "learning_rate": 1.3427433576165217e-06, "loss": 0.1766, "step": 5698 }, { "epoch": 0.77, "grad_norm": 0.6362944400517881, "learning_rate": 1.3412545975677693e-06, "loss": 0.1068, "step": 5699 }, { "epoch": 0.77, "grad_norm": 1.0231105857822622, "learning_rate": 1.3397665354489208e-06, "loss": 0.1521, "step": 5700 }, { "epoch": 0.77, "grad_norm": 0.6071695400849098, "learning_rate": 1.3382791715438353e-06, "loss": 0.1018, "step": 5701 }, { "epoch": 0.77, "grad_norm": 1.2383091413430833, "learning_rate": 1.3367925061362374e-06, "loss": 0.2224, "step": 5702 }, { "epoch": 0.77, "grad_norm": 0.8546941545208087, "learning_rate": 1.335306539509718e-06, "loss": 0.133, "step": 5703 }, { "epoch": 0.77, "grad_norm": 1.0363729086290066, "learning_rate": 1.333821271947735e-06, "loss": 0.1592, "step": 5704 }, { "epoch": 0.77, "grad_norm": 1.217536924233559, "learning_rate": 1.332336703733612e-06, "loss": 0.1899, "step": 5705 }, { "epoch": 0.77, "grad_norm": 0.9229985913767291, "learning_rate": 1.3308528351505433e-06, "loss": 0.2144, "step": 5706 }, { "epoch": 0.77, "grad_norm": 1.0197117648307006, "learning_rate": 1.3293696664815852e-06, "loss": 0.2049, "step": 5707 }, { "epoch": 0.77, "grad_norm": 0.8816045680616584, "learning_rate": 1.3278871980096608e-06, "loss": 0.1447, "step": 5708 }, { "epoch": 0.77, "grad_norm": 1.2009849465577755, "learning_rate": 1.326405430017562e-06, "loss": 0.2162, "step": 5709 }, { "epoch": 0.77, "grad_norm": 0.7288186268013396, "learning_rate": 1.324924362787946e-06, "loss": 0.1185, "step": 5710 }, { "epoch": 0.77, "grad_norm": 0.8989670028601034, "learning_rate": 1.3234439966033346e-06, "loss": 0.1466, "step": 5711 }, { "epoch": 0.77, "grad_norm": 1.143571756799725, "learning_rate": 1.3219643317461195e-06, "loss": 0.2312, "step": 5712 }, { "epoch": 0.77, "grad_norm": 1.0362370638490492, "learning_rate": 1.3204853684985547e-06, "loss": 0.1605, "step": 5713 }, { "epoch": 0.77, "grad_norm": 0.9416623377296394, "learning_rate": 1.3190071071427634e-06, "loss": 0.1393, "step": 5714 }, { "epoch": 0.77, "grad_norm": 0.9690402946757679, "learning_rate": 1.3175295479607336e-06, "loss": 0.169, "step": 5715 }, { "epoch": 0.77, "grad_norm": 1.1828095383677444, "learning_rate": 1.3160526912343192e-06, "loss": 0.2237, "step": 5716 }, { "epoch": 0.77, "grad_norm": 0.895356957220248, "learning_rate": 1.3145765372452402e-06, "loss": 0.1588, "step": 5717 }, { "epoch": 0.77, "grad_norm": 0.6989923770012598, "learning_rate": 1.3131010862750838e-06, "loss": 0.1251, "step": 5718 }, { "epoch": 0.77, "grad_norm": 1.053154395071392, "learning_rate": 1.3116263386053002e-06, "loss": 0.1668, "step": 5719 }, { "epoch": 0.77, "grad_norm": 0.6655267388408878, "learning_rate": 1.310152294517209e-06, "loss": 0.1521, "step": 5720 }, { "epoch": 0.77, "grad_norm": 0.5724723569781257, "learning_rate": 1.3086789542919936e-06, "loss": 0.0961, "step": 5721 }, { "epoch": 0.77, "grad_norm": 0.6418876841328918, "learning_rate": 1.3072063182107026e-06, "loss": 0.1216, "step": 5722 }, { "epoch": 0.77, "grad_norm": 0.8260343972094495, "learning_rate": 1.3057343865542516e-06, "loss": 0.1107, "step": 5723 }, { "epoch": 0.77, "grad_norm": 0.7498775186017452, "learning_rate": 1.304263159603421e-06, "loss": 0.1279, "step": 5724 }, { "epoch": 0.77, "grad_norm": 0.9678020557367353, "learning_rate": 1.3027926376388578e-06, "loss": 0.2099, "step": 5725 }, { "epoch": 0.77, "grad_norm": 1.0039054490043784, "learning_rate": 1.3013228209410732e-06, "loss": 0.1576, "step": 5726 }, { "epoch": 0.77, "grad_norm": 0.8936079518505844, "learning_rate": 1.2998537097904445e-06, "loss": 0.1753, "step": 5727 }, { "epoch": 0.77, "grad_norm": 0.6870570955498208, "learning_rate": 1.298385304467213e-06, "loss": 0.0862, "step": 5728 }, { "epoch": 0.77, "grad_norm": 0.8477124578031644, "learning_rate": 1.296917605251491e-06, "loss": 0.168, "step": 5729 }, { "epoch": 0.77, "grad_norm": 1.2758245007415772, "learning_rate": 1.2954506124232475e-06, "loss": 0.1681, "step": 5730 }, { "epoch": 0.77, "grad_norm": 0.8308163229465512, "learning_rate": 1.2939843262623224e-06, "loss": 0.156, "step": 5731 }, { "epoch": 0.77, "grad_norm": 1.2840689816845992, "learning_rate": 1.2925187470484203e-06, "loss": 0.2161, "step": 5732 }, { "epoch": 0.77, "grad_norm": 0.9084468225293593, "learning_rate": 1.2910538750611095e-06, "loss": 0.1655, "step": 5733 }, { "epoch": 0.77, "grad_norm": 0.9473946240892267, "learning_rate": 1.2895897105798238e-06, "loss": 0.1835, "step": 5734 }, { "epoch": 0.77, "grad_norm": 0.9335530707941561, "learning_rate": 1.2881262538838624e-06, "loss": 0.1398, "step": 5735 }, { "epoch": 0.77, "grad_norm": 0.7521251725297335, "learning_rate": 1.2866635052523902e-06, "loss": 0.112, "step": 5736 }, { "epoch": 0.77, "grad_norm": 0.9405289823443415, "learning_rate": 1.2852014649644345e-06, "loss": 0.1883, "step": 5737 }, { "epoch": 0.77, "grad_norm": 0.8890110008495447, "learning_rate": 1.2837401332988907e-06, "loss": 0.1475, "step": 5738 }, { "epoch": 0.77, "grad_norm": 1.1765448998899475, "learning_rate": 1.2822795105345155e-06, "loss": 0.1714, "step": 5739 }, { "epoch": 0.77, "grad_norm": 0.6976119282258958, "learning_rate": 1.2808195969499354e-06, "loss": 0.1031, "step": 5740 }, { "epoch": 0.77, "grad_norm": 1.201068247001143, "learning_rate": 1.279360392823637e-06, "loss": 0.183, "step": 5741 }, { "epoch": 0.77, "grad_norm": 0.8488294843275107, "learning_rate": 1.2779018984339724e-06, "loss": 0.1735, "step": 5742 }, { "epoch": 0.77, "grad_norm": 0.7074763858489392, "learning_rate": 1.2764441140591605e-06, "loss": 0.1303, "step": 5743 }, { "epoch": 0.77, "grad_norm": 1.1448669523865138, "learning_rate": 1.2749870399772824e-06, "loss": 0.2024, "step": 5744 }, { "epoch": 0.77, "grad_norm": 1.096660117077752, "learning_rate": 1.2735306764662847e-06, "loss": 0.1899, "step": 5745 }, { "epoch": 0.77, "grad_norm": 1.1670308393461015, "learning_rate": 1.2720750238039804e-06, "loss": 0.2298, "step": 5746 }, { "epoch": 0.77, "grad_norm": 0.8724606208596126, "learning_rate": 1.2706200822680414e-06, "loss": 0.1237, "step": 5747 }, { "epoch": 0.78, "grad_norm": 1.0523918780296329, "learning_rate": 1.2691658521360084e-06, "loss": 0.2287, "step": 5748 }, { "epoch": 0.78, "grad_norm": 1.1798221604914925, "learning_rate": 1.267712333685287e-06, "loss": 0.2091, "step": 5749 }, { "epoch": 0.78, "grad_norm": 0.9486977847115328, "learning_rate": 1.2662595271931422e-06, "loss": 0.1566, "step": 5750 }, { "epoch": 0.78, "grad_norm": 0.9640932749986896, "learning_rate": 1.2648074329367105e-06, "loss": 0.1501, "step": 5751 }, { "epoch": 0.78, "grad_norm": 0.839914390047931, "learning_rate": 1.2633560511929871e-06, "loss": 0.1314, "step": 5752 }, { "epoch": 0.78, "grad_norm": 0.865525559295721, "learning_rate": 1.2619053822388321e-06, "loss": 0.1198, "step": 5753 }, { "epoch": 0.78, "grad_norm": 0.9623939121876963, "learning_rate": 1.2604554263509706e-06, "loss": 0.1574, "step": 5754 }, { "epoch": 0.78, "grad_norm": 1.1700873642642557, "learning_rate": 1.2590061838059908e-06, "loss": 0.2009, "step": 5755 }, { "epoch": 0.78, "grad_norm": 1.0647766185671295, "learning_rate": 1.2575576548803459e-06, "loss": 0.2032, "step": 5756 }, { "epoch": 0.78, "grad_norm": 0.9545967908547007, "learning_rate": 1.2561098398503518e-06, "loss": 0.1647, "step": 5757 }, { "epoch": 0.78, "grad_norm": 0.547569332732202, "learning_rate": 1.2546627389921896e-06, "loss": 0.1115, "step": 5758 }, { "epoch": 0.78, "grad_norm": 1.1559550322012027, "learning_rate": 1.2532163525819025e-06, "loss": 0.1878, "step": 5759 }, { "epoch": 0.78, "grad_norm": 0.8410140430879167, "learning_rate": 1.2517706808953983e-06, "loss": 0.1529, "step": 5760 }, { "epoch": 0.78, "grad_norm": 1.0615234311262072, "learning_rate": 1.2503257242084494e-06, "loss": 0.1553, "step": 5761 }, { "epoch": 0.78, "grad_norm": 0.9657827707122657, "learning_rate": 1.2488814827966906e-06, "loss": 0.1407, "step": 5762 }, { "epoch": 0.78, "grad_norm": 1.2209844953084565, "learning_rate": 1.2474379569356194e-06, "loss": 0.1959, "step": 5763 }, { "epoch": 0.78, "grad_norm": 1.112431483463938, "learning_rate": 1.2459951469005987e-06, "loss": 0.2041, "step": 5764 }, { "epoch": 0.78, "grad_norm": 0.7924979071202282, "learning_rate": 1.2445530529668547e-06, "loss": 0.1407, "step": 5765 }, { "epoch": 0.78, "grad_norm": 0.9393149628701397, "learning_rate": 1.2431116754094751e-06, "loss": 0.1382, "step": 5766 }, { "epoch": 0.78, "grad_norm": 0.8663802754703375, "learning_rate": 1.2416710145034133e-06, "loss": 0.1237, "step": 5767 }, { "epoch": 0.78, "grad_norm": 1.1349159709871812, "learning_rate": 1.2402310705234844e-06, "loss": 0.1847, "step": 5768 }, { "epoch": 0.78, "grad_norm": 0.4947135698562794, "learning_rate": 1.238791843744367e-06, "loss": 0.1074, "step": 5769 }, { "epoch": 0.78, "grad_norm": 0.9991133720430295, "learning_rate": 1.2373533344406036e-06, "loss": 0.1447, "step": 5770 }, { "epoch": 0.78, "grad_norm": 1.0036467647103433, "learning_rate": 1.2359155428865989e-06, "loss": 0.1592, "step": 5771 }, { "epoch": 0.78, "grad_norm": 0.8894971165455631, "learning_rate": 1.234478469356622e-06, "loss": 0.1748, "step": 5772 }, { "epoch": 0.78, "grad_norm": 0.8578524020533317, "learning_rate": 1.2330421141248016e-06, "loss": 0.1169, "step": 5773 }, { "epoch": 0.78, "grad_norm": 0.7990018760335045, "learning_rate": 1.2316064774651353e-06, "loss": 0.151, "step": 5774 }, { "epoch": 0.78, "grad_norm": 0.863817344342179, "learning_rate": 1.2301715596514814e-06, "loss": 0.1376, "step": 5775 }, { "epoch": 0.78, "grad_norm": 0.9777861093657693, "learning_rate": 1.2287373609575553e-06, "loss": 0.1511, "step": 5776 }, { "epoch": 0.78, "grad_norm": 0.8980639368679179, "learning_rate": 1.2273038816569421e-06, "loss": 0.1305, "step": 5777 }, { "epoch": 0.78, "grad_norm": 1.1526784947414448, "learning_rate": 1.2258711220230878e-06, "loss": 0.2194, "step": 5778 }, { "epoch": 0.78, "grad_norm": 0.9345581009230273, "learning_rate": 1.2244390823293006e-06, "loss": 0.1401, "step": 5779 }, { "epoch": 0.78, "grad_norm": 0.9845779663302904, "learning_rate": 1.223007762848752e-06, "loss": 0.1967, "step": 5780 }, { "epoch": 0.78, "grad_norm": 0.5192636589269721, "learning_rate": 1.2215771638544743e-06, "loss": 0.0733, "step": 5781 }, { "epoch": 0.78, "grad_norm": 1.1569218461734108, "learning_rate": 1.2201472856193648e-06, "loss": 0.2252, "step": 5782 }, { "epoch": 0.78, "grad_norm": 0.8333565426033752, "learning_rate": 1.218718128416182e-06, "loss": 0.1516, "step": 5783 }, { "epoch": 0.78, "grad_norm": 1.104261750652682, "learning_rate": 1.2172896925175455e-06, "loss": 0.1627, "step": 5784 }, { "epoch": 0.78, "grad_norm": 1.1045303133166224, "learning_rate": 1.2158619781959418e-06, "loss": 0.1784, "step": 5785 }, { "epoch": 0.78, "grad_norm": 0.9459964389801034, "learning_rate": 1.2144349857237159e-06, "loss": 0.1784, "step": 5786 }, { "epoch": 0.78, "grad_norm": 1.1235954821374865, "learning_rate": 1.2130087153730757e-06, "loss": 0.168, "step": 5787 }, { "epoch": 0.78, "grad_norm": 0.7743037116178275, "learning_rate": 1.2115831674160916e-06, "loss": 0.1677, "step": 5788 }, { "epoch": 0.78, "grad_norm": 0.96178501967681, "learning_rate": 1.2101583421246965e-06, "loss": 0.1554, "step": 5789 }, { "epoch": 0.78, "grad_norm": 0.7926006070067841, "learning_rate": 1.2087342397706852e-06, "loss": 0.1253, "step": 5790 }, { "epoch": 0.78, "grad_norm": 0.6722061974720122, "learning_rate": 1.207310860625715e-06, "loss": 0.0753, "step": 5791 }, { "epoch": 0.78, "grad_norm": 0.8025428243093825, "learning_rate": 1.2058882049613053e-06, "loss": 0.1571, "step": 5792 }, { "epoch": 0.78, "grad_norm": 0.7892043138004476, "learning_rate": 1.2044662730488355e-06, "loss": 0.1496, "step": 5793 }, { "epoch": 0.78, "grad_norm": 0.9343514716197824, "learning_rate": 1.2030450651595489e-06, "loss": 0.2097, "step": 5794 }, { "epoch": 0.78, "grad_norm": 1.0350889543393462, "learning_rate": 1.2016245815645493e-06, "loss": 0.1891, "step": 5795 }, { "epoch": 0.78, "grad_norm": 0.951850004369731, "learning_rate": 1.2002048225348067e-06, "loss": 0.1624, "step": 5796 }, { "epoch": 0.78, "grad_norm": 0.8938291847193152, "learning_rate": 1.1987857883411474e-06, "loss": 0.1349, "step": 5797 }, { "epoch": 0.78, "grad_norm": 0.9903595850030387, "learning_rate": 1.1973674792542618e-06, "loss": 0.1848, "step": 5798 }, { "epoch": 0.78, "grad_norm": 0.9057001580123292, "learning_rate": 1.1959498955447014e-06, "loss": 0.1712, "step": 5799 }, { "epoch": 0.78, "grad_norm": 1.0414981747201142, "learning_rate": 1.1945330374828806e-06, "loss": 0.2131, "step": 5800 }, { "epoch": 0.78, "grad_norm": 0.9049625459201293, "learning_rate": 1.1931169053390735e-06, "loss": 0.1696, "step": 5801 }, { "epoch": 0.78, "grad_norm": 0.9150623282329752, "learning_rate": 1.1917014993834175e-06, "loss": 0.1676, "step": 5802 }, { "epoch": 0.78, "grad_norm": 0.8723303822805556, "learning_rate": 1.1902868198859096e-06, "loss": 0.1567, "step": 5803 }, { "epoch": 0.78, "grad_norm": 1.1708962536388647, "learning_rate": 1.1888728671164102e-06, "loss": 0.2148, "step": 5804 }, { "epoch": 0.78, "grad_norm": 1.0068797622974566, "learning_rate": 1.1874596413446398e-06, "loss": 0.1781, "step": 5805 }, { "epoch": 0.78, "grad_norm": 1.0316478067069812, "learning_rate": 1.186047142840181e-06, "loss": 0.1881, "step": 5806 }, { "epoch": 0.78, "grad_norm": 0.7424675624771958, "learning_rate": 1.1846353718724767e-06, "loss": 0.121, "step": 5807 }, { "epoch": 0.78, "grad_norm": 0.9805534255704718, "learning_rate": 1.1832243287108314e-06, "loss": 0.1982, "step": 5808 }, { "epoch": 0.78, "grad_norm": 0.8768679564057156, "learning_rate": 1.1818140136244117e-06, "loss": 0.1815, "step": 5809 }, { "epoch": 0.78, "grad_norm": 1.3471218375449172, "learning_rate": 1.1804044268822434e-06, "loss": 0.238, "step": 5810 }, { "epoch": 0.78, "grad_norm": 1.061153458809499, "learning_rate": 1.1789955687532157e-06, "loss": 0.1922, "step": 5811 }, { "epoch": 0.78, "grad_norm": 0.8534359786571267, "learning_rate": 1.1775874395060772e-06, "loss": 0.1329, "step": 5812 }, { "epoch": 0.78, "grad_norm": 1.2165316930712184, "learning_rate": 1.1761800394094375e-06, "loss": 0.1948, "step": 5813 }, { "epoch": 0.78, "grad_norm": 0.847456718772052, "learning_rate": 1.174773368731768e-06, "loss": 0.1408, "step": 5814 }, { "epoch": 0.78, "grad_norm": 1.0843506442942221, "learning_rate": 1.1733674277413999e-06, "loss": 0.1418, "step": 5815 }, { "epoch": 0.78, "grad_norm": 0.9546108774897045, "learning_rate": 1.1719622167065264e-06, "loss": 0.1425, "step": 5816 }, { "epoch": 0.78, "grad_norm": 0.7782252589220415, "learning_rate": 1.1705577358951986e-06, "loss": 0.1224, "step": 5817 }, { "epoch": 0.78, "grad_norm": 0.8589205907010459, "learning_rate": 1.1691539855753342e-06, "loss": 0.1542, "step": 5818 }, { "epoch": 0.78, "grad_norm": 0.9668300706814611, "learning_rate": 1.1677509660147056e-06, "loss": 0.1473, "step": 5819 }, { "epoch": 0.78, "grad_norm": 0.9393512522575863, "learning_rate": 1.166348677480949e-06, "loss": 0.1563, "step": 5820 }, { "epoch": 0.78, "grad_norm": 0.936897749729241, "learning_rate": 1.1649471202415597e-06, "loss": 0.1199, "step": 5821 }, { "epoch": 0.79, "grad_norm": 1.0388947885023831, "learning_rate": 1.1635462945638954e-06, "loss": 0.1965, "step": 5822 }, { "epoch": 0.79, "grad_norm": 1.0392319371137915, "learning_rate": 1.1621462007151696e-06, "loss": 0.1484, "step": 5823 }, { "epoch": 0.79, "grad_norm": 0.8827763754402883, "learning_rate": 1.1607468389624622e-06, "loss": 0.1505, "step": 5824 }, { "epoch": 0.79, "grad_norm": 0.6994744090262519, "learning_rate": 1.1593482095727094e-06, "loss": 0.1209, "step": 5825 }, { "epoch": 0.79, "grad_norm": 0.8702765341675481, "learning_rate": 1.1579503128127096e-06, "loss": 0.1316, "step": 5826 }, { "epoch": 0.79, "grad_norm": 0.8645885320366815, "learning_rate": 1.1565531489491205e-06, "loss": 0.119, "step": 5827 }, { "epoch": 0.79, "grad_norm": 0.7333128554572194, "learning_rate": 1.1551567182484607e-06, "loss": 0.1146, "step": 5828 }, { "epoch": 0.79, "grad_norm": 1.0211739199650232, "learning_rate": 1.1537610209771077e-06, "loss": 0.1377, "step": 5829 }, { "epoch": 0.79, "grad_norm": 0.9688796291815543, "learning_rate": 1.1523660574013017e-06, "loss": 0.1637, "step": 5830 }, { "epoch": 0.79, "grad_norm": 1.0455254235878852, "learning_rate": 1.1509718277871406e-06, "loss": 0.1834, "step": 5831 }, { "epoch": 0.79, "grad_norm": 0.7890213682387811, "learning_rate": 1.1495783324005828e-06, "loss": 0.0996, "step": 5832 }, { "epoch": 0.79, "grad_norm": 0.797061703669105, "learning_rate": 1.1481855715074464e-06, "loss": 0.1254, "step": 5833 }, { "epoch": 0.79, "grad_norm": 0.9324160116756445, "learning_rate": 1.1467935453734102e-06, "loss": 0.165, "step": 5834 }, { "epoch": 0.79, "grad_norm": 0.9115642611306656, "learning_rate": 1.1454022542640125e-06, "loss": 0.1572, "step": 5835 }, { "epoch": 0.79, "grad_norm": 1.0983802011938328, "learning_rate": 1.1440116984446504e-06, "loss": 0.1941, "step": 5836 }, { "epoch": 0.79, "grad_norm": 0.5355451425350636, "learning_rate": 1.1426218781805826e-06, "loss": 0.0923, "step": 5837 }, { "epoch": 0.79, "grad_norm": 1.0606176120383233, "learning_rate": 1.141232793736926e-06, "loss": 0.1724, "step": 5838 }, { "epoch": 0.79, "grad_norm": 1.1166573879777189, "learning_rate": 1.1398444453786594e-06, "loss": 0.1935, "step": 5839 }, { "epoch": 0.79, "grad_norm": 1.1076242587094154, "learning_rate": 1.1384568333706152e-06, "loss": 0.1604, "step": 5840 }, { "epoch": 0.79, "grad_norm": 1.0888648718696605, "learning_rate": 1.1370699579774934e-06, "loss": 0.2105, "step": 5841 }, { "epoch": 0.79, "grad_norm": 1.0013005892479045, "learning_rate": 1.1356838194638486e-06, "loss": 0.1435, "step": 5842 }, { "epoch": 0.79, "grad_norm": 1.075606894742888, "learning_rate": 1.1342984180940953e-06, "loss": 0.1687, "step": 5843 }, { "epoch": 0.79, "grad_norm": 0.9718866403765271, "learning_rate": 1.132913754132509e-06, "loss": 0.1541, "step": 5844 }, { "epoch": 0.79, "grad_norm": 0.749724093568597, "learning_rate": 1.131529827843223e-06, "loss": 0.1417, "step": 5845 }, { "epoch": 0.79, "grad_norm": 0.6771991488492745, "learning_rate": 1.13014663949023e-06, "loss": 0.0922, "step": 5846 }, { "epoch": 0.79, "grad_norm": 1.0796817778996561, "learning_rate": 1.1287641893373829e-06, "loss": 0.1916, "step": 5847 }, { "epoch": 0.79, "grad_norm": 0.726173741966069, "learning_rate": 1.1273824776483932e-06, "loss": 0.1432, "step": 5848 }, { "epoch": 0.79, "grad_norm": 0.9388390568112637, "learning_rate": 1.1260015046868311e-06, "loss": 0.1456, "step": 5849 }, { "epoch": 0.79, "grad_norm": 0.9413919800169055, "learning_rate": 1.1246212707161268e-06, "loss": 0.1318, "step": 5850 }, { "epoch": 0.79, "grad_norm": 0.8817119779686249, "learning_rate": 1.1232417759995678e-06, "loss": 0.1349, "step": 5851 }, { "epoch": 0.79, "grad_norm": 1.1293180362280777, "learning_rate": 1.121863020800305e-06, "loss": 0.1896, "step": 5852 }, { "epoch": 0.79, "grad_norm": 1.262226720657168, "learning_rate": 1.120485005381342e-06, "loss": 0.1885, "step": 5853 }, { "epoch": 0.79, "grad_norm": 1.166752624859448, "learning_rate": 1.1191077300055458e-06, "loss": 0.2165, "step": 5854 }, { "epoch": 0.79, "grad_norm": 1.0232143332785448, "learning_rate": 1.11773119493564e-06, "loss": 0.1357, "step": 5855 }, { "epoch": 0.79, "grad_norm": 0.5703926415739226, "learning_rate": 1.1163554004342082e-06, "loss": 0.1258, "step": 5856 }, { "epoch": 0.79, "grad_norm": 1.0465270136038383, "learning_rate": 1.114980346763692e-06, "loss": 0.184, "step": 5857 }, { "epoch": 0.79, "grad_norm": 0.9808148627121593, "learning_rate": 1.1136060341863924e-06, "loss": 0.1575, "step": 5858 }, { "epoch": 0.79, "grad_norm": 1.1369571312820632, "learning_rate": 1.1122324629644687e-06, "loss": 0.1951, "step": 5859 }, { "epoch": 0.79, "grad_norm": 1.0042673649992575, "learning_rate": 1.1108596333599386e-06, "loss": 0.1629, "step": 5860 }, { "epoch": 0.79, "grad_norm": 0.8586640741690875, "learning_rate": 1.1094875456346781e-06, "loss": 0.139, "step": 5861 }, { "epoch": 0.79, "grad_norm": 0.8054610799747773, "learning_rate": 1.1081162000504208e-06, "loss": 0.1522, "step": 5862 }, { "epoch": 0.79, "grad_norm": 0.9372935816769451, "learning_rate": 1.1067455968687625e-06, "loss": 0.1893, "step": 5863 }, { "epoch": 0.79, "grad_norm": 0.9970656284576083, "learning_rate": 1.1053757363511541e-06, "loss": 0.1957, "step": 5864 }, { "epoch": 0.79, "grad_norm": 0.724638824178854, "learning_rate": 1.104006618758905e-06, "loss": 0.1234, "step": 5865 }, { "epoch": 0.79, "grad_norm": 0.7474735141634685, "learning_rate": 1.1026382443531836e-06, "loss": 0.1493, "step": 5866 }, { "epoch": 0.79, "grad_norm": 0.7718741997508572, "learning_rate": 1.1012706133950163e-06, "loss": 0.1325, "step": 5867 }, { "epoch": 0.79, "grad_norm": 1.093994082709672, "learning_rate": 1.0999037261452882e-06, "loss": 0.16, "step": 5868 }, { "epoch": 0.79, "grad_norm": 1.1901160280843768, "learning_rate": 1.0985375828647432e-06, "loss": 0.177, "step": 5869 }, { "epoch": 0.79, "grad_norm": 0.9579362085854579, "learning_rate": 1.0971721838139788e-06, "loss": 0.1598, "step": 5870 }, { "epoch": 0.79, "grad_norm": 1.051050825072103, "learning_rate": 1.0958075292534558e-06, "loss": 0.1979, "step": 5871 }, { "epoch": 0.79, "grad_norm": 0.9599675471844948, "learning_rate": 1.0944436194434916e-06, "loss": 0.1576, "step": 5872 }, { "epoch": 0.79, "grad_norm": 1.1791247441765522, "learning_rate": 1.0930804546442602e-06, "loss": 0.1826, "step": 5873 }, { "epoch": 0.79, "grad_norm": 0.7611070493746559, "learning_rate": 1.0917180351157936e-06, "loss": 0.0984, "step": 5874 }, { "epoch": 0.79, "grad_norm": 1.0403440106054265, "learning_rate": 1.0903563611179847e-06, "loss": 0.1859, "step": 5875 }, { "epoch": 0.79, "grad_norm": 1.2284576628613284, "learning_rate": 1.0889954329105801e-06, "loss": 0.2379, "step": 5876 }, { "epoch": 0.79, "grad_norm": 0.7365336118734844, "learning_rate": 1.0876352507531867e-06, "loss": 0.1547, "step": 5877 }, { "epoch": 0.79, "grad_norm": 0.689295928351416, "learning_rate": 1.0862758149052678e-06, "loss": 0.139, "step": 5878 }, { "epoch": 0.79, "grad_norm": 1.2735366520603069, "learning_rate": 1.084917125626145e-06, "loss": 0.1983, "step": 5879 }, { "epoch": 0.79, "grad_norm": 0.8096862405782672, "learning_rate": 1.0835591831749963e-06, "loss": 0.1055, "step": 5880 }, { "epoch": 0.79, "grad_norm": 0.8802382470497511, "learning_rate": 1.0822019878108597e-06, "loss": 0.1514, "step": 5881 }, { "epoch": 0.79, "grad_norm": 1.0512587878839719, "learning_rate": 1.0808455397926282e-06, "loss": 0.1651, "step": 5882 }, { "epoch": 0.79, "grad_norm": 0.9652256285157749, "learning_rate": 1.0794898393790537e-06, "loss": 0.1262, "step": 5883 }, { "epoch": 0.79, "grad_norm": 0.9948256762817864, "learning_rate": 1.078134886828745e-06, "loss": 0.1242, "step": 5884 }, { "epoch": 0.79, "grad_norm": 0.9571670659813182, "learning_rate": 1.076780682400168e-06, "loss": 0.1303, "step": 5885 }, { "epoch": 0.79, "grad_norm": 0.8631679913474917, "learning_rate": 1.0754272263516463e-06, "loss": 0.1434, "step": 5886 }, { "epoch": 0.79, "grad_norm": 1.0632765573184144, "learning_rate": 1.074074518941361e-06, "loss": 0.1716, "step": 5887 }, { "epoch": 0.79, "grad_norm": 0.8847574006688135, "learning_rate": 1.0727225604273489e-06, "loss": 0.1817, "step": 5888 }, { "epoch": 0.79, "grad_norm": 0.6823806825191966, "learning_rate": 1.0713713510675062e-06, "loss": 0.1168, "step": 5889 }, { "epoch": 0.79, "grad_norm": 0.8713736328655425, "learning_rate": 1.0700208911195841e-06, "loss": 0.1435, "step": 5890 }, { "epoch": 0.79, "grad_norm": 0.7808305836305928, "learning_rate": 1.0686711808411925e-06, "loss": 0.1265, "step": 5891 }, { "epoch": 0.79, "grad_norm": 0.9298925152638644, "learning_rate": 1.0673222204897977e-06, "loss": 0.1511, "step": 5892 }, { "epoch": 0.79, "grad_norm": 0.865939371099325, "learning_rate": 1.0659740103227217e-06, "loss": 0.1461, "step": 5893 }, { "epoch": 0.79, "grad_norm": 1.0952153052610827, "learning_rate": 1.0646265505971458e-06, "loss": 0.1847, "step": 5894 }, { "epoch": 0.79, "grad_norm": 0.8539051939543886, "learning_rate": 1.0632798415701062e-06, "loss": 0.1204, "step": 5895 }, { "epoch": 0.8, "grad_norm": 0.9393970675081375, "learning_rate": 1.061933883498495e-06, "loss": 0.2042, "step": 5896 }, { "epoch": 0.8, "grad_norm": 0.929663275644126, "learning_rate": 1.060588676639066e-06, "loss": 0.1495, "step": 5897 }, { "epoch": 0.8, "grad_norm": 0.8995945870739792, "learning_rate": 1.0592442212484256e-06, "loss": 0.1624, "step": 5898 }, { "epoch": 0.8, "grad_norm": 0.8930882696724365, "learning_rate": 1.0579005175830354e-06, "loss": 0.1377, "step": 5899 }, { "epoch": 0.8, "grad_norm": 1.44548806075305, "learning_rate": 1.0565575658992173e-06, "loss": 0.2541, "step": 5900 }, { "epoch": 0.8, "grad_norm": 0.9002046664489659, "learning_rate": 1.0552153664531473e-06, "loss": 0.1585, "step": 5901 }, { "epoch": 0.8, "grad_norm": 1.0509285517190072, "learning_rate": 1.0538739195008596e-06, "loss": 0.1534, "step": 5902 }, { "epoch": 0.8, "grad_norm": 0.48370839316025094, "learning_rate": 1.0525332252982435e-06, "loss": 0.098, "step": 5903 }, { "epoch": 0.8, "grad_norm": 0.8461935158029521, "learning_rate": 1.0511932841010457e-06, "loss": 0.172, "step": 5904 }, { "epoch": 0.8, "grad_norm": 0.7321494660075919, "learning_rate": 1.0498540961648695e-06, "loss": 0.0812, "step": 5905 }, { "epoch": 0.8, "grad_norm": 0.82259411662842, "learning_rate": 1.0485156617451725e-06, "loss": 0.146, "step": 5906 }, { "epoch": 0.8, "grad_norm": 1.1718636703013035, "learning_rate": 1.0471779810972694e-06, "loss": 0.1765, "step": 5907 }, { "epoch": 0.8, "grad_norm": 1.1565981992614995, "learning_rate": 1.0458410544763341e-06, "loss": 0.2188, "step": 5908 }, { "epoch": 0.8, "grad_norm": 0.8697678065476009, "learning_rate": 1.0445048821373932e-06, "loss": 0.1639, "step": 5909 }, { "epoch": 0.8, "grad_norm": 1.07137032071878, "learning_rate": 1.0431694643353302e-06, "loss": 0.1575, "step": 5910 }, { "epoch": 0.8, "grad_norm": 1.2525357745215289, "learning_rate": 1.0418348013248846e-06, "loss": 0.202, "step": 5911 }, { "epoch": 0.8, "grad_norm": 0.9749801744668483, "learning_rate": 1.0405008933606524e-06, "loss": 0.1858, "step": 5912 }, { "epoch": 0.8, "grad_norm": 1.3214456008020057, "learning_rate": 1.0391677406970857e-06, "loss": 0.1998, "step": 5913 }, { "epoch": 0.8, "grad_norm": 1.1761249173490917, "learning_rate": 1.0378353435884918e-06, "loss": 0.2163, "step": 5914 }, { "epoch": 0.8, "grad_norm": 0.8999369495324827, "learning_rate": 1.0365037022890346e-06, "loss": 0.1641, "step": 5915 }, { "epoch": 0.8, "grad_norm": 0.7734013467992044, "learning_rate": 1.0351728170527342e-06, "loss": 0.1408, "step": 5916 }, { "epoch": 0.8, "grad_norm": 1.095319514049018, "learning_rate": 1.0338426881334634e-06, "loss": 0.2171, "step": 5917 }, { "epoch": 0.8, "grad_norm": 1.0334668665429942, "learning_rate": 1.0325133157849537e-06, "loss": 0.1984, "step": 5918 }, { "epoch": 0.8, "grad_norm": 0.9123324824228264, "learning_rate": 1.0311847002607938e-06, "loss": 0.1533, "step": 5919 }, { "epoch": 0.8, "grad_norm": 0.871751999281278, "learning_rate": 1.0298568418144244e-06, "loss": 0.125, "step": 5920 }, { "epoch": 0.8, "grad_norm": 0.9763656885903459, "learning_rate": 1.0285297406991434e-06, "loss": 0.1817, "step": 5921 }, { "epoch": 0.8, "grad_norm": 1.118847523591927, "learning_rate": 1.0272033971681045e-06, "loss": 0.1997, "step": 5922 }, { "epoch": 0.8, "grad_norm": 0.9324405264882413, "learning_rate": 1.0258778114743163e-06, "loss": 0.1324, "step": 5923 }, { "epoch": 0.8, "grad_norm": 1.0506872172411919, "learning_rate": 1.024552983870643e-06, "loss": 0.193, "step": 5924 }, { "epoch": 0.8, "grad_norm": 1.0327103779472484, "learning_rate": 1.023228914609804e-06, "loss": 0.145, "step": 5925 }, { "epoch": 0.8, "grad_norm": 1.0348546131623266, "learning_rate": 1.0219056039443748e-06, "loss": 0.1331, "step": 5926 }, { "epoch": 0.8, "grad_norm": 0.9242147844393043, "learning_rate": 1.0205830521267851e-06, "loss": 0.179, "step": 5927 }, { "epoch": 0.8, "grad_norm": 0.7989863482994736, "learning_rate": 1.0192612594093205e-06, "loss": 0.1558, "step": 5928 }, { "epoch": 0.8, "grad_norm": 1.081107548604219, "learning_rate": 1.0179402260441224e-06, "loss": 0.1977, "step": 5929 }, { "epoch": 0.8, "grad_norm": 1.012388431450819, "learning_rate": 1.016619952283186e-06, "loss": 0.2156, "step": 5930 }, { "epoch": 0.8, "grad_norm": 0.8263263223600333, "learning_rate": 1.0153004383783621e-06, "loss": 0.1231, "step": 5931 }, { "epoch": 0.8, "grad_norm": 1.0464023091239543, "learning_rate": 1.0139816845813573e-06, "loss": 0.162, "step": 5932 }, { "epoch": 0.8, "grad_norm": 1.1072575890339782, "learning_rate": 1.0126636911437321e-06, "loss": 0.1661, "step": 5933 }, { "epoch": 0.8, "grad_norm": 1.1096521298101838, "learning_rate": 1.0113464583169031e-06, "loss": 0.1903, "step": 5934 }, { "epoch": 0.8, "grad_norm": 0.9530689673724995, "learning_rate": 1.0100299863521406e-06, "loss": 0.1749, "step": 5935 }, { "epoch": 0.8, "grad_norm": 0.7976603119298444, "learning_rate": 1.0087142755005708e-06, "loss": 0.1495, "step": 5936 }, { "epoch": 0.8, "grad_norm": 1.0052411157667935, "learning_rate": 1.0073993260131737e-06, "loss": 0.1513, "step": 5937 }, { "epoch": 0.8, "grad_norm": 0.6758637324252647, "learning_rate": 1.006085138140785e-06, "loss": 0.1373, "step": 5938 }, { "epoch": 0.8, "grad_norm": 1.1657958566595465, "learning_rate": 1.0047717121340944e-06, "loss": 0.1884, "step": 5939 }, { "epoch": 0.8, "grad_norm": 1.2319650836049016, "learning_rate": 1.0034590482436474e-06, "loss": 0.1968, "step": 5940 }, { "epoch": 0.8, "grad_norm": 0.9457062403581286, "learning_rate": 1.0021471467198406e-06, "loss": 0.1524, "step": 5941 }, { "epoch": 0.8, "grad_norm": 1.1401946958756688, "learning_rate": 1.000836007812932e-06, "loss": 0.1955, "step": 5942 }, { "epoch": 0.8, "grad_norm": 1.0155152845379596, "learning_rate": 9.995256317730284e-07, "loss": 0.1669, "step": 5943 }, { "epoch": 0.8, "grad_norm": 1.0280314754634117, "learning_rate": 9.982160188500923e-07, "loss": 0.1416, "step": 5944 }, { "epoch": 0.8, "grad_norm": 0.8772743244580399, "learning_rate": 9.96907169293942e-07, "loss": 0.1892, "step": 5945 }, { "epoch": 0.8, "grad_norm": 1.0430472486108335, "learning_rate": 9.955990833542472e-07, "loss": 0.2038, "step": 5946 }, { "epoch": 0.8, "grad_norm": 0.8851921589838498, "learning_rate": 9.942917612805352e-07, "loss": 0.15, "step": 5947 }, { "epoch": 0.8, "grad_norm": 1.2313959716891392, "learning_rate": 9.929852033221864e-07, "loss": 0.2458, "step": 5948 }, { "epoch": 0.8, "grad_norm": 0.9237341153444951, "learning_rate": 9.91679409728435e-07, "loss": 0.145, "step": 5949 }, { "epoch": 0.8, "grad_norm": 0.7844958534672382, "learning_rate": 9.9037438074837e-07, "loss": 0.1417, "step": 5950 }, { "epoch": 0.8, "grad_norm": 0.9033866992450642, "learning_rate": 9.890701166309347e-07, "loss": 0.1502, "step": 5951 }, { "epoch": 0.8, "grad_norm": 1.1708792775906616, "learning_rate": 9.87766617624924e-07, "loss": 0.1863, "step": 5952 }, { "epoch": 0.8, "grad_norm": 0.9218335127254954, "learning_rate": 9.86463883978992e-07, "loss": 0.1255, "step": 5953 }, { "epoch": 0.8, "grad_norm": 1.0717870128131457, "learning_rate": 9.851619159416426e-07, "loss": 0.1628, "step": 5954 }, { "epoch": 0.8, "grad_norm": 0.6117279217194304, "learning_rate": 9.83860713761235e-07, "loss": 0.098, "step": 5955 }, { "epoch": 0.8, "grad_norm": 1.0357502576409476, "learning_rate": 9.825602776859816e-07, "loss": 0.1508, "step": 5956 }, { "epoch": 0.8, "grad_norm": 0.7586910473991906, "learning_rate": 9.81260607963949e-07, "loss": 0.1167, "step": 5957 }, { "epoch": 0.8, "grad_norm": 0.9194997930359551, "learning_rate": 9.799617048430588e-07, "loss": 0.1416, "step": 5958 }, { "epoch": 0.8, "grad_norm": 0.6296320726318815, "learning_rate": 9.786635685710843e-07, "loss": 0.0858, "step": 5959 }, { "epoch": 0.8, "grad_norm": 0.8089870576458185, "learning_rate": 9.773661993956546e-07, "loss": 0.1304, "step": 5960 }, { "epoch": 0.8, "grad_norm": 1.0531465613110234, "learning_rate": 9.760695975642504e-07, "loss": 0.1872, "step": 5961 }, { "epoch": 0.8, "grad_norm": 0.9159895643701134, "learning_rate": 9.747737633242095e-07, "loss": 0.1443, "step": 5962 }, { "epoch": 0.8, "grad_norm": 1.1887358750326367, "learning_rate": 9.73478696922716e-07, "loss": 0.1531, "step": 5963 }, { "epoch": 0.8, "grad_norm": 0.9653575900760092, "learning_rate": 9.721843986068164e-07, "loss": 0.1159, "step": 5964 }, { "epoch": 0.8, "grad_norm": 0.7982212002593821, "learning_rate": 9.708908686234059e-07, "loss": 0.1218, "step": 5965 }, { "epoch": 0.8, "grad_norm": 1.2184388432110285, "learning_rate": 9.69598107219234e-07, "loss": 0.1196, "step": 5966 }, { "epoch": 0.8, "grad_norm": 1.0398715248434653, "learning_rate": 9.683061146409029e-07, "loss": 0.1882, "step": 5967 }, { "epoch": 0.8, "grad_norm": 0.9097435251157673, "learning_rate": 9.670148911348688e-07, "loss": 0.1464, "step": 5968 }, { "epoch": 0.8, "grad_norm": 1.1279791779618704, "learning_rate": 9.657244369474411e-07, "loss": 0.1893, "step": 5969 }, { "epoch": 0.81, "grad_norm": 1.0401966208505442, "learning_rate": 9.644347523247832e-07, "loss": 0.161, "step": 5970 }, { "epoch": 0.81, "grad_norm": 1.1380259764182514, "learning_rate": 9.631458375129099e-07, "loss": 0.1644, "step": 5971 }, { "epoch": 0.81, "grad_norm": 0.7906114423514449, "learning_rate": 9.618576927576912e-07, "loss": 0.1147, "step": 5972 }, { "epoch": 0.81, "grad_norm": 0.7975807098120996, "learning_rate": 9.605703183048487e-07, "loss": 0.1114, "step": 5973 }, { "epoch": 0.81, "grad_norm": 0.6168480363687389, "learning_rate": 9.592837143999578e-07, "loss": 0.1314, "step": 5974 }, { "epoch": 0.81, "grad_norm": 0.8406217354628224, "learning_rate": 9.579978812884467e-07, "loss": 0.125, "step": 5975 }, { "epoch": 0.81, "grad_norm": 1.088301057659492, "learning_rate": 9.56712819215596e-07, "loss": 0.1901, "step": 5976 }, { "epoch": 0.81, "grad_norm": 1.1149405278530742, "learning_rate": 9.554285284265407e-07, "loss": 0.1953, "step": 5977 }, { "epoch": 0.81, "grad_norm": 0.8372186232089885, "learning_rate": 9.54145009166268e-07, "loss": 0.1251, "step": 5978 }, { "epoch": 0.81, "grad_norm": 0.8508182005499986, "learning_rate": 9.528622616796163e-07, "loss": 0.1551, "step": 5979 }, { "epoch": 0.81, "grad_norm": 1.128323103747275, "learning_rate": 9.515802862112788e-07, "loss": 0.219, "step": 5980 }, { "epoch": 0.81, "grad_norm": 1.0899465035955778, "learning_rate": 9.502990830058017e-07, "loss": 0.1919, "step": 5981 }, { "epoch": 0.81, "grad_norm": 1.0512183090602762, "learning_rate": 9.490186523075817e-07, "loss": 0.1927, "step": 5982 }, { "epoch": 0.81, "grad_norm": 0.9028428693703211, "learning_rate": 9.477389943608701e-07, "loss": 0.1571, "step": 5983 }, { "epoch": 0.81, "grad_norm": 1.0108706862583792, "learning_rate": 9.464601094097703e-07, "loss": 0.1656, "step": 5984 }, { "epoch": 0.81, "grad_norm": 1.0056466644153061, "learning_rate": 9.451819976982374e-07, "loss": 0.1746, "step": 5985 }, { "epoch": 0.81, "grad_norm": 1.2031474929104626, "learning_rate": 9.439046594700791e-07, "loss": 0.2262, "step": 5986 }, { "epoch": 0.81, "grad_norm": 1.143835149813184, "learning_rate": 9.426280949689581e-07, "loss": 0.2137, "step": 5987 }, { "epoch": 0.81, "grad_norm": 1.0947492912729504, "learning_rate": 9.413523044383865e-07, "loss": 0.1771, "step": 5988 }, { "epoch": 0.81, "grad_norm": 0.5427639025354125, "learning_rate": 9.400772881217296e-07, "loss": 0.0708, "step": 5989 }, { "epoch": 0.81, "grad_norm": 0.9418220585329525, "learning_rate": 9.388030462622056e-07, "loss": 0.1392, "step": 5990 }, { "epoch": 0.81, "grad_norm": 0.9917718682158934, "learning_rate": 9.375295791028843e-07, "loss": 0.1629, "step": 5991 }, { "epoch": 0.81, "grad_norm": 0.9758608237657274, "learning_rate": 9.362568868866895e-07, "loss": 0.1607, "step": 5992 }, { "epoch": 0.81, "grad_norm": 1.0069979060852614, "learning_rate": 9.349849698563928e-07, "loss": 0.1509, "step": 5993 }, { "epoch": 0.81, "grad_norm": 1.1786753778286148, "learning_rate": 9.337138282546227e-07, "loss": 0.2221, "step": 5994 }, { "epoch": 0.81, "grad_norm": 0.8831466303549397, "learning_rate": 9.324434623238571e-07, "loss": 0.144, "step": 5995 }, { "epoch": 0.81, "grad_norm": 1.1357640233587116, "learning_rate": 9.311738723064267e-07, "loss": 0.1801, "step": 5996 }, { "epoch": 0.81, "grad_norm": 1.1780040419174371, "learning_rate": 9.299050584445135e-07, "loss": 0.1619, "step": 5997 }, { "epoch": 0.81, "grad_norm": 1.0401737869788859, "learning_rate": 9.286370209801543e-07, "loss": 0.1965, "step": 5998 }, { "epoch": 0.81, "grad_norm": 1.0747886858354856, "learning_rate": 9.273697601552345e-07, "loss": 0.1501, "step": 5999 }, { "epoch": 0.81, "grad_norm": 0.9145389321769791, "learning_rate": 9.261032762114924e-07, "loss": 0.1534, "step": 6000 }, { "epoch": 0.81, "grad_norm": 0.8117993626912174, "learning_rate": 9.248375693905181e-07, "loss": 0.1692, "step": 6001 }, { "epoch": 0.81, "grad_norm": 0.6575260591928157, "learning_rate": 9.235726399337541e-07, "loss": 0.0888, "step": 6002 }, { "epoch": 0.81, "grad_norm": 0.9831596207448686, "learning_rate": 9.223084880824934e-07, "loss": 0.1754, "step": 6003 }, { "epoch": 0.81, "grad_norm": 0.906032581853624, "learning_rate": 9.21045114077882e-07, "loss": 0.1452, "step": 6004 }, { "epoch": 0.81, "grad_norm": 1.1388197118748735, "learning_rate": 9.197825181609165e-07, "loss": 0.1988, "step": 6005 }, { "epoch": 0.81, "grad_norm": 1.0983003158921338, "learning_rate": 9.18520700572445e-07, "loss": 0.1975, "step": 6006 }, { "epoch": 0.81, "grad_norm": 1.1249351209216267, "learning_rate": 9.172596615531682e-07, "loss": 0.1552, "step": 6007 }, { "epoch": 0.81, "grad_norm": 1.072436030736748, "learning_rate": 9.159994013436374e-07, "loss": 0.2087, "step": 6008 }, { "epoch": 0.81, "grad_norm": 0.8208199257685567, "learning_rate": 9.147399201842555e-07, "loss": 0.1427, "step": 6009 }, { "epoch": 0.81, "grad_norm": 0.9136967844875316, "learning_rate": 9.13481218315277e-07, "loss": 0.1484, "step": 6010 }, { "epoch": 0.81, "grad_norm": 0.8049464742644323, "learning_rate": 9.122232959768073e-07, "loss": 0.1074, "step": 6011 }, { "epoch": 0.81, "grad_norm": 0.9644688184353029, "learning_rate": 9.109661534088043e-07, "loss": 0.168, "step": 6012 }, { "epoch": 0.81, "grad_norm": 0.8246833775525004, "learning_rate": 9.097097908510749e-07, "loss": 0.1481, "step": 6013 }, { "epoch": 0.81, "grad_norm": 0.7427663218923876, "learning_rate": 9.084542085432796e-07, "loss": 0.1755, "step": 6014 }, { "epoch": 0.81, "grad_norm": 0.837391528701616, "learning_rate": 9.071994067249291e-07, "loss": 0.1485, "step": 6015 }, { "epoch": 0.81, "grad_norm": 1.1615355968376897, "learning_rate": 9.059453856353845e-07, "loss": 0.1944, "step": 6016 }, { "epoch": 0.81, "grad_norm": 0.6068571132023078, "learning_rate": 9.046921455138591e-07, "loss": 0.108, "step": 6017 }, { "epoch": 0.81, "grad_norm": 0.9292985067367193, "learning_rate": 9.034396865994166e-07, "loss": 0.1445, "step": 6018 }, { "epoch": 0.81, "grad_norm": 1.1158968398003324, "learning_rate": 9.021880091309704e-07, "loss": 0.193, "step": 6019 }, { "epoch": 0.81, "grad_norm": 1.199284568299364, "learning_rate": 9.009371133472889e-07, "loss": 0.235, "step": 6020 }, { "epoch": 0.81, "grad_norm": 0.9661216131053245, "learning_rate": 8.996869994869878e-07, "loss": 0.1574, "step": 6021 }, { "epoch": 0.81, "grad_norm": 0.7761702333638768, "learning_rate": 8.984376677885354e-07, "loss": 0.1404, "step": 6022 }, { "epoch": 0.81, "grad_norm": 1.0388808096590618, "learning_rate": 8.971891184902476e-07, "loss": 0.1759, "step": 6023 }, { "epoch": 0.81, "grad_norm": 1.081094005800363, "learning_rate": 8.959413518302946e-07, "loss": 0.1831, "step": 6024 }, { "epoch": 0.81, "grad_norm": 0.9353341474122308, "learning_rate": 8.946943680466968e-07, "loss": 0.1453, "step": 6025 }, { "epoch": 0.81, "grad_norm": 1.0189121123353349, "learning_rate": 8.934481673773238e-07, "loss": 0.1448, "step": 6026 }, { "epoch": 0.81, "grad_norm": 0.9553710078736047, "learning_rate": 8.922027500598968e-07, "loss": 0.185, "step": 6027 }, { "epoch": 0.81, "grad_norm": 0.7937125913821367, "learning_rate": 8.909581163319875e-07, "loss": 0.1373, "step": 6028 }, { "epoch": 0.81, "grad_norm": 0.8890056266341019, "learning_rate": 8.897142664310182e-07, "loss": 0.1429, "step": 6029 }, { "epoch": 0.81, "grad_norm": 1.0783189686827472, "learning_rate": 8.88471200594262e-07, "loss": 0.196, "step": 6030 }, { "epoch": 0.81, "grad_norm": 1.2098771319006716, "learning_rate": 8.87228919058839e-07, "loss": 0.2018, "step": 6031 }, { "epoch": 0.81, "grad_norm": 0.8452362668315772, "learning_rate": 8.859874220617271e-07, "loss": 0.1412, "step": 6032 }, { "epoch": 0.81, "grad_norm": 1.0264610868869166, "learning_rate": 8.847467098397472e-07, "loss": 0.1855, "step": 6033 }, { "epoch": 0.81, "grad_norm": 0.6284670447837418, "learning_rate": 8.835067826295745e-07, "loss": 0.1559, "step": 6034 }, { "epoch": 0.81, "grad_norm": 1.367341644300006, "learning_rate": 8.822676406677327e-07, "loss": 0.2488, "step": 6035 }, { "epoch": 0.81, "grad_norm": 0.8636221745621318, "learning_rate": 8.810292841905965e-07, "loss": 0.173, "step": 6036 }, { "epoch": 0.81, "grad_norm": 0.928057520413017, "learning_rate": 8.797917134343908e-07, "loss": 0.1349, "step": 6037 }, { "epoch": 0.81, "grad_norm": 1.0362616214390674, "learning_rate": 8.785549286351902e-07, "loss": 0.2107, "step": 6038 }, { "epoch": 0.81, "grad_norm": 0.9818898815883779, "learning_rate": 8.773189300289209e-07, "loss": 0.1809, "step": 6039 }, { "epoch": 0.81, "grad_norm": 0.688627087503231, "learning_rate": 8.760837178513553e-07, "loss": 0.1118, "step": 6040 }, { "epoch": 0.81, "grad_norm": 0.8972340765532953, "learning_rate": 8.748492923381191e-07, "loss": 0.1707, "step": 6041 }, { "epoch": 0.81, "grad_norm": 0.986692203492362, "learning_rate": 8.73615653724687e-07, "loss": 0.1628, "step": 6042 }, { "epoch": 0.81, "grad_norm": 1.1985711830494263, "learning_rate": 8.723828022463848e-07, "loss": 0.2152, "step": 6043 }, { "epoch": 0.81, "grad_norm": 1.1709379139914136, "learning_rate": 8.711507381383871e-07, "loss": 0.1972, "step": 6044 }, { "epoch": 0.82, "grad_norm": 0.79424658206911, "learning_rate": 8.699194616357181e-07, "loss": 0.1562, "step": 6045 }, { "epoch": 0.82, "grad_norm": 1.1521830522061658, "learning_rate": 8.686889729732512e-07, "loss": 0.2314, "step": 6046 }, { "epoch": 0.82, "grad_norm": 0.971942651330073, "learning_rate": 8.674592723857106e-07, "loss": 0.2042, "step": 6047 }, { "epoch": 0.82, "grad_norm": 0.9735097871994244, "learning_rate": 8.662303601076699e-07, "loss": 0.1763, "step": 6048 }, { "epoch": 0.82, "grad_norm": 1.0252353300450494, "learning_rate": 8.650022363735522e-07, "loss": 0.1879, "step": 6049 }, { "epoch": 0.82, "grad_norm": 1.1929219458673261, "learning_rate": 8.637749014176305e-07, "loss": 0.1968, "step": 6050 }, { "epoch": 0.82, "grad_norm": 1.0087396437745628, "learning_rate": 8.625483554740271e-07, "loss": 0.2034, "step": 6051 }, { "epoch": 0.82, "grad_norm": 1.326024696037896, "learning_rate": 8.613225987767132e-07, "loss": 0.2394, "step": 6052 }, { "epoch": 0.82, "grad_norm": 1.0686379923772296, "learning_rate": 8.6009763155951e-07, "loss": 0.1788, "step": 6053 }, { "epoch": 0.82, "grad_norm": 1.1486655032023105, "learning_rate": 8.588734540560889e-07, "loss": 0.2155, "step": 6054 }, { "epoch": 0.82, "grad_norm": 0.847843453150639, "learning_rate": 8.576500664999693e-07, "loss": 0.0953, "step": 6055 }, { "epoch": 0.82, "grad_norm": 1.0656747963875648, "learning_rate": 8.564274691245206e-07, "loss": 0.1665, "step": 6056 }, { "epoch": 0.82, "grad_norm": 0.7739511703405263, "learning_rate": 8.552056621629612e-07, "loss": 0.1648, "step": 6057 }, { "epoch": 0.82, "grad_norm": 1.1450374250871949, "learning_rate": 8.539846458483586e-07, "loss": 0.1632, "step": 6058 }, { "epoch": 0.82, "grad_norm": 0.9374580410773327, "learning_rate": 8.52764420413631e-07, "loss": 0.1163, "step": 6059 }, { "epoch": 0.82, "grad_norm": 1.199420704994289, "learning_rate": 8.515449860915426e-07, "loss": 0.2235, "step": 6060 }, { "epoch": 0.82, "grad_norm": 0.8715506688445197, "learning_rate": 8.503263431147102e-07, "loss": 0.1076, "step": 6061 }, { "epoch": 0.82, "grad_norm": 0.6935852907538813, "learning_rate": 8.491084917155973e-07, "loss": 0.1312, "step": 6062 }, { "epoch": 0.82, "grad_norm": 0.7555407117982189, "learning_rate": 8.478914321265169e-07, "loss": 0.1007, "step": 6063 }, { "epoch": 0.82, "grad_norm": 0.7038446619208261, "learning_rate": 8.466751645796306e-07, "loss": 0.0927, "step": 6064 }, { "epoch": 0.82, "grad_norm": 0.9356359035909217, "learning_rate": 8.454596893069517e-07, "loss": 0.1583, "step": 6065 }, { "epoch": 0.82, "grad_norm": 0.8853117175657235, "learning_rate": 8.442450065403385e-07, "loss": 0.1359, "step": 6066 }, { "epoch": 0.82, "grad_norm": 0.8808071848302962, "learning_rate": 8.430311165115001e-07, "loss": 0.1414, "step": 6067 }, { "epoch": 0.82, "grad_norm": 1.0730419107662135, "learning_rate": 8.418180194519954e-07, "loss": 0.1918, "step": 6068 }, { "epoch": 0.82, "grad_norm": 0.8580860712398342, "learning_rate": 8.406057155932279e-07, "loss": 0.1333, "step": 6069 }, { "epoch": 0.82, "grad_norm": 0.9505086751310611, "learning_rate": 8.393942051664538e-07, "loss": 0.1317, "step": 6070 }, { "epoch": 0.82, "grad_norm": 1.0779845594121606, "learning_rate": 8.381834884027773e-07, "loss": 0.2159, "step": 6071 }, { "epoch": 0.82, "grad_norm": 0.8998786541701671, "learning_rate": 8.369735655331507e-07, "loss": 0.1472, "step": 6072 }, { "epoch": 0.82, "grad_norm": 0.8632486980869826, "learning_rate": 8.357644367883739e-07, "loss": 0.1189, "step": 6073 }, { "epoch": 0.82, "grad_norm": 0.7678804756735101, "learning_rate": 8.345561023990966e-07, "loss": 0.142, "step": 6074 }, { "epoch": 0.82, "grad_norm": 0.8473279421571975, "learning_rate": 8.333485625958171e-07, "loss": 0.1106, "step": 6075 }, { "epoch": 0.82, "grad_norm": 0.8652640139008787, "learning_rate": 8.321418176088797e-07, "loss": 0.1338, "step": 6076 }, { "epoch": 0.82, "grad_norm": 0.6656726551143234, "learning_rate": 8.309358676684814e-07, "loss": 0.1039, "step": 6077 }, { "epoch": 0.82, "grad_norm": 1.2189564362254839, "learning_rate": 8.297307130046645e-07, "loss": 0.1975, "step": 6078 }, { "epoch": 0.82, "grad_norm": 1.392876126457686, "learning_rate": 8.285263538473204e-07, "loss": 0.2822, "step": 6079 }, { "epoch": 0.82, "grad_norm": 1.1148474754891424, "learning_rate": 8.273227904261877e-07, "loss": 0.1798, "step": 6080 }, { "epoch": 0.82, "grad_norm": 1.0091922837294607, "learning_rate": 8.261200229708544e-07, "loss": 0.1655, "step": 6081 }, { "epoch": 0.82, "grad_norm": 0.9848769370174782, "learning_rate": 8.249180517107569e-07, "loss": 0.1746, "step": 6082 }, { "epoch": 0.82, "grad_norm": 1.0842552077356358, "learning_rate": 8.237168768751785e-07, "loss": 0.1919, "step": 6083 }, { "epoch": 0.82, "grad_norm": 1.024480916926148, "learning_rate": 8.225164986932516e-07, "loss": 0.1486, "step": 6084 }, { "epoch": 0.82, "grad_norm": 0.935971888305995, "learning_rate": 8.213169173939573e-07, "loss": 0.1847, "step": 6085 }, { "epoch": 0.82, "grad_norm": 0.9871876095384997, "learning_rate": 8.201181332061215e-07, "loss": 0.1432, "step": 6086 }, { "epoch": 0.82, "grad_norm": 1.166746313936605, "learning_rate": 8.189201463584196e-07, "loss": 0.1591, "step": 6087 }, { "epoch": 0.82, "grad_norm": 0.8784183782724323, "learning_rate": 8.177229570793788e-07, "loss": 0.1488, "step": 6088 }, { "epoch": 0.82, "grad_norm": 1.0144954018961603, "learning_rate": 8.165265655973697e-07, "loss": 0.1371, "step": 6089 }, { "epoch": 0.82, "grad_norm": 0.9996278697781205, "learning_rate": 8.15330972140611e-07, "loss": 0.1857, "step": 6090 }, { "epoch": 0.82, "grad_norm": 0.9981021075062493, "learning_rate": 8.141361769371703e-07, "loss": 0.1326, "step": 6091 }, { "epoch": 0.82, "grad_norm": 0.9930793619480366, "learning_rate": 8.129421802149634e-07, "loss": 0.1597, "step": 6092 }, { "epoch": 0.82, "grad_norm": 1.3024659325195187, "learning_rate": 8.117489822017527e-07, "loss": 0.1929, "step": 6093 }, { "epoch": 0.82, "grad_norm": 1.1452659930659137, "learning_rate": 8.105565831251482e-07, "loss": 0.2121, "step": 6094 }, { "epoch": 0.82, "grad_norm": 0.8207914517503294, "learning_rate": 8.093649832126083e-07, "loss": 0.1186, "step": 6095 }, { "epoch": 0.82, "grad_norm": 1.0471114140576592, "learning_rate": 8.081741826914386e-07, "loss": 0.1858, "step": 6096 }, { "epoch": 0.82, "grad_norm": 0.7785130892873576, "learning_rate": 8.069841817887919e-07, "loss": 0.1183, "step": 6097 }, { "epoch": 0.82, "grad_norm": 0.8631191930257687, "learning_rate": 8.057949807316695e-07, "loss": 0.1654, "step": 6098 }, { "epoch": 0.82, "grad_norm": 0.9744288796597252, "learning_rate": 8.046065797469182e-07, "loss": 0.1897, "step": 6099 }, { "epoch": 0.82, "grad_norm": 0.9666763669119235, "learning_rate": 8.034189790612346e-07, "loss": 0.1834, "step": 6100 }, { "epoch": 0.82, "grad_norm": 0.9151940053568564, "learning_rate": 8.022321789011605e-07, "loss": 0.1208, "step": 6101 }, { "epoch": 0.82, "grad_norm": 0.7334484948256751, "learning_rate": 8.010461794930863e-07, "loss": 0.1104, "step": 6102 }, { "epoch": 0.82, "grad_norm": 0.9941851564493654, "learning_rate": 7.998609810632485e-07, "loss": 0.2032, "step": 6103 }, { "epoch": 0.82, "grad_norm": 0.8842185988292872, "learning_rate": 7.986765838377331e-07, "loss": 0.1784, "step": 6104 }, { "epoch": 0.82, "grad_norm": 1.1003159940142904, "learning_rate": 7.974929880424703e-07, "loss": 0.1909, "step": 6105 }, { "epoch": 0.82, "grad_norm": 1.2771901956935943, "learning_rate": 7.963101939032391e-07, "loss": 0.2117, "step": 6106 }, { "epoch": 0.82, "grad_norm": 0.7663633519301061, "learning_rate": 7.951282016456657e-07, "loss": 0.1441, "step": 6107 }, { "epoch": 0.82, "grad_norm": 0.9939968667576631, "learning_rate": 7.939470114952225e-07, "loss": 0.1883, "step": 6108 }, { "epoch": 0.82, "grad_norm": 0.8517936616019448, "learning_rate": 7.927666236772286e-07, "loss": 0.1528, "step": 6109 }, { "epoch": 0.82, "grad_norm": 1.2041061831995503, "learning_rate": 7.915870384168534e-07, "loss": 0.2086, "step": 6110 }, { "epoch": 0.82, "grad_norm": 0.8813208809662278, "learning_rate": 7.904082559391086e-07, "loss": 0.1318, "step": 6111 }, { "epoch": 0.82, "grad_norm": 0.9515325698442083, "learning_rate": 7.892302764688548e-07, "loss": 0.1601, "step": 6112 }, { "epoch": 0.82, "grad_norm": 0.8020032944557562, "learning_rate": 7.880531002308001e-07, "loss": 0.1633, "step": 6113 }, { "epoch": 0.82, "grad_norm": 0.9781906321162119, "learning_rate": 7.868767274494982e-07, "loss": 0.19, "step": 6114 }, { "epoch": 0.82, "grad_norm": 0.8019726629303815, "learning_rate": 7.857011583493518e-07, "loss": 0.1132, "step": 6115 }, { "epoch": 0.82, "grad_norm": 0.97672221267092, "learning_rate": 7.845263931546049e-07, "loss": 0.1703, "step": 6116 }, { "epoch": 0.82, "grad_norm": 1.2286300500010932, "learning_rate": 7.833524320893537e-07, "loss": 0.192, "step": 6117 }, { "epoch": 0.82, "grad_norm": 0.524333545120802, "learning_rate": 7.821792753775392e-07, "loss": 0.0914, "step": 6118 }, { "epoch": 0.83, "grad_norm": 1.1153487806857514, "learning_rate": 7.810069232429485e-07, "loss": 0.1893, "step": 6119 }, { "epoch": 0.83, "grad_norm": 1.0866797283985257, "learning_rate": 7.798353759092142e-07, "loss": 0.1719, "step": 6120 }, { "epoch": 0.83, "grad_norm": 0.9661404638720513, "learning_rate": 7.786646335998194e-07, "loss": 0.1663, "step": 6121 }, { "epoch": 0.83, "grad_norm": 0.9541699813682232, "learning_rate": 7.774946965380897e-07, "loss": 0.2175, "step": 6122 }, { "epoch": 0.83, "grad_norm": 1.101808257978215, "learning_rate": 7.763255649471985e-07, "loss": 0.1846, "step": 6123 }, { "epoch": 0.83, "grad_norm": 1.0966982650458454, "learning_rate": 7.751572390501649e-07, "loss": 0.2146, "step": 6124 }, { "epoch": 0.83, "grad_norm": 1.0037108484619477, "learning_rate": 7.739897190698548e-07, "loss": 0.1446, "step": 6125 }, { "epoch": 0.83, "grad_norm": 0.8039800744948091, "learning_rate": 7.728230052289809e-07, "loss": 0.1198, "step": 6126 }, { "epoch": 0.83, "grad_norm": 0.9289607026068593, "learning_rate": 7.716570977501014e-07, "loss": 0.1515, "step": 6127 }, { "epoch": 0.83, "grad_norm": 0.875895492993034, "learning_rate": 7.704919968556207e-07, "loss": 0.136, "step": 6128 }, { "epoch": 0.83, "grad_norm": 0.7788580175570488, "learning_rate": 7.693277027677898e-07, "loss": 0.1069, "step": 6129 }, { "epoch": 0.83, "grad_norm": 0.8790970746297723, "learning_rate": 7.68164215708705e-07, "loss": 0.142, "step": 6130 }, { "epoch": 0.83, "grad_norm": 0.8027184160123236, "learning_rate": 7.670015359003097e-07, "loss": 0.1402, "step": 6131 }, { "epoch": 0.83, "grad_norm": 1.1434865960537655, "learning_rate": 7.658396635643928e-07, "loss": 0.1783, "step": 6132 }, { "epoch": 0.83, "grad_norm": 1.0951267339067878, "learning_rate": 7.646785989225885e-07, "loss": 0.1855, "step": 6133 }, { "epoch": 0.83, "grad_norm": 1.0436976133548708, "learning_rate": 7.635183421963776e-07, "loss": 0.1734, "step": 6134 }, { "epoch": 0.83, "grad_norm": 0.93164380967608, "learning_rate": 7.623588936070875e-07, "loss": 0.1382, "step": 6135 }, { "epoch": 0.83, "grad_norm": 0.977981682286933, "learning_rate": 7.612002533758905e-07, "loss": 0.1555, "step": 6136 }, { "epoch": 0.83, "grad_norm": 0.9108394844083066, "learning_rate": 7.600424217238044e-07, "loss": 0.1349, "step": 6137 }, { "epoch": 0.83, "grad_norm": 0.7923254354344753, "learning_rate": 7.588853988716938e-07, "loss": 0.1191, "step": 6138 }, { "epoch": 0.83, "grad_norm": 0.7607966254961421, "learning_rate": 7.577291850402679e-07, "loss": 0.1068, "step": 6139 }, { "epoch": 0.83, "grad_norm": 1.1317271882017008, "learning_rate": 7.565737804500822e-07, "loss": 0.2225, "step": 6140 }, { "epoch": 0.83, "grad_norm": 0.955854508839649, "learning_rate": 7.554191853215387e-07, "loss": 0.1721, "step": 6141 }, { "epoch": 0.83, "grad_norm": 0.9769691646902561, "learning_rate": 7.542653998748828e-07, "loss": 0.1724, "step": 6142 }, { "epoch": 0.83, "grad_norm": 1.012325505636287, "learning_rate": 7.531124243302063e-07, "loss": 0.192, "step": 6143 }, { "epoch": 0.83, "grad_norm": 0.8752783373646479, "learning_rate": 7.519602589074493e-07, "loss": 0.1547, "step": 6144 }, { "epoch": 0.83, "grad_norm": 0.7807791017930841, "learning_rate": 7.508089038263943e-07, "loss": 0.1563, "step": 6145 }, { "epoch": 0.83, "grad_norm": 0.8970727027800284, "learning_rate": 7.496583593066686e-07, "loss": 0.1246, "step": 6146 }, { "epoch": 0.83, "grad_norm": 1.0242372005485993, "learning_rate": 7.485086255677466e-07, "loss": 0.1616, "step": 6147 }, { "epoch": 0.83, "grad_norm": 1.18291011244177, "learning_rate": 7.473597028289475e-07, "loss": 0.2122, "step": 6148 }, { "epoch": 0.83, "grad_norm": 0.8516411687887598, "learning_rate": 7.462115913094364e-07, "loss": 0.1447, "step": 6149 }, { "epoch": 0.83, "grad_norm": 0.889852145240436, "learning_rate": 7.45064291228223e-07, "loss": 0.1174, "step": 6150 }, { "epoch": 0.83, "grad_norm": 1.3418472995115502, "learning_rate": 7.439178028041621e-07, "loss": 0.2159, "step": 6151 }, { "epoch": 0.83, "grad_norm": 0.9582136359151241, "learning_rate": 7.42772126255955e-07, "loss": 0.1483, "step": 6152 }, { "epoch": 0.83, "grad_norm": 1.0356926047108834, "learning_rate": 7.416272618021458e-07, "loss": 0.2031, "step": 6153 }, { "epoch": 0.83, "grad_norm": 0.8634446375967592, "learning_rate": 7.404832096611242e-07, "loss": 0.1093, "step": 6154 }, { "epoch": 0.83, "grad_norm": 0.9902102049797653, "learning_rate": 7.393399700511284e-07, "loss": 0.1756, "step": 6155 }, { "epoch": 0.83, "grad_norm": 0.9045392286385434, "learning_rate": 7.381975431902372e-07, "loss": 0.1621, "step": 6156 }, { "epoch": 0.83, "grad_norm": 0.7515384207449244, "learning_rate": 7.37055929296377e-07, "loss": 0.1225, "step": 6157 }, { "epoch": 0.83, "grad_norm": 0.9286710664969411, "learning_rate": 7.359151285873172e-07, "loss": 0.1488, "step": 6158 }, { "epoch": 0.83, "grad_norm": 1.2092210606301028, "learning_rate": 7.347751412806737e-07, "loss": 0.2002, "step": 6159 }, { "epoch": 0.83, "grad_norm": 1.2041092978753376, "learning_rate": 7.336359675939064e-07, "loss": 0.2409, "step": 6160 }, { "epoch": 0.83, "grad_norm": 0.9695548839892117, "learning_rate": 7.324976077443202e-07, "loss": 0.164, "step": 6161 }, { "epoch": 0.83, "grad_norm": 1.0962321836794104, "learning_rate": 7.313600619490658e-07, "loss": 0.1938, "step": 6162 }, { "epoch": 0.83, "grad_norm": 0.9039609356037683, "learning_rate": 7.302233304251355e-07, "loss": 0.1425, "step": 6163 }, { "epoch": 0.83, "grad_norm": 0.9186361094846158, "learning_rate": 7.290874133893699e-07, "loss": 0.1445, "step": 6164 }, { "epoch": 0.83, "grad_norm": 1.0289703198061928, "learning_rate": 7.279523110584507e-07, "loss": 0.1513, "step": 6165 }, { "epoch": 0.83, "grad_norm": 1.0503353176811245, "learning_rate": 7.268180236489092e-07, "loss": 0.1652, "step": 6166 }, { "epoch": 0.83, "grad_norm": 0.8481553731394315, "learning_rate": 7.256845513771171e-07, "loss": 0.1416, "step": 6167 }, { "epoch": 0.83, "grad_norm": 0.9663251773634434, "learning_rate": 7.245518944592911e-07, "loss": 0.1581, "step": 6168 }, { "epoch": 0.83, "grad_norm": 0.9967230226747626, "learning_rate": 7.234200531114932e-07, "loss": 0.1622, "step": 6169 }, { "epoch": 0.83, "grad_norm": 1.063421278436821, "learning_rate": 7.222890275496297e-07, "loss": 0.1656, "step": 6170 }, { "epoch": 0.83, "grad_norm": 0.6864006213515101, "learning_rate": 7.211588179894514e-07, "loss": 0.107, "step": 6171 }, { "epoch": 0.83, "grad_norm": 0.8462052004809886, "learning_rate": 7.200294246465534e-07, "loss": 0.1404, "step": 6172 }, { "epoch": 0.83, "grad_norm": 1.1924002487132512, "learning_rate": 7.189008477363746e-07, "loss": 0.1888, "step": 6173 }, { "epoch": 0.83, "grad_norm": 0.8906080676196526, "learning_rate": 7.177730874741984e-07, "loss": 0.1032, "step": 6174 }, { "epoch": 0.83, "grad_norm": 1.0183542846084526, "learning_rate": 7.166461440751526e-07, "loss": 0.1823, "step": 6175 }, { "epoch": 0.83, "grad_norm": 1.0056610724973303, "learning_rate": 7.155200177542098e-07, "loss": 0.1564, "step": 6176 }, { "epoch": 0.83, "grad_norm": 1.1340573619595236, "learning_rate": 7.143947087261854e-07, "loss": 0.1539, "step": 6177 }, { "epoch": 0.83, "grad_norm": 1.0043081449014173, "learning_rate": 7.132702172057393e-07, "loss": 0.2057, "step": 6178 }, { "epoch": 0.83, "grad_norm": 1.0782362356673272, "learning_rate": 7.121465434073766e-07, "loss": 0.1934, "step": 6179 }, { "epoch": 0.83, "grad_norm": 1.239703825508989, "learning_rate": 7.110236875454446e-07, "loss": 0.1769, "step": 6180 }, { "epoch": 0.83, "grad_norm": 1.1267393632533662, "learning_rate": 7.09901649834136e-07, "loss": 0.1811, "step": 6181 }, { "epoch": 0.83, "grad_norm": 1.1339422132580366, "learning_rate": 7.087804304874863e-07, "loss": 0.1823, "step": 6182 }, { "epoch": 0.83, "grad_norm": 0.8173736291983926, "learning_rate": 7.076600297193764e-07, "loss": 0.1114, "step": 6183 }, { "epoch": 0.83, "grad_norm": 0.9485509978685372, "learning_rate": 7.065404477435301e-07, "loss": 0.1629, "step": 6184 }, { "epoch": 0.83, "grad_norm": 1.0898208480065668, "learning_rate": 7.054216847735146e-07, "loss": 0.186, "step": 6185 }, { "epoch": 0.83, "grad_norm": 1.054525444735243, "learning_rate": 7.04303741022741e-07, "loss": 0.1767, "step": 6186 }, { "epoch": 0.83, "grad_norm": 0.9479647902711232, "learning_rate": 7.031866167044654e-07, "loss": 0.1395, "step": 6187 }, { "epoch": 0.83, "grad_norm": 0.8189701659694909, "learning_rate": 7.020703120317846e-07, "loss": 0.1154, "step": 6188 }, { "epoch": 0.83, "grad_norm": 1.0171741657423694, "learning_rate": 7.009548272176441e-07, "loss": 0.1377, "step": 6189 }, { "epoch": 0.83, "grad_norm": 0.8980623346236254, "learning_rate": 6.998401624748292e-07, "loss": 0.1325, "step": 6190 }, { "epoch": 0.83, "grad_norm": 0.9325814429803563, "learning_rate": 6.987263180159693e-07, "loss": 0.1106, "step": 6191 }, { "epoch": 0.83, "grad_norm": 1.0614430289371295, "learning_rate": 6.976132940535362e-07, "loss": 0.1902, "step": 6192 }, { "epoch": 0.84, "grad_norm": 0.9895215006617977, "learning_rate": 6.965010907998482e-07, "loss": 0.1477, "step": 6193 }, { "epoch": 0.84, "grad_norm": 0.9980111511448247, "learning_rate": 6.953897084670646e-07, "loss": 0.185, "step": 6194 }, { "epoch": 0.84, "grad_norm": 1.0103768602522145, "learning_rate": 6.942791472671895e-07, "loss": 0.1558, "step": 6195 }, { "epoch": 0.84, "grad_norm": 1.0260247126069824, "learning_rate": 6.931694074120699e-07, "loss": 0.1549, "step": 6196 }, { "epoch": 0.84, "grad_norm": 1.2750499296370776, "learning_rate": 6.920604891133948e-07, "loss": 0.2001, "step": 6197 }, { "epoch": 0.84, "grad_norm": 0.7716639438810377, "learning_rate": 6.909523925826994e-07, "loss": 0.1556, "step": 6198 }, { "epoch": 0.84, "grad_norm": 0.986984242927735, "learning_rate": 6.898451180313581e-07, "loss": 0.189, "step": 6199 }, { "epoch": 0.84, "grad_norm": 1.039981029488902, "learning_rate": 6.887386656705936e-07, "loss": 0.146, "step": 6200 }, { "epoch": 0.84, "grad_norm": 0.995503247000664, "learning_rate": 6.876330357114674e-07, "loss": 0.1714, "step": 6201 }, { "epoch": 0.84, "grad_norm": 1.0476302590492692, "learning_rate": 6.865282283648867e-07, "loss": 0.1804, "step": 6202 }, { "epoch": 0.84, "grad_norm": 0.9254521656831755, "learning_rate": 6.854242438416003e-07, "loss": 0.1224, "step": 6203 }, { "epoch": 0.84, "grad_norm": 1.117170914798823, "learning_rate": 6.843210823522001e-07, "loss": 0.1751, "step": 6204 }, { "epoch": 0.84, "grad_norm": 0.6020759506776878, "learning_rate": 6.832187441071226e-07, "loss": 0.1306, "step": 6205 }, { "epoch": 0.84, "grad_norm": 1.1204147055412714, "learning_rate": 6.821172293166445e-07, "loss": 0.1954, "step": 6206 }, { "epoch": 0.84, "grad_norm": 0.884279666031796, "learning_rate": 6.810165381908884e-07, "loss": 0.1309, "step": 6207 }, { "epoch": 0.84, "grad_norm": 0.9468897272548826, "learning_rate": 6.799166709398175e-07, "loss": 0.144, "step": 6208 }, { "epoch": 0.84, "grad_norm": 0.9232994126549895, "learning_rate": 6.788176277732411e-07, "loss": 0.1381, "step": 6209 }, { "epoch": 0.84, "grad_norm": 0.9256460077272873, "learning_rate": 6.777194089008044e-07, "loss": 0.169, "step": 6210 }, { "epoch": 0.84, "grad_norm": 0.6835932232695976, "learning_rate": 6.766220145320035e-07, "loss": 0.1276, "step": 6211 }, { "epoch": 0.84, "grad_norm": 0.6643463968263651, "learning_rate": 6.755254448761728e-07, "loss": 0.0987, "step": 6212 }, { "epoch": 0.84, "grad_norm": 1.0260826350023649, "learning_rate": 6.744297001424904e-07, "loss": 0.1758, "step": 6213 }, { "epoch": 0.84, "grad_norm": 1.1132736379805734, "learning_rate": 6.733347805399764e-07, "loss": 0.1673, "step": 6214 }, { "epoch": 0.84, "grad_norm": 0.846502914282829, "learning_rate": 6.722406862774944e-07, "loss": 0.117, "step": 6215 }, { "epoch": 0.84, "grad_norm": 0.7918961037359178, "learning_rate": 6.711474175637494e-07, "loss": 0.1103, "step": 6216 }, { "epoch": 0.84, "grad_norm": 0.8496341652328424, "learning_rate": 6.700549746072904e-07, "loss": 0.1581, "step": 6217 }, { "epoch": 0.84, "grad_norm": 1.0124500916911516, "learning_rate": 6.689633576165083e-07, "loss": 0.1751, "step": 6218 }, { "epoch": 0.84, "grad_norm": 1.1384272017395163, "learning_rate": 6.67872566799635e-07, "loss": 0.1819, "step": 6219 }, { "epoch": 0.84, "grad_norm": 1.00706125606111, "learning_rate": 6.667826023647472e-07, "loss": 0.1687, "step": 6220 }, { "epoch": 0.84, "grad_norm": 0.893069406294713, "learning_rate": 6.656934645197626e-07, "loss": 0.1356, "step": 6221 }, { "epoch": 0.84, "grad_norm": 0.9609383114905601, "learning_rate": 6.646051534724419e-07, "loss": 0.1847, "step": 6222 }, { "epoch": 0.84, "grad_norm": 0.8574887946637828, "learning_rate": 6.635176694303864e-07, "loss": 0.1283, "step": 6223 }, { "epoch": 0.84, "grad_norm": 0.8103146289707231, "learning_rate": 6.624310126010419e-07, "loss": 0.1464, "step": 6224 }, { "epoch": 0.84, "grad_norm": 0.9782819370412615, "learning_rate": 6.61345183191695e-07, "loss": 0.1663, "step": 6225 }, { "epoch": 0.84, "grad_norm": 0.9510302304816143, "learning_rate": 6.60260181409475e-07, "loss": 0.1836, "step": 6226 }, { "epoch": 0.84, "grad_norm": 0.9990670342413697, "learning_rate": 6.591760074613529e-07, "loss": 0.1793, "step": 6227 }, { "epoch": 0.84, "grad_norm": 0.9346110205167386, "learning_rate": 6.580926615541428e-07, "loss": 0.1393, "step": 6228 }, { "epoch": 0.84, "grad_norm": 1.0419359967959136, "learning_rate": 6.570101438944987e-07, "loss": 0.175, "step": 6229 }, { "epoch": 0.84, "grad_norm": 0.7935254320390988, "learning_rate": 6.559284546889195e-07, "loss": 0.137, "step": 6230 }, { "epoch": 0.84, "grad_norm": 1.042052888614404, "learning_rate": 6.548475941437437e-07, "loss": 0.1598, "step": 6231 }, { "epoch": 0.84, "grad_norm": 1.0181185555045325, "learning_rate": 6.53767562465153e-07, "loss": 0.1847, "step": 6232 }, { "epoch": 0.84, "grad_norm": 1.0246369637247872, "learning_rate": 6.526883598591694e-07, "loss": 0.1807, "step": 6233 }, { "epoch": 0.84, "grad_norm": 0.8926859957377062, "learning_rate": 6.5160998653166e-07, "loss": 0.1595, "step": 6234 }, { "epoch": 0.84, "grad_norm": 1.1756236492360923, "learning_rate": 6.505324426883303e-07, "loss": 0.1811, "step": 6235 }, { "epoch": 0.84, "grad_norm": 0.9200101837445687, "learning_rate": 6.494557285347297e-07, "loss": 0.1484, "step": 6236 }, { "epoch": 0.84, "grad_norm": 0.9308989064059969, "learning_rate": 6.483798442762479e-07, "loss": 0.1442, "step": 6237 }, { "epoch": 0.84, "grad_norm": 0.8566587174439826, "learning_rate": 6.473047901181185e-07, "loss": 0.1576, "step": 6238 }, { "epoch": 0.84, "grad_norm": 0.9387831528858295, "learning_rate": 6.462305662654122e-07, "loss": 0.1444, "step": 6239 }, { "epoch": 0.84, "grad_norm": 0.8118613276068842, "learning_rate": 6.451571729230466e-07, "loss": 0.1052, "step": 6240 }, { "epoch": 0.84, "grad_norm": 0.4421220022238005, "learning_rate": 6.440846102957776e-07, "loss": 0.1098, "step": 6241 }, { "epoch": 0.84, "grad_norm": 1.1267728512464357, "learning_rate": 6.43012878588204e-07, "loss": 0.1638, "step": 6242 }, { "epoch": 0.84, "grad_norm": 1.1094691275571125, "learning_rate": 6.419419780047659e-07, "loss": 0.1792, "step": 6243 }, { "epoch": 0.84, "grad_norm": 0.8874806944231751, "learning_rate": 6.408719087497428e-07, "loss": 0.1331, "step": 6244 }, { "epoch": 0.84, "grad_norm": 1.0030357743830205, "learning_rate": 6.398026710272609e-07, "loss": 0.1877, "step": 6245 }, { "epoch": 0.84, "grad_norm": 0.9597515717356001, "learning_rate": 6.387342650412826e-07, "loss": 0.1946, "step": 6246 }, { "epoch": 0.84, "grad_norm": 1.0583127357865145, "learning_rate": 6.37666690995613e-07, "loss": 0.1858, "step": 6247 }, { "epoch": 0.84, "grad_norm": 0.889642739754243, "learning_rate": 6.365999490938995e-07, "loss": 0.1508, "step": 6248 }, { "epoch": 0.84, "grad_norm": 1.1653478679267737, "learning_rate": 6.355340395396303e-07, "loss": 0.1813, "step": 6249 }, { "epoch": 0.84, "grad_norm": 0.895024954669452, "learning_rate": 6.344689625361339e-07, "loss": 0.1411, "step": 6250 }, { "epoch": 0.84, "grad_norm": 0.6960088724941282, "learning_rate": 6.334047182865815e-07, "loss": 0.1064, "step": 6251 }, { "epoch": 0.84, "grad_norm": 1.2241596519273352, "learning_rate": 6.323413069939849e-07, "loss": 0.2146, "step": 6252 }, { "epoch": 0.84, "grad_norm": 0.9023152636935614, "learning_rate": 6.312787288611965e-07, "loss": 0.1511, "step": 6253 }, { "epoch": 0.84, "grad_norm": 0.9221554338034569, "learning_rate": 6.3021698409091e-07, "loss": 0.1563, "step": 6254 }, { "epoch": 0.84, "grad_norm": 1.1332075820223064, "learning_rate": 6.291560728856599e-07, "loss": 0.1802, "step": 6255 }, { "epoch": 0.84, "grad_norm": 1.1005015940548575, "learning_rate": 6.280959954478233e-07, "loss": 0.1751, "step": 6256 }, { "epoch": 0.84, "grad_norm": 0.8862430180137111, "learning_rate": 6.270367519796155e-07, "loss": 0.1874, "step": 6257 }, { "epoch": 0.84, "grad_norm": 0.9403977681618588, "learning_rate": 6.259783426830957e-07, "loss": 0.1813, "step": 6258 }, { "epoch": 0.84, "grad_norm": 0.9010419636236114, "learning_rate": 6.24920767760161e-07, "loss": 0.1196, "step": 6259 }, { "epoch": 0.84, "grad_norm": 0.6743773773469434, "learning_rate": 6.238640274125518e-07, "loss": 0.109, "step": 6260 }, { "epoch": 0.84, "grad_norm": 0.7771613115728727, "learning_rate": 6.228081218418474e-07, "loss": 0.1655, "step": 6261 }, { "epoch": 0.84, "grad_norm": 0.857793526083911, "learning_rate": 6.217530512494701e-07, "loss": 0.1289, "step": 6262 }, { "epoch": 0.84, "grad_norm": 1.027098069717295, "learning_rate": 6.206988158366806e-07, "loss": 0.1521, "step": 6263 }, { "epoch": 0.84, "grad_norm": 0.8402333539093781, "learning_rate": 6.196454158045817e-07, "loss": 0.1219, "step": 6264 }, { "epoch": 0.84, "grad_norm": 0.9563946627141791, "learning_rate": 6.185928513541162e-07, "loss": 0.1928, "step": 6265 }, { "epoch": 0.84, "grad_norm": 0.7819378441519622, "learning_rate": 6.175411226860667e-07, "loss": 0.1372, "step": 6266 }, { "epoch": 0.85, "grad_norm": 1.037719288637647, "learning_rate": 6.164902300010595e-07, "loss": 0.159, "step": 6267 }, { "epoch": 0.85, "grad_norm": 1.100890765668324, "learning_rate": 6.154401734995596e-07, "loss": 0.2009, "step": 6268 }, { "epoch": 0.85, "grad_norm": 0.7844625497337301, "learning_rate": 6.143909533818704e-07, "loss": 0.1139, "step": 6269 }, { "epoch": 0.85, "grad_norm": 1.0762121409715362, "learning_rate": 6.133425698481377e-07, "loss": 0.1401, "step": 6270 }, { "epoch": 0.85, "grad_norm": 0.8591322116916041, "learning_rate": 6.122950230983476e-07, "loss": 0.1248, "step": 6271 }, { "epoch": 0.85, "grad_norm": 0.9848503867262209, "learning_rate": 6.112483133323277e-07, "loss": 0.1401, "step": 6272 }, { "epoch": 0.85, "grad_norm": 0.885548882361572, "learning_rate": 6.102024407497442e-07, "loss": 0.1706, "step": 6273 }, { "epoch": 0.85, "grad_norm": 0.8009080176893196, "learning_rate": 6.091574055501043e-07, "loss": 0.124, "step": 6274 }, { "epoch": 0.85, "grad_norm": 0.8432006434097415, "learning_rate": 6.081132079327545e-07, "loss": 0.1424, "step": 6275 }, { "epoch": 0.85, "grad_norm": 0.7468176024959775, "learning_rate": 6.070698480968839e-07, "loss": 0.1247, "step": 6276 }, { "epoch": 0.85, "grad_norm": 0.7423936013498135, "learning_rate": 6.060273262415195e-07, "loss": 0.1114, "step": 6277 }, { "epoch": 0.85, "grad_norm": 0.9555201924686546, "learning_rate": 6.049856425655282e-07, "loss": 0.1567, "step": 6278 }, { "epoch": 0.85, "grad_norm": 0.6642573448764376, "learning_rate": 6.039447972676204e-07, "loss": 0.0983, "step": 6279 }, { "epoch": 0.85, "grad_norm": 1.0109414599524835, "learning_rate": 6.029047905463426e-07, "loss": 0.1692, "step": 6280 }, { "epoch": 0.85, "grad_norm": 0.7414731598576992, "learning_rate": 6.018656226000835e-07, "loss": 0.1432, "step": 6281 }, { "epoch": 0.85, "grad_norm": 0.9406293911704788, "learning_rate": 6.008272936270714e-07, "loss": 0.1514, "step": 6282 }, { "epoch": 0.85, "grad_norm": 1.126949084429464, "learning_rate": 5.997898038253741e-07, "loss": 0.1992, "step": 6283 }, { "epoch": 0.85, "grad_norm": 1.077464208791795, "learning_rate": 5.987531533928997e-07, "loss": 0.1912, "step": 6284 }, { "epoch": 0.85, "grad_norm": 0.9739589348780749, "learning_rate": 5.977173425273968e-07, "loss": 0.1683, "step": 6285 }, { "epoch": 0.85, "grad_norm": 0.8078965265186561, "learning_rate": 5.966823714264519e-07, "loss": 0.1129, "step": 6286 }, { "epoch": 0.85, "grad_norm": 0.6336835117876843, "learning_rate": 5.95648240287493e-07, "loss": 0.0992, "step": 6287 }, { "epoch": 0.85, "grad_norm": 1.0488789425810507, "learning_rate": 5.94614949307788e-07, "loss": 0.1621, "step": 6288 }, { "epoch": 0.85, "grad_norm": 0.9120279060567074, "learning_rate": 5.935824986844424e-07, "loss": 0.1528, "step": 6289 }, { "epoch": 0.85, "grad_norm": 0.9894668443124296, "learning_rate": 5.925508886144055e-07, "loss": 0.1321, "step": 6290 }, { "epoch": 0.85, "grad_norm": 0.6866931277850614, "learning_rate": 5.915201192944625e-07, "loss": 0.0822, "step": 6291 }, { "epoch": 0.85, "grad_norm": 0.8737681642287556, "learning_rate": 5.904901909212391e-07, "loss": 0.1625, "step": 6292 }, { "epoch": 0.85, "grad_norm": 1.2833847716477613, "learning_rate": 5.894611036912018e-07, "loss": 0.1826, "step": 6293 }, { "epoch": 0.85, "grad_norm": 0.9981923487265315, "learning_rate": 5.884328578006548e-07, "loss": 0.2155, "step": 6294 }, { "epoch": 0.85, "grad_norm": 0.813583842154565, "learning_rate": 5.874054534457441e-07, "loss": 0.1288, "step": 6295 }, { "epoch": 0.85, "grad_norm": 1.3894503847509132, "learning_rate": 5.863788908224527e-07, "loss": 0.2117, "step": 6296 }, { "epoch": 0.85, "grad_norm": 0.9979169006250002, "learning_rate": 5.853531701266046e-07, "loss": 0.1903, "step": 6297 }, { "epoch": 0.85, "grad_norm": 1.0461993037947879, "learning_rate": 5.843282915538629e-07, "loss": 0.1592, "step": 6298 }, { "epoch": 0.85, "grad_norm": 0.7802901500769098, "learning_rate": 5.833042552997303e-07, "loss": 0.1313, "step": 6299 }, { "epoch": 0.85, "grad_norm": 0.8901947957229105, "learning_rate": 5.822810615595476e-07, "loss": 0.1246, "step": 6300 }, { "epoch": 0.85, "grad_norm": 1.0575206208146186, "learning_rate": 5.812587105284967e-07, "loss": 0.1592, "step": 6301 }, { "epoch": 0.85, "grad_norm": 0.9566914318955411, "learning_rate": 5.802372024015973e-07, "loss": 0.1546, "step": 6302 }, { "epoch": 0.85, "grad_norm": 0.9669475196316842, "learning_rate": 5.792165373737086e-07, "loss": 0.1846, "step": 6303 }, { "epoch": 0.85, "grad_norm": 0.9913844770937538, "learning_rate": 5.781967156395302e-07, "loss": 0.1473, "step": 6304 }, { "epoch": 0.85, "grad_norm": 0.9824181097664149, "learning_rate": 5.771777373935988e-07, "loss": 0.1675, "step": 6305 }, { "epoch": 0.85, "grad_norm": 1.2896950107835483, "learning_rate": 5.761596028302918e-07, "loss": 0.2143, "step": 6306 }, { "epoch": 0.85, "grad_norm": 0.9155980904289378, "learning_rate": 5.751423121438249e-07, "loss": 0.1303, "step": 6307 }, { "epoch": 0.85, "grad_norm": 1.2513061020020022, "learning_rate": 5.741258655282533e-07, "loss": 0.2045, "step": 6308 }, { "epoch": 0.85, "grad_norm": 0.9362728419103497, "learning_rate": 5.731102631774705e-07, "loss": 0.1472, "step": 6309 }, { "epoch": 0.85, "grad_norm": 1.159023914631112, "learning_rate": 5.720955052852101e-07, "loss": 0.2032, "step": 6310 }, { "epoch": 0.85, "grad_norm": 0.98608855880572, "learning_rate": 5.710815920450419e-07, "loss": 0.1263, "step": 6311 }, { "epoch": 0.85, "grad_norm": 0.9847878222889241, "learning_rate": 5.700685236503789e-07, "loss": 0.19, "step": 6312 }, { "epoch": 0.85, "grad_norm": 1.1085847897300618, "learning_rate": 5.690563002944704e-07, "loss": 0.1733, "step": 6313 }, { "epoch": 0.85, "grad_norm": 0.8370565128396594, "learning_rate": 5.680449221704038e-07, "loss": 0.1161, "step": 6314 }, { "epoch": 0.85, "grad_norm": 1.0038939228980803, "learning_rate": 5.670343894711072e-07, "loss": 0.2123, "step": 6315 }, { "epoch": 0.85, "grad_norm": 1.0704874804326, "learning_rate": 5.660247023893445e-07, "loss": 0.1554, "step": 6316 }, { "epoch": 0.85, "grad_norm": 0.7794792342410576, "learning_rate": 5.65015861117722e-07, "loss": 0.1476, "step": 6317 }, { "epoch": 0.85, "grad_norm": 1.0223832716264127, "learning_rate": 5.64007865848682e-07, "loss": 0.1684, "step": 6318 }, { "epoch": 0.85, "grad_norm": 1.185889146564933, "learning_rate": 5.630007167745061e-07, "loss": 0.1957, "step": 6319 }, { "epoch": 0.85, "grad_norm": 0.8054054391485373, "learning_rate": 5.619944140873152e-07, "loss": 0.1384, "step": 6320 }, { "epoch": 0.85, "grad_norm": 1.2420299573179665, "learning_rate": 5.609889579790678e-07, "loss": 0.2054, "step": 6321 }, { "epoch": 0.85, "grad_norm": 0.7616327294359372, "learning_rate": 5.599843486415607e-07, "loss": 0.1395, "step": 6322 }, { "epoch": 0.85, "grad_norm": 0.9775296684308928, "learning_rate": 5.589805862664316e-07, "loss": 0.1542, "step": 6323 }, { "epoch": 0.85, "grad_norm": 0.9307535069843053, "learning_rate": 5.579776710451539e-07, "loss": 0.144, "step": 6324 }, { "epoch": 0.85, "grad_norm": 1.0050212838380548, "learning_rate": 5.569756031690399e-07, "loss": 0.1187, "step": 6325 }, { "epoch": 0.85, "grad_norm": 1.0623580751007327, "learning_rate": 5.559743828292413e-07, "loss": 0.1877, "step": 6326 }, { "epoch": 0.85, "grad_norm": 0.9902988796195, "learning_rate": 5.549740102167472e-07, "loss": 0.1637, "step": 6327 }, { "epoch": 0.85, "grad_norm": 0.8625682019438713, "learning_rate": 5.53974485522385e-07, "loss": 0.144, "step": 6328 }, { "epoch": 0.85, "grad_norm": 1.0647204828361037, "learning_rate": 5.529758089368215e-07, "loss": 0.1583, "step": 6329 }, { "epoch": 0.85, "grad_norm": 1.0281224170402394, "learning_rate": 5.519779806505599e-07, "loss": 0.1507, "step": 6330 }, { "epoch": 0.85, "grad_norm": 1.0346037656328568, "learning_rate": 5.509810008539435e-07, "loss": 0.1751, "step": 6331 }, { "epoch": 0.85, "grad_norm": 0.7424052746543686, "learning_rate": 5.49984869737153e-07, "loss": 0.139, "step": 6332 }, { "epoch": 0.85, "grad_norm": 1.1239726950026039, "learning_rate": 5.489895874902052e-07, "loss": 0.1971, "step": 6333 }, { "epoch": 0.85, "grad_norm": 1.1376911567211232, "learning_rate": 5.479951543029566e-07, "loss": 0.1964, "step": 6334 }, { "epoch": 0.85, "grad_norm": 0.8298414832175496, "learning_rate": 5.470015703651043e-07, "loss": 0.1663, "step": 6335 }, { "epoch": 0.85, "grad_norm": 0.675882331440319, "learning_rate": 5.460088358661802e-07, "loss": 0.1289, "step": 6336 }, { "epoch": 0.85, "grad_norm": 1.0596667866550549, "learning_rate": 5.450169509955549e-07, "loss": 0.1487, "step": 6337 }, { "epoch": 0.85, "grad_norm": 0.8114730849049155, "learning_rate": 5.440259159424361e-07, "loss": 0.1076, "step": 6338 }, { "epoch": 0.85, "grad_norm": 0.8683828630136692, "learning_rate": 5.430357308958711e-07, "loss": 0.1709, "step": 6339 }, { "epoch": 0.85, "grad_norm": 1.1938311533404755, "learning_rate": 5.420463960447447e-07, "loss": 0.235, "step": 6340 }, { "epoch": 0.86, "grad_norm": 1.0498113883517028, "learning_rate": 5.410579115777781e-07, "loss": 0.2129, "step": 6341 }, { "epoch": 0.86, "grad_norm": 1.0582907525923018, "learning_rate": 5.400702776835314e-07, "loss": 0.1677, "step": 6342 }, { "epoch": 0.86, "grad_norm": 0.7590402506058977, "learning_rate": 5.390834945504031e-07, "loss": 0.1371, "step": 6343 }, { "epoch": 0.86, "grad_norm": 1.0112085087330025, "learning_rate": 5.380975623666279e-07, "loss": 0.149, "step": 6344 }, { "epoch": 0.86, "grad_norm": 0.9727409789707463, "learning_rate": 5.371124813202788e-07, "loss": 0.1346, "step": 6345 }, { "epoch": 0.86, "grad_norm": 1.067419803079856, "learning_rate": 5.361282515992666e-07, "loss": 0.1959, "step": 6346 }, { "epoch": 0.86, "grad_norm": 1.006667192035056, "learning_rate": 5.351448733913406e-07, "loss": 0.1743, "step": 6347 }, { "epoch": 0.86, "grad_norm": 0.6961740334951217, "learning_rate": 5.341623468840851e-07, "loss": 0.1226, "step": 6348 }, { "epoch": 0.86, "grad_norm": 0.9451164920063745, "learning_rate": 5.331806722649252e-07, "loss": 0.174, "step": 6349 }, { "epoch": 0.86, "grad_norm": 0.7914320523516993, "learning_rate": 5.321998497211206e-07, "loss": 0.1489, "step": 6350 }, { "epoch": 0.86, "grad_norm": 1.1142690808389233, "learning_rate": 5.3121987943977e-07, "loss": 0.1958, "step": 6351 }, { "epoch": 0.86, "grad_norm": 0.5625972881745678, "learning_rate": 5.302407616078092e-07, "loss": 0.0982, "step": 6352 }, { "epoch": 0.86, "grad_norm": 0.9118595306615143, "learning_rate": 5.292624964120113e-07, "loss": 0.1343, "step": 6353 }, { "epoch": 0.86, "grad_norm": 0.6161848332769246, "learning_rate": 5.282850840389875e-07, "loss": 0.1054, "step": 6354 }, { "epoch": 0.86, "grad_norm": 0.9315953265089535, "learning_rate": 5.273085246751852e-07, "loss": 0.158, "step": 6355 }, { "epoch": 0.86, "grad_norm": 0.9503710514920243, "learning_rate": 5.263328185068888e-07, "loss": 0.1484, "step": 6356 }, { "epoch": 0.86, "grad_norm": 1.109034243759788, "learning_rate": 5.253579657202223e-07, "loss": 0.1828, "step": 6357 }, { "epoch": 0.86, "grad_norm": 0.9701613145284307, "learning_rate": 5.243839665011447e-07, "loss": 0.1689, "step": 6358 }, { "epoch": 0.86, "grad_norm": 1.0536977040942241, "learning_rate": 5.234108210354527e-07, "loss": 0.1615, "step": 6359 }, { "epoch": 0.86, "grad_norm": 0.9120923000888748, "learning_rate": 5.224385295087797e-07, "loss": 0.1329, "step": 6360 }, { "epoch": 0.86, "grad_norm": 0.665783942298152, "learning_rate": 5.21467092106599e-07, "loss": 0.0889, "step": 6361 }, { "epoch": 0.86, "grad_norm": 0.8129534877449156, "learning_rate": 5.20496509014215e-07, "loss": 0.13, "step": 6362 }, { "epoch": 0.86, "grad_norm": 1.1365028813980407, "learning_rate": 5.195267804167753e-07, "loss": 0.1864, "step": 6363 }, { "epoch": 0.86, "grad_norm": 1.0929886090388992, "learning_rate": 5.185579064992618e-07, "loss": 0.1765, "step": 6364 }, { "epoch": 0.86, "grad_norm": 0.8343596349339035, "learning_rate": 5.17589887446493e-07, "loss": 0.144, "step": 6365 }, { "epoch": 0.86, "grad_norm": 0.8475793667492336, "learning_rate": 5.166227234431254e-07, "loss": 0.1388, "step": 6366 }, { "epoch": 0.86, "grad_norm": 1.0558267491181283, "learning_rate": 5.156564146736509e-07, "loss": 0.1676, "step": 6367 }, { "epoch": 0.86, "grad_norm": 0.8672959552998791, "learning_rate": 5.146909613224015e-07, "loss": 0.1237, "step": 6368 }, { "epoch": 0.86, "grad_norm": 0.9537162216136689, "learning_rate": 5.137263635735423e-07, "loss": 0.1666, "step": 6369 }, { "epoch": 0.86, "grad_norm": 0.8869111204772371, "learning_rate": 5.12762621611077e-07, "loss": 0.1326, "step": 6370 }, { "epoch": 0.86, "grad_norm": 0.8064632266417862, "learning_rate": 5.117997356188454e-07, "loss": 0.1266, "step": 6371 }, { "epoch": 0.86, "grad_norm": 1.0553248897841854, "learning_rate": 5.108377057805253e-07, "loss": 0.162, "step": 6372 }, { "epoch": 0.86, "grad_norm": 0.8249803394870762, "learning_rate": 5.09876532279629e-07, "loss": 0.1297, "step": 6373 }, { "epoch": 0.86, "grad_norm": 1.0610393567362673, "learning_rate": 5.089162152995075e-07, "loss": 0.172, "step": 6374 }, { "epoch": 0.86, "grad_norm": 0.7131092699894326, "learning_rate": 5.079567550233477e-07, "loss": 0.1355, "step": 6375 }, { "epoch": 0.86, "grad_norm": 1.0927078274854929, "learning_rate": 5.069981516341727e-07, "loss": 0.1858, "step": 6376 }, { "epoch": 0.86, "grad_norm": 0.8319940937292754, "learning_rate": 5.060404053148427e-07, "loss": 0.1653, "step": 6377 }, { "epoch": 0.86, "grad_norm": 0.9747227831021323, "learning_rate": 5.050835162480549e-07, "loss": 0.1636, "step": 6378 }, { "epoch": 0.86, "grad_norm": 0.7170252897512815, "learning_rate": 5.041274846163391e-07, "loss": 0.1364, "step": 6379 }, { "epoch": 0.86, "grad_norm": 1.2000447845135325, "learning_rate": 5.031723106020681e-07, "loss": 0.1822, "step": 6380 }, { "epoch": 0.86, "grad_norm": 1.0338266947945878, "learning_rate": 5.022179943874461e-07, "loss": 0.166, "step": 6381 }, { "epoch": 0.86, "grad_norm": 1.2488993678443545, "learning_rate": 5.012645361545159e-07, "loss": 0.2245, "step": 6382 }, { "epoch": 0.86, "grad_norm": 0.9060299133681259, "learning_rate": 5.003119360851555e-07, "loss": 0.1402, "step": 6383 }, { "epoch": 0.86, "grad_norm": 0.992710997427097, "learning_rate": 4.993601943610798e-07, "loss": 0.1987, "step": 6384 }, { "epoch": 0.86, "grad_norm": 1.137046372986187, "learning_rate": 4.9840931116384e-07, "loss": 0.189, "step": 6385 }, { "epoch": 0.86, "grad_norm": 0.8676078998304928, "learning_rate": 4.974592866748229e-07, "loss": 0.1674, "step": 6386 }, { "epoch": 0.86, "grad_norm": 0.8447930920612355, "learning_rate": 4.965101210752526e-07, "loss": 0.137, "step": 6387 }, { "epoch": 0.86, "grad_norm": 0.772048849344421, "learning_rate": 4.955618145461883e-07, "loss": 0.1189, "step": 6388 }, { "epoch": 0.86, "grad_norm": 0.8303104590438161, "learning_rate": 4.946143672685256e-07, "loss": 0.1465, "step": 6389 }, { "epoch": 0.86, "grad_norm": 0.8641039053909849, "learning_rate": 4.936677794229955e-07, "loss": 0.1643, "step": 6390 }, { "epoch": 0.86, "grad_norm": 1.2135383284581949, "learning_rate": 4.927220511901693e-07, "loss": 0.1999, "step": 6391 }, { "epoch": 0.86, "grad_norm": 0.7884452403615588, "learning_rate": 4.917771827504475e-07, "loss": 0.1237, "step": 6392 }, { "epoch": 0.86, "grad_norm": 1.0230160452614838, "learning_rate": 4.908331742840711e-07, "loss": 0.154, "step": 6393 }, { "epoch": 0.86, "grad_norm": 0.9037995322282468, "learning_rate": 4.898900259711158e-07, "loss": 0.121, "step": 6394 }, { "epoch": 0.86, "grad_norm": 1.0407882777527573, "learning_rate": 4.889477379914936e-07, "loss": 0.1731, "step": 6395 }, { "epoch": 0.86, "grad_norm": 0.8675309108215828, "learning_rate": 4.880063105249522e-07, "loss": 0.1579, "step": 6396 }, { "epoch": 0.86, "grad_norm": 1.18203651302903, "learning_rate": 4.870657437510751e-07, "loss": 0.1881, "step": 6397 }, { "epoch": 0.86, "grad_norm": 0.8848415487536554, "learning_rate": 4.861260378492816e-07, "loss": 0.1382, "step": 6398 }, { "epoch": 0.86, "grad_norm": 0.7470832430550791, "learning_rate": 4.851871929988267e-07, "loss": 0.1485, "step": 6399 }, { "epoch": 0.86, "grad_norm": 0.8672620485408103, "learning_rate": 4.842492093788014e-07, "loss": 0.1005, "step": 6400 }, { "epoch": 0.86, "grad_norm": 1.0693028326845089, "learning_rate": 4.833120871681313e-07, "loss": 0.1952, "step": 6401 }, { "epoch": 0.86, "grad_norm": 0.89817158696219, "learning_rate": 4.823758265455803e-07, "loss": 0.1629, "step": 6402 }, { "epoch": 0.86, "grad_norm": 0.9389234499997499, "learning_rate": 4.814404276897461e-07, "loss": 0.1847, "step": 6403 }, { "epoch": 0.86, "grad_norm": 1.3805380072437428, "learning_rate": 4.805058907790611e-07, "loss": 0.2236, "step": 6404 }, { "epoch": 0.86, "grad_norm": 0.9185964155534543, "learning_rate": 4.795722159917959e-07, "loss": 0.1532, "step": 6405 }, { "epoch": 0.86, "grad_norm": 0.8995469898040998, "learning_rate": 4.786394035060538e-07, "loss": 0.1344, "step": 6406 }, { "epoch": 0.86, "grad_norm": 1.0231218896030658, "learning_rate": 4.777074534997755e-07, "loss": 0.1614, "step": 6407 }, { "epoch": 0.86, "grad_norm": 0.8868462163412947, "learning_rate": 4.767763661507374e-07, "loss": 0.1249, "step": 6408 }, { "epoch": 0.86, "grad_norm": 1.034421449688889, "learning_rate": 4.7584614163654896e-07, "loss": 0.1621, "step": 6409 }, { "epoch": 0.86, "grad_norm": 1.0452204289544866, "learning_rate": 4.749167801346577e-07, "loss": 0.1933, "step": 6410 }, { "epoch": 0.86, "grad_norm": 0.8128865763691816, "learning_rate": 4.7398828182234457e-07, "loss": 0.138, "step": 6411 }, { "epoch": 0.86, "grad_norm": 0.8791060882141107, "learning_rate": 4.7306064687672737e-07, "loss": 0.1312, "step": 6412 }, { "epoch": 0.86, "grad_norm": 0.7557405145665133, "learning_rate": 4.7213387547475896e-07, "loss": 0.1303, "step": 6413 }, { "epoch": 0.86, "grad_norm": 0.907209228617094, "learning_rate": 4.7120796779322744e-07, "loss": 0.1514, "step": 6414 }, { "epoch": 0.87, "grad_norm": 0.9135308649235993, "learning_rate": 4.7028292400875474e-07, "loss": 0.1052, "step": 6415 }, { "epoch": 0.87, "grad_norm": 1.1628995042620305, "learning_rate": 4.693587442977998e-07, "loss": 0.1855, "step": 6416 }, { "epoch": 0.87, "grad_norm": 1.1751885371104345, "learning_rate": 4.684354288366555e-07, "loss": 0.1895, "step": 6417 }, { "epoch": 0.87, "grad_norm": 0.9383083686692925, "learning_rate": 4.675129778014509e-07, "loss": 0.1673, "step": 6418 }, { "epoch": 0.87, "grad_norm": 1.0513667540498797, "learning_rate": 4.665913913681497e-07, "loss": 0.1596, "step": 6419 }, { "epoch": 0.87, "grad_norm": 1.1893424639207533, "learning_rate": 4.656706697125496e-07, "loss": 0.1868, "step": 6420 }, { "epoch": 0.87, "grad_norm": 1.0345221289420548, "learning_rate": 4.647508130102857e-07, "loss": 0.1765, "step": 6421 }, { "epoch": 0.87, "grad_norm": 1.119065406141127, "learning_rate": 4.6383182143682594e-07, "loss": 0.1715, "step": 6422 }, { "epoch": 0.87, "grad_norm": 1.0770133808447624, "learning_rate": 4.629136951674745e-07, "loss": 0.1876, "step": 6423 }, { "epoch": 0.87, "grad_norm": 0.9513060236042091, "learning_rate": 4.619964343773692e-07, "loss": 0.1753, "step": 6424 }, { "epoch": 0.87, "grad_norm": 0.8666653119867324, "learning_rate": 4.6108003924148494e-07, "loss": 0.1093, "step": 6425 }, { "epoch": 0.87, "grad_norm": 0.85487304570555, "learning_rate": 4.6016450993462855e-07, "loss": 0.146, "step": 6426 }, { "epoch": 0.87, "grad_norm": 1.1136577620509371, "learning_rate": 4.592498466314449e-07, "loss": 0.2144, "step": 6427 }, { "epoch": 0.87, "grad_norm": 0.7011679828914752, "learning_rate": 4.5833604950641096e-07, "loss": 0.1248, "step": 6428 }, { "epoch": 0.87, "grad_norm": 0.8470328694884893, "learning_rate": 4.5742311873384013e-07, "loss": 0.0955, "step": 6429 }, { "epoch": 0.87, "grad_norm": 0.6837451875300236, "learning_rate": 4.565110544878798e-07, "loss": 0.104, "step": 6430 }, { "epoch": 0.87, "grad_norm": 0.8954120052792939, "learning_rate": 4.5559985694251186e-07, "loss": 0.1405, "step": 6431 }, { "epoch": 0.87, "grad_norm": 1.1019082372884303, "learning_rate": 4.546895262715539e-07, "loss": 0.1863, "step": 6432 }, { "epoch": 0.87, "grad_norm": 1.0381878063235304, "learning_rate": 4.537800626486577e-07, "loss": 0.1591, "step": 6433 }, { "epoch": 0.87, "grad_norm": 1.1127247709874724, "learning_rate": 4.528714662473088e-07, "loss": 0.1533, "step": 6434 }, { "epoch": 0.87, "grad_norm": 0.646117499795589, "learning_rate": 4.5196373724082754e-07, "loss": 0.0855, "step": 6435 }, { "epoch": 0.87, "grad_norm": 1.0335400954746639, "learning_rate": 4.510568758023709e-07, "loss": 0.141, "step": 6436 }, { "epoch": 0.87, "grad_norm": 0.9958253356718576, "learning_rate": 4.5015088210492717e-07, "loss": 0.1774, "step": 6437 }, { "epoch": 0.87, "grad_norm": 0.8166083199526809, "learning_rate": 4.492457563213226e-07, "loss": 0.1083, "step": 6438 }, { "epoch": 0.87, "grad_norm": 0.9716060690372261, "learning_rate": 4.4834149862421403e-07, "loss": 0.1636, "step": 6439 }, { "epoch": 0.87, "grad_norm": 1.0405519442068278, "learning_rate": 4.4743810918609463e-07, "loss": 0.1545, "step": 6440 }, { "epoch": 0.87, "grad_norm": 0.8819017154034535, "learning_rate": 4.4653558817929264e-07, "loss": 0.1561, "step": 6441 }, { "epoch": 0.87, "grad_norm": 1.13205047983376, "learning_rate": 4.4563393577596925e-07, "loss": 0.1848, "step": 6442 }, { "epoch": 0.87, "grad_norm": 0.903405096997521, "learning_rate": 4.4473315214812127e-07, "loss": 0.1521, "step": 6443 }, { "epoch": 0.87, "grad_norm": 1.171827189825303, "learning_rate": 4.438332374675791e-07, "loss": 0.2009, "step": 6444 }, { "epoch": 0.87, "grad_norm": 0.869932608833549, "learning_rate": 4.4293419190600764e-07, "loss": 0.1351, "step": 6445 }, { "epoch": 0.87, "grad_norm": 0.7694561381004996, "learning_rate": 4.420360156349041e-07, "loss": 0.1486, "step": 6446 }, { "epoch": 0.87, "grad_norm": 1.0374554250753323, "learning_rate": 4.411387088256036e-07, "loss": 0.1521, "step": 6447 }, { "epoch": 0.87, "grad_norm": 1.074006989542153, "learning_rate": 4.402422716492727e-07, "loss": 0.1948, "step": 6448 }, { "epoch": 0.87, "grad_norm": 0.8598074327594086, "learning_rate": 4.3934670427691284e-07, "loss": 0.1209, "step": 6449 }, { "epoch": 0.87, "grad_norm": 1.0412107908330104, "learning_rate": 4.384520068793591e-07, "loss": 0.152, "step": 6450 }, { "epoch": 0.87, "grad_norm": 0.8705221625334096, "learning_rate": 4.3755817962728096e-07, "loss": 0.1351, "step": 6451 }, { "epoch": 0.87, "grad_norm": 0.9274617552430099, "learning_rate": 4.3666522269118215e-07, "loss": 0.1902, "step": 6452 }, { "epoch": 0.87, "grad_norm": 0.9976743129947836, "learning_rate": 4.357731362413997e-07, "loss": 0.1725, "step": 6453 }, { "epoch": 0.87, "grad_norm": 0.9340032688502382, "learning_rate": 4.3488192044810586e-07, "loss": 0.1457, "step": 6454 }, { "epoch": 0.87, "grad_norm": 1.0530111800883135, "learning_rate": 4.3399157548130565e-07, "loss": 0.1705, "step": 6455 }, { "epoch": 0.87, "grad_norm": 0.8170778766830011, "learning_rate": 4.331021015108372e-07, "loss": 0.1388, "step": 6456 }, { "epoch": 0.87, "grad_norm": 0.7546400708734039, "learning_rate": 4.322134987063731e-07, "loss": 0.1376, "step": 6457 }, { "epoch": 0.87, "grad_norm": 0.8107502280700065, "learning_rate": 4.313257672374227e-07, "loss": 0.1048, "step": 6458 }, { "epoch": 0.87, "grad_norm": 1.0726992280511949, "learning_rate": 4.30438907273325e-07, "loss": 0.1539, "step": 6459 }, { "epoch": 0.87, "grad_norm": 1.055569913458303, "learning_rate": 4.2955291898325536e-07, "loss": 0.1523, "step": 6460 }, { "epoch": 0.87, "grad_norm": 0.9616161707263087, "learning_rate": 4.286678025362212e-07, "loss": 0.1468, "step": 6461 }, { "epoch": 0.87, "grad_norm": 0.953575077812341, "learning_rate": 4.277835581010642e-07, "loss": 0.1271, "step": 6462 }, { "epoch": 0.87, "grad_norm": 1.0355726775644365, "learning_rate": 4.2690018584645996e-07, "loss": 0.1959, "step": 6463 }, { "epoch": 0.87, "grad_norm": 1.213549376469964, "learning_rate": 4.260176859409182e-07, "loss": 0.2098, "step": 6464 }, { "epoch": 0.87, "grad_norm": 1.0115768362932047, "learning_rate": 4.2513605855278085e-07, "loss": 0.172, "step": 6465 }, { "epoch": 0.87, "grad_norm": 0.8909129033414762, "learning_rate": 4.242553038502245e-07, "loss": 0.1407, "step": 6466 }, { "epoch": 0.87, "grad_norm": 0.743871526709058, "learning_rate": 4.2337542200125926e-07, "loss": 0.1299, "step": 6467 }, { "epoch": 0.87, "grad_norm": 0.6488685374319328, "learning_rate": 4.2249641317372804e-07, "loss": 0.1156, "step": 6468 }, { "epoch": 0.87, "grad_norm": 0.8448071152873876, "learning_rate": 4.216182775353073e-07, "loss": 0.1654, "step": 6469 }, { "epoch": 0.87, "grad_norm": 0.7087261492290345, "learning_rate": 4.20741015253508e-07, "loss": 0.1179, "step": 6470 }, { "epoch": 0.87, "grad_norm": 0.8868306110551291, "learning_rate": 4.1986462649567294e-07, "loss": 0.1435, "step": 6471 }, { "epoch": 0.87, "grad_norm": 0.8161106589166939, "learning_rate": 4.18989111428979e-07, "loss": 0.1262, "step": 6472 }, { "epoch": 0.87, "grad_norm": 0.917232189949663, "learning_rate": 4.181144702204376e-07, "loss": 0.1515, "step": 6473 }, { "epoch": 0.87, "grad_norm": 0.581779098374162, "learning_rate": 4.1724070303689136e-07, "loss": 0.1265, "step": 6474 }, { "epoch": 0.87, "grad_norm": 1.107703673919751, "learning_rate": 4.163678100450175e-07, "loss": 0.1833, "step": 6475 }, { "epoch": 0.87, "grad_norm": 0.896800873808154, "learning_rate": 4.1549579141132615e-07, "loss": 0.1635, "step": 6476 }, { "epoch": 0.87, "grad_norm": 0.9908288057270572, "learning_rate": 4.1462464730216033e-07, "loss": 0.1581, "step": 6477 }, { "epoch": 0.87, "grad_norm": 0.7440298303116876, "learning_rate": 4.1375437788369667e-07, "loss": 0.1335, "step": 6478 }, { "epoch": 0.87, "grad_norm": 1.282407051469742, "learning_rate": 4.128849833219456e-07, "loss": 0.1912, "step": 6479 }, { "epoch": 0.87, "grad_norm": 1.081437284401157, "learning_rate": 4.1201646378274785e-07, "loss": 0.197, "step": 6480 }, { "epoch": 0.87, "grad_norm": 1.1757527714252658, "learning_rate": 4.1114881943178143e-07, "loss": 0.2061, "step": 6481 }, { "epoch": 0.87, "grad_norm": 1.155212154795072, "learning_rate": 4.102820504345545e-07, "loss": 0.1693, "step": 6482 }, { "epoch": 0.87, "grad_norm": 0.9712598078953576, "learning_rate": 4.0941615695640867e-07, "loss": 0.1895, "step": 6483 }, { "epoch": 0.87, "grad_norm": 0.7471690672061314, "learning_rate": 4.085511391625208e-07, "loss": 0.1246, "step": 6484 }, { "epoch": 0.87, "grad_norm": 1.1826021928746024, "learning_rate": 4.076869972178954e-07, "loss": 0.208, "step": 6485 }, { "epoch": 0.87, "grad_norm": 0.9552185486071433, "learning_rate": 4.0682373128737517e-07, "loss": 0.1465, "step": 6486 }, { "epoch": 0.87, "grad_norm": 0.8788915218191641, "learning_rate": 4.0596134153563395e-07, "loss": 0.1418, "step": 6487 }, { "epoch": 0.87, "grad_norm": 0.9850177161247985, "learning_rate": 4.050998281271773e-07, "loss": 0.1878, "step": 6488 }, { "epoch": 0.88, "grad_norm": 1.0057863075292472, "learning_rate": 4.0423919122634547e-07, "loss": 0.1648, "step": 6489 }, { "epoch": 0.88, "grad_norm": 0.9703155084764146, "learning_rate": 4.0337943099731046e-07, "loss": 0.1614, "step": 6490 }, { "epoch": 0.88, "grad_norm": 1.2780864770273908, "learning_rate": 4.025205476040761e-07, "loss": 0.2051, "step": 6491 }, { "epoch": 0.88, "grad_norm": 0.8821344164709313, "learning_rate": 4.016625412104824e-07, "loss": 0.1357, "step": 6492 }, { "epoch": 0.88, "grad_norm": 0.9302304373117717, "learning_rate": 4.008054119801985e-07, "loss": 0.1476, "step": 6493 }, { "epoch": 0.88, "grad_norm": 0.8109677424672546, "learning_rate": 3.999491600767269e-07, "loss": 0.1444, "step": 6494 }, { "epoch": 0.88, "grad_norm": 1.2689648179918647, "learning_rate": 3.9909378566340417e-07, "loss": 0.192, "step": 6495 }, { "epoch": 0.88, "grad_norm": 1.317319543632297, "learning_rate": 3.982392889033987e-07, "loss": 0.1801, "step": 6496 }, { "epoch": 0.88, "grad_norm": 1.097641840689414, "learning_rate": 3.973856699597106e-07, "loss": 0.1926, "step": 6497 }, { "epoch": 0.88, "grad_norm": 0.865846921185871, "learning_rate": 3.9653292899517414e-07, "loss": 0.1461, "step": 6498 }, { "epoch": 0.88, "grad_norm": 0.989683764736022, "learning_rate": 3.956810661724547e-07, "loss": 0.1696, "step": 6499 }, { "epoch": 0.88, "grad_norm": 0.9756648782807744, "learning_rate": 3.9483008165405123e-07, "loss": 0.1817, "step": 6500 }, { "epoch": 0.88, "grad_norm": 0.5613915559447358, "learning_rate": 3.9397997560229437e-07, "loss": 0.1131, "step": 6501 }, { "epoch": 0.88, "grad_norm": 1.008566720028377, "learning_rate": 3.9313074817934783e-07, "loss": 0.1527, "step": 6502 }, { "epoch": 0.88, "grad_norm": 1.1722708014827223, "learning_rate": 3.9228239954720693e-07, "loss": 0.179, "step": 6503 }, { "epoch": 0.88, "grad_norm": 1.096787406809332, "learning_rate": 3.9143492986770007e-07, "loss": 0.1673, "step": 6504 }, { "epoch": 0.88, "grad_norm": 1.0682586899980069, "learning_rate": 3.905883393024873e-07, "loss": 0.2069, "step": 6505 }, { "epoch": 0.88, "grad_norm": 1.264826663646371, "learning_rate": 3.897426280130617e-07, "loss": 0.21, "step": 6506 }, { "epoch": 0.88, "grad_norm": 0.5669285699780408, "learning_rate": 3.888977961607482e-07, "loss": 0.1055, "step": 6507 }, { "epoch": 0.88, "grad_norm": 1.0185287585168588, "learning_rate": 3.880538439067039e-07, "loss": 0.1617, "step": 6508 }, { "epoch": 0.88, "grad_norm": 1.0171604367433305, "learning_rate": 3.872107714119189e-07, "loss": 0.1687, "step": 6509 }, { "epoch": 0.88, "grad_norm": 1.0556227379999779, "learning_rate": 3.863685788372146e-07, "loss": 0.1848, "step": 6510 }, { "epoch": 0.88, "grad_norm": 0.6028610893912756, "learning_rate": 3.8552726634324413e-07, "loss": 0.109, "step": 6511 }, { "epoch": 0.88, "grad_norm": 0.7670237576698306, "learning_rate": 3.8468683409049466e-07, "loss": 0.1694, "step": 6512 }, { "epoch": 0.88, "grad_norm": 0.9934111821423252, "learning_rate": 3.838472822392819e-07, "loss": 0.1359, "step": 6513 }, { "epoch": 0.88, "grad_norm": 1.003227345760076, "learning_rate": 3.8300861094975997e-07, "loss": 0.1791, "step": 6514 }, { "epoch": 0.88, "grad_norm": 1.064978338384462, "learning_rate": 3.821708203819069e-07, "loss": 0.1894, "step": 6515 }, { "epoch": 0.88, "grad_norm": 1.0130215871248245, "learning_rate": 3.813339106955388e-07, "loss": 0.1855, "step": 6516 }, { "epoch": 0.88, "grad_norm": 1.0222025365445833, "learning_rate": 3.8049788205030136e-07, "loss": 0.1604, "step": 6517 }, { "epoch": 0.88, "grad_norm": 0.9056031836229449, "learning_rate": 3.796627346056725e-07, "loss": 0.1416, "step": 6518 }, { "epoch": 0.88, "grad_norm": 1.151253380161323, "learning_rate": 3.78828468520962e-07, "loss": 0.1908, "step": 6519 }, { "epoch": 0.88, "grad_norm": 0.8883815463969833, "learning_rate": 3.7799508395531206e-07, "loss": 0.133, "step": 6520 }, { "epoch": 0.88, "grad_norm": 0.9875291166161327, "learning_rate": 3.77162581067696e-07, "loss": 0.1619, "step": 6521 }, { "epoch": 0.88, "grad_norm": 1.309152225975833, "learning_rate": 3.763309600169196e-07, "loss": 0.2586, "step": 6522 }, { "epoch": 0.88, "grad_norm": 0.9295171634152507, "learning_rate": 3.755002209616193e-07, "loss": 0.1317, "step": 6523 }, { "epoch": 0.88, "grad_norm": 0.9364247972245481, "learning_rate": 3.746703640602639e-07, "loss": 0.1477, "step": 6524 }, { "epoch": 0.88, "grad_norm": 1.0878118754137895, "learning_rate": 3.7384138947115565e-07, "loss": 0.1489, "step": 6525 }, { "epoch": 0.88, "grad_norm": 0.8301701944822767, "learning_rate": 3.730132973524264e-07, "loss": 0.1715, "step": 6526 }, { "epoch": 0.88, "grad_norm": 0.7107326026389474, "learning_rate": 3.721860878620398e-07, "loss": 0.1302, "step": 6527 }, { "epoch": 0.88, "grad_norm": 1.0219976931003387, "learning_rate": 3.7135976115779127e-07, "loss": 0.1654, "step": 6528 }, { "epoch": 0.88, "grad_norm": 0.8994930083728846, "learning_rate": 3.705343173973086e-07, "loss": 0.1677, "step": 6529 }, { "epoch": 0.88, "grad_norm": 0.7993534409902192, "learning_rate": 3.697097567380503e-07, "loss": 0.1211, "step": 6530 }, { "epoch": 0.88, "grad_norm": 0.8433669191744421, "learning_rate": 3.688860793373078e-07, "loss": 0.1475, "step": 6531 }, { "epoch": 0.88, "grad_norm": 0.642017097339313, "learning_rate": 3.6806328535220147e-07, "loss": 0.0871, "step": 6532 }, { "epoch": 0.88, "grad_norm": 0.8956533472298287, "learning_rate": 3.672413749396853e-07, "loss": 0.1557, "step": 6533 }, { "epoch": 0.88, "grad_norm": 0.8549453310518719, "learning_rate": 3.6642034825654427e-07, "loss": 0.124, "step": 6534 }, { "epoch": 0.88, "grad_norm": 1.0403827706534647, "learning_rate": 3.656002054593949e-07, "loss": 0.1674, "step": 6535 }, { "epoch": 0.88, "grad_norm": 0.8813547320067389, "learning_rate": 3.6478094670468367e-07, "loss": 0.1211, "step": 6536 }, { "epoch": 0.88, "grad_norm": 0.7598998566142101, "learning_rate": 3.639625721486911e-07, "loss": 0.104, "step": 6537 }, { "epoch": 0.88, "grad_norm": 0.9000704563939006, "learning_rate": 3.631450819475274e-07, "loss": 0.1417, "step": 6538 }, { "epoch": 0.88, "grad_norm": 1.0877764961658094, "learning_rate": 3.6232847625713386e-07, "loss": 0.1819, "step": 6539 }, { "epoch": 0.88, "grad_norm": 0.8388659602356552, "learning_rate": 3.615127552332831e-07, "loss": 0.1522, "step": 6540 }, { "epoch": 0.88, "grad_norm": 0.9662056558674348, "learning_rate": 3.606979190315796e-07, "loss": 0.1654, "step": 6541 }, { "epoch": 0.88, "grad_norm": 0.9706670629200765, "learning_rate": 3.5988396780745836e-07, "loss": 0.1646, "step": 6542 }, { "epoch": 0.88, "grad_norm": 1.0814522148929162, "learning_rate": 3.59070901716187e-07, "loss": 0.1963, "step": 6543 }, { "epoch": 0.88, "grad_norm": 0.7558270547264629, "learning_rate": 3.582587209128624e-07, "loss": 0.1101, "step": 6544 }, { "epoch": 0.88, "grad_norm": 0.8978317037987603, "learning_rate": 3.5744742555241354e-07, "loss": 0.1413, "step": 6545 }, { "epoch": 0.88, "grad_norm": 0.9096836905331253, "learning_rate": 3.5663701578960096e-07, "loss": 0.121, "step": 6546 }, { "epoch": 0.88, "grad_norm": 1.00582611429529, "learning_rate": 3.5582749177901497e-07, "loss": 0.1853, "step": 6547 }, { "epoch": 0.88, "grad_norm": 1.1640381679664376, "learning_rate": 3.550188536750776e-07, "loss": 0.1898, "step": 6548 }, { "epoch": 0.88, "grad_norm": 0.6902101338254265, "learning_rate": 3.542111016320421e-07, "loss": 0.1301, "step": 6549 }, { "epoch": 0.88, "grad_norm": 0.955197945022915, "learning_rate": 3.5340423580399296e-07, "loss": 0.1647, "step": 6550 }, { "epoch": 0.88, "grad_norm": 1.140311955598939, "learning_rate": 3.525982563448449e-07, "loss": 0.1986, "step": 6551 }, { "epoch": 0.88, "grad_norm": 0.95068368921538, "learning_rate": 3.5179316340834377e-07, "loss": 0.1726, "step": 6552 }, { "epoch": 0.88, "grad_norm": 1.0281036899925298, "learning_rate": 3.5098895714806625e-07, "loss": 0.1951, "step": 6553 }, { "epoch": 0.88, "grad_norm": 0.7488986129334169, "learning_rate": 3.501856377174201e-07, "loss": 0.1466, "step": 6554 }, { "epoch": 0.88, "grad_norm": 1.0974802154508676, "learning_rate": 3.493832052696439e-07, "loss": 0.1687, "step": 6555 }, { "epoch": 0.88, "grad_norm": 1.3699203070728392, "learning_rate": 3.485816599578068e-07, "loss": 0.2349, "step": 6556 }, { "epoch": 0.88, "grad_norm": 0.814473312491895, "learning_rate": 3.477810019348088e-07, "loss": 0.1212, "step": 6557 }, { "epoch": 0.88, "grad_norm": 0.8310675606196032, "learning_rate": 3.4698123135338047e-07, "loss": 0.1131, "step": 6558 }, { "epoch": 0.88, "grad_norm": 1.0879481998263407, "learning_rate": 3.4618234836608424e-07, "loss": 0.1687, "step": 6559 }, { "epoch": 0.88, "grad_norm": 1.1889208860488423, "learning_rate": 3.4538435312531207e-07, "loss": 0.2106, "step": 6560 }, { "epoch": 0.88, "grad_norm": 1.1244545905105425, "learning_rate": 3.4458724578328774e-07, "loss": 0.2091, "step": 6561 }, { "epoch": 0.88, "grad_norm": 1.1082101657036865, "learning_rate": 3.437910264920624e-07, "loss": 0.2006, "step": 6562 }, { "epoch": 0.88, "grad_norm": 1.0402475983565507, "learning_rate": 3.429956954035213e-07, "loss": 0.1414, "step": 6563 }, { "epoch": 0.89, "grad_norm": 1.2047701986373314, "learning_rate": 3.4220125266937967e-07, "loss": 0.2273, "step": 6564 }, { "epoch": 0.89, "grad_norm": 0.8768276410422384, "learning_rate": 3.414076984411818e-07, "loss": 0.154, "step": 6565 }, { "epoch": 0.89, "grad_norm": 1.0809589769565038, "learning_rate": 3.4061503287030393e-07, "loss": 0.164, "step": 6566 }, { "epoch": 0.89, "grad_norm": 1.136133764892567, "learning_rate": 3.398232561079523e-07, "loss": 0.1359, "step": 6567 }, { "epoch": 0.89, "grad_norm": 0.8034942994577804, "learning_rate": 3.3903236830516384e-07, "loss": 0.1336, "step": 6568 }, { "epoch": 0.89, "grad_norm": 0.9982189516096012, "learning_rate": 3.38242369612804e-07, "loss": 0.1985, "step": 6569 }, { "epoch": 0.89, "grad_norm": 1.02777525926754, "learning_rate": 3.374532601815722e-07, "loss": 0.1591, "step": 6570 }, { "epoch": 0.89, "grad_norm": 1.0826231575601886, "learning_rate": 3.3666504016199586e-07, "loss": 0.1693, "step": 6571 }, { "epoch": 0.89, "grad_norm": 0.9733790090647892, "learning_rate": 3.35877709704433e-07, "loss": 0.1225, "step": 6572 }, { "epoch": 0.89, "grad_norm": 0.8063944014097106, "learning_rate": 3.3509126895907186e-07, "loss": 0.1497, "step": 6573 }, { "epoch": 0.89, "grad_norm": 0.7984358656295524, "learning_rate": 3.343057180759313e-07, "loss": 0.1191, "step": 6574 }, { "epoch": 0.89, "grad_norm": 0.8268560964927839, "learning_rate": 3.3352105720486027e-07, "loss": 0.1332, "step": 6575 }, { "epoch": 0.89, "grad_norm": 1.015142484497892, "learning_rate": 3.3273728649553863e-07, "loss": 0.1403, "step": 6576 }, { "epoch": 0.89, "grad_norm": 0.8574767530874033, "learning_rate": 3.3195440609747496e-07, "loss": 0.1568, "step": 6577 }, { "epoch": 0.89, "grad_norm": 0.8976287344952174, "learning_rate": 3.311724161600105e-07, "loss": 0.1775, "step": 6578 }, { "epoch": 0.89, "grad_norm": 0.7736575301619936, "learning_rate": 3.303913168323131e-07, "loss": 0.1046, "step": 6579 }, { "epoch": 0.89, "grad_norm": 0.7322249549765311, "learning_rate": 3.296111082633824e-07, "loss": 0.1372, "step": 6580 }, { "epoch": 0.89, "grad_norm": 0.9280715034259762, "learning_rate": 3.2883179060204994e-07, "loss": 0.1449, "step": 6581 }, { "epoch": 0.89, "grad_norm": 1.1052150353396726, "learning_rate": 3.280533639969752e-07, "loss": 0.1721, "step": 6582 }, { "epoch": 0.89, "grad_norm": 0.9922173758037507, "learning_rate": 3.272758285966482e-07, "loss": 0.1826, "step": 6583 }, { "epoch": 0.89, "grad_norm": 0.7875461382178235, "learning_rate": 3.2649918454938866e-07, "loss": 0.1576, "step": 6584 }, { "epoch": 0.89, "grad_norm": 0.8779228830794216, "learning_rate": 3.2572343200334646e-07, "loss": 0.1762, "step": 6585 }, { "epoch": 0.89, "grad_norm": 0.7658826541318579, "learning_rate": 3.24948571106502e-07, "loss": 0.1635, "step": 6586 }, { "epoch": 0.89, "grad_norm": 0.8411251851074788, "learning_rate": 3.241746020066655e-07, "loss": 0.1447, "step": 6587 }, { "epoch": 0.89, "grad_norm": 1.1134114911987871, "learning_rate": 3.2340152485147544e-07, "loss": 0.1691, "step": 6588 }, { "epoch": 0.89, "grad_norm": 0.856687338466489, "learning_rate": 3.226293397884028e-07, "loss": 0.1446, "step": 6589 }, { "epoch": 0.89, "grad_norm": 0.9504562480092675, "learning_rate": 3.2185804696474586e-07, "loss": 0.15, "step": 6590 }, { "epoch": 0.89, "grad_norm": 1.2006452986744334, "learning_rate": 3.2108764652763467e-07, "loss": 0.183, "step": 6591 }, { "epoch": 0.89, "grad_norm": 1.1917619237533197, "learning_rate": 3.203181386240273e-07, "loss": 0.2188, "step": 6592 }, { "epoch": 0.89, "grad_norm": 0.9488561665167081, "learning_rate": 3.1954952340071344e-07, "loss": 0.1496, "step": 6593 }, { "epoch": 0.89, "grad_norm": 1.0120851279874687, "learning_rate": 3.187818010043109e-07, "loss": 0.1271, "step": 6594 }, { "epoch": 0.89, "grad_norm": 0.7736068664262942, "learning_rate": 3.1801497158126806e-07, "loss": 0.1423, "step": 6595 }, { "epoch": 0.89, "grad_norm": 0.8292479212760469, "learning_rate": 3.1724903527786233e-07, "loss": 0.1636, "step": 6596 }, { "epoch": 0.89, "grad_norm": 0.7028020536128271, "learning_rate": 3.164839922402019e-07, "loss": 0.0989, "step": 6597 }, { "epoch": 0.89, "grad_norm": 1.0252751921226475, "learning_rate": 3.1571984261422275e-07, "loss": 0.1485, "step": 6598 }, { "epoch": 0.89, "grad_norm": 1.0309274672242745, "learning_rate": 3.1495658654569273e-07, "loss": 0.1847, "step": 6599 }, { "epoch": 0.89, "grad_norm": 0.9886273596206779, "learning_rate": 3.1419422418020706e-07, "loss": 0.1439, "step": 6600 }, { "epoch": 0.89, "grad_norm": 1.1299984692462481, "learning_rate": 3.1343275566319166e-07, "loss": 0.1864, "step": 6601 }, { "epoch": 0.89, "grad_norm": 1.0895693647275881, "learning_rate": 3.126721811399019e-07, "loss": 0.1819, "step": 6602 }, { "epoch": 0.89, "grad_norm": 0.9584299114018205, "learning_rate": 3.1191250075542137e-07, "loss": 0.1597, "step": 6603 }, { "epoch": 0.89, "grad_norm": 1.013272709964075, "learning_rate": 3.111537146546656e-07, "loss": 0.1507, "step": 6604 }, { "epoch": 0.89, "grad_norm": 0.6960865398307213, "learning_rate": 3.1039582298237723e-07, "loss": 0.1158, "step": 6605 }, { "epoch": 0.89, "grad_norm": 0.9649240524826111, "learning_rate": 3.0963882588312956e-07, "loss": 0.1537, "step": 6606 }, { "epoch": 0.89, "grad_norm": 0.9512347960010288, "learning_rate": 3.0888272350132474e-07, "loss": 0.1695, "step": 6607 }, { "epoch": 0.89, "grad_norm": 0.8354475531704867, "learning_rate": 3.0812751598119474e-07, "loss": 0.1287, "step": 6608 }, { "epoch": 0.89, "grad_norm": 1.3614181477512404, "learning_rate": 3.073732034667992e-07, "loss": 0.1722, "step": 6609 }, { "epoch": 0.89, "grad_norm": 1.128437758210328, "learning_rate": 3.066197861020287e-07, "loss": 0.1817, "step": 6610 }, { "epoch": 0.89, "grad_norm": 1.278323403117601, "learning_rate": 3.058672640306021e-07, "loss": 0.2203, "step": 6611 }, { "epoch": 0.89, "grad_norm": 0.927759220749876, "learning_rate": 3.051156373960695e-07, "loss": 0.1461, "step": 6612 }, { "epoch": 0.89, "grad_norm": 0.7852338785709027, "learning_rate": 3.0436490634180693e-07, "loss": 0.1497, "step": 6613 }, { "epoch": 0.89, "grad_norm": 0.9150770539355506, "learning_rate": 3.0361507101102194e-07, "loss": 0.1629, "step": 6614 }, { "epoch": 0.89, "grad_norm": 1.0524752225839542, "learning_rate": 3.0286613154675125e-07, "loss": 0.1965, "step": 6615 }, { "epoch": 0.89, "grad_norm": 1.1172043250432258, "learning_rate": 3.021180880918595e-07, "loss": 0.1681, "step": 6616 }, { "epoch": 0.89, "grad_norm": 0.9713451782661265, "learning_rate": 3.013709407890408e-07, "loss": 0.1956, "step": 6617 }, { "epoch": 0.89, "grad_norm": 1.0129546656229351, "learning_rate": 3.006246897808185e-07, "loss": 0.1558, "step": 6618 }, { "epoch": 0.89, "grad_norm": 1.1158046260658976, "learning_rate": 2.998793352095453e-07, "loss": 0.2053, "step": 6619 }, { "epoch": 0.89, "grad_norm": 0.7012829817839722, "learning_rate": 2.991348772174024e-07, "loss": 0.1635, "step": 6620 }, { "epoch": 0.89, "grad_norm": 0.7621891692892512, "learning_rate": 2.9839131594639967e-07, "loss": 0.1286, "step": 6621 }, { "epoch": 0.89, "grad_norm": 0.7692795225306365, "learning_rate": 2.976486515383764e-07, "loss": 0.1412, "step": 6622 }, { "epoch": 0.89, "grad_norm": 0.8686282571969726, "learning_rate": 2.969068841350009e-07, "loss": 0.1287, "step": 6623 }, { "epoch": 0.89, "grad_norm": 0.7328120633589157, "learning_rate": 2.9616601387777053e-07, "loss": 0.1339, "step": 6624 }, { "epoch": 0.89, "grad_norm": 1.2295634185224427, "learning_rate": 2.954260409080106e-07, "loss": 0.2054, "step": 6625 }, { "epoch": 0.89, "grad_norm": 0.975385218802498, "learning_rate": 2.946869653668766e-07, "loss": 0.1724, "step": 6626 }, { "epoch": 0.89, "grad_norm": 0.9944115560919254, "learning_rate": 2.9394878739535115e-07, "loss": 0.1763, "step": 6627 }, { "epoch": 0.89, "grad_norm": 0.5561884290660416, "learning_rate": 2.932115071342473e-07, "loss": 0.0937, "step": 6628 }, { "epoch": 0.89, "grad_norm": 0.8211377836272143, "learning_rate": 2.9247512472420637e-07, "loss": 0.143, "step": 6629 }, { "epoch": 0.89, "grad_norm": 0.6816106400405629, "learning_rate": 2.917396403056971e-07, "loss": 0.1272, "step": 6630 }, { "epoch": 0.89, "grad_norm": 0.7874203318456808, "learning_rate": 2.9100505401901903e-07, "loss": 0.1197, "step": 6631 }, { "epoch": 0.89, "grad_norm": 0.9541211153683593, "learning_rate": 2.9027136600429827e-07, "loss": 0.1474, "step": 6632 }, { "epoch": 0.89, "grad_norm": 1.0419436612363782, "learning_rate": 2.895385764014919e-07, "loss": 0.1675, "step": 6633 }, { "epoch": 0.89, "grad_norm": 0.7313657012592987, "learning_rate": 2.88806685350384e-07, "loss": 0.0897, "step": 6634 }, { "epoch": 0.89, "grad_norm": 1.1751544951110067, "learning_rate": 2.8807569299058704e-07, "loss": 0.1832, "step": 6635 }, { "epoch": 0.89, "grad_norm": 0.87394548536581, "learning_rate": 2.873455994615437e-07, "loss": 0.1567, "step": 6636 }, { "epoch": 0.89, "grad_norm": 0.848540032767753, "learning_rate": 2.8661640490252275e-07, "loss": 0.1624, "step": 6637 }, { "epoch": 0.9, "grad_norm": 1.1583998791422128, "learning_rate": 2.8588810945262444e-07, "loss": 0.1626, "step": 6638 }, { "epoch": 0.9, "grad_norm": 1.0481943384045274, "learning_rate": 2.851607132507744e-07, "loss": 0.1344, "step": 6639 }, { "epoch": 0.9, "grad_norm": 0.7557330697740667, "learning_rate": 2.8443421643573e-07, "loss": 0.1443, "step": 6640 }, { "epoch": 0.9, "grad_norm": 1.079365388849619, "learning_rate": 2.837086191460736e-07, "loss": 0.1537, "step": 6641 }, { "epoch": 0.9, "grad_norm": 0.8645301192705651, "learning_rate": 2.8298392152021846e-07, "loss": 0.1257, "step": 6642 }, { "epoch": 0.9, "grad_norm": 0.9452217071725098, "learning_rate": 2.8226012369640567e-07, "loss": 0.1708, "step": 6643 }, { "epoch": 0.9, "grad_norm": 0.6575937846407753, "learning_rate": 2.8153722581270414e-07, "loss": 0.0842, "step": 6644 }, { "epoch": 0.9, "grad_norm": 0.85116681734894, "learning_rate": 2.80815228007012e-07, "loss": 0.1304, "step": 6645 }, { "epoch": 0.9, "grad_norm": 0.8196352757830315, "learning_rate": 2.80094130417054e-07, "loss": 0.14, "step": 6646 }, { "epoch": 0.9, "grad_norm": 1.0628025874867095, "learning_rate": 2.793739331803852e-07, "loss": 0.1947, "step": 6647 }, { "epoch": 0.9, "grad_norm": 1.1177429552312959, "learning_rate": 2.786546364343867e-07, "loss": 0.1757, "step": 6648 }, { "epoch": 0.9, "grad_norm": 0.7312960174397884, "learning_rate": 2.7793624031627107e-07, "loss": 0.127, "step": 6649 }, { "epoch": 0.9, "grad_norm": 0.7359838654132836, "learning_rate": 2.7721874496307645e-07, "loss": 0.1435, "step": 6650 }, { "epoch": 0.9, "grad_norm": 0.9826294479012873, "learning_rate": 2.765021505116694e-07, "loss": 0.1449, "step": 6651 }, { "epoch": 0.9, "grad_norm": 0.8377987551786372, "learning_rate": 2.7578645709874453e-07, "loss": 0.1547, "step": 6652 }, { "epoch": 0.9, "grad_norm": 1.004328620497427, "learning_rate": 2.750716648608265e-07, "loss": 0.1522, "step": 6653 }, { "epoch": 0.9, "grad_norm": 0.7000974968985629, "learning_rate": 2.743577739342662e-07, "loss": 0.1293, "step": 6654 }, { "epoch": 0.9, "grad_norm": 1.026127375034486, "learning_rate": 2.7364478445524257e-07, "loss": 0.2004, "step": 6655 }, { "epoch": 0.9, "grad_norm": 0.6239615986397823, "learning_rate": 2.7293269655976287e-07, "loss": 0.0863, "step": 6656 }, { "epoch": 0.9, "grad_norm": 0.993312021105059, "learning_rate": 2.722215103836634e-07, "loss": 0.1849, "step": 6657 }, { "epoch": 0.9, "grad_norm": 1.0139377405710521, "learning_rate": 2.7151122606260683e-07, "loss": 0.188, "step": 6658 }, { "epoch": 0.9, "grad_norm": 1.190791376272607, "learning_rate": 2.708018437320842e-07, "loss": 0.2356, "step": 6659 }, { "epoch": 0.9, "grad_norm": 0.9823940295867932, "learning_rate": 2.700933635274167e-07, "loss": 0.1693, "step": 6660 }, { "epoch": 0.9, "grad_norm": 0.9885278584573925, "learning_rate": 2.693857855837501e-07, "loss": 0.1654, "step": 6661 }, { "epoch": 0.9, "grad_norm": 1.157938190274593, "learning_rate": 2.6867911003606083e-07, "loss": 0.1793, "step": 6662 }, { "epoch": 0.9, "grad_norm": 0.8425218021527714, "learning_rate": 2.6797333701915064e-07, "loss": 0.1553, "step": 6663 }, { "epoch": 0.9, "grad_norm": 0.9835979923039619, "learning_rate": 2.6726846666765115e-07, "loss": 0.1561, "step": 6664 }, { "epoch": 0.9, "grad_norm": 1.0083558905833165, "learning_rate": 2.6656449911602047e-07, "loss": 0.1678, "step": 6665 }, { "epoch": 0.9, "grad_norm": 0.8986191501543311, "learning_rate": 2.6586143449854553e-07, "loss": 0.1278, "step": 6666 }, { "epoch": 0.9, "grad_norm": 0.8114877995375229, "learning_rate": 2.6515927294934074e-07, "loss": 0.1196, "step": 6667 }, { "epoch": 0.9, "grad_norm": 0.8275420902361819, "learning_rate": 2.6445801460234786e-07, "loss": 0.1251, "step": 6668 }, { "epoch": 0.9, "grad_norm": 0.8567256928654707, "learning_rate": 2.6375765959133605e-07, "loss": 0.1206, "step": 6669 }, { "epoch": 0.9, "grad_norm": 0.8239166944593588, "learning_rate": 2.6305820804990336e-07, "loss": 0.1132, "step": 6670 }, { "epoch": 0.9, "grad_norm": 0.8002047464781497, "learning_rate": 2.6235966011147474e-07, "loss": 0.113, "step": 6671 }, { "epoch": 0.9, "grad_norm": 0.7788489659890118, "learning_rate": 2.616620159093025e-07, "loss": 0.1105, "step": 6672 }, { "epoch": 0.9, "grad_norm": 0.8506461770175697, "learning_rate": 2.609652755764669e-07, "loss": 0.1237, "step": 6673 }, { "epoch": 0.9, "grad_norm": 0.8267672891438624, "learning_rate": 2.602694392458754e-07, "loss": 0.1292, "step": 6674 }, { "epoch": 0.9, "grad_norm": 0.8155835992315059, "learning_rate": 2.5957450705026477e-07, "loss": 0.1587, "step": 6675 }, { "epoch": 0.9, "grad_norm": 1.1255525061762994, "learning_rate": 2.588804791221966e-07, "loss": 0.1696, "step": 6676 }, { "epoch": 0.9, "grad_norm": 0.9200730104289826, "learning_rate": 2.5818735559406174e-07, "loss": 0.1438, "step": 6677 }, { "epoch": 0.9, "grad_norm": 1.1151723915832161, "learning_rate": 2.574951365980782e-07, "loss": 0.1631, "step": 6678 }, { "epoch": 0.9, "grad_norm": 0.9706705628537926, "learning_rate": 2.568038222662911e-07, "loss": 0.1611, "step": 6679 }, { "epoch": 0.9, "grad_norm": 0.8375354864081039, "learning_rate": 2.5611341273057367e-07, "loss": 0.1625, "step": 6680 }, { "epoch": 0.9, "grad_norm": 1.0352167944023354, "learning_rate": 2.554239081226245e-07, "loss": 0.1623, "step": 6681 }, { "epoch": 0.9, "grad_norm": 1.1577350788844432, "learning_rate": 2.5473530857397397e-07, "loss": 0.1872, "step": 6682 }, { "epoch": 0.9, "grad_norm": 1.0765132097359427, "learning_rate": 2.5404761421597477e-07, "loss": 0.1533, "step": 6683 }, { "epoch": 0.9, "grad_norm": 0.8086476054473513, "learning_rate": 2.5336082517981087e-07, "loss": 0.1275, "step": 6684 }, { "epoch": 0.9, "grad_norm": 1.0969198175037491, "learning_rate": 2.526749415964902e-07, "loss": 0.143, "step": 6685 }, { "epoch": 0.9, "grad_norm": 0.6274480009715614, "learning_rate": 2.519899635968498e-07, "loss": 0.1025, "step": 6686 }, { "epoch": 0.9, "grad_norm": 1.0289831294528313, "learning_rate": 2.5130589131155456e-07, "loss": 0.1627, "step": 6687 }, { "epoch": 0.9, "grad_norm": 1.141361479274327, "learning_rate": 2.506227248710952e-07, "loss": 0.2035, "step": 6688 }, { "epoch": 0.9, "grad_norm": 1.0732214347186955, "learning_rate": 2.499404644057907e-07, "loss": 0.1711, "step": 6689 }, { "epoch": 0.9, "grad_norm": 1.078564032124749, "learning_rate": 2.492591100457864e-07, "loss": 0.1805, "step": 6690 }, { "epoch": 0.9, "grad_norm": 1.0026996718355696, "learning_rate": 2.485786619210551e-07, "loss": 0.171, "step": 6691 }, { "epoch": 0.9, "grad_norm": 1.1276088585751578, "learning_rate": 2.478991201613973e-07, "loss": 0.1927, "step": 6692 }, { "epoch": 0.9, "grad_norm": 0.8259801642402856, "learning_rate": 2.472204848964388e-07, "loss": 0.1221, "step": 6693 }, { "epoch": 0.9, "grad_norm": 0.7321050038793474, "learning_rate": 2.4654275625563494e-07, "loss": 0.1165, "step": 6694 }, { "epoch": 0.9, "grad_norm": 0.9595477855416754, "learning_rate": 2.4586593436826736e-07, "loss": 0.155, "step": 6695 }, { "epoch": 0.9, "grad_norm": 1.012408104952246, "learning_rate": 2.451900193634432e-07, "loss": 0.1637, "step": 6696 }, { "epoch": 0.9, "grad_norm": 1.0478786614380016, "learning_rate": 2.4451501137009837e-07, "loss": 0.1868, "step": 6697 }, { "epoch": 0.9, "grad_norm": 0.9831590381526357, "learning_rate": 2.438409105169953e-07, "loss": 0.1471, "step": 6698 }, { "epoch": 0.9, "grad_norm": 0.9200254236748565, "learning_rate": 2.4316771693272235e-07, "loss": 0.1833, "step": 6699 }, { "epoch": 0.9, "grad_norm": 1.1306890587841099, "learning_rate": 2.4249543074569615e-07, "loss": 0.2137, "step": 6700 }, { "epoch": 0.9, "grad_norm": 1.0327272427140246, "learning_rate": 2.4182405208416084e-07, "loss": 0.1434, "step": 6701 }, { "epoch": 0.9, "grad_norm": 0.8563372905697705, "learning_rate": 2.411535810761839e-07, "loss": 0.1254, "step": 6702 }, { "epoch": 0.9, "grad_norm": 1.0486879698177756, "learning_rate": 2.404840178496637e-07, "loss": 0.1251, "step": 6703 }, { "epoch": 0.9, "grad_norm": 1.0175661968996863, "learning_rate": 2.3981536253232293e-07, "loss": 0.1708, "step": 6704 }, { "epoch": 0.9, "grad_norm": 1.0037161600615598, "learning_rate": 2.3914761525171357e-07, "loss": 0.1751, "step": 6705 }, { "epoch": 0.9, "grad_norm": 0.9068768901769396, "learning_rate": 2.3848077613521146e-07, "loss": 0.1313, "step": 6706 }, { "epoch": 0.9, "grad_norm": 1.08900783272371, "learning_rate": 2.3781484531002153e-07, "loss": 0.1756, "step": 6707 }, { "epoch": 0.9, "grad_norm": 0.9861045997301896, "learning_rate": 2.3714982290317378e-07, "loss": 0.1724, "step": 6708 }, { "epoch": 0.9, "grad_norm": 0.8600836028666228, "learning_rate": 2.3648570904152623e-07, "loss": 0.1395, "step": 6709 }, { "epoch": 0.9, "grad_norm": 1.1264065551759803, "learning_rate": 2.3582250385176196e-07, "loss": 0.1772, "step": 6710 }, { "epoch": 0.9, "grad_norm": 1.0170562906502958, "learning_rate": 2.351602074603926e-07, "loss": 0.1784, "step": 6711 }, { "epoch": 0.91, "grad_norm": 0.7077727724972602, "learning_rate": 2.344988199937559e-07, "loss": 0.1201, "step": 6712 }, { "epoch": 0.91, "grad_norm": 0.9078826414633829, "learning_rate": 2.3383834157801488e-07, "loss": 0.1194, "step": 6713 }, { "epoch": 0.91, "grad_norm": 0.6366206323870837, "learning_rate": 2.3317877233916099e-07, "loss": 0.1329, "step": 6714 }, { "epoch": 0.91, "grad_norm": 0.90466204577714, "learning_rate": 2.3252011240301076e-07, "loss": 0.1427, "step": 6715 }, { "epoch": 0.91, "grad_norm": 0.7319442519345528, "learning_rate": 2.3186236189520871e-07, "loss": 0.1112, "step": 6716 }, { "epoch": 0.91, "grad_norm": 1.1310513202235326, "learning_rate": 2.312055209412245e-07, "loss": 0.17, "step": 6717 }, { "epoch": 0.91, "grad_norm": 1.0884141335528246, "learning_rate": 2.305495896663551e-07, "loss": 0.2074, "step": 6718 }, { "epoch": 0.91, "grad_norm": 1.0318554833307692, "learning_rate": 2.2989456819572375e-07, "loss": 0.1963, "step": 6719 }, { "epoch": 0.91, "grad_norm": 1.0828734760347496, "learning_rate": 2.2924045665427995e-07, "loss": 0.1327, "step": 6720 }, { "epoch": 0.91, "grad_norm": 1.014443844098307, "learning_rate": 2.2858725516680004e-07, "loss": 0.146, "step": 6721 }, { "epoch": 0.91, "grad_norm": 0.824328367063193, "learning_rate": 2.279349638578865e-07, "loss": 0.1371, "step": 6722 }, { "epoch": 0.91, "grad_norm": 0.8035756610096375, "learning_rate": 2.2728358285196816e-07, "loss": 0.1649, "step": 6723 }, { "epoch": 0.91, "grad_norm": 1.0172173420235422, "learning_rate": 2.266331122733001e-07, "loss": 0.1786, "step": 6724 }, { "epoch": 0.91, "grad_norm": 0.9526066231190604, "learning_rate": 2.259835522459647e-07, "loss": 0.1467, "step": 6725 }, { "epoch": 0.91, "grad_norm": 0.8436062421839398, "learning_rate": 2.2533490289386785e-07, "loss": 0.1428, "step": 6726 }, { "epoch": 0.91, "grad_norm": 0.9434540414125806, "learning_rate": 2.246871643407461e-07, "loss": 0.1219, "step": 6727 }, { "epoch": 0.91, "grad_norm": 0.9563641041477071, "learning_rate": 2.2404033671015845e-07, "loss": 0.1558, "step": 6728 }, { "epoch": 0.91, "grad_norm": 0.795439573271076, "learning_rate": 2.2339442012549173e-07, "loss": 0.117, "step": 6729 }, { "epoch": 0.91, "grad_norm": 1.0729858226882558, "learning_rate": 2.227494147099596e-07, "loss": 0.1962, "step": 6730 }, { "epoch": 0.91, "grad_norm": 0.9940246777118744, "learning_rate": 2.2210532058660085e-07, "loss": 0.153, "step": 6731 }, { "epoch": 0.91, "grad_norm": 0.8956076044655401, "learning_rate": 2.2146213787827996e-07, "loss": 0.1771, "step": 6732 }, { "epoch": 0.91, "grad_norm": 0.8824056545303289, "learning_rate": 2.208198667076883e-07, "loss": 0.1076, "step": 6733 }, { "epoch": 0.91, "grad_norm": 1.012451149655774, "learning_rate": 2.201785071973439e-07, "loss": 0.1836, "step": 6734 }, { "epoch": 0.91, "grad_norm": 1.031169732380788, "learning_rate": 2.1953805946959006e-07, "loss": 0.1623, "step": 6735 }, { "epoch": 0.91, "grad_norm": 0.8322791958333444, "learning_rate": 2.1889852364659626e-07, "loss": 0.1054, "step": 6736 }, { "epoch": 0.91, "grad_norm": 1.5280690386015066, "learning_rate": 2.1825989985035877e-07, "loss": 0.2885, "step": 6737 }, { "epoch": 0.91, "grad_norm": 0.9772694647092872, "learning_rate": 2.1762218820269854e-07, "loss": 0.2014, "step": 6738 }, { "epoch": 0.91, "grad_norm": 0.9963411976620876, "learning_rate": 2.1698538882526432e-07, "loss": 0.1658, "step": 6739 }, { "epoch": 0.91, "grad_norm": 0.9543734217586894, "learning_rate": 2.1634950183952897e-07, "loss": 0.1877, "step": 6740 }, { "epoch": 0.91, "grad_norm": 1.133498945206632, "learning_rate": 2.1571452736679266e-07, "loss": 0.1777, "step": 6741 }, { "epoch": 0.91, "grad_norm": 1.252649209739906, "learning_rate": 2.150804655281802e-07, "loss": 0.218, "step": 6742 }, { "epoch": 0.91, "grad_norm": 0.9260883962658802, "learning_rate": 2.1444731644464422e-07, "loss": 0.1935, "step": 6743 }, { "epoch": 0.91, "grad_norm": 0.7756229770376946, "learning_rate": 2.1381508023696095e-07, "loss": 0.128, "step": 6744 }, { "epoch": 0.91, "grad_norm": 1.0744534020311167, "learning_rate": 2.1318375702573445e-07, "loss": 0.1506, "step": 6745 }, { "epoch": 0.91, "grad_norm": 0.9790213493799531, "learning_rate": 2.1255334693139396e-07, "loss": 0.1789, "step": 6746 }, { "epoch": 0.91, "grad_norm": 1.014456228385367, "learning_rate": 2.1192385007419325e-07, "loss": 0.1583, "step": 6747 }, { "epoch": 0.91, "grad_norm": 0.9726128222690806, "learning_rate": 2.1129526657421462e-07, "loss": 0.1395, "step": 6748 }, { "epoch": 0.91, "grad_norm": 1.0480702252575425, "learning_rate": 2.1066759655136216e-07, "loss": 0.1949, "step": 6749 }, { "epoch": 0.91, "grad_norm": 0.9251505464067075, "learning_rate": 2.1004084012537008e-07, "loss": 0.1476, "step": 6750 }, { "epoch": 0.91, "grad_norm": 1.0444927302495286, "learning_rate": 2.0941499741579607e-07, "loss": 0.1816, "step": 6751 }, { "epoch": 0.91, "grad_norm": 0.6937258058917575, "learning_rate": 2.08790068542023e-07, "loss": 0.116, "step": 6752 }, { "epoch": 0.91, "grad_norm": 0.665808402027934, "learning_rate": 2.0816605362326102e-07, "loss": 0.1114, "step": 6753 }, { "epoch": 0.91, "grad_norm": 1.0593196446942101, "learning_rate": 2.075429527785444e-07, "loss": 0.1633, "step": 6754 }, { "epoch": 0.91, "grad_norm": 1.0440140460702834, "learning_rate": 2.0692076612673474e-07, "loss": 0.1509, "step": 6755 }, { "epoch": 0.91, "grad_norm": 0.8143268402190774, "learning_rate": 2.0629949378651702e-07, "loss": 0.1219, "step": 6756 }, { "epoch": 0.91, "grad_norm": 0.956364049779853, "learning_rate": 2.0567913587640375e-07, "loss": 0.1409, "step": 6757 }, { "epoch": 0.91, "grad_norm": 1.1029134722279517, "learning_rate": 2.0505969251473245e-07, "loss": 0.2, "step": 6758 }, { "epoch": 0.91, "grad_norm": 1.093290526690002, "learning_rate": 2.0444116381966527e-07, "loss": 0.1643, "step": 6759 }, { "epoch": 0.91, "grad_norm": 1.0073043726104889, "learning_rate": 2.0382354990919174e-07, "loss": 0.1733, "step": 6760 }, { "epoch": 0.91, "grad_norm": 1.2034475784381755, "learning_rate": 2.032068509011248e-07, "loss": 0.2171, "step": 6761 }, { "epoch": 0.91, "grad_norm": 0.7845397303968557, "learning_rate": 2.0259106691310482e-07, "loss": 0.1339, "step": 6762 }, { "epoch": 0.91, "grad_norm": 1.0280729390011725, "learning_rate": 2.0197619806259561e-07, "loss": 0.1522, "step": 6763 }, { "epoch": 0.91, "grad_norm": 1.028247938623212, "learning_rate": 2.0136224446688836e-07, "loss": 0.1843, "step": 6764 }, { "epoch": 0.91, "grad_norm": 0.94611881732135, "learning_rate": 2.0074920624309824e-07, "loss": 0.1705, "step": 6765 }, { "epoch": 0.91, "grad_norm": 0.9174364743102539, "learning_rate": 2.0013708350816675e-07, "loss": 0.1408, "step": 6766 }, { "epoch": 0.91, "grad_norm": 0.9312328786789269, "learning_rate": 1.995258763788599e-07, "loss": 0.1387, "step": 6767 }, { "epoch": 0.91, "grad_norm": 1.103724422168606, "learning_rate": 1.9891558497177e-07, "loss": 0.143, "step": 6768 }, { "epoch": 0.91, "grad_norm": 1.0269077014687187, "learning_rate": 1.983062094033139e-07, "loss": 0.1709, "step": 6769 }, { "epoch": 0.91, "grad_norm": 1.3199742643808787, "learning_rate": 1.9769774978973423e-07, "loss": 0.2248, "step": 6770 }, { "epoch": 0.91, "grad_norm": 1.114703220683054, "learning_rate": 1.9709020624709752e-07, "loss": 0.1832, "step": 6771 }, { "epoch": 0.91, "grad_norm": 1.0106402372272287, "learning_rate": 1.964835788912983e-07, "loss": 0.1352, "step": 6772 }, { "epoch": 0.91, "grad_norm": 0.9436781271395638, "learning_rate": 1.9587786783805408e-07, "loss": 0.1606, "step": 6773 }, { "epoch": 0.91, "grad_norm": 0.9080278328034828, "learning_rate": 1.95273073202909e-07, "loss": 0.1301, "step": 6774 }, { "epoch": 0.91, "grad_norm": 0.9945442752322082, "learning_rate": 1.9466919510123028e-07, "loss": 0.1399, "step": 6775 }, { "epoch": 0.91, "grad_norm": 0.8769700505060033, "learning_rate": 1.940662336482124e-07, "loss": 0.1298, "step": 6776 }, { "epoch": 0.91, "grad_norm": 0.9746005730966486, "learning_rate": 1.9346418895887508e-07, "loss": 0.1611, "step": 6777 }, { "epoch": 0.91, "grad_norm": 0.9146394840734171, "learning_rate": 1.9286306114806086e-07, "loss": 0.1366, "step": 6778 }, { "epoch": 0.91, "grad_norm": 1.2354333036826854, "learning_rate": 1.9226285033043967e-07, "loss": 0.1961, "step": 6779 }, { "epoch": 0.91, "grad_norm": 0.979747714937446, "learning_rate": 1.9166355662050496e-07, "loss": 0.1445, "step": 6780 }, { "epoch": 0.91, "grad_norm": 0.8447773918798533, "learning_rate": 1.9106518013257692e-07, "loss": 0.1282, "step": 6781 }, { "epoch": 0.91, "grad_norm": 0.99086148726328, "learning_rate": 1.904677209807987e-07, "loss": 0.1282, "step": 6782 }, { "epoch": 0.91, "grad_norm": 1.0160127955853104, "learning_rate": 1.8987117927914077e-07, "loss": 0.1328, "step": 6783 }, { "epoch": 0.91, "grad_norm": 1.1023543365938702, "learning_rate": 1.8927555514139662e-07, "loss": 0.1727, "step": 6784 }, { "epoch": 0.91, "grad_norm": 1.1766364740573778, "learning_rate": 1.886808486811864e-07, "loss": 0.2475, "step": 6785 }, { "epoch": 0.92, "grad_norm": 0.9423938932289952, "learning_rate": 1.8808706001195332e-07, "loss": 0.1521, "step": 6786 }, { "epoch": 0.92, "grad_norm": 0.9729259324778075, "learning_rate": 1.8749418924696727e-07, "loss": 0.1305, "step": 6787 }, { "epoch": 0.92, "grad_norm": 0.9248349212095932, "learning_rate": 1.869022364993217e-07, "loss": 0.1707, "step": 6788 }, { "epoch": 0.92, "grad_norm": 1.1245511083907447, "learning_rate": 1.863112018819352e-07, "loss": 0.1721, "step": 6789 }, { "epoch": 0.92, "grad_norm": 0.852660704903649, "learning_rate": 1.8572108550755253e-07, "loss": 0.1072, "step": 6790 }, { "epoch": 0.92, "grad_norm": 0.7774192655014818, "learning_rate": 1.8513188748874201e-07, "loss": 0.0947, "step": 6791 }, { "epoch": 0.92, "grad_norm": 1.112694410758652, "learning_rate": 1.845436079378965e-07, "loss": 0.1878, "step": 6792 }, { "epoch": 0.92, "grad_norm": 1.354375617679019, "learning_rate": 1.8395624696723457e-07, "loss": 0.2098, "step": 6793 }, { "epoch": 0.92, "grad_norm": 1.1071921577629877, "learning_rate": 1.833698046887994e-07, "loss": 0.1794, "step": 6794 }, { "epoch": 0.92, "grad_norm": 1.1382392362398797, "learning_rate": 1.8278428121445812e-07, "loss": 0.2032, "step": 6795 }, { "epoch": 0.92, "grad_norm": 0.9712796762694941, "learning_rate": 1.8219967665590422e-07, "loss": 0.118, "step": 6796 }, { "epoch": 0.92, "grad_norm": 1.0066596934531054, "learning_rate": 1.81615991124654e-07, "loss": 0.1439, "step": 6797 }, { "epoch": 0.92, "grad_norm": 0.9870830836275584, "learning_rate": 1.8103322473204954e-07, "loss": 0.175, "step": 6798 }, { "epoch": 0.92, "grad_norm": 1.0096027253721307, "learning_rate": 1.80451377589258e-07, "loss": 0.1838, "step": 6799 }, { "epoch": 0.92, "grad_norm": 1.0142456734755338, "learning_rate": 1.7987044980726954e-07, "loss": 0.1848, "step": 6800 }, { "epoch": 0.92, "grad_norm": 0.8570320286690623, "learning_rate": 1.7929044149690044e-07, "loss": 0.1599, "step": 6801 }, { "epoch": 0.92, "grad_norm": 0.7547474577260419, "learning_rate": 1.787113527687917e-07, "loss": 0.1133, "step": 6802 }, { "epoch": 0.92, "grad_norm": 0.974620879857114, "learning_rate": 1.781331837334077e-07, "loss": 0.1524, "step": 6803 }, { "epoch": 0.92, "grad_norm": 1.1398265511814931, "learning_rate": 1.7755593450103802e-07, "loss": 0.1793, "step": 6804 }, { "epoch": 0.92, "grad_norm": 1.1207655921191881, "learning_rate": 1.7697960518179626e-07, "loss": 0.1974, "step": 6805 }, { "epoch": 0.92, "grad_norm": 0.8862655907905792, "learning_rate": 1.764041958856222e-07, "loss": 0.1619, "step": 6806 }, { "epoch": 0.92, "grad_norm": 1.1513751816395996, "learning_rate": 1.7582970672227918e-07, "loss": 0.178, "step": 6807 }, { "epoch": 0.92, "grad_norm": 0.9378278663179058, "learning_rate": 1.7525613780135343e-07, "loss": 0.1558, "step": 6808 }, { "epoch": 0.92, "grad_norm": 1.1056748457585173, "learning_rate": 1.7468348923225797e-07, "loss": 0.1528, "step": 6809 }, { "epoch": 0.92, "grad_norm": 1.0222818524322523, "learning_rate": 1.7411176112422823e-07, "loss": 0.136, "step": 6810 }, { "epoch": 0.92, "grad_norm": 0.8079899017361339, "learning_rate": 1.735409535863264e-07, "loss": 0.1207, "step": 6811 }, { "epoch": 0.92, "grad_norm": 0.7099181268290162, "learning_rate": 1.7297106672743758e-07, "loss": 0.1266, "step": 6812 }, { "epoch": 0.92, "grad_norm": 1.0435795623900979, "learning_rate": 1.7240210065627094e-07, "loss": 0.1374, "step": 6813 }, { "epoch": 0.92, "grad_norm": 0.9459127143358255, "learning_rate": 1.718340554813608e-07, "loss": 0.1861, "step": 6814 }, { "epoch": 0.92, "grad_norm": 0.9578330210788875, "learning_rate": 1.7126693131106542e-07, "loss": 0.1667, "step": 6815 }, { "epoch": 0.92, "grad_norm": 1.0156030790565675, "learning_rate": 1.707007282535672e-07, "loss": 0.1577, "step": 6816 }, { "epoch": 0.92, "grad_norm": 0.9079643661929773, "learning_rate": 1.7013544641687363e-07, "loss": 0.1417, "step": 6817 }, { "epoch": 0.92, "grad_norm": 0.7561887728279548, "learning_rate": 1.695710859088162e-07, "loss": 0.1533, "step": 6818 }, { "epoch": 0.92, "grad_norm": 1.3462879323708707, "learning_rate": 1.6900764683704996e-07, "loss": 0.2081, "step": 6819 }, { "epoch": 0.92, "grad_norm": 0.99541328036293, "learning_rate": 1.6844512930905444e-07, "loss": 0.1362, "step": 6820 }, { "epoch": 0.92, "grad_norm": 1.1495349320148396, "learning_rate": 1.6788353343213437e-07, "loss": 0.2165, "step": 6821 }, { "epoch": 0.92, "grad_norm": 1.017767985687681, "learning_rate": 1.673228593134174e-07, "loss": 0.166, "step": 6822 }, { "epoch": 0.92, "grad_norm": 0.9868431142243442, "learning_rate": 1.667631070598552e-07, "loss": 0.147, "step": 6823 }, { "epoch": 0.92, "grad_norm": 1.0126302945884185, "learning_rate": 1.662042767782257e-07, "loss": 0.1869, "step": 6824 }, { "epoch": 0.92, "grad_norm": 0.8684027348539511, "learning_rate": 1.6564636857512805e-07, "loss": 0.1427, "step": 6825 }, { "epoch": 0.92, "grad_norm": 1.095211437168177, "learning_rate": 1.650893825569877e-07, "loss": 0.1766, "step": 6826 }, { "epoch": 0.92, "grad_norm": 1.0844122036317763, "learning_rate": 1.6453331883005242e-07, "loss": 0.1798, "step": 6827 }, { "epoch": 0.92, "grad_norm": 0.7792485030088863, "learning_rate": 1.639781775003968e-07, "loss": 0.1458, "step": 6828 }, { "epoch": 0.92, "grad_norm": 0.8481350409316776, "learning_rate": 1.6342395867391614e-07, "loss": 0.1494, "step": 6829 }, { "epoch": 0.92, "grad_norm": 1.0624312129570461, "learning_rate": 1.6287066245633254e-07, "loss": 0.1648, "step": 6830 }, { "epoch": 0.92, "grad_norm": 1.154335516833946, "learning_rate": 1.6231828895318992e-07, "loss": 0.1424, "step": 6831 }, { "epoch": 0.92, "grad_norm": 1.0101539612005428, "learning_rate": 1.6176683826985783e-07, "loss": 0.1534, "step": 6832 }, { "epoch": 0.92, "grad_norm": 0.7825455891168713, "learning_rate": 1.612163105115283e-07, "loss": 0.1337, "step": 6833 }, { "epoch": 0.92, "grad_norm": 0.9814518092291986, "learning_rate": 1.6066670578321953e-07, "loss": 0.1546, "step": 6834 }, { "epoch": 0.92, "grad_norm": 0.9846516738820059, "learning_rate": 1.6011802418977097e-07, "loss": 0.1707, "step": 6835 }, { "epoch": 0.92, "grad_norm": 1.2561405259857576, "learning_rate": 1.5957026583584833e-07, "loss": 0.2176, "step": 6836 }, { "epoch": 0.92, "grad_norm": 1.0616547584092364, "learning_rate": 1.5902343082593919e-07, "loss": 0.1923, "step": 6837 }, { "epoch": 0.92, "grad_norm": 1.21433307855318, "learning_rate": 1.5847751926435618e-07, "loss": 0.2004, "step": 6838 }, { "epoch": 0.92, "grad_norm": 0.9732220053610612, "learning_rate": 1.57932531255236e-07, "loss": 0.1532, "step": 6839 }, { "epoch": 0.92, "grad_norm": 1.0721191161610808, "learning_rate": 1.5738846690253883e-07, "loss": 0.1844, "step": 6840 }, { "epoch": 0.92, "grad_norm": 0.7367615112818118, "learning_rate": 1.568453263100478e-07, "loss": 0.1192, "step": 6841 }, { "epoch": 0.92, "grad_norm": 1.0285972660886171, "learning_rate": 1.563031095813705e-07, "loss": 0.1714, "step": 6842 }, { "epoch": 0.92, "grad_norm": 1.1037997840668385, "learning_rate": 1.5576181681993928e-07, "loss": 0.159, "step": 6843 }, { "epoch": 0.92, "grad_norm": 0.9266977943529282, "learning_rate": 1.5522144812900875e-07, "loss": 0.1607, "step": 6844 }, { "epoch": 0.92, "grad_norm": 1.0291541873348247, "learning_rate": 1.5468200361165752e-07, "loss": 0.1627, "step": 6845 }, { "epoch": 0.92, "grad_norm": 0.9830241068235963, "learning_rate": 1.541434833707889e-07, "loss": 0.1986, "step": 6846 }, { "epoch": 0.92, "grad_norm": 0.7975243805304287, "learning_rate": 1.5360588750912897e-07, "loss": 0.135, "step": 6847 }, { "epoch": 0.92, "grad_norm": 0.9284062720392169, "learning_rate": 1.5306921612922742e-07, "loss": 0.1733, "step": 6848 }, { "epoch": 0.92, "grad_norm": 0.7675289180221971, "learning_rate": 1.5253346933345846e-07, "loss": 0.1192, "step": 6849 }, { "epoch": 0.92, "grad_norm": 1.0012495692163907, "learning_rate": 1.5199864722401758e-07, "loss": 0.1985, "step": 6850 }, { "epoch": 0.92, "grad_norm": 1.0767112626210948, "learning_rate": 1.5146474990292815e-07, "loss": 0.1599, "step": 6851 }, { "epoch": 0.92, "grad_norm": 0.8361832294703531, "learning_rate": 1.5093177747203315e-07, "loss": 0.114, "step": 6852 }, { "epoch": 0.92, "grad_norm": 1.1691105339445367, "learning_rate": 1.5039973003300125e-07, "loss": 0.1959, "step": 6853 }, { "epoch": 0.92, "grad_norm": 1.015301717942936, "learning_rate": 1.4986860768732402e-07, "loss": 0.1855, "step": 6854 }, { "epoch": 0.92, "grad_norm": 1.001258828550003, "learning_rate": 1.4933841053631547e-07, "loss": 0.1494, "step": 6855 }, { "epoch": 0.92, "grad_norm": 0.9314623884934662, "learning_rate": 1.4880913868111525e-07, "loss": 0.1304, "step": 6856 }, { "epoch": 0.92, "grad_norm": 1.0575387703299641, "learning_rate": 1.4828079222268477e-07, "loss": 0.1358, "step": 6857 }, { "epoch": 0.92, "grad_norm": 0.967803336688854, "learning_rate": 1.477533712618101e-07, "loss": 0.1647, "step": 6858 }, { "epoch": 0.92, "grad_norm": 0.7792933362088337, "learning_rate": 1.4722687589910024e-07, "loss": 0.1105, "step": 6859 }, { "epoch": 0.93, "grad_norm": 1.0322848378259848, "learning_rate": 1.4670130623498812e-07, "loss": 0.1695, "step": 6860 }, { "epoch": 0.93, "grad_norm": 0.7792738474141399, "learning_rate": 1.4617666236972805e-07, "loss": 0.1222, "step": 6861 }, { "epoch": 0.93, "grad_norm": 1.0177908163604303, "learning_rate": 1.4565294440340105e-07, "loss": 0.1663, "step": 6862 }, { "epoch": 0.93, "grad_norm": 0.8263887906331342, "learning_rate": 1.4513015243590944e-07, "loss": 0.1155, "step": 6863 }, { "epoch": 0.93, "grad_norm": 1.0075184391409626, "learning_rate": 1.4460828656697844e-07, "loss": 0.1679, "step": 6864 }, { "epoch": 0.93, "grad_norm": 1.1410329235693637, "learning_rate": 1.4408734689615845e-07, "loss": 0.1601, "step": 6865 }, { "epoch": 0.93, "grad_norm": 1.0434385779581652, "learning_rate": 1.4356733352282105e-07, "loss": 0.1471, "step": 6866 }, { "epoch": 0.93, "grad_norm": 1.212335567364262, "learning_rate": 1.4304824654616355e-07, "loss": 0.2087, "step": 6867 }, { "epoch": 0.93, "grad_norm": 0.8955785094588053, "learning_rate": 1.4253008606520403e-07, "loss": 0.128, "step": 6868 }, { "epoch": 0.93, "grad_norm": 1.1990913870746325, "learning_rate": 1.4201285217878558e-07, "loss": 0.1787, "step": 6869 }, { "epoch": 0.93, "grad_norm": 0.8501299172632159, "learning_rate": 1.4149654498557374e-07, "loss": 0.1411, "step": 6870 }, { "epoch": 0.93, "grad_norm": 1.0648243759354779, "learning_rate": 1.4098116458405865e-07, "loss": 0.1788, "step": 6871 }, { "epoch": 0.93, "grad_norm": 1.1117321612656634, "learning_rate": 1.404667110725505e-07, "loss": 0.1919, "step": 6872 }, { "epoch": 0.93, "grad_norm": 1.0051994081301492, "learning_rate": 1.3995318454918582e-07, "loss": 0.1727, "step": 6873 }, { "epoch": 0.93, "grad_norm": 0.8733244448772443, "learning_rate": 1.3944058511192348e-07, "loss": 0.1653, "step": 6874 }, { "epoch": 0.93, "grad_norm": 0.8248642055521022, "learning_rate": 1.3892891285854472e-07, "loss": 0.1678, "step": 6875 }, { "epoch": 0.93, "grad_norm": 1.040102610725063, "learning_rate": 1.3841816788665475e-07, "loss": 0.1674, "step": 6876 }, { "epoch": 0.93, "grad_norm": 1.0465346620653737, "learning_rate": 1.3790835029368177e-07, "loss": 0.2106, "step": 6877 }, { "epoch": 0.93, "grad_norm": 0.9718102152972311, "learning_rate": 1.3739946017687578e-07, "loss": 0.1658, "step": 6878 }, { "epoch": 0.93, "grad_norm": 0.9880900417321249, "learning_rate": 1.3689149763331188e-07, "loss": 0.1937, "step": 6879 }, { "epoch": 0.93, "grad_norm": 1.0688475872829633, "learning_rate": 1.3638446275988704e-07, "loss": 0.1845, "step": 6880 }, { "epoch": 0.93, "grad_norm": 0.7375971134015602, "learning_rate": 1.3587835565332163e-07, "loss": 0.0847, "step": 6881 }, { "epoch": 0.93, "grad_norm": 0.9916687036891483, "learning_rate": 1.35373176410159e-07, "loss": 0.1705, "step": 6882 }, { "epoch": 0.93, "grad_norm": 0.7535632217084743, "learning_rate": 1.3486892512676487e-07, "loss": 0.1397, "step": 6883 }, { "epoch": 0.93, "grad_norm": 0.8426302529136925, "learning_rate": 1.3436560189932946e-07, "loss": 0.1642, "step": 6884 }, { "epoch": 0.93, "grad_norm": 1.126881376383651, "learning_rate": 1.3386320682386378e-07, "loss": 0.2253, "step": 6885 }, { "epoch": 0.93, "grad_norm": 1.1001898073756695, "learning_rate": 1.3336173999620395e-07, "loss": 0.1747, "step": 6886 }, { "epoch": 0.93, "grad_norm": 0.8640578017306897, "learning_rate": 1.328612015120079e-07, "loss": 0.1401, "step": 6887 }, { "epoch": 0.93, "grad_norm": 0.8951414773510545, "learning_rate": 1.3236159146675642e-07, "loss": 0.135, "step": 6888 }, { "epoch": 0.93, "grad_norm": 1.220633420885509, "learning_rate": 1.318629099557539e-07, "loss": 0.2181, "step": 6889 }, { "epoch": 0.93, "grad_norm": 0.9883186681332496, "learning_rate": 1.3136515707412645e-07, "loss": 0.188, "step": 6890 }, { "epoch": 0.93, "grad_norm": 0.877711501819609, "learning_rate": 1.3086833291682422e-07, "loss": 0.1414, "step": 6891 }, { "epoch": 0.93, "grad_norm": 0.9343673676747426, "learning_rate": 1.3037243757861972e-07, "loss": 0.1216, "step": 6892 }, { "epoch": 0.93, "grad_norm": 0.807103721283885, "learning_rate": 1.2987747115410786e-07, "loss": 0.1272, "step": 6893 }, { "epoch": 0.93, "grad_norm": 1.2335430650237793, "learning_rate": 1.2938343373770757e-07, "loss": 0.1911, "step": 6894 }, { "epoch": 0.93, "grad_norm": 0.7659543403398328, "learning_rate": 1.2889032542365843e-07, "loss": 0.1131, "step": 6895 }, { "epoch": 0.93, "grad_norm": 0.7711357798791394, "learning_rate": 1.283981463060252e-07, "loss": 0.1138, "step": 6896 }, { "epoch": 0.93, "grad_norm": 1.062924210897339, "learning_rate": 1.2790689647869448e-07, "loss": 0.1852, "step": 6897 }, { "epoch": 0.93, "grad_norm": 0.7583324833634865, "learning_rate": 1.2741657603537462e-07, "loss": 0.1464, "step": 6898 }, { "epoch": 0.93, "grad_norm": 0.7306173758575644, "learning_rate": 1.26927185069598e-07, "loss": 0.123, "step": 6899 }, { "epoch": 0.93, "grad_norm": 0.9021564794402062, "learning_rate": 1.2643872367471888e-07, "loss": 0.1257, "step": 6900 }, { "epoch": 0.93, "grad_norm": 0.7719130946016068, "learning_rate": 1.2595119194391548e-07, "loss": 0.1401, "step": 6901 }, { "epoch": 0.93, "grad_norm": 1.039889120537121, "learning_rate": 1.2546458997018617e-07, "loss": 0.1796, "step": 6902 }, { "epoch": 0.93, "grad_norm": 0.8245906176885905, "learning_rate": 1.2497891784635396e-07, "loss": 0.1143, "step": 6903 }, { "epoch": 0.93, "grad_norm": 0.7596167999096254, "learning_rate": 1.2449417566506473e-07, "loss": 0.1621, "step": 6904 }, { "epoch": 0.93, "grad_norm": 0.7801814233950001, "learning_rate": 1.2401036351878558e-07, "loss": 0.1161, "step": 6905 }, { "epoch": 0.93, "grad_norm": 0.6441709767186422, "learning_rate": 1.2352748149980666e-07, "loss": 0.0864, "step": 6906 }, { "epoch": 0.93, "grad_norm": 1.0410202981008367, "learning_rate": 1.230455297002414e-07, "loss": 0.1772, "step": 6907 }, { "epoch": 0.93, "grad_norm": 1.2021837901248675, "learning_rate": 1.2256450821202582e-07, "loss": 0.2268, "step": 6908 }, { "epoch": 0.93, "grad_norm": 0.6626419927594319, "learning_rate": 1.2208441712691698e-07, "loss": 0.1172, "step": 6909 }, { "epoch": 0.93, "grad_norm": 0.9762276305307642, "learning_rate": 1.2160525653649612e-07, "loss": 0.1275, "step": 6910 }, { "epoch": 0.93, "grad_norm": 1.058085175121899, "learning_rate": 1.211270265321657e-07, "loss": 0.182, "step": 6911 }, { "epoch": 0.93, "grad_norm": 1.0497647619294852, "learning_rate": 1.2064972720515212e-07, "loss": 0.1759, "step": 6912 }, { "epoch": 0.93, "grad_norm": 1.0813809603471911, "learning_rate": 1.2017335864650203e-07, "loss": 0.1868, "step": 6913 }, { "epoch": 0.93, "grad_norm": 0.7989126268666471, "learning_rate": 1.196979209470872e-07, "loss": 0.1693, "step": 6914 }, { "epoch": 0.93, "grad_norm": 0.978452244598369, "learning_rate": 1.1922341419760008e-07, "loss": 0.1904, "step": 6915 }, { "epoch": 0.93, "grad_norm": 0.9177666824822925, "learning_rate": 1.1874983848855604e-07, "loss": 0.1292, "step": 6916 }, { "epoch": 0.93, "grad_norm": 1.0826419225178698, "learning_rate": 1.1827719391029225e-07, "loss": 0.1811, "step": 6917 }, { "epoch": 0.93, "grad_norm": 0.7690066591901719, "learning_rate": 1.1780548055296936e-07, "loss": 0.122, "step": 6918 }, { "epoch": 0.93, "grad_norm": 1.2246651204166494, "learning_rate": 1.1733469850656931e-07, "loss": 0.1905, "step": 6919 }, { "epoch": 0.93, "grad_norm": 1.1238667593114104, "learning_rate": 1.1686484786089747e-07, "loss": 0.207, "step": 6920 }, { "epoch": 0.93, "grad_norm": 0.9747297005547378, "learning_rate": 1.1639592870558048e-07, "loss": 0.1361, "step": 6921 }, { "epoch": 0.93, "grad_norm": 0.8187022322931965, "learning_rate": 1.1592794113006845e-07, "loss": 0.1537, "step": 6922 }, { "epoch": 0.93, "grad_norm": 1.1102674764004108, "learning_rate": 1.1546088522363219e-07, "loss": 0.1899, "step": 6923 }, { "epoch": 0.93, "grad_norm": 0.7075808873327298, "learning_rate": 1.1499476107536655e-07, "loss": 0.0996, "step": 6924 }, { "epoch": 0.93, "grad_norm": 1.0803615244105516, "learning_rate": 1.1452956877418764e-07, "loss": 0.1752, "step": 6925 }, { "epoch": 0.93, "grad_norm": 1.237496649926138, "learning_rate": 1.1406530840883334e-07, "loss": 0.2013, "step": 6926 }, { "epoch": 0.93, "grad_norm": 0.7954974255427564, "learning_rate": 1.136019800678645e-07, "loss": 0.1219, "step": 6927 }, { "epoch": 0.93, "grad_norm": 0.9967805143938459, "learning_rate": 1.1313958383966428e-07, "loss": 0.1649, "step": 6928 }, { "epoch": 0.93, "grad_norm": 0.7159837535860237, "learning_rate": 1.1267811981243882e-07, "loss": 0.1081, "step": 6929 }, { "epoch": 0.93, "grad_norm": 0.8359313315181557, "learning_rate": 1.1221758807421435e-07, "loss": 0.1367, "step": 6930 }, { "epoch": 0.93, "grad_norm": 0.8070837742125326, "learning_rate": 1.1175798871284006e-07, "loss": 0.1318, "step": 6931 }, { "epoch": 0.93, "grad_norm": 0.794275802656754, "learning_rate": 1.1129932181598856e-07, "loss": 0.1235, "step": 6932 }, { "epoch": 0.93, "grad_norm": 1.1966159876135878, "learning_rate": 1.108415874711527e-07, "loss": 0.1987, "step": 6933 }, { "epoch": 0.94, "grad_norm": 0.8261832940836285, "learning_rate": 1.103847857656487e-07, "loss": 0.1099, "step": 6934 }, { "epoch": 0.94, "grad_norm": 1.0266613638903914, "learning_rate": 1.0992891678661466e-07, "loss": 0.1513, "step": 6935 }, { "epoch": 0.94, "grad_norm": 0.9331578311293341, "learning_rate": 1.0947398062101045e-07, "loss": 0.162, "step": 6936 }, { "epoch": 0.94, "grad_norm": 0.9484773346406137, "learning_rate": 1.0901997735561776e-07, "loss": 0.1392, "step": 6937 }, { "epoch": 0.94, "grad_norm": 0.877577674585599, "learning_rate": 1.0856690707704176e-07, "loss": 0.1373, "step": 6938 }, { "epoch": 0.94, "grad_norm": 0.7359981688196937, "learning_rate": 1.0811476987170777e-07, "loss": 0.1162, "step": 6939 }, { "epoch": 0.94, "grad_norm": 0.8747269360441735, "learning_rate": 1.0766356582586346e-07, "loss": 0.1236, "step": 6940 }, { "epoch": 0.94, "grad_norm": 0.9785448245176511, "learning_rate": 1.0721329502557998e-07, "loss": 0.1566, "step": 6941 }, { "epoch": 0.94, "grad_norm": 1.1579438074836157, "learning_rate": 1.0676395755674973e-07, "loss": 0.1692, "step": 6942 }, { "epoch": 0.94, "grad_norm": 0.9503997292611295, "learning_rate": 1.0631555350508582e-07, "loss": 0.1471, "step": 6943 }, { "epoch": 0.94, "grad_norm": 1.0033255358415225, "learning_rate": 1.0586808295612539e-07, "loss": 0.1926, "step": 6944 }, { "epoch": 0.94, "grad_norm": 0.9649499364955014, "learning_rate": 1.0542154599522514e-07, "loss": 0.1539, "step": 6945 }, { "epoch": 0.94, "grad_norm": 0.8265417903652713, "learning_rate": 1.0497594270756528e-07, "loss": 0.1088, "step": 6946 }, { "epoch": 0.94, "grad_norm": 1.1065140627433878, "learning_rate": 1.0453127317814893e-07, "loss": 0.1692, "step": 6947 }, { "epoch": 0.94, "grad_norm": 1.0319409985190968, "learning_rate": 1.0408753749179823e-07, "loss": 0.1396, "step": 6948 }, { "epoch": 0.94, "grad_norm": 1.0404655209606903, "learning_rate": 1.0364473573315881e-07, "loss": 0.1673, "step": 6949 }, { "epoch": 0.94, "grad_norm": 1.1247742042901339, "learning_rate": 1.0320286798669866e-07, "loss": 0.1969, "step": 6950 }, { "epoch": 0.94, "grad_norm": 0.7124769309542707, "learning_rate": 1.027619343367059e-07, "loss": 0.0965, "step": 6951 }, { "epoch": 0.94, "grad_norm": 0.9031262664229643, "learning_rate": 1.023219348672927e-07, "loss": 0.1521, "step": 6952 }, { "epoch": 0.94, "grad_norm": 0.8381989219143028, "learning_rate": 1.0188286966239192e-07, "loss": 0.1454, "step": 6953 }, { "epoch": 0.94, "grad_norm": 0.9227675066641171, "learning_rate": 1.0144473880575768e-07, "loss": 0.1627, "step": 6954 }, { "epoch": 0.94, "grad_norm": 1.146477928778864, "learning_rate": 1.0100754238096589e-07, "loss": 0.1946, "step": 6955 }, { "epoch": 0.94, "grad_norm": 1.0154914022686132, "learning_rate": 1.0057128047141595e-07, "loss": 0.1613, "step": 6956 }, { "epoch": 0.94, "grad_norm": 1.0913733916168618, "learning_rate": 1.0013595316032631e-07, "loss": 0.1885, "step": 6957 }, { "epoch": 0.94, "grad_norm": 0.8640093656692655, "learning_rate": 9.970156053073943e-08, "loss": 0.1272, "step": 6958 }, { "epoch": 0.94, "grad_norm": 1.0680869611371815, "learning_rate": 9.926810266551846e-08, "loss": 0.181, "step": 6959 }, { "epoch": 0.94, "grad_norm": 1.2424681281862482, "learning_rate": 9.88355796473478e-08, "loss": 0.1763, "step": 6960 }, { "epoch": 0.94, "grad_norm": 0.8896922965322368, "learning_rate": 9.840399155873481e-08, "loss": 0.1501, "step": 6961 }, { "epoch": 0.94, "grad_norm": 0.9925847711156711, "learning_rate": 9.797333848200697e-08, "loss": 0.182, "step": 6962 }, { "epoch": 0.94, "grad_norm": 1.031894349965629, "learning_rate": 9.75436204993152e-08, "loss": 0.1521, "step": 6963 }, { "epoch": 0.94, "grad_norm": 0.9180696057251551, "learning_rate": 9.711483769263009e-08, "loss": 0.1808, "step": 6964 }, { "epoch": 0.94, "grad_norm": 1.1383570756316745, "learning_rate": 9.668699014374561e-08, "loss": 0.2309, "step": 6965 }, { "epoch": 0.94, "grad_norm": 1.0305743378514032, "learning_rate": 9.626007793427594e-08, "loss": 0.1606, "step": 6966 }, { "epoch": 0.94, "grad_norm": 0.8381107163402245, "learning_rate": 9.58341011456576e-08, "loss": 0.1691, "step": 6967 }, { "epoch": 0.94, "grad_norm": 0.6027096068304391, "learning_rate": 9.540905985914894e-08, "loss": 0.0824, "step": 6968 }, { "epoch": 0.94, "grad_norm": 0.9200510133713593, "learning_rate": 9.498495415582842e-08, "loss": 0.1458, "step": 6969 }, { "epoch": 0.94, "grad_norm": 1.055291907111611, "learning_rate": 9.456178411659744e-08, "loss": 0.1689, "step": 6970 }, { "epoch": 0.94, "grad_norm": 1.1751543791541668, "learning_rate": 9.413954982217921e-08, "loss": 0.2077, "step": 6971 }, { "epoch": 0.94, "grad_norm": 1.2126832459883061, "learning_rate": 9.371825135311652e-08, "loss": 0.2243, "step": 6972 }, { "epoch": 0.94, "grad_norm": 0.8385530446159899, "learning_rate": 9.329788878977508e-08, "loss": 0.1447, "step": 6973 }, { "epoch": 0.94, "grad_norm": 1.0532505459681425, "learning_rate": 9.287846221234298e-08, "loss": 0.1633, "step": 6974 }, { "epoch": 0.94, "grad_norm": 0.974995952349562, "learning_rate": 9.245997170082733e-08, "loss": 0.1498, "step": 6975 }, { "epoch": 0.94, "grad_norm": 1.100508714808763, "learning_rate": 9.20424173350587e-08, "loss": 0.2052, "step": 6976 }, { "epoch": 0.94, "grad_norm": 1.148564326871821, "learning_rate": 9.16257991946884e-08, "loss": 0.1967, "step": 6977 }, { "epoch": 0.94, "grad_norm": 1.161186775073153, "learning_rate": 9.121011735918895e-08, "loss": 0.1725, "step": 6978 }, { "epoch": 0.94, "grad_norm": 1.0719089407269116, "learning_rate": 9.079537190785359e-08, "loss": 0.1634, "step": 6979 }, { "epoch": 0.94, "grad_norm": 1.0515948872860046, "learning_rate": 9.03815629197985e-08, "loss": 0.1393, "step": 6980 }, { "epoch": 0.94, "grad_norm": 1.073805003163807, "learning_rate": 8.99686904739605e-08, "loss": 0.173, "step": 6981 }, { "epoch": 0.94, "grad_norm": 1.2063120131641136, "learning_rate": 8.955675464909774e-08, "loss": 0.1754, "step": 6982 }, { "epoch": 0.94, "grad_norm": 0.7927009938293855, "learning_rate": 8.914575552378901e-08, "loss": 0.1135, "step": 6983 }, { "epoch": 0.94, "grad_norm": 0.9169515131823162, "learning_rate": 8.873569317643604e-08, "loss": 0.127, "step": 6984 }, { "epoch": 0.94, "grad_norm": 0.8224274799941372, "learning_rate": 8.832656768526072e-08, "loss": 0.117, "step": 6985 }, { "epoch": 0.94, "grad_norm": 0.9200720761655052, "learning_rate": 8.791837912830615e-08, "loss": 0.13, "step": 6986 }, { "epoch": 0.94, "grad_norm": 1.0177390240974866, "learning_rate": 8.751112758343782e-08, "loss": 0.1513, "step": 6987 }, { "epoch": 0.94, "grad_norm": 0.8040040487009378, "learning_rate": 8.71048131283403e-08, "loss": 0.1644, "step": 6988 }, { "epoch": 0.94, "grad_norm": 0.8101141854816508, "learning_rate": 8.66994358405221e-08, "loss": 0.1416, "step": 6989 }, { "epoch": 0.94, "grad_norm": 0.917024736915328, "learning_rate": 8.629499579731026e-08, "loss": 0.1756, "step": 6990 }, { "epoch": 0.94, "grad_norm": 1.1623350934465588, "learning_rate": 8.589149307585586e-08, "loss": 0.2068, "step": 6991 }, { "epoch": 0.94, "grad_norm": 0.9852066247858529, "learning_rate": 8.548892775312844e-08, "loss": 0.1622, "step": 6992 }, { "epoch": 0.94, "grad_norm": 0.9540801564633401, "learning_rate": 8.508729990592102e-08, "loss": 0.1387, "step": 6993 }, { "epoch": 0.94, "grad_norm": 0.9861813244108999, "learning_rate": 8.468660961084674e-08, "loss": 0.1619, "step": 6994 }, { "epoch": 0.94, "grad_norm": 1.1166845453035672, "learning_rate": 8.428685694433891e-08, "loss": 0.2069, "step": 6995 }, { "epoch": 0.94, "grad_norm": 1.2924889724761786, "learning_rate": 8.388804198265321e-08, "loss": 0.1976, "step": 6996 }, { "epoch": 0.94, "grad_norm": 0.9526324579270022, "learning_rate": 8.349016480186656e-08, "loss": 0.138, "step": 6997 }, { "epoch": 0.94, "grad_norm": 1.0747713239233667, "learning_rate": 8.309322547787713e-08, "loss": 0.148, "step": 6998 }, { "epoch": 0.94, "grad_norm": 0.9660227252042999, "learning_rate": 8.269722408640324e-08, "loss": 0.1511, "step": 6999 }, { "epoch": 0.94, "grad_norm": 1.0087833132393098, "learning_rate": 8.230216070298502e-08, "loss": 0.1598, "step": 7000 }, { "epoch": 0.94, "grad_norm": 0.7152347525241377, "learning_rate": 8.190803540298331e-08, "loss": 0.11, "step": 7001 }, { "epoch": 0.94, "grad_norm": 1.054730053694937, "learning_rate": 8.151484826157963e-08, "loss": 0.1605, "step": 7002 }, { "epoch": 0.94, "grad_norm": 0.9315182783968157, "learning_rate": 8.112259935377786e-08, "loss": 0.189, "step": 7003 }, { "epoch": 0.94, "grad_norm": 1.1291506339491824, "learning_rate": 8.07312887544015e-08, "loss": 0.1903, "step": 7004 }, { "epoch": 0.94, "grad_norm": 0.8260030822275065, "learning_rate": 8.034091653809584e-08, "loss": 0.1344, "step": 7005 }, { "epoch": 0.94, "grad_norm": 1.212945458502857, "learning_rate": 7.995148277932685e-08, "loss": 0.1714, "step": 7006 }, { "epoch": 0.94, "grad_norm": 0.6702863924064277, "learning_rate": 7.956298755238179e-08, "loss": 0.0768, "step": 7007 }, { "epoch": 0.94, "grad_norm": 0.9002958350063365, "learning_rate": 7.91754309313686e-08, "loss": 0.1365, "step": 7008 }, { "epoch": 0.95, "grad_norm": 1.2152535770728965, "learning_rate": 7.878881299021645e-08, "loss": 0.1917, "step": 7009 }, { "epoch": 0.95, "grad_norm": 0.6844110970902733, "learning_rate": 7.840313380267473e-08, "loss": 0.1434, "step": 7010 }, { "epoch": 0.95, "grad_norm": 0.9978101672618794, "learning_rate": 7.801839344231454e-08, "loss": 0.1488, "step": 7011 }, { "epoch": 0.95, "grad_norm": 1.0447438043261512, "learning_rate": 7.763459198252831e-08, "loss": 0.1723, "step": 7012 }, { "epoch": 0.95, "grad_norm": 1.1202286791690361, "learning_rate": 7.725172949652749e-08, "loss": 0.1759, "step": 7013 }, { "epoch": 0.95, "grad_norm": 1.0791106556770236, "learning_rate": 7.686980605734695e-08, "loss": 0.2037, "step": 7014 }, { "epoch": 0.95, "grad_norm": 0.7887193245136266, "learning_rate": 7.648882173784011e-08, "loss": 0.1042, "step": 7015 }, { "epoch": 0.95, "grad_norm": 0.9201503694891774, "learning_rate": 7.610877661068273e-08, "loss": 0.1709, "step": 7016 }, { "epoch": 0.95, "grad_norm": 0.7511217234758081, "learning_rate": 7.572967074837068e-08, "loss": 0.1261, "step": 7017 }, { "epoch": 0.95, "grad_norm": 0.9753919936055008, "learning_rate": 7.535150422322058e-08, "loss": 0.1683, "step": 7018 }, { "epoch": 0.95, "grad_norm": 1.1904585194941226, "learning_rate": 7.497427710737137e-08, "loss": 0.2544, "step": 7019 }, { "epoch": 0.95, "grad_norm": 0.8916280664734003, "learning_rate": 7.459798947278052e-08, "loss": 0.121, "step": 7020 }, { "epoch": 0.95, "grad_norm": 1.0580553714593297, "learning_rate": 7.422264139122837e-08, "loss": 0.1646, "step": 7021 }, { "epoch": 0.95, "grad_norm": 1.1215091846467247, "learning_rate": 7.384823293431376e-08, "loss": 0.1692, "step": 7022 }, { "epoch": 0.95, "grad_norm": 1.0711031375615445, "learning_rate": 7.347476417345844e-08, "loss": 0.1473, "step": 7023 }, { "epoch": 0.95, "grad_norm": 0.9523342540473632, "learning_rate": 7.31022351799049e-08, "loss": 0.1668, "step": 7024 }, { "epoch": 0.95, "grad_norm": 1.106292044854448, "learning_rate": 7.273064602471347e-08, "loss": 0.1881, "step": 7025 }, { "epoch": 0.95, "grad_norm": 0.8545987396965311, "learning_rate": 7.23599967787686e-08, "loss": 0.1268, "step": 7026 }, { "epoch": 0.95, "grad_norm": 1.0527027312857609, "learning_rate": 7.19902875127737e-08, "loss": 0.1855, "step": 7027 }, { "epoch": 0.95, "grad_norm": 1.1455493082986261, "learning_rate": 7.162151829725294e-08, "loss": 0.1996, "step": 7028 }, { "epoch": 0.95, "grad_norm": 0.9139721571737562, "learning_rate": 7.125368920255171e-08, "loss": 0.1136, "step": 7029 }, { "epoch": 0.95, "grad_norm": 0.8426379912520281, "learning_rate": 7.088680029883666e-08, "loss": 0.1606, "step": 7030 }, { "epoch": 0.95, "grad_norm": 0.946100134389037, "learning_rate": 7.052085165609346e-08, "loss": 0.1707, "step": 7031 }, { "epoch": 0.95, "grad_norm": 0.8119875457454561, "learning_rate": 7.015584334412962e-08, "loss": 0.1203, "step": 7032 }, { "epoch": 0.95, "grad_norm": 0.7069691820667958, "learning_rate": 6.979177543257221e-08, "loss": 0.0931, "step": 7033 }, { "epoch": 0.95, "grad_norm": 1.04679186233318, "learning_rate": 6.942864799087012e-08, "loss": 0.19, "step": 7034 }, { "epoch": 0.95, "grad_norm": 1.1796330774852635, "learning_rate": 6.906646108829241e-08, "loss": 0.2154, "step": 7035 }, { "epoch": 0.95, "grad_norm": 1.0087850062045989, "learning_rate": 6.870521479392877e-08, "loss": 0.1623, "step": 7036 }, { "epoch": 0.95, "grad_norm": 1.1124501498450161, "learning_rate": 6.834490917668857e-08, "loss": 0.1719, "step": 7037 }, { "epoch": 0.95, "grad_norm": 1.2013818197090123, "learning_rate": 6.798554430530291e-08, "loss": 0.1917, "step": 7038 }, { "epoch": 0.95, "grad_norm": 1.1379517922246167, "learning_rate": 6.762712024832307e-08, "loss": 0.1775, "step": 7039 }, { "epoch": 0.95, "grad_norm": 0.8633570667462709, "learning_rate": 6.726963707412105e-08, "loss": 0.1271, "step": 7040 }, { "epoch": 0.95, "grad_norm": 0.7673023260235989, "learning_rate": 6.691309485088893e-08, "loss": 0.143, "step": 7041 }, { "epoch": 0.95, "grad_norm": 0.9301210642044782, "learning_rate": 6.655749364663899e-08, "loss": 0.1565, "step": 7042 }, { "epoch": 0.95, "grad_norm": 0.8745504846071491, "learning_rate": 6.620283352920532e-08, "loss": 0.1409, "step": 7043 }, { "epoch": 0.95, "grad_norm": 0.9953145554382922, "learning_rate": 6.584911456624155e-08, "loss": 0.1722, "step": 7044 }, { "epoch": 0.95, "grad_norm": 0.8967535253780745, "learning_rate": 6.54963368252215e-08, "loss": 0.1488, "step": 7045 }, { "epoch": 0.95, "grad_norm": 0.6057901333070779, "learning_rate": 6.514450037343967e-08, "loss": 0.1263, "step": 7046 }, { "epoch": 0.95, "grad_norm": 1.1072689922349281, "learning_rate": 6.479360527801182e-08, "loss": 0.1728, "step": 7047 }, { "epoch": 0.95, "grad_norm": 0.9055335826003975, "learning_rate": 6.444365160587329e-08, "loss": 0.1467, "step": 7048 }, { "epoch": 0.95, "grad_norm": 1.0323953404476154, "learning_rate": 6.409463942378013e-08, "loss": 0.1927, "step": 7049 }, { "epoch": 0.95, "grad_norm": 0.9973299311542495, "learning_rate": 6.374656879830854e-08, "loss": 0.1492, "step": 7050 }, { "epoch": 0.95, "grad_norm": 1.2156904163525593, "learning_rate": 6.339943979585539e-08, "loss": 0.2485, "step": 7051 }, { "epoch": 0.95, "grad_norm": 1.083884964488857, "learning_rate": 6.305325248263772e-08, "loss": 0.1563, "step": 7052 }, { "epoch": 0.95, "grad_norm": 0.9798685167991897, "learning_rate": 6.270800692469326e-08, "loss": 0.1885, "step": 7053 }, { "epoch": 0.95, "grad_norm": 1.0310496609265638, "learning_rate": 6.236370318787987e-08, "loss": 0.1717, "step": 7054 }, { "epoch": 0.95, "grad_norm": 1.0047648088706729, "learning_rate": 6.20203413378756e-08, "loss": 0.1657, "step": 7055 }, { "epoch": 0.95, "grad_norm": 0.9563596982256766, "learning_rate": 6.167792144017914e-08, "loss": 0.1797, "step": 7056 }, { "epoch": 0.95, "grad_norm": 1.0990766121585174, "learning_rate": 6.133644356010937e-08, "loss": 0.174, "step": 7057 }, { "epoch": 0.95, "grad_norm": 0.5602632717713026, "learning_rate": 6.099590776280528e-08, "loss": 0.0646, "step": 7058 }, { "epoch": 0.95, "grad_norm": 1.05487130623426, "learning_rate": 6.065631411322603e-08, "loss": 0.1769, "step": 7059 }, { "epoch": 0.95, "grad_norm": 0.9480356050453882, "learning_rate": 6.031766267615258e-08, "loss": 0.177, "step": 7060 }, { "epoch": 0.95, "grad_norm": 1.1392833493943963, "learning_rate": 5.997995351618324e-08, "loss": 0.2103, "step": 7061 }, { "epoch": 0.95, "grad_norm": 1.1229122868914487, "learning_rate": 5.964318669773983e-08, "loss": 0.2067, "step": 7062 }, { "epoch": 0.95, "grad_norm": 0.8403392117757154, "learning_rate": 5.9307362285061506e-08, "loss": 0.1282, "step": 7063 }, { "epoch": 0.95, "grad_norm": 0.9586021300076366, "learning_rate": 5.8972480342209815e-08, "loss": 0.1608, "step": 7064 }, { "epoch": 0.95, "grad_norm": 1.0175765619891144, "learning_rate": 5.863854093306587e-08, "loss": 0.1955, "step": 7065 }, { "epoch": 0.95, "grad_norm": 0.9411246167175679, "learning_rate": 5.830554412133038e-08, "loss": 0.15, "step": 7066 }, { "epoch": 0.95, "grad_norm": 1.138469488854617, "learning_rate": 5.7973489970524765e-08, "loss": 0.1751, "step": 7067 }, { "epoch": 0.95, "grad_norm": 0.9099681757254275, "learning_rate": 5.7642378543990574e-08, "loss": 0.151, "step": 7068 }, { "epoch": 0.95, "grad_norm": 0.9639249396763565, "learning_rate": 5.731220990488951e-08, "loss": 0.1414, "step": 7069 }, { "epoch": 0.95, "grad_norm": 1.1143911186013638, "learning_rate": 5.6982984116203957e-08, "loss": 0.1958, "step": 7070 }, { "epoch": 0.95, "grad_norm": 0.9863776859870137, "learning_rate": 5.66547012407348e-08, "loss": 0.1818, "step": 7071 }, { "epoch": 0.95, "grad_norm": 1.0352749073467844, "learning_rate": 5.632736134110528e-08, "loss": 0.1534, "step": 7072 }, { "epoch": 0.95, "grad_norm": 0.8887867656221394, "learning_rate": 5.600096447975656e-08, "loss": 0.1433, "step": 7073 }, { "epoch": 0.95, "grad_norm": 0.8658589670185685, "learning_rate": 5.567551071895161e-08, "loss": 0.1621, "step": 7074 }, { "epoch": 0.95, "grad_norm": 0.8960107262050628, "learning_rate": 5.535100012077299e-08, "loss": 0.1147, "step": 7075 }, { "epoch": 0.95, "grad_norm": 0.917790257757191, "learning_rate": 5.5027432747122856e-08, "loss": 0.1316, "step": 7076 }, { "epoch": 0.95, "grad_norm": 0.8314344666687191, "learning_rate": 5.4704808659724605e-08, "loss": 0.1594, "step": 7077 }, { "epoch": 0.95, "grad_norm": 1.0605990609014848, "learning_rate": 5.438312792012013e-08, "loss": 0.1715, "step": 7078 }, { "epoch": 0.95, "grad_norm": 1.0792548561402124, "learning_rate": 5.4062390589671995e-08, "loss": 0.1647, "step": 7079 }, { "epoch": 0.95, "grad_norm": 1.1977355579808646, "learning_rate": 5.374259672956405e-08, "loss": 0.1674, "step": 7080 }, { "epoch": 0.95, "grad_norm": 1.1260653423575646, "learning_rate": 5.342374640079806e-08, "loss": 0.1837, "step": 7081 }, { "epoch": 0.95, "grad_norm": 1.1443169958670973, "learning_rate": 5.310583966419758e-08, "loss": 0.1665, "step": 7082 }, { "epoch": 0.96, "grad_norm": 1.3508639457021028, "learning_rate": 5.2788876580404655e-08, "loss": 0.1945, "step": 7083 }, { "epoch": 0.96, "grad_norm": 0.8851511866204479, "learning_rate": 5.24728572098826e-08, "loss": 0.1132, "step": 7084 }, { "epoch": 0.96, "grad_norm": 0.6638674643897843, "learning_rate": 5.2157781612914294e-08, "loss": 0.1386, "step": 7085 }, { "epoch": 0.96, "grad_norm": 0.9774552805776522, "learning_rate": 5.184364984960222e-08, "loss": 0.2021, "step": 7086 }, { "epoch": 0.96, "grad_norm": 0.577528746012248, "learning_rate": 5.1530461979869e-08, "loss": 0.1001, "step": 7087 }, { "epoch": 0.96, "grad_norm": 1.0316230000814415, "learning_rate": 5.121821806345739e-08, "loss": 0.1916, "step": 7088 }, { "epoch": 0.96, "grad_norm": 1.0083787115060228, "learning_rate": 5.0906918159930317e-08, "loss": 0.1547, "step": 7089 }, { "epoch": 0.96, "grad_norm": 1.1536402655496538, "learning_rate": 5.0596562328670276e-08, "loss": 0.1826, "step": 7090 }, { "epoch": 0.96, "grad_norm": 1.230927145391705, "learning_rate": 5.028715062887879e-08, "loss": 0.2176, "step": 7091 }, { "epoch": 0.96, "grad_norm": 1.2918328757811322, "learning_rate": 4.997868311957921e-08, "loss": 0.2439, "step": 7092 }, { "epoch": 0.96, "grad_norm": 0.8290716527260996, "learning_rate": 4.967115985961335e-08, "loss": 0.1386, "step": 7093 }, { "epoch": 0.96, "grad_norm": 0.8768183180387156, "learning_rate": 4.936458090764373e-08, "loss": 0.1652, "step": 7094 }, { "epoch": 0.96, "grad_norm": 0.7566050134981219, "learning_rate": 4.905894632215136e-08, "loss": 0.126, "step": 7095 }, { "epoch": 0.96, "grad_norm": 0.9282169703255335, "learning_rate": 4.875425616143903e-08, "loss": 0.1553, "step": 7096 }, { "epoch": 0.96, "grad_norm": 0.8427655628596161, "learning_rate": 4.8450510483627475e-08, "loss": 0.1537, "step": 7097 }, { "epoch": 0.96, "grad_norm": 1.137470986325778, "learning_rate": 4.8147709346659244e-08, "loss": 0.1945, "step": 7098 }, { "epoch": 0.96, "grad_norm": 1.1352126481513605, "learning_rate": 4.784585280829479e-08, "loss": 0.1565, "step": 7099 }, { "epoch": 0.96, "grad_norm": 1.1504103441553881, "learning_rate": 4.754494092611639e-08, "loss": 0.2085, "step": 7100 }, { "epoch": 0.96, "grad_norm": 0.9968633346442629, "learning_rate": 4.7244973757523685e-08, "loss": 0.2032, "step": 7101 }, { "epoch": 0.96, "grad_norm": 1.0165172992525902, "learning_rate": 4.694595135973756e-08, "loss": 0.1724, "step": 7102 }, { "epoch": 0.96, "grad_norm": 1.0122556234174085, "learning_rate": 4.664787378979907e-08, "loss": 0.1634, "step": 7103 }, { "epoch": 0.96, "grad_norm": 0.9293130591260527, "learning_rate": 4.635074110456883e-08, "loss": 0.1819, "step": 7104 }, { "epoch": 0.96, "grad_norm": 0.5769741320825271, "learning_rate": 4.6054553360725396e-08, "loss": 0.1143, "step": 7105 }, { "epoch": 0.96, "grad_norm": 1.1041168200322484, "learning_rate": 4.575931061477024e-08, "loss": 0.1956, "step": 7106 }, { "epoch": 0.96, "grad_norm": 0.781405133150644, "learning_rate": 4.546501292302164e-08, "loss": 0.1242, "step": 7107 }, { "epoch": 0.96, "grad_norm": 0.9367363618602934, "learning_rate": 4.517166034161913e-08, "loss": 0.1819, "step": 7108 }, { "epoch": 0.96, "grad_norm": 0.9798406071983519, "learning_rate": 4.487925292652184e-08, "loss": 0.1696, "step": 7109 }, { "epoch": 0.96, "grad_norm": 1.22317732364511, "learning_rate": 4.458779073350905e-08, "loss": 0.2078, "step": 7110 }, { "epoch": 0.96, "grad_norm": 1.1818405245834898, "learning_rate": 4.429727381817794e-08, "loss": 0.1939, "step": 7111 }, { "epoch": 0.96, "grad_norm": 0.7829602075323107, "learning_rate": 4.400770223594697e-08, "loss": 0.14, "step": 7112 }, { "epoch": 0.96, "grad_norm": 1.0664728644433223, "learning_rate": 4.371907604205417e-08, "loss": 0.1581, "step": 7113 }, { "epoch": 0.96, "grad_norm": 0.9623471058950345, "learning_rate": 4.34313952915566e-08, "loss": 0.162, "step": 7114 }, { "epoch": 0.96, "grad_norm": 0.8707344576490329, "learning_rate": 4.3144660039330934e-08, "loss": 0.1626, "step": 7115 }, { "epoch": 0.96, "grad_norm": 0.6640611329853282, "learning_rate": 4.285887034007452e-08, "loss": 0.1411, "step": 7116 }, { "epoch": 0.96, "grad_norm": 0.9995482486343169, "learning_rate": 4.2574026248303754e-08, "loss": 0.1438, "step": 7117 }, { "epoch": 0.96, "grad_norm": 0.764746758357648, "learning_rate": 4.229012781835351e-08, "loss": 0.135, "step": 7118 }, { "epoch": 0.96, "grad_norm": 1.042184059637096, "learning_rate": 4.200717510437935e-08, "loss": 0.1845, "step": 7119 }, { "epoch": 0.96, "grad_norm": 0.8231663939172652, "learning_rate": 4.172516816035754e-08, "loss": 0.1189, "step": 7120 }, { "epoch": 0.96, "grad_norm": 1.1482020077152615, "learning_rate": 4.1444107040081726e-08, "loss": 0.2023, "step": 7121 }, { "epoch": 0.96, "grad_norm": 0.5814978856094835, "learning_rate": 4.116399179716679e-08, "loss": 0.0988, "step": 7122 }, { "epoch": 0.96, "grad_norm": 1.0806382615287595, "learning_rate": 4.088482248504666e-08, "loss": 0.1924, "step": 7123 }, { "epoch": 0.96, "grad_norm": 0.6798039409914879, "learning_rate": 4.0606599156973736e-08, "loss": 0.1137, "step": 7124 }, { "epoch": 0.96, "grad_norm": 1.071348682999224, "learning_rate": 4.032932186602223e-08, "loss": 0.1698, "step": 7125 }, { "epoch": 0.96, "grad_norm": 1.0809547600928047, "learning_rate": 4.0052990665083724e-08, "loss": 0.1758, "step": 7126 }, { "epoch": 0.96, "grad_norm": 0.8891227682595524, "learning_rate": 3.9777605606870497e-08, "loss": 0.1429, "step": 7127 }, { "epoch": 0.96, "grad_norm": 0.8231024386783966, "learning_rate": 3.9503166743914415e-08, "loss": 0.1662, "step": 7128 }, { "epoch": 0.96, "grad_norm": 0.9594891247269897, "learning_rate": 3.922967412856582e-08, "loss": 0.1647, "step": 7129 }, { "epoch": 0.96, "grad_norm": 0.9131795656641276, "learning_rate": 3.895712781299576e-08, "loss": 0.1652, "step": 7130 }, { "epoch": 0.96, "grad_norm": 0.7228939317846501, "learning_rate": 3.868552784919433e-08, "loss": 0.1246, "step": 7131 }, { "epoch": 0.96, "grad_norm": 1.0129629393894297, "learning_rate": 3.8414874288970613e-08, "loss": 0.1874, "step": 7132 }, { "epoch": 0.96, "grad_norm": 1.067232098632618, "learning_rate": 3.814516718395389e-08, "loss": 0.1718, "step": 7133 }, { "epoch": 0.96, "grad_norm": 0.816623799691802, "learning_rate": 3.787640658559244e-08, "loss": 0.1104, "step": 7134 }, { "epoch": 0.96, "grad_norm": 0.7774751482357395, "learning_rate": 3.7608592545153586e-08, "loss": 0.12, "step": 7135 }, { "epoch": 0.96, "grad_norm": 1.0292186791493732, "learning_rate": 3.734172511372591e-08, "loss": 0.1623, "step": 7136 }, { "epoch": 0.96, "grad_norm": 0.797518342786685, "learning_rate": 3.707580434221481e-08, "loss": 0.125, "step": 7137 }, { "epoch": 0.96, "grad_norm": 1.052172413316302, "learning_rate": 3.681083028134746e-08, "loss": 0.1918, "step": 7138 }, { "epoch": 0.96, "grad_norm": 0.9519166433316231, "learning_rate": 3.654680298166846e-08, "loss": 0.1474, "step": 7139 }, { "epoch": 0.96, "grad_norm": 0.8345894715268443, "learning_rate": 3.628372249354362e-08, "loss": 0.1558, "step": 7140 }, { "epoch": 0.96, "grad_norm": 0.8075613870443407, "learning_rate": 3.6021588867156674e-08, "loss": 0.1315, "step": 7141 }, { "epoch": 0.96, "grad_norm": 1.1038562707070174, "learning_rate": 3.576040215251209e-08, "loss": 0.2047, "step": 7142 }, { "epoch": 0.96, "grad_norm": 0.9076058316018562, "learning_rate": 3.5500162399432216e-08, "loss": 0.116, "step": 7143 }, { "epoch": 0.96, "grad_norm": 1.0130202700218056, "learning_rate": 3.524086965755957e-08, "loss": 0.1608, "step": 7144 }, { "epoch": 0.96, "grad_norm": 1.1517224146659804, "learning_rate": 3.498252397635626e-08, "loss": 0.1573, "step": 7145 }, { "epoch": 0.96, "grad_norm": 1.04800406541103, "learning_rate": 3.472512540510342e-08, "loss": 0.1801, "step": 7146 }, { "epoch": 0.96, "grad_norm": 1.1235299369201044, "learning_rate": 3.446867399290177e-08, "loss": 0.1915, "step": 7147 }, { "epoch": 0.96, "grad_norm": 0.82576423000679, "learning_rate": 3.421316978867106e-08, "loss": 0.1177, "step": 7148 }, { "epoch": 0.96, "grad_norm": 1.2336444681280472, "learning_rate": 3.395861284114954e-08, "loss": 0.178, "step": 7149 }, { "epoch": 0.96, "grad_norm": 1.0511690062893986, "learning_rate": 3.3705003198896135e-08, "loss": 0.1353, "step": 7150 }, { "epoch": 0.96, "grad_norm": 1.1623202608562717, "learning_rate": 3.3452340910288815e-08, "loss": 0.1662, "step": 7151 }, { "epoch": 0.96, "grad_norm": 0.8899130861015885, "learning_rate": 3.320062602352459e-08, "loss": 0.1623, "step": 7152 }, { "epoch": 0.96, "grad_norm": 0.880947129280693, "learning_rate": 3.294985858661948e-08, "loss": 0.1659, "step": 7153 }, { "epoch": 0.96, "grad_norm": 1.111357308523082, "learning_rate": 3.270003864740911e-08, "loss": 0.1985, "step": 7154 }, { "epoch": 0.96, "grad_norm": 1.2365417875639373, "learning_rate": 3.245116625354816e-08, "loss": 0.1787, "step": 7155 }, { "epoch": 0.96, "grad_norm": 0.9860269263970403, "learning_rate": 3.220324145251141e-08, "loss": 0.1624, "step": 7156 }, { "epoch": 0.97, "grad_norm": 1.1226807968652617, "learning_rate": 3.195626429159104e-08, "loss": 0.1942, "step": 7157 }, { "epoch": 0.97, "grad_norm": 0.9293532972832927, "learning_rate": 3.171023481790048e-08, "loss": 0.1474, "step": 7158 }, { "epoch": 0.97, "grad_norm": 1.13725811102622, "learning_rate": 3.146515307837106e-08, "loss": 0.1948, "step": 7159 }, { "epoch": 0.97, "grad_norm": 0.7634710690128641, "learning_rate": 3.1221019119753725e-08, "loss": 0.1375, "step": 7160 }, { "epoch": 0.97, "grad_norm": 0.7103459842648718, "learning_rate": 3.097783298861845e-08, "loss": 0.1235, "step": 7161 }, { "epoch": 0.97, "grad_norm": 0.8916320531948476, "learning_rate": 3.0735594731355346e-08, "loss": 0.133, "step": 7162 }, { "epoch": 0.97, "grad_norm": 1.0495667457310545, "learning_rate": 3.0494304394172446e-08, "loss": 0.1681, "step": 7163 }, { "epoch": 0.97, "grad_norm": 0.9538875134378775, "learning_rate": 3.0253962023097386e-08, "loss": 0.1546, "step": 7164 }, { "epoch": 0.97, "grad_norm": 0.7676624392891073, "learning_rate": 3.001456766397737e-08, "loss": 0.1301, "step": 7165 }, { "epoch": 0.97, "grad_norm": 0.8575544276570009, "learning_rate": 2.9776121362478095e-08, "loss": 0.1255, "step": 7166 }, { "epoch": 0.97, "grad_norm": 1.2077879297113376, "learning_rate": 2.95386231640854e-08, "loss": 0.1803, "step": 7167 }, { "epoch": 0.97, "grad_norm": 1.1869999781766283, "learning_rate": 2.9302073114103047e-08, "loss": 0.2136, "step": 7168 }, { "epoch": 0.97, "grad_norm": 1.0599898584056127, "learning_rate": 2.9066471257654384e-08, "loss": 0.1879, "step": 7169 }, { "epoch": 0.97, "grad_norm": 1.0912505396712935, "learning_rate": 2.8831817639682347e-08, "loss": 0.1818, "step": 7170 }, { "epoch": 0.97, "grad_norm": 1.2673451256923198, "learning_rate": 2.859811230494891e-08, "loss": 0.2063, "step": 7171 }, { "epoch": 0.97, "grad_norm": 0.7425769934436572, "learning_rate": 2.8365355298034525e-08, "loss": 0.1129, "step": 7172 }, { "epoch": 0.97, "grad_norm": 0.5497071382974267, "learning_rate": 2.8133546663339228e-08, "loss": 0.1447, "step": 7173 }, { "epoch": 0.97, "grad_norm": 0.7809700254729391, "learning_rate": 2.7902686445082648e-08, "loss": 0.1332, "step": 7174 }, { "epoch": 0.97, "grad_norm": 0.7839604332547094, "learning_rate": 2.767277468730123e-08, "loss": 0.1244, "step": 7175 }, { "epoch": 0.97, "grad_norm": 0.9290002981960046, "learning_rate": 2.744381143385433e-08, "loss": 0.1276, "step": 7176 }, { "epoch": 0.97, "grad_norm": 0.6519756782079191, "learning_rate": 2.7215796728416453e-08, "loss": 0.1087, "step": 7177 }, { "epoch": 0.97, "grad_norm": 0.8148680928790976, "learning_rate": 2.6988730614483927e-08, "loss": 0.1581, "step": 7178 }, { "epoch": 0.97, "grad_norm": 1.01240355340718, "learning_rate": 2.6762613135370984e-08, "loss": 0.1533, "step": 7179 }, { "epoch": 0.97, "grad_norm": 0.9768972303493653, "learning_rate": 2.653744433421035e-08, "loss": 0.1506, "step": 7180 }, { "epoch": 0.97, "grad_norm": 1.0130511256957622, "learning_rate": 2.6313224253955437e-08, "loss": 0.2188, "step": 7181 }, { "epoch": 0.97, "grad_norm": 1.0026290211568003, "learning_rate": 2.608995293737704e-08, "loss": 0.171, "step": 7182 }, { "epoch": 0.97, "grad_norm": 0.9479050653312758, "learning_rate": 2.5867630427065527e-08, "loss": 0.136, "step": 7183 }, { "epoch": 0.97, "grad_norm": 1.1745330928350917, "learning_rate": 2.564625676543142e-08, "loss": 0.2282, "step": 7184 }, { "epoch": 0.97, "grad_norm": 0.750369407768691, "learning_rate": 2.5425831994702055e-08, "loss": 0.0973, "step": 7185 }, { "epoch": 0.97, "grad_norm": 1.1555046253244834, "learning_rate": 2.5206356156924906e-08, "loss": 0.1633, "step": 7186 }, { "epoch": 0.97, "grad_norm": 0.9774818080683998, "learning_rate": 2.4987829293967036e-08, "loss": 0.1797, "step": 7187 }, { "epoch": 0.97, "grad_norm": 0.677692783631185, "learning_rate": 2.4770251447513992e-08, "loss": 0.1178, "step": 7188 }, { "epoch": 0.97, "grad_norm": 0.7593551966305777, "learning_rate": 2.4553622659070352e-08, "loss": 0.1131, "step": 7189 }, { "epoch": 0.97, "grad_norm": 0.9900820148602745, "learning_rate": 2.4337942969958618e-08, "loss": 0.1621, "step": 7190 }, { "epoch": 0.97, "grad_norm": 0.9186930840477887, "learning_rate": 2.412321242132143e-08, "loss": 0.1347, "step": 7191 }, { "epoch": 0.97, "grad_norm": 0.8633800034696428, "learning_rate": 2.390943105412047e-08, "loss": 0.1396, "step": 7192 }, { "epoch": 0.97, "grad_norm": 0.8770752937983549, "learning_rate": 2.3696598909135336e-08, "loss": 0.1334, "step": 7193 }, { "epoch": 0.97, "grad_norm": 0.7448222391558278, "learning_rate": 2.348471602696578e-08, "loss": 0.1211, "step": 7194 }, { "epoch": 0.97, "grad_norm": 0.8979615622741122, "learning_rate": 2.3273782448030024e-08, "loss": 0.1711, "step": 7195 }, { "epoch": 0.97, "grad_norm": 1.2793402219024927, "learning_rate": 2.3063798212564214e-08, "loss": 0.2351, "step": 7196 }, { "epoch": 0.97, "grad_norm": 1.0648073295227254, "learning_rate": 2.2854763360624088e-08, "loss": 0.19, "step": 7197 }, { "epoch": 0.97, "grad_norm": 1.114090649327106, "learning_rate": 2.2646677932085524e-08, "loss": 0.1891, "step": 7198 }, { "epoch": 0.97, "grad_norm": 1.1266808776654769, "learning_rate": 2.2439541966641775e-08, "loss": 0.152, "step": 7199 }, { "epoch": 0.97, "grad_norm": 0.7972374345818846, "learning_rate": 2.223335550380512e-08, "loss": 0.1495, "step": 7200 }, { "epoch": 0.97, "grad_norm": 1.010171096651144, "learning_rate": 2.2028118582906877e-08, "loss": 0.1682, "step": 7201 }, { "epoch": 0.97, "grad_norm": 0.842067508137843, "learning_rate": 2.182383124309795e-08, "loss": 0.1332, "step": 7202 }, { "epoch": 0.97, "grad_norm": 0.9704381048106576, "learning_rate": 2.1620493523347164e-08, "loss": 0.1488, "step": 7203 }, { "epoch": 0.97, "grad_norm": 1.1933360995515734, "learning_rate": 2.1418105462442383e-08, "loss": 0.2011, "step": 7204 }, { "epoch": 0.97, "grad_norm": 1.089040431988871, "learning_rate": 2.1216667098990508e-08, "loss": 0.1362, "step": 7205 }, { "epoch": 0.97, "grad_norm": 1.0373171487015365, "learning_rate": 2.101617847141746e-08, "loss": 0.1914, "step": 7206 }, { "epoch": 0.97, "grad_norm": 0.7544403257449053, "learning_rate": 2.0816639617967648e-08, "loss": 0.1319, "step": 7207 }, { "epoch": 0.97, "grad_norm": 1.0375810487948913, "learning_rate": 2.061805057670452e-08, "loss": 0.1614, "step": 7208 }, { "epoch": 0.97, "grad_norm": 1.027484278318831, "learning_rate": 2.042041138551054e-08, "loss": 0.1617, "step": 7209 }, { "epoch": 0.97, "grad_norm": 0.9209026914053957, "learning_rate": 2.0223722082085563e-08, "loss": 0.1543, "step": 7210 }, { "epoch": 0.97, "grad_norm": 1.0514316699325272, "learning_rate": 2.0027982703950676e-08, "loss": 0.1488, "step": 7211 }, { "epoch": 0.97, "grad_norm": 1.0098928158279667, "learning_rate": 1.9833193288443797e-08, "loss": 0.1685, "step": 7212 }, { "epoch": 0.97, "grad_norm": 0.8569993196651691, "learning_rate": 1.9639353872722422e-08, "loss": 0.1228, "step": 7213 }, { "epoch": 0.97, "grad_norm": 0.9464963849491995, "learning_rate": 1.944646449376253e-08, "loss": 0.1454, "step": 7214 }, { "epoch": 0.97, "grad_norm": 0.6481379278355931, "learning_rate": 1.9254525188359686e-08, "loss": 0.1375, "step": 7215 }, { "epoch": 0.97, "grad_norm": 0.9326711465592867, "learning_rate": 1.9063535993126826e-08, "loss": 0.1711, "step": 7216 }, { "epoch": 0.97, "grad_norm": 0.7846424844874648, "learning_rate": 1.887349694449647e-08, "loss": 0.1455, "step": 7217 }, { "epoch": 0.97, "grad_norm": 1.040236365964663, "learning_rate": 1.868440807872074e-08, "loss": 0.2096, "step": 7218 }, { "epoch": 0.97, "grad_norm": 1.2277799145822967, "learning_rate": 1.8496269431867996e-08, "loss": 0.1917, "step": 7219 }, { "epoch": 0.97, "grad_norm": 1.087620456197298, "learning_rate": 1.8309081039828424e-08, "loss": 0.1813, "step": 7220 }, { "epoch": 0.97, "grad_norm": 0.988527968024858, "learning_rate": 1.812284293830846e-08, "loss": 0.1693, "step": 7221 }, { "epoch": 0.97, "grad_norm": 1.174760467243767, "learning_rate": 1.793755516283524e-08, "loss": 0.1696, "step": 7222 }, { "epoch": 0.97, "grad_norm": 0.856082902665888, "learning_rate": 1.7753217748752717e-08, "loss": 0.1444, "step": 7223 }, { "epoch": 0.97, "grad_norm": 1.2434046270594465, "learning_rate": 1.7569830731224978e-08, "loss": 0.2321, "step": 7224 }, { "epoch": 0.97, "grad_norm": 0.9672506749297931, "learning_rate": 1.7387394145234048e-08, "loss": 0.1582, "step": 7225 }, { "epoch": 0.97, "grad_norm": 0.9261964630001277, "learning_rate": 1.7205908025580976e-08, "loss": 0.1554, "step": 7226 }, { "epoch": 0.97, "grad_norm": 0.7729255813328016, "learning_rate": 1.7025372406885287e-08, "loss": 0.1316, "step": 7227 }, { "epoch": 0.97, "grad_norm": 1.0059223979963525, "learning_rate": 1.684578732358555e-08, "loss": 0.1569, "step": 7228 }, { "epoch": 0.97, "grad_norm": 1.0446198285826305, "learning_rate": 1.6667152809938802e-08, "loss": 0.1595, "step": 7229 }, { "epoch": 0.97, "grad_norm": 1.0095543271374934, "learning_rate": 1.648946890002112e-08, "loss": 0.1918, "step": 7230 }, { "epoch": 0.98, "grad_norm": 1.3723155739933133, "learning_rate": 1.631273562772595e-08, "loss": 0.2075, "step": 7231 }, { "epoch": 0.98, "grad_norm": 0.9762424147694152, "learning_rate": 1.613695302676688e-08, "loss": 0.1497, "step": 7232 }, { "epoch": 0.98, "grad_norm": 1.029560168958185, "learning_rate": 1.5962121130675968e-08, "loss": 0.1285, "step": 7233 }, { "epoch": 0.98, "grad_norm": 0.5832255259110695, "learning_rate": 1.578823997280321e-08, "loss": 0.0971, "step": 7234 }, { "epoch": 0.98, "grad_norm": 0.7837626497995986, "learning_rate": 1.561530958631763e-08, "loss": 0.1148, "step": 7235 }, { "epoch": 0.98, "grad_norm": 0.9782952044692776, "learning_rate": 1.5443330004206724e-08, "loss": 0.1179, "step": 7236 }, { "epoch": 0.98, "grad_norm": 0.9750351367830732, "learning_rate": 1.527230125927648e-08, "loss": 0.1588, "step": 7237 }, { "epoch": 0.98, "grad_norm": 1.289382533541146, "learning_rate": 1.510222338415246e-08, "loss": 0.1741, "step": 7238 }, { "epoch": 0.98, "grad_norm": 0.9982328209354667, "learning_rate": 1.4933096411277605e-08, "loss": 0.1695, "step": 7239 }, { "epoch": 0.98, "grad_norm": 1.04918327919573, "learning_rate": 1.4764920372914437e-08, "loss": 0.2032, "step": 7240 }, { "epoch": 0.98, "grad_norm": 0.8385409122443841, "learning_rate": 1.459769530114341e-08, "loss": 0.1213, "step": 7241 }, { "epoch": 0.98, "grad_norm": 0.9173073402864916, "learning_rate": 1.4431421227863452e-08, "loss": 0.1477, "step": 7242 }, { "epoch": 0.98, "grad_norm": 0.9260723412031067, "learning_rate": 1.4266098184792521e-08, "loss": 0.1259, "step": 7243 }, { "epoch": 0.98, "grad_norm": 1.1246784388623132, "learning_rate": 1.410172620346817e-08, "loss": 0.1927, "step": 7244 }, { "epoch": 0.98, "grad_norm": 0.7363178390574152, "learning_rate": 1.3938305315244204e-08, "loss": 0.1188, "step": 7245 }, { "epoch": 0.98, "grad_norm": 1.187290787046644, "learning_rate": 1.377583555129458e-08, "loss": 0.1643, "step": 7246 }, { "epoch": 0.98, "grad_norm": 0.8644567440517933, "learning_rate": 1.3614316942611172e-08, "loss": 0.1745, "step": 7247 }, { "epoch": 0.98, "grad_norm": 1.1389402791725147, "learning_rate": 1.3453749520005999e-08, "loss": 0.1849, "step": 7248 }, { "epoch": 0.98, "grad_norm": 0.5376808021453415, "learning_rate": 1.3294133314106784e-08, "loss": 0.1154, "step": 7249 }, { "epoch": 0.98, "grad_norm": 0.964977313300608, "learning_rate": 1.3135468355361946e-08, "loss": 0.1339, "step": 7250 }, { "epoch": 0.98, "grad_norm": 0.8092263798219078, "learning_rate": 1.2977754674038389e-08, "loss": 0.1589, "step": 7251 }, { "epoch": 0.98, "grad_norm": 0.9753080140822372, "learning_rate": 1.2820992300220381e-08, "loss": 0.1768, "step": 7252 }, { "epoch": 0.98, "grad_norm": 1.157298772867146, "learning_rate": 1.2665181263812332e-08, "loss": 0.151, "step": 7253 }, { "epoch": 0.98, "grad_norm": 0.8593569325985204, "learning_rate": 1.2510321594534913e-08, "loss": 0.1592, "step": 7254 }, { "epoch": 0.98, "grad_norm": 0.8763379770282465, "learning_rate": 1.235641332192894e-08, "loss": 0.1391, "step": 7255 }, { "epoch": 0.98, "grad_norm": 1.1127012964508203, "learning_rate": 1.2203456475354258e-08, "loss": 0.2151, "step": 7256 }, { "epoch": 0.98, "grad_norm": 0.9515050685915679, "learning_rate": 1.2051451083988086e-08, "loss": 0.146, "step": 7257 }, { "epoch": 0.98, "grad_norm": 0.6718886105895752, "learning_rate": 1.1900397176825562e-08, "loss": 0.1052, "step": 7258 }, { "epoch": 0.98, "grad_norm": 1.0012577090661603, "learning_rate": 1.1750294782682525e-08, "loss": 0.1584, "step": 7259 }, { "epoch": 0.98, "grad_norm": 0.849355626647822, "learning_rate": 1.1601143930191072e-08, "loss": 0.1121, "step": 7260 }, { "epoch": 0.98, "grad_norm": 0.9037331862099334, "learning_rate": 1.1452944647802889e-08, "loss": 0.1462, "step": 7261 }, { "epoch": 0.98, "grad_norm": 1.1097003924124773, "learning_rate": 1.1305696963788693e-08, "loss": 0.1887, "step": 7262 }, { "epoch": 0.98, "grad_norm": 1.124875701096774, "learning_rate": 1.1159400906236018e-08, "loss": 0.1615, "step": 7263 }, { "epoch": 0.98, "grad_norm": 0.9528369971918839, "learning_rate": 1.1014056503051984e-08, "loss": 0.1464, "step": 7264 }, { "epoch": 0.98, "grad_norm": 0.8427228653486243, "learning_rate": 1.0869663781962191e-08, "loss": 0.1261, "step": 7265 }, { "epoch": 0.98, "grad_norm": 0.9144785428667102, "learning_rate": 1.072622277051072e-08, "loss": 0.1345, "step": 7266 }, { "epoch": 0.98, "grad_norm": 0.7500716329958492, "learning_rate": 1.0583733496059566e-08, "loss": 0.1432, "step": 7267 }, { "epoch": 0.98, "grad_norm": 1.0088395873869167, "learning_rate": 1.0442195985789771e-08, "loss": 0.1641, "step": 7268 }, { "epoch": 0.98, "grad_norm": 1.1239988962832834, "learning_rate": 1.0301610266700291e-08, "loss": 0.18, "step": 7269 }, { "epoch": 0.98, "grad_norm": 0.8319645276727875, "learning_rate": 1.0161976365609117e-08, "loss": 0.1256, "step": 7270 }, { "epoch": 0.98, "grad_norm": 1.0874139074927234, "learning_rate": 1.002329430915161e-08, "loss": 0.1777, "step": 7271 }, { "epoch": 0.98, "grad_norm": 0.8915205529451817, "learning_rate": 9.885564123783276e-09, "loss": 0.1141, "step": 7272 }, { "epoch": 0.98, "grad_norm": 1.092484900392333, "learning_rate": 9.748785835776431e-09, "loss": 0.1911, "step": 7273 }, { "epoch": 0.98, "grad_norm": 1.0762850586966235, "learning_rate": 9.612959471222428e-09, "loss": 0.16, "step": 7274 }, { "epoch": 0.98, "grad_norm": 1.2123503611139166, "learning_rate": 9.478085056031095e-09, "loss": 0.1792, "step": 7275 }, { "epoch": 0.98, "grad_norm": 0.8380386613981863, "learning_rate": 9.34416261593074e-09, "loss": 0.1535, "step": 7276 }, { "epoch": 0.98, "grad_norm": 0.9871674725544453, "learning_rate": 9.211192176468153e-09, "loss": 0.1628, "step": 7277 }, { "epoch": 0.98, "grad_norm": 0.8228464023622025, "learning_rate": 9.079173763007488e-09, "loss": 0.1461, "step": 7278 }, { "epoch": 0.98, "grad_norm": 0.8482191488217737, "learning_rate": 8.948107400733596e-09, "loss": 0.1664, "step": 7279 }, { "epoch": 0.98, "grad_norm": 0.8540544718796427, "learning_rate": 8.817993114647039e-09, "loss": 0.1423, "step": 7280 }, { "epoch": 0.98, "grad_norm": 1.3316424281721553, "learning_rate": 8.68883092956796e-09, "loss": 0.2056, "step": 7281 }, { "epoch": 0.98, "grad_norm": 0.7698594825504441, "learning_rate": 8.560620870136094e-09, "loss": 0.1205, "step": 7282 }, { "epoch": 0.98, "grad_norm": 0.9917504534722746, "learning_rate": 8.433362960806879e-09, "loss": 0.1349, "step": 7283 }, { "epoch": 0.98, "grad_norm": 1.1031158520198243, "learning_rate": 8.307057225856451e-09, "loss": 0.1633, "step": 7284 }, { "epoch": 0.98, "grad_norm": 1.0593188248946113, "learning_rate": 8.181703689378318e-09, "loss": 0.1819, "step": 7285 }, { "epoch": 0.98, "grad_norm": 0.8318870856893527, "learning_rate": 8.05730237528446e-09, "loss": 0.1257, "step": 7286 }, { "epoch": 0.98, "grad_norm": 0.9516334213539437, "learning_rate": 7.93385330730534e-09, "loss": 0.1761, "step": 7287 }, { "epoch": 0.98, "grad_norm": 1.024260309863008, "learning_rate": 7.811356508989343e-09, "loss": 0.1852, "step": 7288 }, { "epoch": 0.98, "grad_norm": 1.3534508501554439, "learning_rate": 7.689812003703889e-09, "loss": 0.2047, "step": 7289 }, { "epoch": 0.98, "grad_norm": 0.6709242278965117, "learning_rate": 7.569219814634877e-09, "loss": 0.1064, "step": 7290 }, { "epoch": 0.98, "grad_norm": 0.9889618753104136, "learning_rate": 7.4495799647850145e-09, "loss": 0.1622, "step": 7291 }, { "epoch": 0.98, "grad_norm": 0.7780964501208475, "learning_rate": 7.330892476976603e-09, "loss": 0.1603, "step": 7292 }, { "epoch": 0.98, "grad_norm": 0.9836105095780547, "learning_rate": 7.2131573738504215e-09, "loss": 0.1263, "step": 7293 }, { "epoch": 0.98, "grad_norm": 0.9207374576315601, "learning_rate": 7.096374677865725e-09, "loss": 0.1255, "step": 7294 }, { "epoch": 0.98, "grad_norm": 1.6140018417570645, "learning_rate": 6.980544411298584e-09, "loss": 0.2622, "step": 7295 }, { "epoch": 0.98, "grad_norm": 0.9766592591255876, "learning_rate": 6.865666596244658e-09, "loss": 0.1588, "step": 7296 }, { "epoch": 0.98, "grad_norm": 0.9876291005244302, "learning_rate": 6.7517412546180824e-09, "loss": 0.1427, "step": 7297 }, { "epoch": 0.98, "grad_norm": 0.9901286785854136, "learning_rate": 6.638768408150364e-09, "loss": 0.1608, "step": 7298 }, { "epoch": 0.98, "grad_norm": 1.2148659920416132, "learning_rate": 6.5267480783920425e-09, "loss": 0.125, "step": 7299 }, { "epoch": 0.98, "grad_norm": 0.9811744867323801, "learning_rate": 6.4156802867121334e-09, "loss": 0.1228, "step": 7300 }, { "epoch": 0.98, "grad_norm": 0.9399186308217071, "learning_rate": 6.305565054296469e-09, "loss": 0.1731, "step": 7301 }, { "epoch": 0.98, "grad_norm": 0.735161724371514, "learning_rate": 6.19640240215158e-09, "loss": 0.1387, "step": 7302 }, { "epoch": 0.98, "grad_norm": 1.0750703866323073, "learning_rate": 6.0881923511002525e-09, "loss": 0.1477, "step": 7303 }, { "epoch": 0.98, "grad_norm": 0.8462896118346415, "learning_rate": 5.980934921783754e-09, "loss": 0.1392, "step": 7304 }, { "epoch": 0.99, "grad_norm": 0.8748958631299202, "learning_rate": 5.874630134663495e-09, "loss": 0.1692, "step": 7305 }, { "epoch": 0.99, "grad_norm": 1.0011727154256957, "learning_rate": 5.769278010016588e-09, "loss": 0.1758, "step": 7306 }, { "epoch": 0.99, "grad_norm": 0.8701555751928727, "learning_rate": 5.664878567940291e-09, "loss": 0.1355, "step": 7307 }, { "epoch": 0.99, "grad_norm": 0.847642738538414, "learning_rate": 5.561431828349229e-09, "loss": 0.1187, "step": 7308 }, { "epoch": 0.99, "grad_norm": 1.0211249685906114, "learning_rate": 5.458937810977061e-09, "loss": 0.1819, "step": 7309 }, { "epoch": 0.99, "grad_norm": 1.095285407817043, "learning_rate": 5.35739653537426e-09, "loss": 0.1901, "step": 7310 }, { "epoch": 0.99, "grad_norm": 0.7631258917254583, "learning_rate": 5.256808020911441e-09, "loss": 0.1099, "step": 7311 }, { "epoch": 0.99, "grad_norm": 0.9709794182574086, "learning_rate": 5.15717228677548e-09, "loss": 0.1713, "step": 7312 }, { "epoch": 0.99, "grad_norm": 0.7240899504050898, "learning_rate": 5.05848935197395e-09, "loss": 0.0958, "step": 7313 }, { "epoch": 0.99, "grad_norm": 0.9478565351997758, "learning_rate": 4.960759235330126e-09, "loss": 0.1723, "step": 7314 }, { "epoch": 0.99, "grad_norm": 1.0383124331202234, "learning_rate": 4.863981955486874e-09, "loss": 0.181, "step": 7315 }, { "epoch": 0.99, "grad_norm": 0.9119123475860408, "learning_rate": 4.768157530905537e-09, "loss": 0.1336, "step": 7316 }, { "epoch": 0.99, "grad_norm": 1.1138665966261432, "learning_rate": 4.6732859798653825e-09, "loss": 0.1664, "step": 7317 }, { "epoch": 0.99, "grad_norm": 0.8862830376148857, "learning_rate": 4.579367320462491e-09, "loss": 0.1634, "step": 7318 }, { "epoch": 0.99, "grad_norm": 0.8772252008345854, "learning_rate": 4.486401570614196e-09, "loss": 0.1598, "step": 7319 }, { "epoch": 0.99, "grad_norm": 0.8481296613885574, "learning_rate": 4.39438874805298e-09, "loss": 0.114, "step": 7320 }, { "epoch": 0.99, "grad_norm": 1.0606002998023984, "learning_rate": 4.3033288703320245e-09, "loss": 0.1448, "step": 7321 }, { "epoch": 0.99, "grad_norm": 1.1362527032118839, "learning_rate": 4.213221954820212e-09, "loss": 0.187, "step": 7322 }, { "epoch": 0.99, "grad_norm": 1.0233662832087336, "learning_rate": 4.124068018707683e-09, "loss": 0.1689, "step": 7323 }, { "epoch": 0.99, "grad_norm": 0.7274149033223958, "learning_rate": 4.035867078999723e-09, "loss": 0.1442, "step": 7324 }, { "epoch": 0.99, "grad_norm": 1.0521711151182591, "learning_rate": 3.94861915252176e-09, "loss": 0.1665, "step": 7325 }, { "epoch": 0.99, "grad_norm": 0.9383749304079361, "learning_rate": 3.862324255917704e-09, "loss": 0.1755, "step": 7326 }, { "epoch": 0.99, "grad_norm": 1.1633300745439148, "learning_rate": 3.776982405647167e-09, "loss": 0.1629, "step": 7327 }, { "epoch": 0.99, "grad_norm": 1.1981072672178041, "learning_rate": 3.692593617991569e-09, "loss": 0.2145, "step": 7328 }, { "epoch": 0.99, "grad_norm": 0.9794479382122182, "learning_rate": 3.6091579090469233e-09, "loss": 0.1816, "step": 7329 }, { "epoch": 0.99, "grad_norm": 0.9570975839741919, "learning_rate": 3.526675294730497e-09, "loss": 0.1642, "step": 7330 }, { "epoch": 0.99, "grad_norm": 0.8883787274567082, "learning_rate": 3.4451457907758167e-09, "loss": 0.1359, "step": 7331 }, { "epoch": 0.99, "grad_norm": 1.0110702097677413, "learning_rate": 3.364569412734886e-09, "loss": 0.1682, "step": 7332 }, { "epoch": 0.99, "grad_norm": 0.5581022894592724, "learning_rate": 3.2849461759781877e-09, "loss": 0.0757, "step": 7333 }, { "epoch": 0.99, "grad_norm": 0.9247373091213249, "learning_rate": 3.2062760956952377e-09, "loss": 0.1656, "step": 7334 }, { "epoch": 0.99, "grad_norm": 1.050303215645153, "learning_rate": 3.128559186892366e-09, "loss": 0.1912, "step": 7335 }, { "epoch": 0.99, "grad_norm": 0.833293330729349, "learning_rate": 3.0517954643943802e-09, "loss": 0.1576, "step": 7336 }, { "epoch": 0.99, "grad_norm": 0.8419055023895928, "learning_rate": 2.9759849428445674e-09, "loss": 0.1448, "step": 7337 }, { "epoch": 0.99, "grad_norm": 0.9032495486336977, "learning_rate": 2.9011276367041374e-09, "loss": 0.1832, "step": 7338 }, { "epoch": 0.99, "grad_norm": 0.6673626163816087, "learning_rate": 2.827223560253334e-09, "loss": 0.0769, "step": 7339 }, { "epoch": 0.99, "grad_norm": 1.0815135840921177, "learning_rate": 2.7542727275886583e-09, "loss": 0.1765, "step": 7340 }, { "epoch": 0.99, "grad_norm": 0.8561218748411754, "learning_rate": 2.682275152627312e-09, "loss": 0.1647, "step": 7341 }, { "epoch": 0.99, "grad_norm": 1.1610374096440863, "learning_rate": 2.6112308491021978e-09, "loss": 0.1961, "step": 7342 }, { "epoch": 0.99, "grad_norm": 0.6974386501106745, "learning_rate": 2.5411398305663636e-09, "loss": 0.0788, "step": 7343 }, { "epoch": 0.99, "grad_norm": 1.2176600254128493, "learning_rate": 2.4720021103891155e-09, "loss": 0.198, "step": 7344 }, { "epoch": 0.99, "grad_norm": 0.9570140820143032, "learning_rate": 2.403817701759903e-09, "loss": 0.1701, "step": 7345 }, { "epoch": 0.99, "grad_norm": 0.7857036663351461, "learning_rate": 2.336586617684988e-09, "loss": 0.1254, "step": 7346 }, { "epoch": 0.99, "grad_norm": 0.6775817002517139, "learning_rate": 2.270308870989113e-09, "loss": 0.1115, "step": 7347 }, { "epoch": 0.99, "grad_norm": 1.0876478113727341, "learning_rate": 2.2049844743149416e-09, "loss": 0.143, "step": 7348 }, { "epoch": 0.99, "grad_norm": 0.9473478118799338, "learning_rate": 2.1406134401241728e-09, "loss": 0.1605, "step": 7349 }, { "epoch": 0.99, "grad_norm": 0.9114193696716774, "learning_rate": 2.077195780695318e-09, "loss": 0.1744, "step": 7350 }, { "epoch": 0.99, "grad_norm": 0.9412166446615565, "learning_rate": 2.0147315081259223e-09, "loss": 0.143, "step": 7351 }, { "epoch": 0.99, "grad_norm": 0.9853472283261606, "learning_rate": 1.953220634332009e-09, "loss": 0.1777, "step": 7352 }, { "epoch": 0.99, "grad_norm": 0.7457698400318497, "learning_rate": 1.892663171046416e-09, "loss": 0.132, "step": 7353 }, { "epoch": 0.99, "grad_norm": 0.7697231425471306, "learning_rate": 1.8330591298215684e-09, "loss": 0.0865, "step": 7354 }, { "epoch": 0.99, "grad_norm": 0.9304385715754818, "learning_rate": 1.7744085220267049e-09, "loss": 0.171, "step": 7355 }, { "epoch": 0.99, "grad_norm": 0.9120212585183252, "learning_rate": 1.7167113588500984e-09, "loss": 0.1492, "step": 7356 }, { "epoch": 0.99, "grad_norm": 1.196727409562461, "learning_rate": 1.6599676512979445e-09, "loss": 0.2217, "step": 7357 }, { "epoch": 0.99, "grad_norm": 0.82212991324107, "learning_rate": 1.604177410194363e-09, "loss": 0.1141, "step": 7358 }, { "epoch": 0.99, "grad_norm": 1.0225809684560312, "learning_rate": 1.5493406461813965e-09, "loss": 0.1905, "step": 7359 }, { "epoch": 0.99, "grad_norm": 0.9313745254500596, "learning_rate": 1.4954573697206764e-09, "loss": 0.1504, "step": 7360 }, { "epoch": 0.99, "grad_norm": 0.9540168990297494, "learning_rate": 1.4425275910889824e-09, "loss": 0.1413, "step": 7361 }, { "epoch": 0.99, "grad_norm": 1.0895854662339497, "learning_rate": 1.390551320384903e-09, "loss": 0.1442, "step": 7362 }, { "epoch": 0.99, "grad_norm": 0.740663540148301, "learning_rate": 1.3395285675216197e-09, "loss": 0.1118, "step": 7363 }, { "epoch": 0.99, "grad_norm": 1.0301102290498831, "learning_rate": 1.2894593422335677e-09, "loss": 0.1653, "step": 7364 }, { "epoch": 0.99, "grad_norm": 0.5927225765594528, "learning_rate": 1.2403436540703306e-09, "loss": 0.1281, "step": 7365 }, { "epoch": 0.99, "grad_norm": 0.9344783013356373, "learning_rate": 1.1921815124021906e-09, "loss": 0.1511, "step": 7366 }, { "epoch": 0.99, "grad_norm": 1.272488954068171, "learning_rate": 1.1449729264156883e-09, "loss": 0.143, "step": 7367 }, { "epoch": 0.99, "grad_norm": 0.8732665830035861, "learning_rate": 1.0987179051169528e-09, "loss": 0.1412, "step": 7368 }, { "epoch": 0.99, "grad_norm": 1.0200599366518175, "learning_rate": 1.0534164573283712e-09, "loss": 0.1671, "step": 7369 }, { "epoch": 0.99, "grad_norm": 0.8726796458450906, "learning_rate": 1.0090685916924746e-09, "loss": 0.155, "step": 7370 }, { "epoch": 0.99, "grad_norm": 1.2194908087329297, "learning_rate": 9.656743166680526e-10, "loss": 0.2373, "step": 7371 }, { "epoch": 0.99, "grad_norm": 1.1611830769482292, "learning_rate": 9.23233640533483e-10, "loss": 0.1392, "step": 7372 }, { "epoch": 0.99, "grad_norm": 0.8643668038490419, "learning_rate": 8.817465713839568e-10, "loss": 0.1358, "step": 7373 }, { "epoch": 0.99, "grad_norm": 0.9479602972180314, "learning_rate": 8.412131171348093e-10, "loss": 0.1563, "step": 7374 }, { "epoch": 0.99, "grad_norm": 1.0099703459035925, "learning_rate": 8.016332855165232e-10, "loss": 0.1442, "step": 7375 }, { "epoch": 0.99, "grad_norm": 0.8511616543262075, "learning_rate": 7.630070840797255e-10, "loss": 0.1417, "step": 7376 }, { "epoch": 0.99, "grad_norm": 1.1072196153609812, "learning_rate": 7.253345201924111e-10, "loss": 0.1713, "step": 7377 }, { "epoch": 0.99, "grad_norm": 0.9585863667482212, "learning_rate": 6.88615601041609e-10, "loss": 0.1781, "step": 7378 }, { "epoch": 1.0, "grad_norm": 0.8103151370175125, "learning_rate": 6.528503336311609e-10, "loss": 0.1547, "step": 7379 }, { "epoch": 1.0, "grad_norm": 1.265027966466253, "learning_rate": 6.180387247839426e-10, "loss": 0.196, "step": 7380 }, { "epoch": 1.0, "grad_norm": 1.2484218772950444, "learning_rate": 5.841807811396427e-10, "loss": 0.1844, "step": 7381 }, { "epoch": 1.0, "grad_norm": 1.1088687512433664, "learning_rate": 5.512765091575389e-10, "loss": 0.221, "step": 7382 }, { "epoch": 1.0, "grad_norm": 1.0203734573582837, "learning_rate": 5.193259151142771e-10, "loss": 0.1575, "step": 7383 }, { "epoch": 1.0, "grad_norm": 0.6966041603371325, "learning_rate": 4.883290051049816e-10, "loss": 0.0961, "step": 7384 }, { "epoch": 1.0, "grad_norm": 0.9273647952756037, "learning_rate": 4.5828578504159004e-10, "loss": 0.1899, "step": 7385 }, { "epoch": 1.0, "grad_norm": 1.0937148129790664, "learning_rate": 4.291962606556288e-10, "loss": 0.2017, "step": 7386 }, { "epoch": 1.0, "grad_norm": 1.2504495608465191, "learning_rate": 4.0106043749599256e-10, "loss": 0.2026, "step": 7387 }, { "epoch": 1.0, "grad_norm": 0.9078404868242123, "learning_rate": 3.738783209300545e-10, "loss": 0.1901, "step": 7388 }, { "epoch": 1.0, "grad_norm": 0.8615352085141567, "learning_rate": 3.4764991614255616e-10, "loss": 0.1256, "step": 7389 }, { "epoch": 1.0, "grad_norm": 0.9503880497530746, "learning_rate": 3.2237522813727273e-10, "loss": 0.1345, "step": 7390 }, { "epoch": 1.0, "grad_norm": 0.9190127242673465, "learning_rate": 2.9805426173479257e-10, "loss": 0.0995, "step": 7391 }, { "epoch": 1.0, "grad_norm": 1.0856927359291453, "learning_rate": 2.746870215752928e-10, "loss": 0.1671, "step": 7392 }, { "epoch": 1.0, "grad_norm": 0.8797208430439248, "learning_rate": 2.522735121157638e-10, "loss": 0.1502, "step": 7393 }, { "epoch": 1.0, "grad_norm": 1.1766230155997932, "learning_rate": 2.3081373763167436e-10, "loss": 0.2112, "step": 7394 }, { "epoch": 1.0, "grad_norm": 1.0034046694287782, "learning_rate": 2.1030770221641684e-10, "loss": 0.1717, "step": 7395 }, { "epoch": 1.0, "grad_norm": 1.0200840543863872, "learning_rate": 1.9075540978241714e-10, "loss": 0.1884, "step": 7396 }, { "epoch": 1.0, "grad_norm": 1.14572582015939, "learning_rate": 1.7215686405891442e-10, "loss": 0.1946, "step": 7397 }, { "epoch": 1.0, "grad_norm": 1.0505690416300588, "learning_rate": 1.5451206859418143e-10, "loss": 0.1694, "step": 7398 }, { "epoch": 1.0, "grad_norm": 0.8229424702458777, "learning_rate": 1.378210267527491e-10, "loss": 0.186, "step": 7399 }, { "epoch": 1.0, "grad_norm": 0.7758891303816176, "learning_rate": 1.220837417204024e-10, "loss": 0.1561, "step": 7400 }, { "epoch": 1.0, "grad_norm": 1.111874036608757, "learning_rate": 1.0730021649751899e-10, "loss": 0.1701, "step": 7401 }, { "epoch": 1.0, "grad_norm": 0.8714765681728576, "learning_rate": 9.347045390517561e-11, "loss": 0.1461, "step": 7402 }, { "epoch": 1.0, "grad_norm": 0.8452264516684526, "learning_rate": 8.059445658070708e-11, "loss": 0.1542, "step": 7403 }, { "epoch": 1.0, "grad_norm": 1.0527982172143069, "learning_rate": 6.867222698103693e-11, "loss": 0.1689, "step": 7404 }, { "epoch": 1.0, "grad_norm": 1.0765845134661567, "learning_rate": 5.770376738045702e-11, "loss": 0.1669, "step": 7405 }, { "epoch": 1.0, "grad_norm": 1.0996113919522528, "learning_rate": 4.768907987062754e-11, "loss": 0.1719, "step": 7406 }, { "epoch": 1.0, "grad_norm": 0.7948748653045761, "learning_rate": 3.8628166362242315e-11, "loss": 0.1764, "step": 7407 }, { "epoch": 1.0, "grad_norm": 1.1164557534378285, "learning_rate": 3.052102858336348e-11, "loss": 0.1277, "step": 7408 }, { "epoch": 1.0, "grad_norm": 1.0693356460774184, "learning_rate": 2.3367668081086813e-11, "loss": 0.1723, "step": 7409 }, { "epoch": 1.0, "grad_norm": 0.6104702938633813, "learning_rate": 1.7168086219876424e-11, "loss": 0.1016, "step": 7410 }, { "epoch": 1.0, "grad_norm": 0.8963425757551352, "learning_rate": 1.1922284181564714e-11, "loss": 0.1399, "step": 7411 }, { "epoch": 1.0, "grad_norm": 1.0374687282553519, "learning_rate": 7.630262968127966e-12, "loss": 0.1671, "step": 7412 }, { "epoch": 1.0, "grad_norm": 1.1719537044381103, "learning_rate": 4.292023397245437e-12, "loss": 0.2004, "step": 7413 }, { "epoch": 1.0, "grad_norm": 1.0656576361137509, "learning_rate": 1.9075661056300323e-12, "loss": 0.1787, "step": 7414 }, { "epoch": 1.0, "grad_norm": 1.3256955823670467, "learning_rate": 4.768915490283022e-13, "loss": 0.2237, "step": 7415 }, { "epoch": 1.0, "grad_norm": 0.82913469848174, "learning_rate": 0.0, "loss": 0.1521, "step": 7416 }, { "epoch": 1.0, "step": 7416, "total_flos": 1350402630340608.0, "train_loss": 0.18874426638986816, "train_runtime": 19625.7622, "train_samples_per_second": 3.023, "train_steps_per_second": 0.378 } ], "logging_steps": 1.0, "max_steps": 7416, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 1350402630340608.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }