{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999591319628918, "eval_steps": 500, "global_step": 12234, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 10.763204990230978, "learning_rate": 2.7173913043478262e-08, "loss": 1.9124, "step": 1 }, { "epoch": 0.0, "grad_norm": 10.252056662172615, "learning_rate": 5.4347826086956524e-08, "loss": 1.2475, "step": 2 }, { "epoch": 0.0, "grad_norm": 12.5031240421653, "learning_rate": 8.152173913043479e-08, "loss": 1.8097, "step": 3 }, { "epoch": 0.0, "grad_norm": 11.19022777352965, "learning_rate": 1.0869565217391305e-07, "loss": 1.7408, "step": 4 }, { "epoch": 0.0, "grad_norm": 8.74911389200089, "learning_rate": 1.3586956521739132e-07, "loss": 1.3384, "step": 5 }, { "epoch": 0.0, "grad_norm": 16.414301597091637, "learning_rate": 1.6304347826086958e-07, "loss": 2.1608, "step": 6 }, { "epoch": 0.0, "grad_norm": 9.441165175742352, "learning_rate": 1.9021739130434786e-07, "loss": 1.8799, "step": 7 }, { "epoch": 0.0, "grad_norm": 9.77584594887577, "learning_rate": 2.173913043478261e-07, "loss": 1.8447, "step": 8 }, { "epoch": 0.0, "grad_norm": 10.111184232798522, "learning_rate": 2.445652173913044e-07, "loss": 1.4988, "step": 9 }, { "epoch": 0.0, "grad_norm": 9.567135461407071, "learning_rate": 2.7173913043478264e-07, "loss": 1.5362, "step": 10 }, { "epoch": 0.0, "grad_norm": 10.00231116510922, "learning_rate": 2.989130434782609e-07, "loss": 1.2534, "step": 11 }, { "epoch": 0.0, "grad_norm": 7.806732616267708, "learning_rate": 3.2608695652173915e-07, "loss": 1.4756, "step": 12 }, { "epoch": 0.0, "grad_norm": 10.617002504828708, "learning_rate": 3.532608695652174e-07, "loss": 1.7337, "step": 13 }, { "epoch": 0.0, "grad_norm": 13.54765846286333, "learning_rate": 3.804347826086957e-07, "loss": 2.1608, "step": 14 }, { "epoch": 0.0, "grad_norm": 11.123744709657245, "learning_rate": 4.0760869565217393e-07, "loss": 1.8026, "step": 15 }, { "epoch": 0.0, "grad_norm": 7.142452879045726, "learning_rate": 4.347826086956522e-07, "loss": 1.38, "step": 16 }, { "epoch": 0.0, "grad_norm": 10.641416240703439, "learning_rate": 4.6195652173913045e-07, "loss": 1.439, "step": 17 }, { "epoch": 0.0, "grad_norm": 9.110065588139014, "learning_rate": 4.891304347826088e-07, "loss": 1.8888, "step": 18 }, { "epoch": 0.0, "grad_norm": 10.116752917147823, "learning_rate": 5.16304347826087e-07, "loss": 1.4248, "step": 19 }, { "epoch": 0.0, "grad_norm": 10.439796920171236, "learning_rate": 5.434782608695653e-07, "loss": 1.5432, "step": 20 }, { "epoch": 0.0, "grad_norm": 8.992746653280529, "learning_rate": 5.706521739130435e-07, "loss": 1.7672, "step": 21 }, { "epoch": 0.0, "grad_norm": 11.46654871784486, "learning_rate": 5.978260869565218e-07, "loss": 1.66, "step": 22 }, { "epoch": 0.0, "grad_norm": 10.103379665707928, "learning_rate": 6.25e-07, "loss": 1.5059, "step": 23 }, { "epoch": 0.0, "grad_norm": 8.922521662725512, "learning_rate": 6.521739130434783e-07, "loss": 1.3644, "step": 24 }, { "epoch": 0.0, "grad_norm": 9.482787698784223, "learning_rate": 6.793478260869566e-07, "loss": 1.7136, "step": 25 }, { "epoch": 0.0, "grad_norm": 10.131149963147784, "learning_rate": 7.065217391304348e-07, "loss": 1.5865, "step": 26 }, { "epoch": 0.0, "grad_norm": 7.553296695300296, "learning_rate": 7.336956521739132e-07, "loss": 1.0809, "step": 27 }, { "epoch": 0.0, "grad_norm": 7.447837089466901, "learning_rate": 7.608695652173914e-07, "loss": 1.3542, "step": 28 }, { "epoch": 0.0, "grad_norm": 6.646186450438408, "learning_rate": 7.880434782608697e-07, "loss": 1.3759, "step": 29 }, { "epoch": 0.0, "grad_norm": 11.363825771345514, "learning_rate": 8.152173913043479e-07, "loss": 1.3261, "step": 30 }, { "epoch": 0.0, "grad_norm": 6.707159536720378, "learning_rate": 8.423913043478261e-07, "loss": 1.2247, "step": 31 }, { "epoch": 0.0, "grad_norm": 7.5094610599711125, "learning_rate": 8.695652173913044e-07, "loss": 1.4841, "step": 32 }, { "epoch": 0.0, "grad_norm": 7.783281889567137, "learning_rate": 8.967391304347826e-07, "loss": 1.5589, "step": 33 }, { "epoch": 0.0, "grad_norm": 10.324049872893895, "learning_rate": 9.239130434782609e-07, "loss": 1.402, "step": 34 }, { "epoch": 0.0, "grad_norm": 8.307774271557626, "learning_rate": 9.510869565217393e-07, "loss": 1.4388, "step": 35 }, { "epoch": 0.0, "grad_norm": 8.48905313529863, "learning_rate": 9.782608695652175e-07, "loss": 1.5702, "step": 36 }, { "epoch": 0.0, "grad_norm": 8.834200227156316, "learning_rate": 1.0054347826086958e-06, "loss": 1.3819, "step": 37 }, { "epoch": 0.0, "grad_norm": 7.867659654983689, "learning_rate": 1.032608695652174e-06, "loss": 1.5146, "step": 38 }, { "epoch": 0.0, "grad_norm": 7.104395892275559, "learning_rate": 1.0597826086956523e-06, "loss": 1.2098, "step": 39 }, { "epoch": 0.0, "grad_norm": 8.485504914038025, "learning_rate": 1.0869565217391306e-06, "loss": 1.3955, "step": 40 }, { "epoch": 0.0, "grad_norm": 10.13078856345608, "learning_rate": 1.1141304347826088e-06, "loss": 1.4476, "step": 41 }, { "epoch": 0.0, "grad_norm": 8.66501688643045, "learning_rate": 1.141304347826087e-06, "loss": 1.1798, "step": 42 }, { "epoch": 0.0, "grad_norm": 7.056438781274291, "learning_rate": 1.1684782608695653e-06, "loss": 1.1171, "step": 43 }, { "epoch": 0.0, "grad_norm": 9.524989559260273, "learning_rate": 1.1956521739130436e-06, "loss": 1.2799, "step": 44 }, { "epoch": 0.0, "grad_norm": 5.207602666230073, "learning_rate": 1.2228260869565218e-06, "loss": 1.1379, "step": 45 }, { "epoch": 0.0, "grad_norm": 10.741407362172229, "learning_rate": 1.25e-06, "loss": 1.5437, "step": 46 }, { "epoch": 0.0, "grad_norm": 5.112828564926632, "learning_rate": 1.2771739130434786e-06, "loss": 0.9981, "step": 47 }, { "epoch": 0.0, "grad_norm": 5.503495238241442, "learning_rate": 1.3043478260869566e-06, "loss": 1.0219, "step": 48 }, { "epoch": 0.0, "grad_norm": 8.465625542087716, "learning_rate": 1.3315217391304349e-06, "loss": 1.4285, "step": 49 }, { "epoch": 0.0, "grad_norm": 7.756922552940579, "learning_rate": 1.3586956521739131e-06, "loss": 1.298, "step": 50 }, { "epoch": 0.0, "grad_norm": 6.191522642983566, "learning_rate": 1.3858695652173914e-06, "loss": 1.2136, "step": 51 }, { "epoch": 0.0, "grad_norm": 7.345904517953232, "learning_rate": 1.4130434782608697e-06, "loss": 1.2073, "step": 52 }, { "epoch": 0.0, "grad_norm": 6.782042045608577, "learning_rate": 1.440217391304348e-06, "loss": 1.197, "step": 53 }, { "epoch": 0.0, "grad_norm": 7.103653842504592, "learning_rate": 1.4673913043478264e-06, "loss": 1.1927, "step": 54 }, { "epoch": 0.0, "grad_norm": 9.288565917912903, "learning_rate": 1.4945652173913044e-06, "loss": 1.728, "step": 55 }, { "epoch": 0.0, "grad_norm": 6.496033995601524, "learning_rate": 1.521739130434783e-06, "loss": 1.1772, "step": 56 }, { "epoch": 0.0, "grad_norm": 8.70760468187945, "learning_rate": 1.548913043478261e-06, "loss": 1.0784, "step": 57 }, { "epoch": 0.0, "grad_norm": 5.164233229137625, "learning_rate": 1.5760869565217394e-06, "loss": 0.8166, "step": 58 }, { "epoch": 0.0, "grad_norm": 6.153545718589251, "learning_rate": 1.6032608695652175e-06, "loss": 1.0213, "step": 59 }, { "epoch": 0.0, "grad_norm": 9.224145677156148, "learning_rate": 1.6304347826086957e-06, "loss": 1.2201, "step": 60 }, { "epoch": 0.0, "grad_norm": 5.929420117992498, "learning_rate": 1.657608695652174e-06, "loss": 1.0981, "step": 61 }, { "epoch": 0.01, "grad_norm": 6.832090681262447, "learning_rate": 1.6847826086956522e-06, "loss": 1.1829, "step": 62 }, { "epoch": 0.01, "grad_norm": 7.827788334408938, "learning_rate": 1.7119565217391307e-06, "loss": 1.1005, "step": 63 }, { "epoch": 0.01, "grad_norm": 7.5134565802844415, "learning_rate": 1.7391304347826088e-06, "loss": 0.8607, "step": 64 }, { "epoch": 0.01, "grad_norm": 6.975177473268082, "learning_rate": 1.7663043478260872e-06, "loss": 1.0803, "step": 65 }, { "epoch": 0.01, "grad_norm": 8.425166183961684, "learning_rate": 1.7934782608695653e-06, "loss": 1.1596, "step": 66 }, { "epoch": 0.01, "grad_norm": 8.723229692609811, "learning_rate": 1.8206521739130437e-06, "loss": 1.2265, "step": 67 }, { "epoch": 0.01, "grad_norm": 6.7467229371397694, "learning_rate": 1.8478260869565218e-06, "loss": 0.7996, "step": 68 }, { "epoch": 0.01, "grad_norm": 3.146784934298589, "learning_rate": 1.8750000000000003e-06, "loss": 0.3544, "step": 69 }, { "epoch": 0.01, "grad_norm": 10.983495052283711, "learning_rate": 1.9021739130434785e-06, "loss": 1.5448, "step": 70 }, { "epoch": 0.01, "grad_norm": 6.528084910725228, "learning_rate": 1.9293478260869568e-06, "loss": 0.9641, "step": 71 }, { "epoch": 0.01, "grad_norm": 4.772154961625803, "learning_rate": 1.956521739130435e-06, "loss": 0.6387, "step": 72 }, { "epoch": 0.01, "grad_norm": 5.372610251132539, "learning_rate": 1.9836956521739133e-06, "loss": 0.7883, "step": 73 }, { "epoch": 0.01, "grad_norm": 5.3183594429603716, "learning_rate": 2.0108695652173916e-06, "loss": 0.6512, "step": 74 }, { "epoch": 0.01, "grad_norm": 5.804934889637244, "learning_rate": 2.03804347826087e-06, "loss": 0.8766, "step": 75 }, { "epoch": 0.01, "grad_norm": 6.42791079405579, "learning_rate": 2.065217391304348e-06, "loss": 0.8556, "step": 76 }, { "epoch": 0.01, "grad_norm": 8.519309928339856, "learning_rate": 2.0923913043478263e-06, "loss": 1.5128, "step": 77 }, { "epoch": 0.01, "grad_norm": 7.595715930891681, "learning_rate": 2.1195652173913046e-06, "loss": 1.3523, "step": 78 }, { "epoch": 0.01, "grad_norm": 5.709132076023693, "learning_rate": 2.146739130434783e-06, "loss": 0.8167, "step": 79 }, { "epoch": 0.01, "grad_norm": 5.721035794036643, "learning_rate": 2.173913043478261e-06, "loss": 0.8613, "step": 80 }, { "epoch": 0.01, "grad_norm": 9.955864530501376, "learning_rate": 2.2010869565217394e-06, "loss": 1.4923, "step": 81 }, { "epoch": 0.01, "grad_norm": 5.226117141588304, "learning_rate": 2.2282608695652176e-06, "loss": 1.0378, "step": 82 }, { "epoch": 0.01, "grad_norm": 6.578554182689265, "learning_rate": 2.255434782608696e-06, "loss": 1.2069, "step": 83 }, { "epoch": 0.01, "grad_norm": 4.854813831700824, "learning_rate": 2.282608695652174e-06, "loss": 0.8649, "step": 84 }, { "epoch": 0.01, "grad_norm": 9.154347006791653, "learning_rate": 2.3097826086956524e-06, "loss": 0.8471, "step": 85 }, { "epoch": 0.01, "grad_norm": 7.017133522659245, "learning_rate": 2.3369565217391307e-06, "loss": 1.1938, "step": 86 }, { "epoch": 0.01, "grad_norm": 6.026983949592643, "learning_rate": 2.364130434782609e-06, "loss": 0.8922, "step": 87 }, { "epoch": 0.01, "grad_norm": 8.405981979152875, "learning_rate": 2.391304347826087e-06, "loss": 1.4834, "step": 88 }, { "epoch": 0.01, "grad_norm": 6.5999817340984075, "learning_rate": 2.4184782608695654e-06, "loss": 1.0044, "step": 89 }, { "epoch": 0.01, "grad_norm": 9.162604562149298, "learning_rate": 2.4456521739130437e-06, "loss": 1.3329, "step": 90 }, { "epoch": 0.01, "grad_norm": 7.074359325975014, "learning_rate": 2.472826086956522e-06, "loss": 1.5377, "step": 91 }, { "epoch": 0.01, "grad_norm": 9.41639145189624, "learning_rate": 2.5e-06, "loss": 1.0232, "step": 92 }, { "epoch": 0.01, "grad_norm": 6.4783028052120395, "learning_rate": 2.5271739130434785e-06, "loss": 0.955, "step": 93 }, { "epoch": 0.01, "grad_norm": 5.739755004143212, "learning_rate": 2.554347826086957e-06, "loss": 0.753, "step": 94 }, { "epoch": 0.01, "grad_norm": 5.632644951561571, "learning_rate": 2.581521739130435e-06, "loss": 1.1089, "step": 95 }, { "epoch": 0.01, "grad_norm": 8.707986315608768, "learning_rate": 2.6086956521739132e-06, "loss": 1.6927, "step": 96 }, { "epoch": 0.01, "grad_norm": 5.928093931905623, "learning_rate": 2.6358695652173915e-06, "loss": 0.7765, "step": 97 }, { "epoch": 0.01, "grad_norm": 8.761801614748697, "learning_rate": 2.6630434782608698e-06, "loss": 1.3454, "step": 98 }, { "epoch": 0.01, "grad_norm": 8.71085682910734, "learning_rate": 2.6902173913043476e-06, "loss": 1.2272, "step": 99 }, { "epoch": 0.01, "grad_norm": 5.743239729296844, "learning_rate": 2.7173913043478263e-06, "loss": 0.8723, "step": 100 }, { "epoch": 0.01, "grad_norm": 6.8842539643647065, "learning_rate": 2.7445652173913045e-06, "loss": 1.1771, "step": 101 }, { "epoch": 0.01, "grad_norm": 6.6027040012838265, "learning_rate": 2.771739130434783e-06, "loss": 1.1343, "step": 102 }, { "epoch": 0.01, "grad_norm": 5.93038824774127, "learning_rate": 2.7989130434782615e-06, "loss": 0.8708, "step": 103 }, { "epoch": 0.01, "grad_norm": 5.992654839808304, "learning_rate": 2.8260869565217393e-06, "loss": 0.7075, "step": 104 }, { "epoch": 0.01, "grad_norm": 6.638844600072942, "learning_rate": 2.8532608695652176e-06, "loss": 0.8676, "step": 105 }, { "epoch": 0.01, "grad_norm": 6.919202754473433, "learning_rate": 2.880434782608696e-06, "loss": 1.1083, "step": 106 }, { "epoch": 0.01, "grad_norm": 5.808777508326425, "learning_rate": 2.9076086956521745e-06, "loss": 0.8974, "step": 107 }, { "epoch": 0.01, "grad_norm": 3.049341085789842, "learning_rate": 2.9347826086956528e-06, "loss": 0.4606, "step": 108 }, { "epoch": 0.01, "grad_norm": 5.734898712066466, "learning_rate": 2.9619565217391306e-06, "loss": 0.7338, "step": 109 }, { "epoch": 0.01, "grad_norm": 7.308385711610393, "learning_rate": 2.989130434782609e-06, "loss": 1.2931, "step": 110 }, { "epoch": 0.01, "grad_norm": 8.37792345474363, "learning_rate": 3.016304347826087e-06, "loss": 1.3357, "step": 111 }, { "epoch": 0.01, "grad_norm": 4.207211713951564, "learning_rate": 3.043478260869566e-06, "loss": 0.4874, "step": 112 }, { "epoch": 0.01, "grad_norm": 8.291411813399872, "learning_rate": 3.0706521739130436e-06, "loss": 1.3978, "step": 113 }, { "epoch": 0.01, "grad_norm": 8.685180426322534, "learning_rate": 3.097826086956522e-06, "loss": 0.8724, "step": 114 }, { "epoch": 0.01, "grad_norm": 6.104442131372095, "learning_rate": 3.125e-06, "loss": 1.0527, "step": 115 }, { "epoch": 0.01, "grad_norm": 7.478605850436171, "learning_rate": 3.152173913043479e-06, "loss": 1.4413, "step": 116 }, { "epoch": 0.01, "grad_norm": 7.531709196936938, "learning_rate": 3.179347826086957e-06, "loss": 1.6845, "step": 117 }, { "epoch": 0.01, "grad_norm": 5.4759958206666175, "learning_rate": 3.206521739130435e-06, "loss": 1.0112, "step": 118 }, { "epoch": 0.01, "grad_norm": 9.049621112602324, "learning_rate": 3.233695652173913e-06, "loss": 1.3741, "step": 119 }, { "epoch": 0.01, "grad_norm": 3.5817184153763173, "learning_rate": 3.2608695652173914e-06, "loss": 0.7584, "step": 120 }, { "epoch": 0.01, "grad_norm": 6.404142910694771, "learning_rate": 3.28804347826087e-06, "loss": 1.1528, "step": 121 }, { "epoch": 0.01, "grad_norm": 3.906408150238798, "learning_rate": 3.315217391304348e-06, "loss": 0.512, "step": 122 }, { "epoch": 0.01, "grad_norm": 6.488336599440538, "learning_rate": 3.3423913043478262e-06, "loss": 1.018, "step": 123 }, { "epoch": 0.01, "grad_norm": 6.981920682664463, "learning_rate": 3.3695652173913045e-06, "loss": 0.8915, "step": 124 }, { "epoch": 0.01, "grad_norm": 8.05588044254937, "learning_rate": 3.396739130434783e-06, "loss": 0.8899, "step": 125 }, { "epoch": 0.01, "grad_norm": 5.854430041120995, "learning_rate": 3.4239130434782614e-06, "loss": 1.0416, "step": 126 }, { "epoch": 0.01, "grad_norm": 10.556756679163271, "learning_rate": 3.4510869565217393e-06, "loss": 1.6121, "step": 127 }, { "epoch": 0.01, "grad_norm": 7.0566625292176495, "learning_rate": 3.4782608695652175e-06, "loss": 1.4485, "step": 128 }, { "epoch": 0.01, "grad_norm": 3.883945959657133, "learning_rate": 3.5054347826086958e-06, "loss": 0.6082, "step": 129 }, { "epoch": 0.01, "grad_norm": 7.701849857905996, "learning_rate": 3.5326086956521745e-06, "loss": 1.3073, "step": 130 }, { "epoch": 0.01, "grad_norm": 5.27587282635949, "learning_rate": 3.5597826086956527e-06, "loss": 0.6537, "step": 131 }, { "epoch": 0.01, "grad_norm": 3.3323783890401635, "learning_rate": 3.5869565217391305e-06, "loss": 0.5372, "step": 132 }, { "epoch": 0.01, "grad_norm": 7.843012301008066, "learning_rate": 3.614130434782609e-06, "loss": 1.243, "step": 133 }, { "epoch": 0.01, "grad_norm": 7.200673205505264, "learning_rate": 3.6413043478260875e-06, "loss": 1.2031, "step": 134 }, { "epoch": 0.01, "grad_norm": 4.178859040391603, "learning_rate": 3.6684782608695657e-06, "loss": 0.7575, "step": 135 }, { "epoch": 0.01, "grad_norm": 8.493390921146402, "learning_rate": 3.6956521739130436e-06, "loss": 1.2118, "step": 136 }, { "epoch": 0.01, "grad_norm": 6.517598073215942, "learning_rate": 3.722826086956522e-06, "loss": 1.0804, "step": 137 }, { "epoch": 0.01, "grad_norm": 5.872001208747307, "learning_rate": 3.7500000000000005e-06, "loss": 0.8877, "step": 138 }, { "epoch": 0.01, "grad_norm": 5.751199032083687, "learning_rate": 3.7771739130434788e-06, "loss": 1.0684, "step": 139 }, { "epoch": 0.01, "grad_norm": 8.814160757707059, "learning_rate": 3.804347826086957e-06, "loss": 1.4598, "step": 140 }, { "epoch": 0.01, "grad_norm": 2.7161018974223925, "learning_rate": 3.831521739130435e-06, "loss": 0.2583, "step": 141 }, { "epoch": 0.01, "grad_norm": 4.528220495890977, "learning_rate": 3.8586956521739136e-06, "loss": 0.9408, "step": 142 }, { "epoch": 0.01, "grad_norm": 3.7952554346766822, "learning_rate": 3.885869565217392e-06, "loss": 0.4543, "step": 143 }, { "epoch": 0.01, "grad_norm": 6.48531466674868, "learning_rate": 3.91304347826087e-06, "loss": 0.8372, "step": 144 }, { "epoch": 0.01, "grad_norm": 6.993848127713756, "learning_rate": 3.9402173913043475e-06, "loss": 1.1149, "step": 145 }, { "epoch": 0.01, "grad_norm": 7.60199739787131, "learning_rate": 3.967391304347827e-06, "loss": 1.4652, "step": 146 }, { "epoch": 0.01, "grad_norm": 2.180849053327101, "learning_rate": 3.994565217391305e-06, "loss": 0.3403, "step": 147 }, { "epoch": 0.01, "grad_norm": 6.675303114346352, "learning_rate": 4.021739130434783e-06, "loss": 1.1365, "step": 148 }, { "epoch": 0.01, "grad_norm": 5.618577228665263, "learning_rate": 4.048913043478261e-06, "loss": 1.067, "step": 149 }, { "epoch": 0.01, "grad_norm": 6.078694781710514, "learning_rate": 4.07608695652174e-06, "loss": 1.0104, "step": 150 }, { "epoch": 0.01, "grad_norm": 7.767941978563566, "learning_rate": 4.103260869565218e-06, "loss": 1.3224, "step": 151 }, { "epoch": 0.01, "grad_norm": 5.885594442243772, "learning_rate": 4.130434782608696e-06, "loss": 0.7112, "step": 152 }, { "epoch": 0.01, "grad_norm": 10.457674878448332, "learning_rate": 4.157608695652174e-06, "loss": 1.8655, "step": 153 }, { "epoch": 0.01, "grad_norm": 9.904533607873724, "learning_rate": 4.184782608695653e-06, "loss": 1.4322, "step": 154 }, { "epoch": 0.01, "grad_norm": 8.049829569078577, "learning_rate": 4.211956521739131e-06, "loss": 1.1242, "step": 155 }, { "epoch": 0.01, "grad_norm": 5.5836948972226566, "learning_rate": 4.239130434782609e-06, "loss": 0.6328, "step": 156 }, { "epoch": 0.01, "grad_norm": 6.511416011994131, "learning_rate": 4.2663043478260874e-06, "loss": 0.7455, "step": 157 }, { "epoch": 0.01, "grad_norm": 5.415113124345645, "learning_rate": 4.293478260869566e-06, "loss": 0.8246, "step": 158 }, { "epoch": 0.01, "grad_norm": 7.563836391840908, "learning_rate": 4.320652173913044e-06, "loss": 1.2982, "step": 159 }, { "epoch": 0.01, "grad_norm": 5.255558685888354, "learning_rate": 4.347826086956522e-06, "loss": 0.922, "step": 160 }, { "epoch": 0.01, "grad_norm": 5.8007257579161235, "learning_rate": 4.3750000000000005e-06, "loss": 0.6111, "step": 161 }, { "epoch": 0.01, "grad_norm": 6.138614187559548, "learning_rate": 4.402173913043479e-06, "loss": 1.0949, "step": 162 }, { "epoch": 0.01, "grad_norm": 4.9221676977686535, "learning_rate": 4.429347826086957e-06, "loss": 0.909, "step": 163 }, { "epoch": 0.01, "grad_norm": 4.942921539191711, "learning_rate": 4.456521739130435e-06, "loss": 0.7227, "step": 164 }, { "epoch": 0.01, "grad_norm": 5.900295438543103, "learning_rate": 4.4836956521739135e-06, "loss": 0.6749, "step": 165 }, { "epoch": 0.01, "grad_norm": 4.789778017522469, "learning_rate": 4.510869565217392e-06, "loss": 0.9947, "step": 166 }, { "epoch": 0.01, "grad_norm": 8.048409907918103, "learning_rate": 4.53804347826087e-06, "loss": 1.2421, "step": 167 }, { "epoch": 0.01, "grad_norm": 5.367898594382595, "learning_rate": 4.565217391304348e-06, "loss": 0.5863, "step": 168 }, { "epoch": 0.01, "grad_norm": 5.038484443485752, "learning_rate": 4.5923913043478265e-06, "loss": 0.8006, "step": 169 }, { "epoch": 0.01, "grad_norm": 6.980449471454865, "learning_rate": 4.619565217391305e-06, "loss": 1.2171, "step": 170 }, { "epoch": 0.01, "grad_norm": 6.291826025186352, "learning_rate": 4.646739130434783e-06, "loss": 0.969, "step": 171 }, { "epoch": 0.01, "grad_norm": 7.18831539912512, "learning_rate": 4.673913043478261e-06, "loss": 0.6138, "step": 172 }, { "epoch": 0.01, "grad_norm": 4.608150004360503, "learning_rate": 4.7010869565217396e-06, "loss": 0.7879, "step": 173 }, { "epoch": 0.01, "grad_norm": 2.410996394740663, "learning_rate": 4.728260869565218e-06, "loss": 0.4698, "step": 174 }, { "epoch": 0.01, "grad_norm": 6.1740205070337355, "learning_rate": 4.755434782608696e-06, "loss": 1.0853, "step": 175 }, { "epoch": 0.01, "grad_norm": 5.13726770207731, "learning_rate": 4.782608695652174e-06, "loss": 0.8261, "step": 176 }, { "epoch": 0.01, "grad_norm": 7.361680179795122, "learning_rate": 4.809782608695653e-06, "loss": 1.1934, "step": 177 }, { "epoch": 0.01, "grad_norm": 7.67123136273917, "learning_rate": 4.836956521739131e-06, "loss": 1.1739, "step": 178 }, { "epoch": 0.01, "grad_norm": 5.2229598582844945, "learning_rate": 4.864130434782609e-06, "loss": 1.0926, "step": 179 }, { "epoch": 0.01, "grad_norm": 4.7046608872567734, "learning_rate": 4.891304347826087e-06, "loss": 0.8426, "step": 180 }, { "epoch": 0.01, "grad_norm": 6.198449576398156, "learning_rate": 4.918478260869566e-06, "loss": 1.1743, "step": 181 }, { "epoch": 0.01, "grad_norm": 8.525933837362583, "learning_rate": 4.945652173913044e-06, "loss": 0.8965, "step": 182 }, { "epoch": 0.01, "grad_norm": 5.102006691809892, "learning_rate": 4.972826086956522e-06, "loss": 0.4219, "step": 183 }, { "epoch": 0.02, "grad_norm": 6.716964784019454, "learning_rate": 5e-06, "loss": 1.3908, "step": 184 }, { "epoch": 0.02, "grad_norm": 5.812408168037797, "learning_rate": 5.027173913043478e-06, "loss": 0.9765, "step": 185 }, { "epoch": 0.02, "grad_norm": 5.7300425111293345, "learning_rate": 5.054347826086957e-06, "loss": 0.7135, "step": 186 }, { "epoch": 0.02, "grad_norm": 5.29453599820772, "learning_rate": 5.081521739130435e-06, "loss": 0.6838, "step": 187 }, { "epoch": 0.02, "grad_norm": 4.3872345244405215, "learning_rate": 5.108695652173914e-06, "loss": 0.7755, "step": 188 }, { "epoch": 0.02, "grad_norm": 6.350885267174286, "learning_rate": 5.135869565217392e-06, "loss": 0.7441, "step": 189 }, { "epoch": 0.02, "grad_norm": 5.705837768030783, "learning_rate": 5.16304347826087e-06, "loss": 0.9734, "step": 190 }, { "epoch": 0.02, "grad_norm": 4.031565248567177, "learning_rate": 5.190217391304348e-06, "loss": 0.521, "step": 191 }, { "epoch": 0.02, "grad_norm": 9.231182683688848, "learning_rate": 5.2173913043478265e-06, "loss": 1.2869, "step": 192 }, { "epoch": 0.02, "grad_norm": 8.081212572731191, "learning_rate": 5.244565217391306e-06, "loss": 1.1, "step": 193 }, { "epoch": 0.02, "grad_norm": 1.923947857423653, "learning_rate": 5.271739130434783e-06, "loss": 0.1881, "step": 194 }, { "epoch": 0.02, "grad_norm": 5.654283650048554, "learning_rate": 5.298913043478261e-06, "loss": 0.9192, "step": 195 }, { "epoch": 0.02, "grad_norm": 6.772455621286309, "learning_rate": 5.3260869565217395e-06, "loss": 0.7107, "step": 196 }, { "epoch": 0.02, "grad_norm": 2.8759240797330703, "learning_rate": 5.353260869565218e-06, "loss": 0.3039, "step": 197 }, { "epoch": 0.02, "grad_norm": 4.517060510758465, "learning_rate": 5.380434782608695e-06, "loss": 0.8079, "step": 198 }, { "epoch": 0.02, "grad_norm": 6.702676517665291, "learning_rate": 5.407608695652174e-06, "loss": 1.3456, "step": 199 }, { "epoch": 0.02, "grad_norm": 6.5636098368903895, "learning_rate": 5.4347826086956525e-06, "loss": 0.9768, "step": 200 }, { "epoch": 0.02, "grad_norm": 6.958333903626813, "learning_rate": 5.461956521739132e-06, "loss": 0.7762, "step": 201 }, { "epoch": 0.02, "grad_norm": 5.581754010001346, "learning_rate": 5.489130434782609e-06, "loss": 0.7291, "step": 202 }, { "epoch": 0.02, "grad_norm": 5.923986836029897, "learning_rate": 5.516304347826087e-06, "loss": 0.9687, "step": 203 }, { "epoch": 0.02, "grad_norm": 6.081234676189072, "learning_rate": 5.543478260869566e-06, "loss": 0.8964, "step": 204 }, { "epoch": 0.02, "grad_norm": 2.390859736544384, "learning_rate": 5.570652173913044e-06, "loss": 0.3286, "step": 205 }, { "epoch": 0.02, "grad_norm": 6.417543485088989, "learning_rate": 5.597826086956523e-06, "loss": 1.6375, "step": 206 }, { "epoch": 0.02, "grad_norm": 5.992210187529088, "learning_rate": 5.625e-06, "loss": 0.8719, "step": 207 }, { "epoch": 0.02, "grad_norm": 6.392475147231553, "learning_rate": 5.652173913043479e-06, "loss": 0.9143, "step": 208 }, { "epoch": 0.02, "grad_norm": 6.174327311086821, "learning_rate": 5.679347826086957e-06, "loss": 0.6731, "step": 209 }, { "epoch": 0.02, "grad_norm": 3.743161076840916, "learning_rate": 5.706521739130435e-06, "loss": 0.6002, "step": 210 }, { "epoch": 0.02, "grad_norm": 6.316271470573057, "learning_rate": 5.733695652173914e-06, "loss": 0.9979, "step": 211 }, { "epoch": 0.02, "grad_norm": 5.871275447806914, "learning_rate": 5.760869565217392e-06, "loss": 0.8193, "step": 212 }, { "epoch": 0.02, "grad_norm": 7.000709896186412, "learning_rate": 5.78804347826087e-06, "loss": 0.8606, "step": 213 }, { "epoch": 0.02, "grad_norm": 4.24528787539437, "learning_rate": 5.815217391304349e-06, "loss": 0.7, "step": 214 }, { "epoch": 0.02, "grad_norm": 7.942273062178936, "learning_rate": 5.842391304347826e-06, "loss": 0.9734, "step": 215 }, { "epoch": 0.02, "grad_norm": 6.013164920105653, "learning_rate": 5.8695652173913055e-06, "loss": 0.9056, "step": 216 }, { "epoch": 0.02, "grad_norm": 6.385221534082321, "learning_rate": 5.896739130434783e-06, "loss": 0.6014, "step": 217 }, { "epoch": 0.02, "grad_norm": 8.202981310804969, "learning_rate": 5.923913043478261e-06, "loss": 1.2524, "step": 218 }, { "epoch": 0.02, "grad_norm": 7.673247533397, "learning_rate": 5.95108695652174e-06, "loss": 1.2265, "step": 219 }, { "epoch": 0.02, "grad_norm": 5.655756570467079, "learning_rate": 5.978260869565218e-06, "loss": 1.0721, "step": 220 }, { "epoch": 0.02, "grad_norm": 6.234835376562726, "learning_rate": 6.005434782608696e-06, "loss": 1.1082, "step": 221 }, { "epoch": 0.02, "grad_norm": 6.974427392129233, "learning_rate": 6.032608695652174e-06, "loss": 1.0017, "step": 222 }, { "epoch": 0.02, "grad_norm": 7.916925594285953, "learning_rate": 6.0597826086956525e-06, "loss": 1.1169, "step": 223 }, { "epoch": 0.02, "grad_norm": 7.069754222166293, "learning_rate": 6.086956521739132e-06, "loss": 1.1844, "step": 224 }, { "epoch": 0.02, "grad_norm": 7.5925567538816, "learning_rate": 6.114130434782609e-06, "loss": 1.3222, "step": 225 }, { "epoch": 0.02, "grad_norm": 6.406797145497836, "learning_rate": 6.141304347826087e-06, "loss": 0.7072, "step": 226 }, { "epoch": 0.02, "grad_norm": 5.960839328635132, "learning_rate": 6.1684782608695655e-06, "loss": 1.0292, "step": 227 }, { "epoch": 0.02, "grad_norm": 3.2287351299289333, "learning_rate": 6.195652173913044e-06, "loss": 0.4187, "step": 228 }, { "epoch": 0.02, "grad_norm": 5.6174718743187375, "learning_rate": 6.222826086956523e-06, "loss": 0.9114, "step": 229 }, { "epoch": 0.02, "grad_norm": 4.422614534330612, "learning_rate": 6.25e-06, "loss": 0.8558, "step": 230 }, { "epoch": 0.02, "grad_norm": 5.2228528011626585, "learning_rate": 6.2771739130434786e-06, "loss": 0.8636, "step": 231 }, { "epoch": 0.02, "grad_norm": 5.503777571503567, "learning_rate": 6.304347826086958e-06, "loss": 0.8099, "step": 232 }, { "epoch": 0.02, "grad_norm": 6.906673443489322, "learning_rate": 6.331521739130435e-06, "loss": 0.7918, "step": 233 }, { "epoch": 0.02, "grad_norm": 7.857207855313775, "learning_rate": 6.358695652173914e-06, "loss": 1.8075, "step": 234 }, { "epoch": 0.02, "grad_norm": 8.148563326223139, "learning_rate": 6.385869565217392e-06, "loss": 1.1465, "step": 235 }, { "epoch": 0.02, "grad_norm": 6.4249316447648654, "learning_rate": 6.41304347826087e-06, "loss": 1.1498, "step": 236 }, { "epoch": 0.02, "grad_norm": 6.644600409673028, "learning_rate": 6.440217391304349e-06, "loss": 0.9854, "step": 237 }, { "epoch": 0.02, "grad_norm": 4.008422155861959, "learning_rate": 6.467391304347826e-06, "loss": 0.4489, "step": 238 }, { "epoch": 0.02, "grad_norm": 7.686369840867863, "learning_rate": 6.4945652173913055e-06, "loss": 1.2196, "step": 239 }, { "epoch": 0.02, "grad_norm": 7.389822453643525, "learning_rate": 6.521739130434783e-06, "loss": 1.5096, "step": 240 }, { "epoch": 0.02, "grad_norm": 6.40452020190104, "learning_rate": 6.548913043478261e-06, "loss": 1.0686, "step": 241 }, { "epoch": 0.02, "grad_norm": 3.8513034466412925, "learning_rate": 6.57608695652174e-06, "loss": 0.5596, "step": 242 }, { "epoch": 0.02, "grad_norm": 7.0985386159169215, "learning_rate": 6.603260869565218e-06, "loss": 0.8382, "step": 243 }, { "epoch": 0.02, "grad_norm": 6.486981558596191, "learning_rate": 6.630434782608696e-06, "loss": 1.1326, "step": 244 }, { "epoch": 0.02, "grad_norm": 7.04189008786626, "learning_rate": 6.657608695652175e-06, "loss": 1.0986, "step": 245 }, { "epoch": 0.02, "grad_norm": 7.826697566070404, "learning_rate": 6.6847826086956524e-06, "loss": 1.224, "step": 246 }, { "epoch": 0.02, "grad_norm": 7.079863511831709, "learning_rate": 6.7119565217391315e-06, "loss": 0.9992, "step": 247 }, { "epoch": 0.02, "grad_norm": 5.103408839897494, "learning_rate": 6.739130434782609e-06, "loss": 0.71, "step": 248 }, { "epoch": 0.02, "grad_norm": 7.1228953020068, "learning_rate": 6.766304347826087e-06, "loss": 0.9402, "step": 249 }, { "epoch": 0.02, "grad_norm": 9.976070490177033, "learning_rate": 6.793478260869566e-06, "loss": 1.4335, "step": 250 }, { "epoch": 0.02, "grad_norm": 5.589589798539868, "learning_rate": 6.820652173913044e-06, "loss": 0.7684, "step": 251 }, { "epoch": 0.02, "grad_norm": 6.862103581613901, "learning_rate": 6.847826086956523e-06, "loss": 1.3673, "step": 252 }, { "epoch": 0.02, "grad_norm": 5.107711867291614, "learning_rate": 6.875e-06, "loss": 0.7849, "step": 253 }, { "epoch": 0.02, "grad_norm": 5.781420070769819, "learning_rate": 6.9021739130434785e-06, "loss": 1.1199, "step": 254 }, { "epoch": 0.02, "grad_norm": 7.074452327593561, "learning_rate": 6.929347826086958e-06, "loss": 0.9971, "step": 255 }, { "epoch": 0.02, "grad_norm": 4.882879030652116, "learning_rate": 6.956521739130435e-06, "loss": 0.7556, "step": 256 }, { "epoch": 0.02, "grad_norm": 5.997966863791385, "learning_rate": 6.983695652173914e-06, "loss": 1.1353, "step": 257 }, { "epoch": 0.02, "grad_norm": 7.651101842854983, "learning_rate": 7.0108695652173915e-06, "loss": 1.2194, "step": 258 }, { "epoch": 0.02, "grad_norm": 4.586971534165365, "learning_rate": 7.03804347826087e-06, "loss": 0.8826, "step": 259 }, { "epoch": 0.02, "grad_norm": 8.149993046803752, "learning_rate": 7.065217391304349e-06, "loss": 1.2882, "step": 260 }, { "epoch": 0.02, "grad_norm": 5.5061265280242075, "learning_rate": 7.092391304347826e-06, "loss": 0.625, "step": 261 }, { "epoch": 0.02, "grad_norm": 4.478712447315659, "learning_rate": 7.119565217391305e-06, "loss": 0.6542, "step": 262 }, { "epoch": 0.02, "grad_norm": 7.837884654347885, "learning_rate": 7.146739130434784e-06, "loss": 1.0407, "step": 263 }, { "epoch": 0.02, "grad_norm": 3.821659738606539, "learning_rate": 7.173913043478261e-06, "loss": 0.4997, "step": 264 }, { "epoch": 0.02, "grad_norm": 7.876378892024102, "learning_rate": 7.20108695652174e-06, "loss": 1.266, "step": 265 }, { "epoch": 0.02, "grad_norm": 4.167619207924342, "learning_rate": 7.228260869565218e-06, "loss": 0.5682, "step": 266 }, { "epoch": 0.02, "grad_norm": 6.240061974639415, "learning_rate": 7.255434782608696e-06, "loss": 0.9576, "step": 267 }, { "epoch": 0.02, "grad_norm": 3.287843463237732, "learning_rate": 7.282608695652175e-06, "loss": 0.4013, "step": 268 }, { "epoch": 0.02, "grad_norm": 5.593555393743031, "learning_rate": 7.309782608695652e-06, "loss": 0.799, "step": 269 }, { "epoch": 0.02, "grad_norm": 7.661077596444041, "learning_rate": 7.3369565217391315e-06, "loss": 1.6408, "step": 270 }, { "epoch": 0.02, "grad_norm": 5.307588962336696, "learning_rate": 7.364130434782609e-06, "loss": 0.749, "step": 271 }, { "epoch": 0.02, "grad_norm": 8.37410791120305, "learning_rate": 7.391304347826087e-06, "loss": 0.8649, "step": 272 }, { "epoch": 0.02, "grad_norm": 9.370566236174257, "learning_rate": 7.418478260869566e-06, "loss": 1.5156, "step": 273 }, { "epoch": 0.02, "grad_norm": 5.110290671960013, "learning_rate": 7.445652173913044e-06, "loss": 0.9931, "step": 274 }, { "epoch": 0.02, "grad_norm": 7.699925092422563, "learning_rate": 7.472826086956523e-06, "loss": 1.0836, "step": 275 }, { "epoch": 0.02, "grad_norm": 5.236535462488547, "learning_rate": 7.500000000000001e-06, "loss": 0.6872, "step": 276 }, { "epoch": 0.02, "grad_norm": 8.182701055682035, "learning_rate": 7.5271739130434784e-06, "loss": 1.3735, "step": 277 }, { "epoch": 0.02, "grad_norm": 5.376412752164515, "learning_rate": 7.5543478260869576e-06, "loss": 0.8981, "step": 278 }, { "epoch": 0.02, "grad_norm": 3.9740375817891804, "learning_rate": 7.581521739130435e-06, "loss": 0.5276, "step": 279 }, { "epoch": 0.02, "grad_norm": 6.909735996024911, "learning_rate": 7.608695652173914e-06, "loss": 0.8678, "step": 280 }, { "epoch": 0.02, "grad_norm": 5.243795496334634, "learning_rate": 7.635869565217392e-06, "loss": 0.8842, "step": 281 }, { "epoch": 0.02, "grad_norm": 2.985454133185386, "learning_rate": 7.66304347826087e-06, "loss": 0.3469, "step": 282 }, { "epoch": 0.02, "grad_norm": 4.329242088544714, "learning_rate": 7.690217391304349e-06, "loss": 0.8734, "step": 283 }, { "epoch": 0.02, "grad_norm": 6.438245468838116, "learning_rate": 7.717391304347827e-06, "loss": 1.1468, "step": 284 }, { "epoch": 0.02, "grad_norm": 4.965606920686939, "learning_rate": 7.744565217391305e-06, "loss": 0.9502, "step": 285 }, { "epoch": 0.02, "grad_norm": 7.078318625119426, "learning_rate": 7.771739130434784e-06, "loss": 1.2333, "step": 286 }, { "epoch": 0.02, "grad_norm": 6.360334548540601, "learning_rate": 7.798913043478262e-06, "loss": 0.7249, "step": 287 }, { "epoch": 0.02, "grad_norm": 2.3335049330687503, "learning_rate": 7.82608695652174e-06, "loss": 0.2457, "step": 288 }, { "epoch": 0.02, "grad_norm": 3.7362915593195667, "learning_rate": 7.853260869565218e-06, "loss": 0.516, "step": 289 }, { "epoch": 0.02, "grad_norm": 2.8781325692758477, "learning_rate": 7.880434782608695e-06, "loss": 0.3987, "step": 290 }, { "epoch": 0.02, "grad_norm": 3.936096915610338, "learning_rate": 7.907608695652175e-06, "loss": 0.6694, "step": 291 }, { "epoch": 0.02, "grad_norm": 5.903385405984172, "learning_rate": 7.934782608695653e-06, "loss": 0.9925, "step": 292 }, { "epoch": 0.02, "grad_norm": 2.4190886985795634, "learning_rate": 7.961956521739131e-06, "loss": 0.2049, "step": 293 }, { "epoch": 0.02, "grad_norm": 6.404488252552992, "learning_rate": 7.98913043478261e-06, "loss": 0.7957, "step": 294 }, { "epoch": 0.02, "grad_norm": 6.097551330154639, "learning_rate": 8.016304347826088e-06, "loss": 0.966, "step": 295 }, { "epoch": 0.02, "grad_norm": 7.976238038990829, "learning_rate": 8.043478260869566e-06, "loss": 1.1818, "step": 296 }, { "epoch": 0.02, "grad_norm": 8.421521826437415, "learning_rate": 8.070652173913044e-06, "loss": 1.4973, "step": 297 }, { "epoch": 0.02, "grad_norm": 4.077754124133221, "learning_rate": 8.097826086956523e-06, "loss": 0.5261, "step": 298 }, { "epoch": 0.02, "grad_norm": 6.4070308899500175, "learning_rate": 8.125000000000001e-06, "loss": 1.1101, "step": 299 }, { "epoch": 0.02, "grad_norm": 5.318753575824465, "learning_rate": 8.15217391304348e-06, "loss": 1.104, "step": 300 }, { "epoch": 0.02, "grad_norm": 4.284465274777319, "learning_rate": 8.179347826086957e-06, "loss": 0.7641, "step": 301 }, { "epoch": 0.02, "grad_norm": 4.625176035390117, "learning_rate": 8.206521739130436e-06, "loss": 0.8921, "step": 302 }, { "epoch": 0.02, "grad_norm": 6.339376984014242, "learning_rate": 8.233695652173914e-06, "loss": 1.2197, "step": 303 }, { "epoch": 0.02, "grad_norm": 6.157859884255341, "learning_rate": 8.260869565217392e-06, "loss": 0.9333, "step": 304 }, { "epoch": 0.02, "grad_norm": 6.3629598823237234, "learning_rate": 8.28804347826087e-06, "loss": 0.9097, "step": 305 }, { "epoch": 0.03, "grad_norm": 4.868071909896863, "learning_rate": 8.315217391304349e-06, "loss": 0.9008, "step": 306 }, { "epoch": 0.03, "grad_norm": 3.978604409945964, "learning_rate": 8.342391304347827e-06, "loss": 0.5327, "step": 307 }, { "epoch": 0.03, "grad_norm": 5.7437093095606695, "learning_rate": 8.369565217391305e-06, "loss": 0.8238, "step": 308 }, { "epoch": 0.03, "grad_norm": 3.454985051623668, "learning_rate": 8.396739130434784e-06, "loss": 0.6295, "step": 309 }, { "epoch": 0.03, "grad_norm": 6.614788356863233, "learning_rate": 8.423913043478262e-06, "loss": 0.9974, "step": 310 }, { "epoch": 0.03, "grad_norm": 5.608916913600715, "learning_rate": 8.45108695652174e-06, "loss": 1.1464, "step": 311 }, { "epoch": 0.03, "grad_norm": 4.436932595933803, "learning_rate": 8.478260869565218e-06, "loss": 0.4595, "step": 312 }, { "epoch": 0.03, "grad_norm": 4.57722024795812, "learning_rate": 8.505434782608697e-06, "loss": 0.5938, "step": 313 }, { "epoch": 0.03, "grad_norm": 6.677248597091288, "learning_rate": 8.532608695652175e-06, "loss": 1.1063, "step": 314 }, { "epoch": 0.03, "grad_norm": 8.034438340002367, "learning_rate": 8.559782608695653e-06, "loss": 1.6156, "step": 315 }, { "epoch": 0.03, "grad_norm": 3.2062511410316326, "learning_rate": 8.586956521739131e-06, "loss": 0.6021, "step": 316 }, { "epoch": 0.03, "grad_norm": 6.145723079176569, "learning_rate": 8.61413043478261e-06, "loss": 1.6035, "step": 317 }, { "epoch": 0.03, "grad_norm": 5.34202953981997, "learning_rate": 8.641304347826088e-06, "loss": 0.8125, "step": 318 }, { "epoch": 0.03, "grad_norm": 6.337229311201793, "learning_rate": 8.668478260869566e-06, "loss": 1.1929, "step": 319 }, { "epoch": 0.03, "grad_norm": 6.225481053597104, "learning_rate": 8.695652173913044e-06, "loss": 1.1157, "step": 320 }, { "epoch": 0.03, "grad_norm": 5.017710989732211, "learning_rate": 8.722826086956523e-06, "loss": 0.8339, "step": 321 }, { "epoch": 0.03, "grad_norm": 7.882373233059433, "learning_rate": 8.750000000000001e-06, "loss": 1.262, "step": 322 }, { "epoch": 0.03, "grad_norm": 4.066556353039955, "learning_rate": 8.77717391304348e-06, "loss": 0.2942, "step": 323 }, { "epoch": 0.03, "grad_norm": 7.770860273190021, "learning_rate": 8.804347826086957e-06, "loss": 1.4823, "step": 324 }, { "epoch": 0.03, "grad_norm": 6.7343550772185505, "learning_rate": 8.831521739130436e-06, "loss": 0.7482, "step": 325 }, { "epoch": 0.03, "grad_norm": 5.639168375286053, "learning_rate": 8.858695652173914e-06, "loss": 0.9131, "step": 326 }, { "epoch": 0.03, "grad_norm": 7.518462985960601, "learning_rate": 8.885869565217392e-06, "loss": 1.058, "step": 327 }, { "epoch": 0.03, "grad_norm": 7.54762146148649, "learning_rate": 8.91304347826087e-06, "loss": 1.2194, "step": 328 }, { "epoch": 0.03, "grad_norm": 7.871385299233604, "learning_rate": 8.940217391304349e-06, "loss": 1.0078, "step": 329 }, { "epoch": 0.03, "grad_norm": 4.359117961957575, "learning_rate": 8.967391304347827e-06, "loss": 0.7769, "step": 330 }, { "epoch": 0.03, "grad_norm": 5.0982811956203244, "learning_rate": 8.994565217391305e-06, "loss": 0.7377, "step": 331 }, { "epoch": 0.03, "grad_norm": 5.830234028030018, "learning_rate": 9.021739130434784e-06, "loss": 0.9978, "step": 332 }, { "epoch": 0.03, "grad_norm": 5.8181359163099655, "learning_rate": 9.048913043478262e-06, "loss": 0.5928, "step": 333 }, { "epoch": 0.03, "grad_norm": 5.681560068759461, "learning_rate": 9.07608695652174e-06, "loss": 1.0175, "step": 334 }, { "epoch": 0.03, "grad_norm": 6.80680243793632, "learning_rate": 9.103260869565218e-06, "loss": 1.1993, "step": 335 }, { "epoch": 0.03, "grad_norm": 6.186937644327905, "learning_rate": 9.130434782608697e-06, "loss": 0.6584, "step": 336 }, { "epoch": 0.03, "grad_norm": 6.560489553639137, "learning_rate": 9.157608695652175e-06, "loss": 1.1444, "step": 337 }, { "epoch": 0.03, "grad_norm": 8.058229643385323, "learning_rate": 9.184782608695653e-06, "loss": 1.1921, "step": 338 }, { "epoch": 0.03, "grad_norm": 4.668465254356496, "learning_rate": 9.211956521739131e-06, "loss": 0.7071, "step": 339 }, { "epoch": 0.03, "grad_norm": 6.35169558250226, "learning_rate": 9.23913043478261e-06, "loss": 0.9809, "step": 340 }, { "epoch": 0.03, "grad_norm": 4.837242529338085, "learning_rate": 9.266304347826088e-06, "loss": 0.9961, "step": 341 }, { "epoch": 0.03, "grad_norm": 6.198233376887303, "learning_rate": 9.293478260869566e-06, "loss": 1.0778, "step": 342 }, { "epoch": 0.03, "grad_norm": 2.4647916149997653, "learning_rate": 9.320652173913044e-06, "loss": 0.3007, "step": 343 }, { "epoch": 0.03, "grad_norm": 6.261137583211198, "learning_rate": 9.347826086956523e-06, "loss": 1.3224, "step": 344 }, { "epoch": 0.03, "grad_norm": 6.556407019061477, "learning_rate": 9.375000000000001e-06, "loss": 1.2377, "step": 345 }, { "epoch": 0.03, "grad_norm": 8.46885744874352, "learning_rate": 9.402173913043479e-06, "loss": 1.5947, "step": 346 }, { "epoch": 0.03, "grad_norm": 5.194630590437151, "learning_rate": 9.429347826086957e-06, "loss": 0.8361, "step": 347 }, { "epoch": 0.03, "grad_norm": 5.769530302655123, "learning_rate": 9.456521739130436e-06, "loss": 1.2915, "step": 348 }, { "epoch": 0.03, "grad_norm": 3.27229215981989, "learning_rate": 9.483695652173914e-06, "loss": 0.4573, "step": 349 }, { "epoch": 0.03, "grad_norm": 6.0281628916208145, "learning_rate": 9.510869565217392e-06, "loss": 0.9685, "step": 350 }, { "epoch": 0.03, "grad_norm": 7.614906153133044, "learning_rate": 9.53804347826087e-06, "loss": 2.14, "step": 351 }, { "epoch": 0.03, "grad_norm": 3.132861967845814, "learning_rate": 9.565217391304349e-06, "loss": 0.4094, "step": 352 }, { "epoch": 0.03, "grad_norm": 7.769971710510702, "learning_rate": 9.592391304347827e-06, "loss": 1.5559, "step": 353 }, { "epoch": 0.03, "grad_norm": 4.214541771357837, "learning_rate": 9.619565217391305e-06, "loss": 0.6542, "step": 354 }, { "epoch": 0.03, "grad_norm": 2.368108862701037, "learning_rate": 9.646739130434783e-06, "loss": 0.3184, "step": 355 }, { "epoch": 0.03, "grad_norm": 9.467183830886526, "learning_rate": 9.673913043478262e-06, "loss": 2.0, "step": 356 }, { "epoch": 0.03, "grad_norm": 4.148717928615864, "learning_rate": 9.70108695652174e-06, "loss": 0.7562, "step": 357 }, { "epoch": 0.03, "grad_norm": 4.699142596546084, "learning_rate": 9.728260869565218e-06, "loss": 0.5602, "step": 358 }, { "epoch": 0.03, "grad_norm": 5.778849120224696, "learning_rate": 9.755434782608696e-06, "loss": 1.285, "step": 359 }, { "epoch": 0.03, "grad_norm": 6.325812744530062, "learning_rate": 9.782608695652175e-06, "loss": 1.035, "step": 360 }, { "epoch": 0.03, "grad_norm": 6.270446437848442, "learning_rate": 9.809782608695653e-06, "loss": 1.1036, "step": 361 }, { "epoch": 0.03, "grad_norm": 5.098595435383664, "learning_rate": 9.836956521739131e-06, "loss": 0.7118, "step": 362 }, { "epoch": 0.03, "grad_norm": 5.120947794599988, "learning_rate": 9.86413043478261e-06, "loss": 1.0194, "step": 363 }, { "epoch": 0.03, "grad_norm": 4.753519153874003, "learning_rate": 9.891304347826088e-06, "loss": 0.8027, "step": 364 }, { "epoch": 0.03, "grad_norm": 5.54491484144688, "learning_rate": 9.918478260869566e-06, "loss": 0.9281, "step": 365 }, { "epoch": 0.03, "grad_norm": 4.88514814649314, "learning_rate": 9.945652173913044e-06, "loss": 0.8455, "step": 366 }, { "epoch": 0.03, "grad_norm": 5.574575608802646, "learning_rate": 9.972826086956523e-06, "loss": 1.2401, "step": 367 }, { "epoch": 0.03, "grad_norm": 5.295452016714822, "learning_rate": 1e-05, "loss": 0.7394, "step": 368 }, { "epoch": 0.03, "grad_norm": 5.827174479039339, "learning_rate": 9.999999824760881e-06, "loss": 0.912, "step": 369 }, { "epoch": 0.03, "grad_norm": 3.053210078293678, "learning_rate": 9.999999299043533e-06, "loss": 0.455, "step": 370 }, { "epoch": 0.03, "grad_norm": 4.67096244105842, "learning_rate": 9.999998422847994e-06, "loss": 0.8681, "step": 371 }, { "epoch": 0.03, "grad_norm": 5.554637130438726, "learning_rate": 9.999997196174327e-06, "loss": 0.7649, "step": 372 }, { "epoch": 0.03, "grad_norm": 3.5853043492657246, "learning_rate": 9.999995619022615e-06, "loss": 0.6937, "step": 373 }, { "epoch": 0.03, "grad_norm": 4.6670712091283875, "learning_rate": 9.999993691392973e-06, "loss": 0.8806, "step": 374 }, { "epoch": 0.03, "grad_norm": 5.168060014150088, "learning_rate": 9.99999141328553e-06, "loss": 0.8426, "step": 375 }, { "epoch": 0.03, "grad_norm": 7.843194545607991, "learning_rate": 9.999988784700451e-06, "loss": 1.1695, "step": 376 }, { "epoch": 0.03, "grad_norm": 5.168568665105547, "learning_rate": 9.999985805637917e-06, "loss": 0.7686, "step": 377 }, { "epoch": 0.03, "grad_norm": 3.894165959519665, "learning_rate": 9.999982476098138e-06, "loss": 0.7126, "step": 378 }, { "epoch": 0.03, "grad_norm": 9.08781693842811, "learning_rate": 9.99997879608135e-06, "loss": 0.9496, "step": 379 }, { "epoch": 0.03, "grad_norm": 6.7907899593817636, "learning_rate": 9.999974765587805e-06, "loss": 1.0241, "step": 380 }, { "epoch": 0.03, "grad_norm": 6.14564735190624, "learning_rate": 9.99997038461779e-06, "loss": 1.084, "step": 381 }, { "epoch": 0.03, "grad_norm": 4.632103552875158, "learning_rate": 9.99996565317161e-06, "loss": 0.6177, "step": 382 }, { "epoch": 0.03, "grad_norm": 7.06828195446152, "learning_rate": 9.999960571249599e-06, "loss": 1.2956, "step": 383 }, { "epoch": 0.03, "grad_norm": 5.651102377791331, "learning_rate": 9.999955138852111e-06, "loss": 1.3542, "step": 384 }, { "epoch": 0.03, "grad_norm": 4.042294337025638, "learning_rate": 9.99994935597953e-06, "loss": 0.6006, "step": 385 }, { "epoch": 0.03, "grad_norm": 3.402994414654537, "learning_rate": 9.999943222632258e-06, "loss": 0.4791, "step": 386 }, { "epoch": 0.03, "grad_norm": 4.965056543386612, "learning_rate": 9.999936738810725e-06, "loss": 1.0879, "step": 387 }, { "epoch": 0.03, "grad_norm": 6.1713896321431605, "learning_rate": 9.999929904515386e-06, "loss": 1.053, "step": 388 }, { "epoch": 0.03, "grad_norm": 6.145030010793328, "learning_rate": 9.99992271974672e-06, "loss": 0.7253, "step": 389 }, { "epoch": 0.03, "grad_norm": 5.336075295397067, "learning_rate": 9.999915184505233e-06, "loss": 0.7442, "step": 390 }, { "epoch": 0.03, "grad_norm": 5.5450996060354, "learning_rate": 9.99990729879145e-06, "loss": 1.277, "step": 391 }, { "epoch": 0.03, "grad_norm": 7.636158624112354, "learning_rate": 9.999899062605928e-06, "loss": 1.239, "step": 392 }, { "epoch": 0.03, "grad_norm": 5.477488726410057, "learning_rate": 9.999890475949236e-06, "loss": 0.9772, "step": 393 }, { "epoch": 0.03, "grad_norm": 5.051495747823013, "learning_rate": 9.999881538821985e-06, "loss": 0.5469, "step": 394 }, { "epoch": 0.03, "grad_norm": 4.300752608556988, "learning_rate": 9.999872251224796e-06, "loss": 0.7399, "step": 395 }, { "epoch": 0.03, "grad_norm": 3.5526737909232016, "learning_rate": 9.999862613158323e-06, "loss": 0.513, "step": 396 }, { "epoch": 0.03, "grad_norm": 4.933537207255718, "learning_rate": 9.99985262462324e-06, "loss": 0.8352, "step": 397 }, { "epoch": 0.03, "grad_norm": 4.738769227354006, "learning_rate": 9.999842285620247e-06, "loss": 1.0434, "step": 398 }, { "epoch": 0.03, "grad_norm": 2.762740426870926, "learning_rate": 9.999831596150069e-06, "loss": 0.3968, "step": 399 }, { "epoch": 0.03, "grad_norm": 4.1264125436257375, "learning_rate": 9.999820556213455e-06, "loss": 0.7019, "step": 400 }, { "epoch": 0.03, "grad_norm": 6.500334497583717, "learning_rate": 9.99980916581118e-06, "loss": 0.8607, "step": 401 }, { "epoch": 0.03, "grad_norm": 4.481620748046511, "learning_rate": 9.999797424944041e-06, "loss": 0.8718, "step": 402 }, { "epoch": 0.03, "grad_norm": 3.151238330741133, "learning_rate": 9.999785333612863e-06, "loss": 0.7545, "step": 403 }, { "epoch": 0.03, "grad_norm": 5.976320787775345, "learning_rate": 9.999772891818493e-06, "loss": 1.2427, "step": 404 }, { "epoch": 0.03, "grad_norm": 5.000150498697293, "learning_rate": 9.999760099561802e-06, "loss": 0.9778, "step": 405 }, { "epoch": 0.03, "grad_norm": 3.6502724932396453, "learning_rate": 9.999746956843685e-06, "loss": 0.6976, "step": 406 }, { "epoch": 0.03, "grad_norm": 5.193776525977321, "learning_rate": 9.999733463665067e-06, "loss": 0.8279, "step": 407 }, { "epoch": 0.03, "grad_norm": 7.239212491469085, "learning_rate": 9.999719620026891e-06, "loss": 1.3266, "step": 408 }, { "epoch": 0.03, "grad_norm": 5.596325626752294, "learning_rate": 9.99970542593013e-06, "loss": 1.1974, "step": 409 }, { "epoch": 0.03, "grad_norm": 6.706543574475022, "learning_rate": 9.999690881375777e-06, "loss": 1.2101, "step": 410 }, { "epoch": 0.03, "grad_norm": 4.644646579385012, "learning_rate": 9.99967598636485e-06, "loss": 0.9091, "step": 411 }, { "epoch": 0.03, "grad_norm": 3.9175230022782546, "learning_rate": 9.999660740898397e-06, "loss": 0.498, "step": 412 }, { "epoch": 0.03, "grad_norm": 5.173890194845122, "learning_rate": 9.999645144977483e-06, "loss": 0.7663, "step": 413 }, { "epoch": 0.03, "grad_norm": 4.748467260869798, "learning_rate": 9.999629198603205e-06, "loss": 0.7728, "step": 414 }, { "epoch": 0.03, "grad_norm": 5.778886530785687, "learning_rate": 9.999612901776678e-06, "loss": 1.2248, "step": 415 }, { "epoch": 0.03, "grad_norm": 4.935476197785821, "learning_rate": 9.999596254499044e-06, "loss": 0.6129, "step": 416 }, { "epoch": 0.03, "grad_norm": 4.02337258831855, "learning_rate": 9.999579256771473e-06, "loss": 0.6155, "step": 417 }, { "epoch": 0.03, "grad_norm": 3.8062484181428986, "learning_rate": 9.999561908595153e-06, "loss": 0.6679, "step": 418 }, { "epoch": 0.03, "grad_norm": 4.8954362755783265, "learning_rate": 9.999544209971299e-06, "loss": 0.66, "step": 419 }, { "epoch": 0.03, "grad_norm": 4.099290865501664, "learning_rate": 9.999526160901156e-06, "loss": 0.8223, "step": 420 }, { "epoch": 0.03, "grad_norm": 4.863034209422073, "learning_rate": 9.999507761385989e-06, "loss": 0.6422, "step": 421 }, { "epoch": 0.03, "grad_norm": 5.083975787293613, "learning_rate": 9.999489011427084e-06, "loss": 0.7041, "step": 422 }, { "epoch": 0.03, "grad_norm": 3.3178956260126076, "learning_rate": 9.999469911025756e-06, "loss": 0.5422, "step": 423 }, { "epoch": 0.03, "grad_norm": 6.650247498520858, "learning_rate": 9.999450460183347e-06, "loss": 1.0725, "step": 424 }, { "epoch": 0.03, "grad_norm": 5.95259074049211, "learning_rate": 9.999430658901217e-06, "loss": 0.9106, "step": 425 }, { "epoch": 0.03, "grad_norm": 7.71544477110711, "learning_rate": 9.999410507180757e-06, "loss": 1.443, "step": 426 }, { "epoch": 0.03, "grad_norm": 5.346969103609945, "learning_rate": 9.999390005023377e-06, "loss": 1.1114, "step": 427 }, { "epoch": 0.03, "grad_norm": 6.434687806214507, "learning_rate": 9.999369152430514e-06, "loss": 1.0029, "step": 428 }, { "epoch": 0.04, "grad_norm": 6.2244364829348955, "learning_rate": 9.999347949403633e-06, "loss": 0.8434, "step": 429 }, { "epoch": 0.04, "grad_norm": 5.439228351059763, "learning_rate": 9.999326395944217e-06, "loss": 1.0175, "step": 430 }, { "epoch": 0.04, "grad_norm": 5.899604465621711, "learning_rate": 9.999304492053777e-06, "loss": 0.9335, "step": 431 }, { "epoch": 0.04, "grad_norm": 5.3167350747899755, "learning_rate": 9.999282237733849e-06, "loss": 1.1625, "step": 432 }, { "epoch": 0.04, "grad_norm": 4.770326297294352, "learning_rate": 9.999259632985996e-06, "loss": 0.896, "step": 433 }, { "epoch": 0.04, "grad_norm": 5.427169696584599, "learning_rate": 9.999236677811796e-06, "loss": 0.8969, "step": 434 }, { "epoch": 0.04, "grad_norm": 6.297151047850789, "learning_rate": 9.999213372212863e-06, "loss": 0.9043, "step": 435 }, { "epoch": 0.04, "grad_norm": 6.20610963603229, "learning_rate": 9.99918971619083e-06, "loss": 1.2859, "step": 436 }, { "epoch": 0.04, "grad_norm": 4.8465537937699406, "learning_rate": 9.999165709747353e-06, "loss": 1.0852, "step": 437 }, { "epoch": 0.04, "grad_norm": 4.657766614343768, "learning_rate": 9.999141352884118e-06, "loss": 1.0622, "step": 438 }, { "epoch": 0.04, "grad_norm": 4.9847198110116775, "learning_rate": 9.999116645602828e-06, "loss": 0.9139, "step": 439 }, { "epoch": 0.04, "grad_norm": 4.4219398024801, "learning_rate": 9.99909158790522e-06, "loss": 0.8118, "step": 440 }, { "epoch": 0.04, "grad_norm": 6.208092284451877, "learning_rate": 9.999066179793047e-06, "loss": 1.0925, "step": 441 }, { "epoch": 0.04, "grad_norm": 5.381473249926374, "learning_rate": 9.99904042126809e-06, "loss": 1.1494, "step": 442 }, { "epoch": 0.04, "grad_norm": 5.171395550274936, "learning_rate": 9.999014312332156e-06, "loss": 1.1686, "step": 443 }, { "epoch": 0.04, "grad_norm": 4.07792792749062, "learning_rate": 9.998987852987074e-06, "loss": 0.9116, "step": 444 }, { "epoch": 0.04, "grad_norm": 7.609250844455039, "learning_rate": 9.9989610432347e-06, "loss": 1.7275, "step": 445 }, { "epoch": 0.04, "grad_norm": 6.042050660855812, "learning_rate": 9.998933883076912e-06, "loss": 1.1827, "step": 446 }, { "epoch": 0.04, "grad_norm": 4.387281639470219, "learning_rate": 9.998906372515615e-06, "loss": 0.9077, "step": 447 }, { "epoch": 0.04, "grad_norm": 4.807479350295363, "learning_rate": 9.998878511552734e-06, "loss": 0.9013, "step": 448 }, { "epoch": 0.04, "grad_norm": 4.673383829189851, "learning_rate": 9.998850300190226e-06, "loss": 0.7067, "step": 449 }, { "epoch": 0.04, "grad_norm": 5.482918184763173, "learning_rate": 9.99882173843007e-06, "loss": 1.2614, "step": 450 }, { "epoch": 0.04, "grad_norm": 4.972131746168448, "learning_rate": 9.99879282627426e-06, "loss": 0.911, "step": 451 }, { "epoch": 0.04, "grad_norm": 7.2324493611028275, "learning_rate": 9.998763563724831e-06, "loss": 1.1946, "step": 452 }, { "epoch": 0.04, "grad_norm": 5.36145192963037, "learning_rate": 9.99873395078383e-06, "loss": 1.0718, "step": 453 }, { "epoch": 0.04, "grad_norm": 6.139122292070844, "learning_rate": 9.998703987453334e-06, "loss": 0.8382, "step": 454 }, { "epoch": 0.04, "grad_norm": 6.657219504198628, "learning_rate": 9.998673673735442e-06, "loss": 1.268, "step": 455 }, { "epoch": 0.04, "grad_norm": 6.009202914261718, "learning_rate": 9.998643009632281e-06, "loss": 0.7224, "step": 456 }, { "epoch": 0.04, "grad_norm": 4.475889755505141, "learning_rate": 9.998611995145997e-06, "loss": 0.7266, "step": 457 }, { "epoch": 0.04, "grad_norm": 1.9204812272815377, "learning_rate": 9.99858063027877e-06, "loss": 0.2579, "step": 458 }, { "epoch": 0.04, "grad_norm": 5.499067475167554, "learning_rate": 9.99854891503279e-06, "loss": 1.1072, "step": 459 }, { "epoch": 0.04, "grad_norm": 2.41932921975513, "learning_rate": 9.998516849410287e-06, "loss": 0.4482, "step": 460 }, { "epoch": 0.04, "grad_norm": 4.714106357668273, "learning_rate": 9.998484433413507e-06, "loss": 0.7622, "step": 461 }, { "epoch": 0.04, "grad_norm": 5.043566688600208, "learning_rate": 9.998451667044721e-06, "loss": 0.6495, "step": 462 }, { "epoch": 0.04, "grad_norm": 5.919820905006889, "learning_rate": 9.998418550306228e-06, "loss": 0.8238, "step": 463 }, { "epoch": 0.04, "grad_norm": 5.294308724711821, "learning_rate": 9.998385083200346e-06, "loss": 0.5751, "step": 464 }, { "epoch": 0.04, "grad_norm": 4.301981205204581, "learning_rate": 9.998351265729423e-06, "loss": 0.4563, "step": 465 }, { "epoch": 0.04, "grad_norm": 6.89628096529308, "learning_rate": 9.99831709789583e-06, "loss": 1.39, "step": 466 }, { "epoch": 0.04, "grad_norm": 4.64203438591841, "learning_rate": 9.99828257970196e-06, "loss": 0.8958, "step": 467 }, { "epoch": 0.04, "grad_norm": 4.151009132010121, "learning_rate": 9.998247711150235e-06, "loss": 0.4676, "step": 468 }, { "epoch": 0.04, "grad_norm": 6.055988336093534, "learning_rate": 9.998212492243099e-06, "loss": 1.0388, "step": 469 }, { "epoch": 0.04, "grad_norm": 4.290773029422374, "learning_rate": 9.998176922983017e-06, "loss": 1.0765, "step": 470 }, { "epoch": 0.04, "grad_norm": 5.447842072761543, "learning_rate": 9.998141003372486e-06, "loss": 1.2072, "step": 471 }, { "epoch": 0.04, "grad_norm": 5.586951220708749, "learning_rate": 9.998104733414022e-06, "loss": 1.0186, "step": 472 }, { "epoch": 0.04, "grad_norm": 5.485052880067488, "learning_rate": 9.998068113110168e-06, "loss": 0.9249, "step": 473 }, { "epoch": 0.04, "grad_norm": 6.381424161053315, "learning_rate": 9.99803114246349e-06, "loss": 1.4598, "step": 474 }, { "epoch": 0.04, "grad_norm": 3.656368392426574, "learning_rate": 9.997993821476583e-06, "loss": 0.7038, "step": 475 }, { "epoch": 0.04, "grad_norm": 4.572194725600882, "learning_rate": 9.99795615015206e-06, "loss": 1.0368, "step": 476 }, { "epoch": 0.04, "grad_norm": 4.633066802629584, "learning_rate": 9.99791812849256e-06, "loss": 1.0351, "step": 477 }, { "epoch": 0.04, "grad_norm": 5.647468770382145, "learning_rate": 9.997879756500752e-06, "loss": 1.2678, "step": 478 }, { "epoch": 0.04, "grad_norm": 4.661062102561084, "learning_rate": 9.997841034179323e-06, "loss": 0.879, "step": 479 }, { "epoch": 0.04, "grad_norm": 4.5448288381588355, "learning_rate": 9.997801961530989e-06, "loss": 0.8834, "step": 480 }, { "epoch": 0.04, "grad_norm": 6.746344735056418, "learning_rate": 9.997762538558488e-06, "loss": 1.4655, "step": 481 }, { "epoch": 0.04, "grad_norm": 3.7462834947998007, "learning_rate": 9.997722765264582e-06, "loss": 0.7281, "step": 482 }, { "epoch": 0.04, "grad_norm": 5.7286846769815325, "learning_rate": 9.997682641652062e-06, "loss": 1.1582, "step": 483 }, { "epoch": 0.04, "grad_norm": 5.03167073385437, "learning_rate": 9.997642167723737e-06, "loss": 0.9327, "step": 484 }, { "epoch": 0.04, "grad_norm": 6.232339007741685, "learning_rate": 9.997601343482448e-06, "loss": 1.1108, "step": 485 }, { "epoch": 0.04, "grad_norm": 6.245419578495987, "learning_rate": 9.997560168931053e-06, "loss": 0.8701, "step": 486 }, { "epoch": 0.04, "grad_norm": 6.9573609801548555, "learning_rate": 9.99751864407244e-06, "loss": 1.6929, "step": 487 }, { "epoch": 0.04, "grad_norm": 4.6903216396074745, "learning_rate": 9.99747676890952e-06, "loss": 1.0227, "step": 488 }, { "epoch": 0.04, "grad_norm": 5.879451561124423, "learning_rate": 9.997434543445227e-06, "loss": 1.1881, "step": 489 }, { "epoch": 0.04, "grad_norm": 4.016960042499101, "learning_rate": 9.997391967682522e-06, "loss": 0.6582, "step": 490 }, { "epoch": 0.04, "grad_norm": 6.015495969337913, "learning_rate": 9.997349041624387e-06, "loss": 1.2102, "step": 491 }, { "epoch": 0.04, "grad_norm": 4.627930447954648, "learning_rate": 9.997305765273834e-06, "loss": 0.6894, "step": 492 }, { "epoch": 0.04, "grad_norm": 4.403979572529196, "learning_rate": 9.997262138633895e-06, "loss": 0.9797, "step": 493 }, { "epoch": 0.04, "grad_norm": 5.264874126549467, "learning_rate": 9.99721816170763e-06, "loss": 1.0349, "step": 494 }, { "epoch": 0.04, "grad_norm": 2.456314950766531, "learning_rate": 9.997173834498118e-06, "loss": 0.2876, "step": 495 }, { "epoch": 0.04, "grad_norm": 7.390226223155865, "learning_rate": 9.997129157008467e-06, "loss": 0.9932, "step": 496 }, { "epoch": 0.04, "grad_norm": 5.220155764196953, "learning_rate": 9.99708412924181e-06, "loss": 0.9571, "step": 497 }, { "epoch": 0.04, "grad_norm": 5.090257690615265, "learning_rate": 9.997038751201305e-06, "loss": 0.6411, "step": 498 }, { "epoch": 0.04, "grad_norm": 3.4198073238913955, "learning_rate": 9.996993022890129e-06, "loss": 0.4335, "step": 499 }, { "epoch": 0.04, "grad_norm": 2.9754266714771402, "learning_rate": 9.99694694431149e-06, "loss": 0.5786, "step": 500 }, { "epoch": 0.04, "grad_norm": 3.862299420547906, "learning_rate": 9.996900515468614e-06, "loss": 0.4861, "step": 501 }, { "epoch": 0.04, "grad_norm": 4.749216334168513, "learning_rate": 9.996853736364763e-06, "loss": 1.0879, "step": 502 }, { "epoch": 0.04, "grad_norm": 5.357431072223263, "learning_rate": 9.996806607003209e-06, "loss": 1.2146, "step": 503 }, { "epoch": 0.04, "grad_norm": 3.591655212241847, "learning_rate": 9.996759127387259e-06, "loss": 0.7087, "step": 504 }, { "epoch": 0.04, "grad_norm": 6.45088704039877, "learning_rate": 9.996711297520238e-06, "loss": 1.3822, "step": 505 }, { "epoch": 0.04, "grad_norm": 7.018084335327446, "learning_rate": 9.996663117405503e-06, "loss": 1.5407, "step": 506 }, { "epoch": 0.04, "grad_norm": 4.427680298582249, "learning_rate": 9.99661458704643e-06, "loss": 0.5793, "step": 507 }, { "epoch": 0.04, "grad_norm": 7.765130560248742, "learning_rate": 9.996565706446418e-06, "loss": 1.67, "step": 508 }, { "epoch": 0.04, "grad_norm": 5.8745700844606406, "learning_rate": 9.996516475608894e-06, "loss": 1.421, "step": 509 }, { "epoch": 0.04, "grad_norm": 6.075923799198178, "learning_rate": 9.996466894537311e-06, "loss": 1.1038, "step": 510 }, { "epoch": 0.04, "grad_norm": 2.259111833152261, "learning_rate": 9.996416963235144e-06, "loss": 0.2819, "step": 511 }, { "epoch": 0.04, "grad_norm": 1.9609336099359, "learning_rate": 9.996366681705892e-06, "loss": 0.4535, "step": 512 }, { "epoch": 0.04, "grad_norm": 4.9779750310059985, "learning_rate": 9.99631604995308e-06, "loss": 0.9532, "step": 513 }, { "epoch": 0.04, "grad_norm": 3.5643470817068112, "learning_rate": 9.996265067980256e-06, "loss": 0.4561, "step": 514 }, { "epoch": 0.04, "grad_norm": 6.865293599732065, "learning_rate": 9.996213735790995e-06, "loss": 1.4386, "step": 515 }, { "epoch": 0.04, "grad_norm": 6.698458945782838, "learning_rate": 9.996162053388895e-06, "loss": 1.2189, "step": 516 }, { "epoch": 0.04, "grad_norm": 5.947033920225605, "learning_rate": 9.996110020777579e-06, "loss": 0.9818, "step": 517 }, { "epoch": 0.04, "grad_norm": 5.38030957812177, "learning_rate": 9.996057637960694e-06, "loss": 0.8517, "step": 518 }, { "epoch": 0.04, "grad_norm": 5.3778991466873185, "learning_rate": 9.996004904941911e-06, "loss": 1.012, "step": 519 }, { "epoch": 0.04, "grad_norm": 5.495684553725476, "learning_rate": 9.995951821724926e-06, "loss": 1.1764, "step": 520 }, { "epoch": 0.04, "grad_norm": 6.828192139209808, "learning_rate": 9.99589838831346e-06, "loss": 1.6443, "step": 521 }, { "epoch": 0.04, "grad_norm": 5.584025659582305, "learning_rate": 9.995844604711262e-06, "loss": 0.8156, "step": 522 }, { "epoch": 0.04, "grad_norm": 3.378898352225901, "learning_rate": 9.995790470922098e-06, "loss": 0.7585, "step": 523 }, { "epoch": 0.04, "grad_norm": 4.815193156427421, "learning_rate": 9.995735986949763e-06, "loss": 1.1676, "step": 524 }, { "epoch": 0.04, "grad_norm": 4.110317832939963, "learning_rate": 9.995681152798079e-06, "loss": 0.6391, "step": 525 }, { "epoch": 0.04, "grad_norm": 3.39861470191864, "learning_rate": 9.995625968470883e-06, "loss": 0.7007, "step": 526 }, { "epoch": 0.04, "grad_norm": 7.833803015864922, "learning_rate": 9.995570433972051e-06, "loss": 1.3191, "step": 527 }, { "epoch": 0.04, "grad_norm": 6.314688180821015, "learning_rate": 9.995514549305472e-06, "loss": 1.2388, "step": 528 }, { "epoch": 0.04, "grad_norm": 6.017090408912602, "learning_rate": 9.995458314475064e-06, "loss": 0.9468, "step": 529 }, { "epoch": 0.04, "grad_norm": 5.001208522010045, "learning_rate": 9.995401729484768e-06, "loss": 0.831, "step": 530 }, { "epoch": 0.04, "grad_norm": 4.782985672211555, "learning_rate": 9.995344794338551e-06, "loss": 0.8729, "step": 531 }, { "epoch": 0.04, "grad_norm": 4.543829103793221, "learning_rate": 9.995287509040403e-06, "loss": 1.1005, "step": 532 }, { "epoch": 0.04, "grad_norm": 4.598299872795798, "learning_rate": 9.99522987359434e-06, "loss": 1.0555, "step": 533 }, { "epoch": 0.04, "grad_norm": 4.2787368892690845, "learning_rate": 9.995171888004403e-06, "loss": 1.141, "step": 534 }, { "epoch": 0.04, "grad_norm": 2.8012577004433523, "learning_rate": 9.995113552274656e-06, "loss": 0.7279, "step": 535 }, { "epoch": 0.04, "grad_norm": 5.805953330549673, "learning_rate": 9.995054866409186e-06, "loss": 1.3504, "step": 536 }, { "epoch": 0.04, "grad_norm": 5.725355607677452, "learning_rate": 9.99499583041211e-06, "loss": 1.1677, "step": 537 }, { "epoch": 0.04, "grad_norm": 5.409679139961245, "learning_rate": 9.994936444287565e-06, "loss": 1.1266, "step": 538 }, { "epoch": 0.04, "grad_norm": 6.340503058191312, "learning_rate": 9.994876708039712e-06, "loss": 0.924, "step": 539 }, { "epoch": 0.04, "grad_norm": 3.635724815694596, "learning_rate": 9.99481662167274e-06, "loss": 0.6758, "step": 540 }, { "epoch": 0.04, "grad_norm": 5.137018574224754, "learning_rate": 9.99475618519086e-06, "loss": 1.488, "step": 541 }, { "epoch": 0.04, "grad_norm": 4.69956991439474, "learning_rate": 9.99469539859831e-06, "loss": 0.823, "step": 542 }, { "epoch": 0.04, "grad_norm": 5.019347119841303, "learning_rate": 9.994634261899347e-06, "loss": 0.9059, "step": 543 }, { "epoch": 0.04, "grad_norm": 6.7141592038245035, "learning_rate": 9.994572775098262e-06, "loss": 1.0794, "step": 544 }, { "epoch": 0.04, "grad_norm": 7.810662155227189, "learning_rate": 9.99451093819936e-06, "loss": 1.2191, "step": 545 }, { "epoch": 0.04, "grad_norm": 1.6379710591200758, "learning_rate": 9.994448751206978e-06, "loss": 0.2519, "step": 546 }, { "epoch": 0.04, "grad_norm": 6.276706835772696, "learning_rate": 9.994386214125476e-06, "loss": 1.4129, "step": 547 }, { "epoch": 0.04, "grad_norm": 7.014552652694709, "learning_rate": 9.994323326959234e-06, "loss": 1.2461, "step": 548 }, { "epoch": 0.04, "grad_norm": 4.620540826198909, "learning_rate": 9.994260089712662e-06, "loss": 1.0425, "step": 549 }, { "epoch": 0.04, "grad_norm": 8.080536553202029, "learning_rate": 9.994196502390194e-06, "loss": 1.7665, "step": 550 }, { "epoch": 0.05, "grad_norm": 6.637864424017921, "learning_rate": 9.994132564996284e-06, "loss": 0.9307, "step": 551 }, { "epoch": 0.05, "grad_norm": 4.781137741702556, "learning_rate": 9.994068277535418e-06, "loss": 0.8607, "step": 552 }, { "epoch": 0.05, "grad_norm": 1.1510246907020736, "learning_rate": 9.994003640012099e-06, "loss": 0.2683, "step": 553 }, { "epoch": 0.05, "grad_norm": 4.250424983427129, "learning_rate": 9.99393865243086e-06, "loss": 0.6378, "step": 554 }, { "epoch": 0.05, "grad_norm": 6.319957939313974, "learning_rate": 9.993873314796253e-06, "loss": 0.8753, "step": 555 }, { "epoch": 0.05, "grad_norm": 6.405556982487025, "learning_rate": 9.993807627112862e-06, "loss": 1.3312, "step": 556 }, { "epoch": 0.05, "grad_norm": 4.160310787271504, "learning_rate": 9.993741589385287e-06, "loss": 0.7164, "step": 557 }, { "epoch": 0.05, "grad_norm": 4.749100097319363, "learning_rate": 9.993675201618162e-06, "loss": 1.0449, "step": 558 }, { "epoch": 0.05, "grad_norm": 6.796749991766679, "learning_rate": 9.993608463816137e-06, "loss": 1.0364, "step": 559 }, { "epoch": 0.05, "grad_norm": 3.0955792124834423, "learning_rate": 9.99354137598389e-06, "loss": 0.5138, "step": 560 }, { "epoch": 0.05, "grad_norm": 5.177971692772812, "learning_rate": 9.993473938126126e-06, "loss": 1.0678, "step": 561 }, { "epoch": 0.05, "grad_norm": 5.299457668753542, "learning_rate": 9.99340615024757e-06, "loss": 1.4612, "step": 562 }, { "epoch": 0.05, "grad_norm": 4.471041279551884, "learning_rate": 9.993338012352973e-06, "loss": 0.8609, "step": 563 }, { "epoch": 0.05, "grad_norm": 4.131374831065728, "learning_rate": 9.993269524447115e-06, "loss": 0.8387, "step": 564 }, { "epoch": 0.05, "grad_norm": 5.95767345172936, "learning_rate": 9.993200686534793e-06, "loss": 1.1389, "step": 565 }, { "epoch": 0.05, "grad_norm": 4.787905925023107, "learning_rate": 9.993131498620833e-06, "loss": 1.0385, "step": 566 }, { "epoch": 0.05, "grad_norm": 5.709708416684444, "learning_rate": 9.993061960710084e-06, "loss": 1.262, "step": 567 }, { "epoch": 0.05, "grad_norm": 4.7600247581497, "learning_rate": 9.992992072807424e-06, "loss": 0.7766, "step": 568 }, { "epoch": 0.05, "grad_norm": 6.040306502808157, "learning_rate": 9.992921834917748e-06, "loss": 1.0075, "step": 569 }, { "epoch": 0.05, "grad_norm": 5.765888597714929, "learning_rate": 9.99285124704598e-06, "loss": 1.1774, "step": 570 }, { "epoch": 0.05, "grad_norm": 4.251268554699921, "learning_rate": 9.99278030919707e-06, "loss": 0.8978, "step": 571 }, { "epoch": 0.05, "grad_norm": 4.687606443004777, "learning_rate": 9.992709021375987e-06, "loss": 0.6764, "step": 572 }, { "epoch": 0.05, "grad_norm": 5.7975790585142155, "learning_rate": 9.992637383587731e-06, "loss": 1.3707, "step": 573 }, { "epoch": 0.05, "grad_norm": 6.216275180993614, "learning_rate": 9.992565395837323e-06, "loss": 1.0971, "step": 574 }, { "epoch": 0.05, "grad_norm": 5.248741788140497, "learning_rate": 9.992493058129808e-06, "loss": 0.7766, "step": 575 }, { "epoch": 0.05, "grad_norm": 5.605246423497219, "learning_rate": 9.992420370470257e-06, "loss": 0.7862, "step": 576 }, { "epoch": 0.05, "grad_norm": 5.2123803620145885, "learning_rate": 9.992347332863766e-06, "loss": 1.1643, "step": 577 }, { "epoch": 0.05, "grad_norm": 5.017266738904277, "learning_rate": 9.992273945315451e-06, "loss": 0.9691, "step": 578 }, { "epoch": 0.05, "grad_norm": 3.856181346386722, "learning_rate": 9.992200207830461e-06, "loss": 0.6863, "step": 579 }, { "epoch": 0.05, "grad_norm": 4.208324579382292, "learning_rate": 9.992126120413963e-06, "loss": 0.8127, "step": 580 }, { "epoch": 0.05, "grad_norm": 4.758991816274172, "learning_rate": 9.99205168307115e-06, "loss": 0.7493, "step": 581 }, { "epoch": 0.05, "grad_norm": 4.657302028990941, "learning_rate": 9.991976895807237e-06, "loss": 0.9181, "step": 582 }, { "epoch": 0.05, "grad_norm": 3.7383748965946557, "learning_rate": 9.99190175862747e-06, "loss": 0.5278, "step": 583 }, { "epoch": 0.05, "grad_norm": 6.017967974387647, "learning_rate": 9.991826271537115e-06, "loss": 1.0367, "step": 584 }, { "epoch": 0.05, "grad_norm": 5.830582460586334, "learning_rate": 9.991750434541463e-06, "loss": 1.2415, "step": 585 }, { "epoch": 0.05, "grad_norm": 4.414455961267636, "learning_rate": 9.99167424764583e-06, "loss": 0.823, "step": 586 }, { "epoch": 0.05, "grad_norm": 6.008113109219425, "learning_rate": 9.991597710855555e-06, "loss": 1.4785, "step": 587 }, { "epoch": 0.05, "grad_norm": 5.6219971020775015, "learning_rate": 9.991520824176004e-06, "loss": 1.1196, "step": 588 }, { "epoch": 0.05, "grad_norm": 3.2924239603883967, "learning_rate": 9.991443587612568e-06, "loss": 0.4934, "step": 589 }, { "epoch": 0.05, "grad_norm": 5.8783937859474875, "learning_rate": 9.991366001170656e-06, "loss": 1.0913, "step": 590 }, { "epoch": 0.05, "grad_norm": 4.6596365717238735, "learning_rate": 9.991288064855713e-06, "loss": 0.8888, "step": 591 }, { "epoch": 0.05, "grad_norm": 1.9574292806593845, "learning_rate": 9.991209778673199e-06, "loss": 0.241, "step": 592 }, { "epoch": 0.05, "grad_norm": 4.958682017079736, "learning_rate": 9.991131142628601e-06, "loss": 1.2773, "step": 593 }, { "epoch": 0.05, "grad_norm": 4.497872038290533, "learning_rate": 9.99105215672743e-06, "loss": 0.9163, "step": 594 }, { "epoch": 0.05, "grad_norm": 4.356815585248249, "learning_rate": 9.990972820975224e-06, "loss": 0.9554, "step": 595 }, { "epoch": 0.05, "grad_norm": 3.823751138948768, "learning_rate": 9.990893135377544e-06, "loss": 0.6938, "step": 596 }, { "epoch": 0.05, "grad_norm": 5.869332541879883, "learning_rate": 9.990813099939977e-06, "loss": 0.9904, "step": 597 }, { "epoch": 0.05, "grad_norm": 2.557426669936456, "learning_rate": 9.990732714668132e-06, "loss": 0.4088, "step": 598 }, { "epoch": 0.05, "grad_norm": 4.994165849060579, "learning_rate": 9.99065197956764e-06, "loss": 0.5122, "step": 599 }, { "epoch": 0.05, "grad_norm": 5.441289707355455, "learning_rate": 9.990570894644168e-06, "loss": 1.1079, "step": 600 }, { "epoch": 0.05, "grad_norm": 4.361083611031531, "learning_rate": 9.990489459903391e-06, "loss": 0.7912, "step": 601 }, { "epoch": 0.05, "grad_norm": 4.907837876833044, "learning_rate": 9.990407675351027e-06, "loss": 1.0607, "step": 602 }, { "epoch": 0.05, "grad_norm": 4.583072805757161, "learning_rate": 9.990325540992798e-06, "loss": 1.0303, "step": 603 }, { "epoch": 0.05, "grad_norm": 6.063504134304614, "learning_rate": 9.99024305683447e-06, "loss": 0.9656, "step": 604 }, { "epoch": 0.05, "grad_norm": 6.3739718312431455, "learning_rate": 9.99016022288182e-06, "loss": 1.9083, "step": 605 }, { "epoch": 0.05, "grad_norm": 3.3717803890143556, "learning_rate": 9.990077039140655e-06, "loss": 0.6516, "step": 606 }, { "epoch": 0.05, "grad_norm": 3.8385892151528815, "learning_rate": 9.989993505616807e-06, "loss": 0.7832, "step": 607 }, { "epoch": 0.05, "grad_norm": 6.415013525607286, "learning_rate": 9.989909622316132e-06, "loss": 0.9748, "step": 608 }, { "epoch": 0.05, "grad_norm": 5.30838058965553, "learning_rate": 9.989825389244508e-06, "loss": 1.1293, "step": 609 }, { "epoch": 0.05, "grad_norm": 3.9641337437991773, "learning_rate": 9.989740806407839e-06, "loss": 0.7073, "step": 610 }, { "epoch": 0.05, "grad_norm": 4.539025450895296, "learning_rate": 9.989655873812054e-06, "loss": 0.4529, "step": 611 }, { "epoch": 0.05, "grad_norm": 8.404028963526692, "learning_rate": 9.98957059146311e-06, "loss": 1.2211, "step": 612 }, { "epoch": 0.05, "grad_norm": 5.464389130013617, "learning_rate": 9.989484959366981e-06, "loss": 0.9611, "step": 613 }, { "epoch": 0.05, "grad_norm": 6.268794174288258, "learning_rate": 9.98939897752967e-06, "loss": 1.5645, "step": 614 }, { "epoch": 0.05, "grad_norm": 6.190325579863303, "learning_rate": 9.989312645957206e-06, "loss": 0.9776, "step": 615 }, { "epoch": 0.05, "grad_norm": 4.519508259471203, "learning_rate": 9.989225964655638e-06, "loss": 0.7953, "step": 616 }, { "epoch": 0.05, "grad_norm": 3.031059724485583, "learning_rate": 9.989138933631042e-06, "loss": 0.5061, "step": 617 }, { "epoch": 0.05, "grad_norm": 6.168095468868191, "learning_rate": 9.989051552889521e-06, "loss": 1.3652, "step": 618 }, { "epoch": 0.05, "grad_norm": 4.7765541732269625, "learning_rate": 9.9889638224372e-06, "loss": 0.8034, "step": 619 }, { "epoch": 0.05, "grad_norm": 5.109554857741848, "learning_rate": 9.988875742280225e-06, "loss": 0.9353, "step": 620 }, { "epoch": 0.05, "grad_norm": 4.082910298532423, "learning_rate": 9.988787312424773e-06, "loss": 0.8639, "step": 621 }, { "epoch": 0.05, "grad_norm": 2.8718090115890407, "learning_rate": 9.98869853287704e-06, "loss": 0.6346, "step": 622 }, { "epoch": 0.05, "grad_norm": 5.209133554282209, "learning_rate": 9.988609403643254e-06, "loss": 1.0097, "step": 623 }, { "epoch": 0.05, "grad_norm": 4.2825195486259995, "learning_rate": 9.988519924729658e-06, "loss": 1.0552, "step": 624 }, { "epoch": 0.05, "grad_norm": 4.090137938891286, "learning_rate": 9.988430096142523e-06, "loss": 0.9436, "step": 625 }, { "epoch": 0.05, "grad_norm": 2.669478879594225, "learning_rate": 9.98833991788815e-06, "loss": 0.4304, "step": 626 }, { "epoch": 0.05, "grad_norm": 2.7927076917738867, "learning_rate": 9.988249389972859e-06, "loss": 0.4305, "step": 627 }, { "epoch": 0.05, "grad_norm": 5.556812199763877, "learning_rate": 9.988158512402993e-06, "loss": 1.3901, "step": 628 }, { "epoch": 0.05, "grad_norm": 2.615049583677811, "learning_rate": 9.988067285184924e-06, "loss": 0.5666, "step": 629 }, { "epoch": 0.05, "grad_norm": 4.2011693520645546, "learning_rate": 9.987975708325048e-06, "loss": 0.7677, "step": 630 }, { "epoch": 0.05, "grad_norm": 3.2099903765384887, "learning_rate": 9.98788378182978e-06, "loss": 0.5586, "step": 631 }, { "epoch": 0.05, "grad_norm": 1.2163063910389378, "learning_rate": 9.987791505705568e-06, "loss": 0.2413, "step": 632 }, { "epoch": 0.05, "grad_norm": 4.488636294376988, "learning_rate": 9.987698879958879e-06, "loss": 0.9507, "step": 633 }, { "epoch": 0.05, "grad_norm": 2.9033659480857534, "learning_rate": 9.987605904596203e-06, "loss": 0.2726, "step": 634 }, { "epoch": 0.05, "grad_norm": 4.480796139908746, "learning_rate": 9.987512579624061e-06, "loss": 0.8255, "step": 635 }, { "epoch": 0.05, "grad_norm": 5.527278591142934, "learning_rate": 9.987418905048993e-06, "loss": 0.9837, "step": 636 }, { "epoch": 0.05, "grad_norm": 3.120787858966511, "learning_rate": 9.987324880877564e-06, "loss": 0.6009, "step": 637 }, { "epoch": 0.05, "grad_norm": 4.656164287100769, "learning_rate": 9.987230507116366e-06, "loss": 0.951, "step": 638 }, { "epoch": 0.05, "grad_norm": 4.721155595685185, "learning_rate": 9.987135783772014e-06, "loss": 1.1902, "step": 639 }, { "epoch": 0.05, "grad_norm": 5.837561241031265, "learning_rate": 9.987040710851148e-06, "loss": 1.3766, "step": 640 }, { "epoch": 0.05, "grad_norm": 3.4364747725629443, "learning_rate": 9.986945288360431e-06, "loss": 0.5502, "step": 641 }, { "epoch": 0.05, "grad_norm": 6.505635828322947, "learning_rate": 9.986849516306554e-06, "loss": 1.3267, "step": 642 }, { "epoch": 0.05, "grad_norm": 4.644333592153027, "learning_rate": 9.986753394696227e-06, "loss": 0.6654, "step": 643 }, { "epoch": 0.05, "grad_norm": 5.0669290688594195, "learning_rate": 9.98665692353619e-06, "loss": 1.1344, "step": 644 }, { "epoch": 0.05, "grad_norm": 4.344739819105405, "learning_rate": 9.986560102833206e-06, "loss": 0.9141, "step": 645 }, { "epoch": 0.05, "grad_norm": 3.8933191624197305, "learning_rate": 9.986462932594059e-06, "loss": 0.487, "step": 646 }, { "epoch": 0.05, "grad_norm": 4.096586790266863, "learning_rate": 9.986365412825562e-06, "loss": 0.7367, "step": 647 }, { "epoch": 0.05, "grad_norm": 5.303554708517804, "learning_rate": 9.98626754353455e-06, "loss": 0.8811, "step": 648 }, { "epoch": 0.05, "grad_norm": 3.3709015554748842, "learning_rate": 9.986169324727883e-06, "loss": 0.617, "step": 649 }, { "epoch": 0.05, "grad_norm": 2.4762846409644337, "learning_rate": 9.986070756412447e-06, "loss": 0.4911, "step": 650 }, { "epoch": 0.05, "grad_norm": 4.601479155779438, "learning_rate": 9.98597183859515e-06, "loss": 0.6638, "step": 651 }, { "epoch": 0.05, "grad_norm": 4.004344661451242, "learning_rate": 9.985872571282927e-06, "loss": 0.6485, "step": 652 }, { "epoch": 0.05, "grad_norm": 6.256974623442099, "learning_rate": 9.985772954482736e-06, "loss": 1.076, "step": 653 }, { "epoch": 0.05, "grad_norm": 5.072351574263503, "learning_rate": 9.985672988201557e-06, "loss": 1.0198, "step": 654 }, { "epoch": 0.05, "grad_norm": 3.2702241501346734, "learning_rate": 9.9855726724464e-06, "loss": 0.6186, "step": 655 }, { "epoch": 0.05, "grad_norm": 5.057842319740918, "learning_rate": 9.985472007224296e-06, "loss": 1.0415, "step": 656 }, { "epoch": 0.05, "grad_norm": 6.149812927136185, "learning_rate": 9.9853709925423e-06, "loss": 1.3504, "step": 657 }, { "epoch": 0.05, "grad_norm": 6.477356519779247, "learning_rate": 9.985269628407497e-06, "loss": 0.957, "step": 658 }, { "epoch": 0.05, "grad_norm": 4.9499863604689995, "learning_rate": 9.985167914826986e-06, "loss": 0.9287, "step": 659 }, { "epoch": 0.05, "grad_norm": 3.666449716748098, "learning_rate": 9.9850658518079e-06, "loss": 1.204, "step": 660 }, { "epoch": 0.05, "grad_norm": 3.5396153481196504, "learning_rate": 9.984963439357395e-06, "loss": 0.4128, "step": 661 }, { "epoch": 0.05, "grad_norm": 3.9862693153077973, "learning_rate": 9.984860677482646e-06, "loss": 0.531, "step": 662 }, { "epoch": 0.05, "grad_norm": 3.3886584701639064, "learning_rate": 9.984757566190856e-06, "loss": 0.5735, "step": 663 }, { "epoch": 0.05, "grad_norm": 6.718073410688344, "learning_rate": 9.984654105489258e-06, "loss": 1.4752, "step": 664 }, { "epoch": 0.05, "grad_norm": 1.6470470212912942, "learning_rate": 9.984550295385097e-06, "loss": 0.1962, "step": 665 }, { "epoch": 0.05, "grad_norm": 4.677375978952525, "learning_rate": 9.984446135885657e-06, "loss": 1.2061, "step": 666 }, { "epoch": 0.05, "grad_norm": 6.1751126586821545, "learning_rate": 9.984341626998234e-06, "loss": 1.6522, "step": 667 }, { "epoch": 0.05, "grad_norm": 4.198772872513048, "learning_rate": 9.984236768730152e-06, "loss": 0.9486, "step": 668 }, { "epoch": 0.05, "grad_norm": 4.550318005535857, "learning_rate": 9.984131561088766e-06, "loss": 0.8527, "step": 669 }, { "epoch": 0.05, "grad_norm": 3.8438636208886927, "learning_rate": 9.98402600408145e-06, "loss": 0.6577, "step": 670 }, { "epoch": 0.05, "grad_norm": 4.408862532659198, "learning_rate": 9.983920097715599e-06, "loss": 0.9216, "step": 671 }, { "epoch": 0.05, "grad_norm": 4.077809477522914, "learning_rate": 9.983813841998639e-06, "loss": 1.0509, "step": 672 }, { "epoch": 0.06, "grad_norm": 3.2653328768983725, "learning_rate": 9.98370723693802e-06, "loss": 0.567, "step": 673 }, { "epoch": 0.06, "grad_norm": 5.705621516826731, "learning_rate": 9.983600282541213e-06, "loss": 1.3581, "step": 674 }, { "epoch": 0.06, "grad_norm": 4.5134736946043725, "learning_rate": 9.983492978815716e-06, "loss": 0.7735, "step": 675 }, { "epoch": 0.06, "grad_norm": 3.973311993907203, "learning_rate": 9.983385325769047e-06, "loss": 0.7616, "step": 676 }, { "epoch": 0.06, "grad_norm": 6.666889227432155, "learning_rate": 9.983277323408755e-06, "loss": 1.2404, "step": 677 }, { "epoch": 0.06, "grad_norm": 5.986813884345034, "learning_rate": 9.983168971742411e-06, "loss": 1.295, "step": 678 }, { "epoch": 0.06, "grad_norm": 5.530210032015996, "learning_rate": 9.983060270777607e-06, "loss": 1.124, "step": 679 }, { "epoch": 0.06, "grad_norm": 5.079685519249418, "learning_rate": 9.982951220521965e-06, "loss": 1.0623, "step": 680 }, { "epoch": 0.06, "grad_norm": 6.617538825588403, "learning_rate": 9.98284182098313e-06, "loss": 1.4321, "step": 681 }, { "epoch": 0.06, "grad_norm": 4.736195546177727, "learning_rate": 9.982732072168768e-06, "loss": 0.9926, "step": 682 }, { "epoch": 0.06, "grad_norm": 4.911523379676589, "learning_rate": 9.982621974086572e-06, "loss": 1.1862, "step": 683 }, { "epoch": 0.06, "grad_norm": 6.679514520970694, "learning_rate": 9.98251152674426e-06, "loss": 1.266, "step": 684 }, { "epoch": 0.06, "grad_norm": 5.072991628720345, "learning_rate": 9.982400730149574e-06, "loss": 0.9329, "step": 685 }, { "epoch": 0.06, "grad_norm": 4.69136136575507, "learning_rate": 9.98228958431028e-06, "loss": 0.9406, "step": 686 }, { "epoch": 0.06, "grad_norm": 5.591915346611839, "learning_rate": 9.982178089234171e-06, "loss": 1.3627, "step": 687 }, { "epoch": 0.06, "grad_norm": 5.747909856781231, "learning_rate": 9.982066244929058e-06, "loss": 1.2625, "step": 688 }, { "epoch": 0.06, "grad_norm": 7.065528326230627, "learning_rate": 9.981954051402785e-06, "loss": 1.0663, "step": 689 }, { "epoch": 0.06, "grad_norm": 4.0001909990159215, "learning_rate": 9.981841508663214e-06, "loss": 0.7058, "step": 690 }, { "epoch": 0.06, "grad_norm": 3.3998445578928598, "learning_rate": 9.981728616718234e-06, "loss": 0.7326, "step": 691 }, { "epoch": 0.06, "grad_norm": 3.855144959078903, "learning_rate": 9.98161537557576e-06, "loss": 0.6568, "step": 692 }, { "epoch": 0.06, "grad_norm": 5.066439170079921, "learning_rate": 9.981501785243725e-06, "loss": 1.1099, "step": 693 }, { "epoch": 0.06, "grad_norm": 4.380398469657443, "learning_rate": 9.981387845730097e-06, "loss": 1.0579, "step": 694 }, { "epoch": 0.06, "grad_norm": 4.897195008908236, "learning_rate": 9.981273557042861e-06, "loss": 1.1187, "step": 695 }, { "epoch": 0.06, "grad_norm": 6.252493751213305, "learning_rate": 9.981158919190024e-06, "loss": 1.4001, "step": 696 }, { "epoch": 0.06, "grad_norm": 3.075321407345318, "learning_rate": 9.981043932179629e-06, "loss": 0.654, "step": 697 }, { "epoch": 0.06, "grad_norm": 4.737293080752598, "learning_rate": 9.980928596019727e-06, "loss": 0.9636, "step": 698 }, { "epoch": 0.06, "grad_norm": 5.220734829615724, "learning_rate": 9.98081291071841e-06, "loss": 1.1473, "step": 699 }, { "epoch": 0.06, "grad_norm": 4.338659674445185, "learning_rate": 9.980696876283785e-06, "loss": 0.7959, "step": 700 }, { "epoch": 0.06, "grad_norm": 3.4732476751320323, "learning_rate": 9.980580492723984e-06, "loss": 0.6809, "step": 701 }, { "epoch": 0.06, "grad_norm": 4.506473008904325, "learning_rate": 9.980463760047167e-06, "loss": 0.853, "step": 702 }, { "epoch": 0.06, "grad_norm": 5.132017256774828, "learning_rate": 9.980346678261515e-06, "loss": 0.9536, "step": 703 }, { "epoch": 0.06, "grad_norm": 2.583142094061532, "learning_rate": 9.980229247375236e-06, "loss": 0.447, "step": 704 }, { "epoch": 0.06, "grad_norm": 3.83052530122639, "learning_rate": 9.980111467396561e-06, "loss": 0.389, "step": 705 }, { "epoch": 0.06, "grad_norm": 6.794608506821916, "learning_rate": 9.979993338333745e-06, "loss": 1.4973, "step": 706 }, { "epoch": 0.06, "grad_norm": 4.703224751249668, "learning_rate": 9.979874860195068e-06, "loss": 0.8389, "step": 707 }, { "epoch": 0.06, "grad_norm": 3.1532072810420435, "learning_rate": 9.979756032988837e-06, "loss": 0.6571, "step": 708 }, { "epoch": 0.06, "grad_norm": 6.015780174494101, "learning_rate": 9.979636856723379e-06, "loss": 1.3718, "step": 709 }, { "epoch": 0.06, "grad_norm": 5.2193398459983555, "learning_rate": 9.97951733140705e-06, "loss": 0.8092, "step": 710 }, { "epoch": 0.06, "grad_norm": 4.705786850338479, "learning_rate": 9.979397457048226e-06, "loss": 0.9143, "step": 711 }, { "epoch": 0.06, "grad_norm": 3.9202053826916483, "learning_rate": 9.97927723365531e-06, "loss": 0.7494, "step": 712 }, { "epoch": 0.06, "grad_norm": 4.633324076574433, "learning_rate": 9.979156661236733e-06, "loss": 0.8238, "step": 713 }, { "epoch": 0.06, "grad_norm": 2.2712391089999926, "learning_rate": 9.97903573980094e-06, "loss": 0.4017, "step": 714 }, { "epoch": 0.06, "grad_norm": 4.71236457496792, "learning_rate": 9.978914469356413e-06, "loss": 0.8248, "step": 715 }, { "epoch": 0.06, "grad_norm": 5.412765845449585, "learning_rate": 9.97879284991165e-06, "loss": 1.0794, "step": 716 }, { "epoch": 0.06, "grad_norm": 3.969742110862581, "learning_rate": 9.978670881475173e-06, "loss": 0.832, "step": 717 }, { "epoch": 0.06, "grad_norm": 5.931801381819527, "learning_rate": 9.978548564055537e-06, "loss": 0.9876, "step": 718 }, { "epoch": 0.06, "grad_norm": 5.316852797200995, "learning_rate": 9.978425897661312e-06, "loss": 0.8552, "step": 719 }, { "epoch": 0.06, "grad_norm": 1.2826681827328124, "learning_rate": 9.978302882301098e-06, "loss": 0.1575, "step": 720 }, { "epoch": 0.06, "grad_norm": 6.760416074010551, "learning_rate": 9.97817951798352e-06, "loss": 1.1668, "step": 721 }, { "epoch": 0.06, "grad_norm": 4.310256577274185, "learning_rate": 9.97805580471722e-06, "loss": 1.0728, "step": 722 }, { "epoch": 0.06, "grad_norm": 6.802609025604089, "learning_rate": 9.977931742510873e-06, "loss": 1.4633, "step": 723 }, { "epoch": 0.06, "grad_norm": 1.8533623374374455, "learning_rate": 9.977807331373176e-06, "loss": 0.2901, "step": 724 }, { "epoch": 0.06, "grad_norm": 2.6339167914487835, "learning_rate": 9.977682571312847e-06, "loss": 0.5014, "step": 725 }, { "epoch": 0.06, "grad_norm": 4.062861416110086, "learning_rate": 9.977557462338635e-06, "loss": 0.7564, "step": 726 }, { "epoch": 0.06, "grad_norm": 3.059168335509616, "learning_rate": 9.977432004459306e-06, "loss": 0.549, "step": 727 }, { "epoch": 0.06, "grad_norm": 5.702152763867874, "learning_rate": 9.977306197683656e-06, "loss": 1.2333, "step": 728 }, { "epoch": 0.06, "grad_norm": 6.630310813593966, "learning_rate": 9.977180042020502e-06, "loss": 1.1659, "step": 729 }, { "epoch": 0.06, "grad_norm": 5.511468366268989, "learning_rate": 9.977053537478686e-06, "loss": 1.0232, "step": 730 }, { "epoch": 0.06, "grad_norm": 3.0047996416749823, "learning_rate": 9.976926684067082e-06, "loss": 0.4844, "step": 731 }, { "epoch": 0.06, "grad_norm": 3.247586868009353, "learning_rate": 9.976799481794573e-06, "loss": 0.4085, "step": 732 }, { "epoch": 0.06, "grad_norm": 3.131277338264275, "learning_rate": 9.976671930670081e-06, "loss": 0.6309, "step": 733 }, { "epoch": 0.06, "grad_norm": 4.2916927804561515, "learning_rate": 9.976544030702546e-06, "loss": 0.8517, "step": 734 }, { "epoch": 0.06, "grad_norm": 4.487087260109036, "learning_rate": 9.97641578190093e-06, "loss": 0.7722, "step": 735 }, { "epoch": 0.06, "grad_norm": 3.2674515774054473, "learning_rate": 9.976287184274228e-06, "loss": 0.5029, "step": 736 }, { "epoch": 0.06, "grad_norm": 3.887167598931923, "learning_rate": 9.976158237831449e-06, "loss": 0.5151, "step": 737 }, { "epoch": 0.06, "grad_norm": 4.117472870245781, "learning_rate": 9.976028942581636e-06, "loss": 0.9479, "step": 738 }, { "epoch": 0.06, "grad_norm": 4.499321395252827, "learning_rate": 9.975899298533848e-06, "loss": 0.5442, "step": 739 }, { "epoch": 0.06, "grad_norm": 2.571042528541903, "learning_rate": 9.975769305697174e-06, "loss": 0.5613, "step": 740 }, { "epoch": 0.06, "grad_norm": 4.101831596434222, "learning_rate": 9.975638964080727e-06, "loss": 0.9055, "step": 741 }, { "epoch": 0.06, "grad_norm": 6.5100041104182385, "learning_rate": 9.975508273693643e-06, "loss": 1.3315, "step": 742 }, { "epoch": 0.06, "grad_norm": 3.0554279292222346, "learning_rate": 9.975377234545083e-06, "loss": 0.6267, "step": 743 }, { "epoch": 0.06, "grad_norm": 5.89859289725527, "learning_rate": 9.97524584664423e-06, "loss": 1.6799, "step": 744 }, { "epoch": 0.06, "grad_norm": 4.430271664446078, "learning_rate": 9.975114110000297e-06, "loss": 0.8808, "step": 745 }, { "epoch": 0.06, "grad_norm": 5.731882166706876, "learning_rate": 9.974982024622517e-06, "loss": 1.1066, "step": 746 }, { "epoch": 0.06, "grad_norm": 3.459257443847804, "learning_rate": 9.974849590520148e-06, "loss": 0.7659, "step": 747 }, { "epoch": 0.06, "grad_norm": 4.939537799631192, "learning_rate": 9.974716807702473e-06, "loss": 0.8695, "step": 748 }, { "epoch": 0.06, "grad_norm": 4.520153583674207, "learning_rate": 9.9745836761788e-06, "loss": 0.8502, "step": 749 }, { "epoch": 0.06, "grad_norm": 3.8020858847345793, "learning_rate": 9.974450195958459e-06, "loss": 0.6382, "step": 750 }, { "epoch": 0.06, "grad_norm": 3.8753527561070307, "learning_rate": 9.97431636705081e-06, "loss": 0.7334, "step": 751 }, { "epoch": 0.06, "grad_norm": 4.723888177512531, "learning_rate": 9.974182189465232e-06, "loss": 0.7886, "step": 752 }, { "epoch": 0.06, "grad_norm": 6.128138154428111, "learning_rate": 9.974047663211131e-06, "loss": 1.7984, "step": 753 }, { "epoch": 0.06, "grad_norm": 5.773109744828223, "learning_rate": 9.973912788297933e-06, "loss": 1.306, "step": 754 }, { "epoch": 0.06, "grad_norm": 4.00723529489966, "learning_rate": 9.973777564735097e-06, "loss": 0.8355, "step": 755 }, { "epoch": 0.06, "grad_norm": 2.9820207602050672, "learning_rate": 9.973641992532099e-06, "loss": 0.4745, "step": 756 }, { "epoch": 0.06, "grad_norm": 4.715125249193091, "learning_rate": 9.973506071698444e-06, "loss": 1.0481, "step": 757 }, { "epoch": 0.06, "grad_norm": 2.439607792847339, "learning_rate": 9.973369802243658e-06, "loss": 0.4661, "step": 758 }, { "epoch": 0.06, "grad_norm": 4.530386457711915, "learning_rate": 9.97323318417729e-06, "loss": 0.9343, "step": 759 }, { "epoch": 0.06, "grad_norm": 3.740555661029211, "learning_rate": 9.973096217508925e-06, "loss": 0.7943, "step": 760 }, { "epoch": 0.06, "grad_norm": 2.7744732026579766, "learning_rate": 9.972958902248153e-06, "loss": 0.6054, "step": 761 }, { "epoch": 0.06, "grad_norm": 1.929079530607702, "learning_rate": 9.972821238404607e-06, "loss": 0.3151, "step": 762 }, { "epoch": 0.06, "grad_norm": 4.495765793088585, "learning_rate": 9.972683225987933e-06, "loss": 1.0715, "step": 763 }, { "epoch": 0.06, "grad_norm": 5.467686535685932, "learning_rate": 9.972544865007807e-06, "loss": 0.6673, "step": 764 }, { "epoch": 0.06, "grad_norm": 4.146818085401882, "learning_rate": 9.972406155473925e-06, "loss": 0.8161, "step": 765 }, { "epoch": 0.06, "grad_norm": 5.526183507517289, "learning_rate": 9.972267097396013e-06, "loss": 1.1339, "step": 766 }, { "epoch": 0.06, "grad_norm": 5.391115260744661, "learning_rate": 9.972127690783815e-06, "loss": 1.6191, "step": 767 }, { "epoch": 0.06, "grad_norm": 5.089685092999949, "learning_rate": 9.971987935647106e-06, "loss": 1.0668, "step": 768 }, { "epoch": 0.06, "grad_norm": 5.520289400789311, "learning_rate": 9.97184783199568e-06, "loss": 0.8992, "step": 769 }, { "epoch": 0.06, "grad_norm": 5.625631151177899, "learning_rate": 9.97170737983936e-06, "loss": 1.1493, "step": 770 }, { "epoch": 0.06, "grad_norm": 4.97478303005252, "learning_rate": 9.971566579187988e-06, "loss": 1.1023, "step": 771 }, { "epoch": 0.06, "grad_norm": 4.547320317403259, "learning_rate": 9.971425430051437e-06, "loss": 1.0619, "step": 772 }, { "epoch": 0.06, "grad_norm": 5.636700625448609, "learning_rate": 9.971283932439597e-06, "loss": 1.1449, "step": 773 }, { "epoch": 0.06, "grad_norm": 5.769386076301568, "learning_rate": 9.971142086362392e-06, "loss": 1.3077, "step": 774 }, { "epoch": 0.06, "grad_norm": 4.635243672503049, "learning_rate": 9.970999891829757e-06, "loss": 1.1862, "step": 775 }, { "epoch": 0.06, "grad_norm": 5.848083854458284, "learning_rate": 9.970857348851667e-06, "loss": 0.9362, "step": 776 }, { "epoch": 0.06, "grad_norm": 3.4745444676936006, "learning_rate": 9.970714457438106e-06, "loss": 0.5107, "step": 777 }, { "epoch": 0.06, "grad_norm": 5.489989337493255, "learning_rate": 9.970571217599096e-06, "loss": 0.9107, "step": 778 }, { "epoch": 0.06, "grad_norm": 3.096443541651896, "learning_rate": 9.970427629344676e-06, "loss": 0.4118, "step": 779 }, { "epoch": 0.06, "grad_norm": 4.530946936104318, "learning_rate": 9.970283692684911e-06, "loss": 0.8377, "step": 780 }, { "epoch": 0.06, "grad_norm": 5.22289394368659, "learning_rate": 9.97013940762989e-06, "loss": 1.3088, "step": 781 }, { "epoch": 0.06, "grad_norm": 5.453490008200965, "learning_rate": 9.969994774189726e-06, "loss": 1.451, "step": 782 }, { "epoch": 0.06, "grad_norm": 4.76093404920054, "learning_rate": 9.969849792374558e-06, "loss": 1.0252, "step": 783 }, { "epoch": 0.06, "grad_norm": 5.57953489925236, "learning_rate": 9.969704462194549e-06, "loss": 1.4084, "step": 784 }, { "epoch": 0.06, "grad_norm": 3.4399668508195123, "learning_rate": 9.969558783659884e-06, "loss": 0.5534, "step": 785 }, { "epoch": 0.06, "grad_norm": 4.7564780956274015, "learning_rate": 9.969412756780776e-06, "loss": 1.0238, "step": 786 }, { "epoch": 0.06, "grad_norm": 4.826815046400798, "learning_rate": 9.969266381567462e-06, "loss": 0.94, "step": 787 }, { "epoch": 0.06, "grad_norm": 4.080648998198349, "learning_rate": 9.9691196580302e-06, "loss": 0.897, "step": 788 }, { "epoch": 0.06, "grad_norm": 3.6470320822489994, "learning_rate": 9.968972586179275e-06, "loss": 0.8945, "step": 789 }, { "epoch": 0.06, "grad_norm": 3.408120703606715, "learning_rate": 9.968825166024999e-06, "loss": 0.4984, "step": 790 }, { "epoch": 0.06, "grad_norm": 7.859349302027108, "learning_rate": 9.968677397577701e-06, "loss": 1.8087, "step": 791 }, { "epoch": 0.06, "grad_norm": 2.3390940932775894, "learning_rate": 9.968529280847743e-06, "loss": 0.474, "step": 792 }, { "epoch": 0.06, "grad_norm": 3.1126987657491423, "learning_rate": 9.968380815845504e-06, "loss": 0.6885, "step": 793 }, { "epoch": 0.06, "grad_norm": 4.074268950114531, "learning_rate": 9.968232002581394e-06, "loss": 0.8573, "step": 794 }, { "epoch": 0.06, "grad_norm": 5.5693261133819005, "learning_rate": 9.96808284106584e-06, "loss": 1.0474, "step": 795 }, { "epoch": 0.07, "grad_norm": 4.392596789543054, "learning_rate": 9.967933331309302e-06, "loss": 0.7573, "step": 796 }, { "epoch": 0.07, "grad_norm": 3.7715217973867903, "learning_rate": 9.967783473322258e-06, "loss": 0.6199, "step": 797 }, { "epoch": 0.07, "grad_norm": 4.074185896638036, "learning_rate": 9.96763326711521e-06, "loss": 0.4578, "step": 798 }, { "epoch": 0.07, "grad_norm": 4.176116951214148, "learning_rate": 9.967482712698694e-06, "loss": 0.6773, "step": 799 }, { "epoch": 0.07, "grad_norm": 4.76439860919025, "learning_rate": 9.967331810083254e-06, "loss": 1.1874, "step": 800 }, { "epoch": 0.07, "grad_norm": 3.582020589446579, "learning_rate": 9.967180559279472e-06, "loss": 0.9202, "step": 801 }, { "epoch": 0.07, "grad_norm": 2.0977116361481984, "learning_rate": 9.967028960297954e-06, "loss": 0.4643, "step": 802 }, { "epoch": 0.07, "grad_norm": 3.098250568478063, "learning_rate": 9.966877013149319e-06, "loss": 0.4353, "step": 803 }, { "epoch": 0.07, "grad_norm": 4.337788120873333, "learning_rate": 9.966724717844222e-06, "loss": 0.7149, "step": 804 }, { "epoch": 0.07, "grad_norm": 5.694222423633388, "learning_rate": 9.966572074393337e-06, "loss": 1.0502, "step": 805 }, { "epoch": 0.07, "grad_norm": 2.776286287183273, "learning_rate": 9.966419082807366e-06, "loss": 0.5987, "step": 806 }, { "epoch": 0.07, "grad_norm": 3.21966864717937, "learning_rate": 9.96626574309703e-06, "loss": 0.4904, "step": 807 }, { "epoch": 0.07, "grad_norm": 5.924492528369482, "learning_rate": 9.966112055273078e-06, "loss": 1.1346, "step": 808 }, { "epoch": 0.07, "grad_norm": 4.1712724961958045, "learning_rate": 9.965958019346284e-06, "loss": 0.6334, "step": 809 }, { "epoch": 0.07, "grad_norm": 3.8522795318151353, "learning_rate": 9.965803635327445e-06, "loss": 0.7694, "step": 810 }, { "epoch": 0.07, "grad_norm": 1.574605370309623, "learning_rate": 9.965648903227383e-06, "loss": 0.261, "step": 811 }, { "epoch": 0.07, "grad_norm": 4.705029403781401, "learning_rate": 9.965493823056943e-06, "loss": 0.6301, "step": 812 }, { "epoch": 0.07, "grad_norm": 5.734011167965443, "learning_rate": 9.965338394826995e-06, "loss": 1.5186, "step": 813 }, { "epoch": 0.07, "grad_norm": 2.9008709218185853, "learning_rate": 9.965182618548437e-06, "loss": 0.5122, "step": 814 }, { "epoch": 0.07, "grad_norm": 2.156045034777844, "learning_rate": 9.965026494232184e-06, "loss": 0.2935, "step": 815 }, { "epoch": 0.07, "grad_norm": 4.339119067796793, "learning_rate": 9.964870021889181e-06, "loss": 1.0165, "step": 816 }, { "epoch": 0.07, "grad_norm": 5.134637446860939, "learning_rate": 9.964713201530399e-06, "loss": 0.7808, "step": 817 }, { "epoch": 0.07, "grad_norm": 5.597641511494216, "learning_rate": 9.964556033166826e-06, "loss": 0.9558, "step": 818 }, { "epoch": 0.07, "grad_norm": 3.3933383102397845, "learning_rate": 9.964398516809482e-06, "loss": 0.7344, "step": 819 }, { "epoch": 0.07, "grad_norm": 5.0015608056776495, "learning_rate": 9.964240652469408e-06, "loss": 0.6732, "step": 820 }, { "epoch": 0.07, "grad_norm": 3.065452808847677, "learning_rate": 9.964082440157668e-06, "loss": 0.6751, "step": 821 }, { "epoch": 0.07, "grad_norm": 3.916773694484184, "learning_rate": 9.963923879885351e-06, "loss": 0.6768, "step": 822 }, { "epoch": 0.07, "grad_norm": 2.630978787822194, "learning_rate": 9.963764971663575e-06, "loss": 0.4979, "step": 823 }, { "epoch": 0.07, "grad_norm": 6.170772583289991, "learning_rate": 9.963605715503477e-06, "loss": 1.2307, "step": 824 }, { "epoch": 0.07, "grad_norm": 4.211658909815121, "learning_rate": 9.96344611141622e-06, "loss": 0.9501, "step": 825 }, { "epoch": 0.07, "grad_norm": 6.606969981147839, "learning_rate": 9.96328615941299e-06, "loss": 1.4042, "step": 826 }, { "epoch": 0.07, "grad_norm": 3.4391829172628925, "learning_rate": 9.963125859505e-06, "loss": 0.604, "step": 827 }, { "epoch": 0.07, "grad_norm": 2.6553164080327796, "learning_rate": 9.96296521170349e-06, "loss": 0.4192, "step": 828 }, { "epoch": 0.07, "grad_norm": 2.426383397582067, "learning_rate": 9.962804216019715e-06, "loss": 0.3705, "step": 829 }, { "epoch": 0.07, "grad_norm": 5.210417920800824, "learning_rate": 9.962642872464964e-06, "loss": 1.218, "step": 830 }, { "epoch": 0.07, "grad_norm": 5.627215980781415, "learning_rate": 9.962481181050544e-06, "loss": 1.1948, "step": 831 }, { "epoch": 0.07, "grad_norm": 4.6291433620745694, "learning_rate": 9.96231914178779e-06, "loss": 0.9308, "step": 832 }, { "epoch": 0.07, "grad_norm": 5.467086133481989, "learning_rate": 9.962156754688062e-06, "loss": 1.166, "step": 833 }, { "epoch": 0.07, "grad_norm": 3.5665160649933267, "learning_rate": 9.96199401976274e-06, "loss": 0.7913, "step": 834 }, { "epoch": 0.07, "grad_norm": 3.3983297626025517, "learning_rate": 9.96183093702323e-06, "loss": 0.6424, "step": 835 }, { "epoch": 0.07, "grad_norm": 6.069701322695037, "learning_rate": 9.961667506480967e-06, "loss": 1.5727, "step": 836 }, { "epoch": 0.07, "grad_norm": 6.402442563901685, "learning_rate": 9.961503728147405e-06, "loss": 1.4001, "step": 837 }, { "epoch": 0.07, "grad_norm": 4.720680137374831, "learning_rate": 9.961339602034026e-06, "loss": 1.0963, "step": 838 }, { "epoch": 0.07, "grad_norm": 3.0981566758358903, "learning_rate": 9.96117512815233e-06, "loss": 0.4581, "step": 839 }, { "epoch": 0.07, "grad_norm": 3.2511679821777086, "learning_rate": 9.96101030651385e-06, "loss": 0.4581, "step": 840 }, { "epoch": 0.07, "grad_norm": 4.790266578683795, "learning_rate": 9.960845137130137e-06, "loss": 0.7372, "step": 841 }, { "epoch": 0.07, "grad_norm": 4.982491338777422, "learning_rate": 9.96067962001277e-06, "loss": 0.972, "step": 842 }, { "epoch": 0.07, "grad_norm": 3.7105151457719914, "learning_rate": 9.96051375517335e-06, "loss": 0.8763, "step": 843 }, { "epoch": 0.07, "grad_norm": 4.185155104774775, "learning_rate": 9.960347542623506e-06, "loss": 0.5442, "step": 844 }, { "epoch": 0.07, "grad_norm": 3.156382246620666, "learning_rate": 9.960180982374884e-06, "loss": 0.4527, "step": 845 }, { "epoch": 0.07, "grad_norm": 3.4217843784477244, "learning_rate": 9.960014074439164e-06, "loss": 0.5007, "step": 846 }, { "epoch": 0.07, "grad_norm": 5.391806637780469, "learning_rate": 9.959846818828041e-06, "loss": 1.3905, "step": 847 }, { "epoch": 0.07, "grad_norm": 3.459019607298156, "learning_rate": 9.959679215553244e-06, "loss": 0.6361, "step": 848 }, { "epoch": 0.07, "grad_norm": 4.619323467927718, "learning_rate": 9.959511264626518e-06, "loss": 0.7784, "step": 849 }, { "epoch": 0.07, "grad_norm": 4.728782464117148, "learning_rate": 9.959342966059636e-06, "loss": 1.2141, "step": 850 }, { "epoch": 0.07, "grad_norm": 4.738622637108753, "learning_rate": 9.959174319864395e-06, "loss": 1.1925, "step": 851 }, { "epoch": 0.07, "grad_norm": 5.479658138287649, "learning_rate": 9.959005326052615e-06, "loss": 1.0201, "step": 852 }, { "epoch": 0.07, "grad_norm": 5.614085673476612, "learning_rate": 9.958835984636146e-06, "loss": 1.165, "step": 853 }, { "epoch": 0.07, "grad_norm": 3.5184482099574366, "learning_rate": 9.958666295626854e-06, "loss": 0.8741, "step": 854 }, { "epoch": 0.07, "grad_norm": 7.138268357473538, "learning_rate": 9.958496259036635e-06, "loss": 1.638, "step": 855 }, { "epoch": 0.07, "grad_norm": 3.042464483331498, "learning_rate": 9.958325874877408e-06, "loss": 0.7941, "step": 856 }, { "epoch": 0.07, "grad_norm": 4.184549738302962, "learning_rate": 9.958155143161115e-06, "loss": 0.915, "step": 857 }, { "epoch": 0.07, "grad_norm": 5.664162701019237, "learning_rate": 9.957984063899727e-06, "loss": 1.1962, "step": 858 }, { "epoch": 0.07, "grad_norm": 1.8646671334584424, "learning_rate": 9.95781263710523e-06, "loss": 0.308, "step": 859 }, { "epoch": 0.07, "grad_norm": 6.639081617480649, "learning_rate": 9.957640862789644e-06, "loss": 1.092, "step": 860 }, { "epoch": 0.07, "grad_norm": 3.5841789111561875, "learning_rate": 9.95746874096501e-06, "loss": 0.5315, "step": 861 }, { "epoch": 0.07, "grad_norm": 4.234477795410627, "learning_rate": 9.957296271643393e-06, "loss": 0.769, "step": 862 }, { "epoch": 0.07, "grad_norm": 5.313150775350866, "learning_rate": 9.957123454836882e-06, "loss": 0.9755, "step": 863 }, { "epoch": 0.07, "grad_norm": 5.115863019680228, "learning_rate": 9.95695029055759e-06, "loss": 1.3551, "step": 864 }, { "epoch": 0.07, "grad_norm": 5.635816693720637, "learning_rate": 9.956776778817654e-06, "loss": 1.222, "step": 865 }, { "epoch": 0.07, "grad_norm": 3.4986707461344753, "learning_rate": 9.956602919629239e-06, "loss": 0.6475, "step": 866 }, { "epoch": 0.07, "grad_norm": 3.7358230723201142, "learning_rate": 9.956428713004529e-06, "loss": 0.5906, "step": 867 }, { "epoch": 0.07, "grad_norm": 6.528042168147081, "learning_rate": 9.956254158955738e-06, "loss": 1.3471, "step": 868 }, { "epoch": 0.07, "grad_norm": 4.613230901572128, "learning_rate": 9.9560792574951e-06, "loss": 0.9629, "step": 869 }, { "epoch": 0.07, "grad_norm": 5.86962822781077, "learning_rate": 9.955904008634876e-06, "loss": 1.3635, "step": 870 }, { "epoch": 0.07, "grad_norm": 5.695620277128768, "learning_rate": 9.955728412387347e-06, "loss": 1.0614, "step": 871 }, { "epoch": 0.07, "grad_norm": 4.794859671216474, "learning_rate": 9.955552468764825e-06, "loss": 1.0326, "step": 872 }, { "epoch": 0.07, "grad_norm": 5.291780152477322, "learning_rate": 9.955376177779641e-06, "loss": 0.6374, "step": 873 }, { "epoch": 0.07, "grad_norm": 3.777384653007664, "learning_rate": 9.955199539444154e-06, "loss": 0.5032, "step": 874 }, { "epoch": 0.07, "grad_norm": 4.269996080253249, "learning_rate": 9.955022553770743e-06, "loss": 0.7474, "step": 875 }, { "epoch": 0.07, "grad_norm": 4.790840369737783, "learning_rate": 9.954845220771816e-06, "loss": 0.9783, "step": 876 }, { "epoch": 0.07, "grad_norm": 6.356400418330578, "learning_rate": 9.954667540459802e-06, "loss": 1.2314, "step": 877 }, { "epoch": 0.07, "grad_norm": 5.2532838304563505, "learning_rate": 9.954489512847156e-06, "loss": 0.8081, "step": 878 }, { "epoch": 0.07, "grad_norm": 3.461855774922967, "learning_rate": 9.954311137946358e-06, "loss": 0.5668, "step": 879 }, { "epoch": 0.07, "grad_norm": 4.000711677503919, "learning_rate": 9.954132415769911e-06, "loss": 0.8234, "step": 880 }, { "epoch": 0.07, "grad_norm": 4.01348735820718, "learning_rate": 9.95395334633034e-06, "loss": 0.8112, "step": 881 }, { "epoch": 0.07, "grad_norm": 4.796895609166545, "learning_rate": 9.953773929640202e-06, "loss": 0.9285, "step": 882 }, { "epoch": 0.07, "grad_norm": 3.523348062987123, "learning_rate": 9.953594165712068e-06, "loss": 0.3327, "step": 883 }, { "epoch": 0.07, "grad_norm": 4.060091186298477, "learning_rate": 9.953414054558543e-06, "loss": 0.7849, "step": 884 }, { "epoch": 0.07, "grad_norm": 5.531720184448185, "learning_rate": 9.95323359619225e-06, "loss": 1.4627, "step": 885 }, { "epoch": 0.07, "grad_norm": 5.124767255143433, "learning_rate": 9.953052790625835e-06, "loss": 1.147, "step": 886 }, { "epoch": 0.07, "grad_norm": 4.174673608668578, "learning_rate": 9.95287163787198e-06, "loss": 1.0221, "step": 887 }, { "epoch": 0.07, "grad_norm": 4.759573418537057, "learning_rate": 9.952690137943374e-06, "loss": 0.7192, "step": 888 }, { "epoch": 0.07, "grad_norm": 4.32649581631083, "learning_rate": 9.952508290852746e-06, "loss": 0.9132, "step": 889 }, { "epoch": 0.07, "grad_norm": 4.49609553767445, "learning_rate": 9.95232609661284e-06, "loss": 0.9195, "step": 890 }, { "epoch": 0.07, "grad_norm": 4.744183694556523, "learning_rate": 9.952143555236426e-06, "loss": 0.5019, "step": 891 }, { "epoch": 0.07, "grad_norm": 5.346790453736447, "learning_rate": 9.9519606667363e-06, "loss": 1.061, "step": 892 }, { "epoch": 0.07, "grad_norm": 3.8526268520642506, "learning_rate": 9.951777431125285e-06, "loss": 0.7299, "step": 893 }, { "epoch": 0.07, "grad_norm": 4.280723702096715, "learning_rate": 9.95159384841622e-06, "loss": 0.8639, "step": 894 }, { "epoch": 0.07, "grad_norm": 5.5124974801196025, "learning_rate": 9.951409918621977e-06, "loss": 1.5656, "step": 895 }, { "epoch": 0.07, "grad_norm": 4.493827302226007, "learning_rate": 9.951225641755447e-06, "loss": 1.2438, "step": 896 }, { "epoch": 0.07, "grad_norm": 4.500693687023412, "learning_rate": 9.951041017829546e-06, "loss": 1.3041, "step": 897 }, { "epoch": 0.07, "grad_norm": 5.256144142745983, "learning_rate": 9.950856046857218e-06, "loss": 1.1518, "step": 898 }, { "epoch": 0.07, "grad_norm": 5.0082769764561474, "learning_rate": 9.950670728851428e-06, "loss": 1.2247, "step": 899 }, { "epoch": 0.07, "grad_norm": 3.9888071975117203, "learning_rate": 9.950485063825164e-06, "loss": 0.8726, "step": 900 }, { "epoch": 0.07, "grad_norm": 3.6636039422830637, "learning_rate": 9.950299051791442e-06, "loss": 0.6408, "step": 901 }, { "epoch": 0.07, "grad_norm": 5.611758379413739, "learning_rate": 9.9501126927633e-06, "loss": 1.0135, "step": 902 }, { "epoch": 0.07, "grad_norm": 5.193649777684185, "learning_rate": 9.949925986753801e-06, "loss": 0.975, "step": 903 }, { "epoch": 0.07, "grad_norm": 1.2652496595795037, "learning_rate": 9.949738933776034e-06, "loss": 0.2191, "step": 904 }, { "epoch": 0.07, "grad_norm": 5.00394153281254, "learning_rate": 9.949551533843108e-06, "loss": 1.1315, "step": 905 }, { "epoch": 0.07, "grad_norm": 4.376078409437867, "learning_rate": 9.949363786968161e-06, "loss": 0.8514, "step": 906 }, { "epoch": 0.07, "grad_norm": 3.0947758925959357, "learning_rate": 9.94917569316435e-06, "loss": 0.4318, "step": 907 }, { "epoch": 0.07, "grad_norm": 3.8579514061113698, "learning_rate": 9.948987252444863e-06, "loss": 0.8145, "step": 908 }, { "epoch": 0.07, "grad_norm": 4.341532052086995, "learning_rate": 9.948798464822908e-06, "loss": 1.0982, "step": 909 }, { "epoch": 0.07, "grad_norm": 5.061424905844848, "learning_rate": 9.948609330311717e-06, "loss": 1.2016, "step": 910 }, { "epoch": 0.07, "grad_norm": 6.975840773315984, "learning_rate": 9.94841984892455e-06, "loss": 1.241, "step": 911 }, { "epoch": 0.07, "grad_norm": 2.617213604441644, "learning_rate": 9.948230020674685e-06, "loss": 0.409, "step": 912 }, { "epoch": 0.07, "grad_norm": 5.751113151861608, "learning_rate": 9.948039845575433e-06, "loss": 1.0532, "step": 913 }, { "epoch": 0.07, "grad_norm": 4.045895048439603, "learning_rate": 9.947849323640119e-06, "loss": 1.2309, "step": 914 }, { "epoch": 0.07, "grad_norm": 5.5548904890189315, "learning_rate": 9.947658454882102e-06, "loss": 1.2756, "step": 915 }, { "epoch": 0.07, "grad_norm": 5.567909285488109, "learning_rate": 9.947467239314759e-06, "loss": 1.6017, "step": 916 }, { "epoch": 0.07, "grad_norm": 4.686842726080642, "learning_rate": 9.947275676951493e-06, "loss": 0.9778, "step": 917 }, { "epoch": 0.08, "grad_norm": 6.261416529520622, "learning_rate": 9.947083767805736e-06, "loss": 1.2024, "step": 918 }, { "epoch": 0.08, "grad_norm": 2.1007870673806903, "learning_rate": 9.946891511890934e-06, "loss": 0.3884, "step": 919 }, { "epoch": 0.08, "grad_norm": 3.098588168379796, "learning_rate": 9.946698909220567e-06, "loss": 0.5807, "step": 920 }, { "epoch": 0.08, "grad_norm": 4.367683093526055, "learning_rate": 9.946505959808133e-06, "loss": 0.9718, "step": 921 }, { "epoch": 0.08, "grad_norm": 3.9092380880050563, "learning_rate": 9.94631266366716e-06, "loss": 0.6939, "step": 922 }, { "epoch": 0.08, "grad_norm": 6.2437644983208935, "learning_rate": 9.946119020811196e-06, "loss": 1.6135, "step": 923 }, { "epoch": 0.08, "grad_norm": 3.559272956140806, "learning_rate": 9.945925031253814e-06, "loss": 0.5448, "step": 924 }, { "epoch": 0.08, "grad_norm": 3.824143064798661, "learning_rate": 9.945730695008611e-06, "loss": 0.9135, "step": 925 }, { "epoch": 0.08, "grad_norm": 4.722676137253339, "learning_rate": 9.94553601208921e-06, "loss": 1.153, "step": 926 }, { "epoch": 0.08, "grad_norm": 3.396639886547055, "learning_rate": 9.94534098250926e-06, "loss": 0.6553, "step": 927 }, { "epoch": 0.08, "grad_norm": 5.255260531499106, "learning_rate": 9.945145606282427e-06, "loss": 1.4183, "step": 928 }, { "epoch": 0.08, "grad_norm": 3.9560741446234613, "learning_rate": 9.944949883422409e-06, "loss": 0.8848, "step": 929 }, { "epoch": 0.08, "grad_norm": 4.44099530624096, "learning_rate": 9.944753813942924e-06, "loss": 1.0808, "step": 930 }, { "epoch": 0.08, "grad_norm": 5.612367341298937, "learning_rate": 9.944557397857717e-06, "loss": 1.7301, "step": 931 }, { "epoch": 0.08, "grad_norm": 5.774377825851068, "learning_rate": 9.944360635180554e-06, "loss": 1.2681, "step": 932 }, { "epoch": 0.08, "grad_norm": 4.4851455553756505, "learning_rate": 9.94416352592523e-06, "loss": 1.0088, "step": 933 }, { "epoch": 0.08, "grad_norm": 3.6727752379053444, "learning_rate": 9.94396607010556e-06, "loss": 0.799, "step": 934 }, { "epoch": 0.08, "grad_norm": 5.661571569893697, "learning_rate": 9.943768267735384e-06, "loss": 1.2836, "step": 935 }, { "epoch": 0.08, "grad_norm": 4.73506630653504, "learning_rate": 9.943570118828569e-06, "loss": 1.107, "step": 936 }, { "epoch": 0.08, "grad_norm": 5.216817841660681, "learning_rate": 9.943371623399001e-06, "loss": 1.3673, "step": 937 }, { "epoch": 0.08, "grad_norm": 4.588521825500617, "learning_rate": 9.943172781460596e-06, "loss": 0.9086, "step": 938 }, { "epoch": 0.08, "grad_norm": 4.840878788819319, "learning_rate": 9.942973593027295e-06, "loss": 0.8751, "step": 939 }, { "epoch": 0.08, "grad_norm": 5.159323698725976, "learning_rate": 9.942774058113053e-06, "loss": 0.969, "step": 940 }, { "epoch": 0.08, "grad_norm": 4.500036698511603, "learning_rate": 9.942574176731863e-06, "loss": 0.9783, "step": 941 }, { "epoch": 0.08, "grad_norm": 4.637897868620929, "learning_rate": 9.942373948897732e-06, "loss": 1.1139, "step": 942 }, { "epoch": 0.08, "grad_norm": 5.260102017631009, "learning_rate": 9.942173374624697e-06, "loss": 1.0704, "step": 943 }, { "epoch": 0.08, "grad_norm": 2.6440686740791235, "learning_rate": 9.941972453926817e-06, "loss": 0.3392, "step": 944 }, { "epoch": 0.08, "grad_norm": 3.4619149305092707, "learning_rate": 9.941771186818176e-06, "loss": 0.7033, "step": 945 }, { "epoch": 0.08, "grad_norm": 4.2853503596861815, "learning_rate": 9.941569573312882e-06, "loss": 0.7557, "step": 946 }, { "epoch": 0.08, "grad_norm": 3.4535644407032398, "learning_rate": 9.941367613425064e-06, "loss": 0.5132, "step": 947 }, { "epoch": 0.08, "grad_norm": 7.988273824309304, "learning_rate": 9.941165307168883e-06, "loss": 1.0222, "step": 948 }, { "epoch": 0.08, "grad_norm": 4.797267330365143, "learning_rate": 9.94096265455852e-06, "loss": 0.8054, "step": 949 }, { "epoch": 0.08, "grad_norm": 5.451973933847118, "learning_rate": 9.940759655608174e-06, "loss": 1.2013, "step": 950 }, { "epoch": 0.08, "grad_norm": 4.937150092892459, "learning_rate": 9.940556310332081e-06, "loss": 1.0607, "step": 951 }, { "epoch": 0.08, "grad_norm": 5.48017657805159, "learning_rate": 9.94035261874449e-06, "loss": 1.2165, "step": 952 }, { "epoch": 0.08, "grad_norm": 6.6979240049013375, "learning_rate": 9.940148580859684e-06, "loss": 1.169, "step": 953 }, { "epoch": 0.08, "grad_norm": 5.048990796591552, "learning_rate": 9.93994419669196e-06, "loss": 1.193, "step": 954 }, { "epoch": 0.08, "grad_norm": 7.120682486408936, "learning_rate": 9.939739466255646e-06, "loss": 1.006, "step": 955 }, { "epoch": 0.08, "grad_norm": 4.25295719086572, "learning_rate": 9.939534389565096e-06, "loss": 0.5601, "step": 956 }, { "epoch": 0.08, "grad_norm": 2.9677365404338754, "learning_rate": 9.939328966634679e-06, "loss": 0.5047, "step": 957 }, { "epoch": 0.08, "grad_norm": 4.720941728762174, "learning_rate": 9.9391231974788e-06, "loss": 1.2215, "step": 958 }, { "epoch": 0.08, "grad_norm": 1.7910568012463464, "learning_rate": 9.938917082111878e-06, "loss": 0.2296, "step": 959 }, { "epoch": 0.08, "grad_norm": 4.461023834809202, "learning_rate": 9.938710620548363e-06, "loss": 0.8519, "step": 960 }, { "epoch": 0.08, "grad_norm": 3.880190533803183, "learning_rate": 9.938503812802726e-06, "loss": 0.8242, "step": 961 }, { "epoch": 0.08, "grad_norm": 3.93656669188537, "learning_rate": 9.938296658889467e-06, "loss": 0.7574, "step": 962 }, { "epoch": 0.08, "grad_norm": 5.2002274052516855, "learning_rate": 9.938089158823101e-06, "loss": 0.9766, "step": 963 }, { "epoch": 0.08, "grad_norm": 3.8287118106163005, "learning_rate": 9.937881312618178e-06, "loss": 0.984, "step": 964 }, { "epoch": 0.08, "grad_norm": 6.032433365734891, "learning_rate": 9.937673120289264e-06, "loss": 1.3364, "step": 965 }, { "epoch": 0.08, "grad_norm": 1.624076949651301, "learning_rate": 9.937464581850952e-06, "loss": 0.2272, "step": 966 }, { "epoch": 0.08, "grad_norm": 2.46448601887881, "learning_rate": 9.937255697317862e-06, "loss": 0.6506, "step": 967 }, { "epoch": 0.08, "grad_norm": 4.687183703503713, "learning_rate": 9.937046466704635e-06, "loss": 1.1185, "step": 968 }, { "epoch": 0.08, "grad_norm": 3.12344443076618, "learning_rate": 9.936836890025934e-06, "loss": 0.6552, "step": 969 }, { "epoch": 0.08, "grad_norm": 4.008246852671144, "learning_rate": 9.936626967296454e-06, "loss": 0.3855, "step": 970 }, { "epoch": 0.08, "grad_norm": 5.727741706894561, "learning_rate": 9.936416698530908e-06, "loss": 1.3848, "step": 971 }, { "epoch": 0.08, "grad_norm": 3.0058232533086335, "learning_rate": 9.936206083744036e-06, "loss": 0.607, "step": 972 }, { "epoch": 0.08, "grad_norm": 4.474463900134665, "learning_rate": 9.935995122950597e-06, "loss": 0.9998, "step": 973 }, { "epoch": 0.08, "grad_norm": 2.71840698955997, "learning_rate": 9.935783816165384e-06, "loss": 0.4255, "step": 974 }, { "epoch": 0.08, "grad_norm": 4.69033891376098, "learning_rate": 9.935572163403205e-06, "loss": 1.203, "step": 975 }, { "epoch": 0.08, "grad_norm": 4.058363801460739, "learning_rate": 9.935360164678897e-06, "loss": 0.9688, "step": 976 }, { "epoch": 0.08, "grad_norm": 4.1298057644289985, "learning_rate": 9.93514782000732e-06, "loss": 0.8337, "step": 977 }, { "epoch": 0.08, "grad_norm": 3.248683450242094, "learning_rate": 9.93493512940336e-06, "loss": 0.718, "step": 978 }, { "epoch": 0.08, "grad_norm": 3.004927639727986, "learning_rate": 9.934722092881923e-06, "loss": 0.3778, "step": 979 }, { "epoch": 0.08, "grad_norm": 6.349465302935186, "learning_rate": 9.934508710457944e-06, "loss": 1.3507, "step": 980 }, { "epoch": 0.08, "grad_norm": 3.6202710526830284, "learning_rate": 9.934294982146379e-06, "loss": 0.5398, "step": 981 }, { "epoch": 0.08, "grad_norm": 4.969030239131703, "learning_rate": 9.93408090796221e-06, "loss": 1.1796, "step": 982 }, { "epoch": 0.08, "grad_norm": 5.911435850540964, "learning_rate": 9.933866487920443e-06, "loss": 1.5327, "step": 983 }, { "epoch": 0.08, "grad_norm": 5.304625028599958, "learning_rate": 9.933651722036106e-06, "loss": 1.1508, "step": 984 }, { "epoch": 0.08, "grad_norm": 3.8855327016276147, "learning_rate": 9.933436610324256e-06, "loss": 0.7818, "step": 985 }, { "epoch": 0.08, "grad_norm": 2.284180123451619, "learning_rate": 9.93322115279997e-06, "loss": 0.505, "step": 986 }, { "epoch": 0.08, "grad_norm": 3.1541005528884596, "learning_rate": 9.93300534947835e-06, "loss": 0.7007, "step": 987 }, { "epoch": 0.08, "grad_norm": 4.915895095443689, "learning_rate": 9.932789200374525e-06, "loss": 1.5247, "step": 988 }, { "epoch": 0.08, "grad_norm": 4.442887963856919, "learning_rate": 9.93257270550364e-06, "loss": 0.6883, "step": 989 }, { "epoch": 0.08, "grad_norm": 4.345430763632468, "learning_rate": 9.93235586488088e-06, "loss": 1.083, "step": 990 }, { "epoch": 0.08, "grad_norm": 5.6329279194096324, "learning_rate": 9.932138678521438e-06, "loss": 1.4216, "step": 991 }, { "epoch": 0.08, "grad_norm": 3.8404128106622193, "learning_rate": 9.93192114644054e-06, "loss": 0.5213, "step": 992 }, { "epoch": 0.08, "grad_norm": 3.829742066244959, "learning_rate": 9.931703268653431e-06, "loss": 0.9371, "step": 993 }, { "epoch": 0.08, "grad_norm": 5.9845704625784375, "learning_rate": 9.931485045175388e-06, "loss": 1.1885, "step": 994 }, { "epoch": 0.08, "grad_norm": 3.3190081677989216, "learning_rate": 9.931266476021704e-06, "loss": 0.6295, "step": 995 }, { "epoch": 0.08, "grad_norm": 5.910239429035196, "learning_rate": 9.931047561207702e-06, "loss": 1.2699, "step": 996 }, { "epoch": 0.08, "grad_norm": 3.619849252844896, "learning_rate": 9.930828300748726e-06, "loss": 0.7838, "step": 997 }, { "epoch": 0.08, "grad_norm": 5.235519631028344, "learning_rate": 9.930608694660144e-06, "loss": 1.2416, "step": 998 }, { "epoch": 0.08, "grad_norm": 4.353341612052121, "learning_rate": 9.930388742957351e-06, "loss": 0.939, "step": 999 }, { "epoch": 0.08, "grad_norm": 3.028762046178202, "learning_rate": 9.930168445655766e-06, "loss": 0.4832, "step": 1000 }, { "epoch": 0.08, "grad_norm": 5.591371052059989, "learning_rate": 9.929947802770827e-06, "loss": 1.2762, "step": 1001 }, { "epoch": 0.08, "grad_norm": 2.697433120159279, "learning_rate": 9.929726814318004e-06, "loss": 0.2835, "step": 1002 }, { "epoch": 0.08, "grad_norm": 4.835924666363637, "learning_rate": 9.929505480312785e-06, "loss": 0.9394, "step": 1003 }, { "epoch": 0.08, "grad_norm": 2.3460503065278155, "learning_rate": 9.929283800770684e-06, "loss": 0.4421, "step": 1004 }, { "epoch": 0.08, "grad_norm": 4.235110616209546, "learning_rate": 9.92906177570724e-06, "loss": 0.7821, "step": 1005 }, { "epoch": 0.08, "grad_norm": 5.546859033042787, "learning_rate": 9.92883940513802e-06, "loss": 1.023, "step": 1006 }, { "epoch": 0.08, "grad_norm": 4.881188064635342, "learning_rate": 9.928616689078605e-06, "loss": 1.1696, "step": 1007 }, { "epoch": 0.08, "grad_norm": 4.414790877336852, "learning_rate": 9.928393627544612e-06, "loss": 0.9386, "step": 1008 }, { "epoch": 0.08, "grad_norm": 5.553507370975231, "learning_rate": 9.928170220551671e-06, "loss": 1.222, "step": 1009 }, { "epoch": 0.08, "grad_norm": 3.6732592042961465, "learning_rate": 9.927946468115448e-06, "loss": 0.7668, "step": 1010 }, { "epoch": 0.08, "grad_norm": 5.3890264197841145, "learning_rate": 9.927722370251623e-06, "loss": 0.8403, "step": 1011 }, { "epoch": 0.08, "grad_norm": 5.415266088346729, "learning_rate": 9.927497926975906e-06, "loss": 1.3521, "step": 1012 }, { "epoch": 0.08, "grad_norm": 3.0549480173712524, "learning_rate": 9.927273138304028e-06, "loss": 0.691, "step": 1013 }, { "epoch": 0.08, "grad_norm": 4.4185227553692865, "learning_rate": 9.927048004251748e-06, "loss": 0.7587, "step": 1014 }, { "epoch": 0.08, "grad_norm": 1.5284307873609142, "learning_rate": 9.926822524834845e-06, "loss": 0.2633, "step": 1015 }, { "epoch": 0.08, "grad_norm": 5.855098239343265, "learning_rate": 9.926596700069122e-06, "loss": 0.8446, "step": 1016 }, { "epoch": 0.08, "grad_norm": 4.706309854028427, "learning_rate": 9.926370529970414e-06, "loss": 1.1894, "step": 1017 }, { "epoch": 0.08, "grad_norm": 4.751806675384341, "learning_rate": 9.92614401455457e-06, "loss": 0.7973, "step": 1018 }, { "epoch": 0.08, "grad_norm": 3.9276380261145714, "learning_rate": 9.925917153837469e-06, "loss": 0.792, "step": 1019 }, { "epoch": 0.08, "grad_norm": 2.120171889646719, "learning_rate": 9.925689947835015e-06, "loss": 0.412, "step": 1020 }, { "epoch": 0.08, "grad_norm": 3.509134466109772, "learning_rate": 9.925462396563131e-06, "loss": 0.7342, "step": 1021 }, { "epoch": 0.08, "grad_norm": 5.275264743662667, "learning_rate": 9.925234500037768e-06, "loss": 1.1055, "step": 1022 }, { "epoch": 0.08, "grad_norm": 3.706494826186237, "learning_rate": 9.925006258274903e-06, "loss": 0.6293, "step": 1023 }, { "epoch": 0.08, "grad_norm": 4.601024212740757, "learning_rate": 9.924777671290532e-06, "loss": 1.1678, "step": 1024 }, { "epoch": 0.08, "grad_norm": 3.604621793451188, "learning_rate": 9.92454873910068e-06, "loss": 0.443, "step": 1025 }, { "epoch": 0.08, "grad_norm": 3.552121826306486, "learning_rate": 9.924319461721391e-06, "loss": 0.7045, "step": 1026 }, { "epoch": 0.08, "grad_norm": 2.5575856771387238, "learning_rate": 9.92408983916874e-06, "loss": 0.4696, "step": 1027 }, { "epoch": 0.08, "grad_norm": 5.289654417823295, "learning_rate": 9.92385987145882e-06, "loss": 1.1612, "step": 1028 }, { "epoch": 0.08, "grad_norm": 4.014493264742926, "learning_rate": 9.923629558607753e-06, "loss": 1.0631, "step": 1029 }, { "epoch": 0.08, "grad_norm": 2.69656964091769, "learning_rate": 9.923398900631681e-06, "loss": 0.7831, "step": 1030 }, { "epoch": 0.08, "grad_norm": 1.7779160202979185, "learning_rate": 9.923167897546773e-06, "loss": 0.4389, "step": 1031 }, { "epoch": 0.08, "grad_norm": 4.123388231016368, "learning_rate": 9.92293654936922e-06, "loss": 1.0849, "step": 1032 }, { "epoch": 0.08, "grad_norm": 4.652202727799504, "learning_rate": 9.922704856115241e-06, "loss": 0.7509, "step": 1033 }, { "epoch": 0.08, "grad_norm": 1.9835718893039749, "learning_rate": 9.922472817801075e-06, "loss": 0.4422, "step": 1034 }, { "epoch": 0.08, "grad_norm": 5.67324732639257, "learning_rate": 9.922240434442988e-06, "loss": 1.484, "step": 1035 }, { "epoch": 0.08, "grad_norm": 3.1649733381598733, "learning_rate": 9.922007706057266e-06, "loss": 0.4445, "step": 1036 }, { "epoch": 0.08, "grad_norm": 2.977160755506409, "learning_rate": 9.921774632660226e-06, "loss": 0.6295, "step": 1037 }, { "epoch": 0.08, "grad_norm": 2.9528745949766493, "learning_rate": 9.921541214268202e-06, "loss": 0.5489, "step": 1038 }, { "epoch": 0.08, "grad_norm": 3.978548604532141, "learning_rate": 9.92130745089756e-06, "loss": 0.5643, "step": 1039 }, { "epoch": 0.09, "grad_norm": 3.542451724141814, "learning_rate": 9.921073342564681e-06, "loss": 0.781, "step": 1040 }, { "epoch": 0.09, "grad_norm": 3.3721141526352976, "learning_rate": 9.920838889285979e-06, "loss": 0.5458, "step": 1041 }, { "epoch": 0.09, "grad_norm": 2.7847159815128384, "learning_rate": 9.920604091077886e-06, "loss": 0.6578, "step": 1042 }, { "epoch": 0.09, "grad_norm": 2.9754391055259912, "learning_rate": 9.92036894795686e-06, "loss": 0.613, "step": 1043 }, { "epoch": 0.09, "grad_norm": 4.030356464555068, "learning_rate": 9.920133459939385e-06, "loss": 0.6361, "step": 1044 }, { "epoch": 0.09, "grad_norm": 4.880834558249092, "learning_rate": 9.919897627041967e-06, "loss": 0.8937, "step": 1045 }, { "epoch": 0.09, "grad_norm": 1.2906676140310624, "learning_rate": 9.919661449281136e-06, "loss": 0.288, "step": 1046 }, { "epoch": 0.09, "grad_norm": 3.284278659461712, "learning_rate": 9.919424926673449e-06, "loss": 0.635, "step": 1047 }, { "epoch": 0.09, "grad_norm": 4.82577780947523, "learning_rate": 9.919188059235483e-06, "loss": 0.9913, "step": 1048 }, { "epoch": 0.09, "grad_norm": 1.758298021782454, "learning_rate": 9.918950846983844e-06, "loss": 0.4224, "step": 1049 }, { "epoch": 0.09, "grad_norm": 5.663385005526883, "learning_rate": 9.918713289935156e-06, "loss": 1.3562, "step": 1050 }, { "epoch": 0.09, "grad_norm": 3.746255654862295, "learning_rate": 9.918475388106075e-06, "loss": 0.7948, "step": 1051 }, { "epoch": 0.09, "grad_norm": 4.1445840998076156, "learning_rate": 9.918237141513272e-06, "loss": 0.687, "step": 1052 }, { "epoch": 0.09, "grad_norm": 2.729431954155433, "learning_rate": 9.917998550173451e-06, "loss": 0.6632, "step": 1053 }, { "epoch": 0.09, "grad_norm": 3.7846370807915215, "learning_rate": 9.917759614103335e-06, "loss": 0.6811, "step": 1054 }, { "epoch": 0.09, "grad_norm": 5.817056381788165, "learning_rate": 9.917520333319671e-06, "loss": 0.9256, "step": 1055 }, { "epoch": 0.09, "grad_norm": 2.366125843476706, "learning_rate": 9.917280707839235e-06, "loss": 0.4559, "step": 1056 }, { "epoch": 0.09, "grad_norm": 2.2462937271026755, "learning_rate": 9.91704073767882e-06, "loss": 0.5, "step": 1057 }, { "epoch": 0.09, "grad_norm": 4.598722852680578, "learning_rate": 9.916800422855247e-06, "loss": 0.8063, "step": 1058 }, { "epoch": 0.09, "grad_norm": 6.463919076864924, "learning_rate": 9.916559763385364e-06, "loss": 1.3674, "step": 1059 }, { "epoch": 0.09, "grad_norm": 3.0754104305945087, "learning_rate": 9.916318759286039e-06, "loss": 0.5969, "step": 1060 }, { "epoch": 0.09, "grad_norm": 5.634585435044843, "learning_rate": 9.916077410574163e-06, "loss": 1.1031, "step": 1061 }, { "epoch": 0.09, "grad_norm": 3.7528878203710736, "learning_rate": 9.915835717266658e-06, "loss": 0.7968, "step": 1062 }, { "epoch": 0.09, "grad_norm": 4.66701414858373, "learning_rate": 9.91559367938046e-06, "loss": 0.8395, "step": 1063 }, { "epoch": 0.09, "grad_norm": 6.168129947730295, "learning_rate": 9.91535129693254e-06, "loss": 1.4167, "step": 1064 }, { "epoch": 0.09, "grad_norm": 3.6400228917682234, "learning_rate": 9.915108569939884e-06, "loss": 0.8876, "step": 1065 }, { "epoch": 0.09, "grad_norm": 2.3615172096238357, "learning_rate": 9.91486549841951e-06, "loss": 0.3196, "step": 1066 }, { "epoch": 0.09, "grad_norm": 6.1590255485854595, "learning_rate": 9.914622082388452e-06, "loss": 1.7367, "step": 1067 }, { "epoch": 0.09, "grad_norm": 1.5833506445418588, "learning_rate": 9.914378321863776e-06, "loss": 0.2484, "step": 1068 }, { "epoch": 0.09, "grad_norm": 3.9244870590910623, "learning_rate": 9.914134216862568e-06, "loss": 0.9296, "step": 1069 }, { "epoch": 0.09, "grad_norm": 4.118313589125725, "learning_rate": 9.913889767401935e-06, "loss": 1.171, "step": 1070 }, { "epoch": 0.09, "grad_norm": 4.112181123473643, "learning_rate": 9.913644973499017e-06, "loss": 0.726, "step": 1071 }, { "epoch": 0.09, "grad_norm": 2.9211444194643703, "learning_rate": 9.913399835170969e-06, "loss": 0.4475, "step": 1072 }, { "epoch": 0.09, "grad_norm": 3.2639064043344446, "learning_rate": 9.913154352434977e-06, "loss": 0.7201, "step": 1073 }, { "epoch": 0.09, "grad_norm": 2.9042396813762323, "learning_rate": 9.912908525308246e-06, "loss": 0.6094, "step": 1074 }, { "epoch": 0.09, "grad_norm": 4.129947465502224, "learning_rate": 9.912662353808009e-06, "loss": 0.783, "step": 1075 }, { "epoch": 0.09, "grad_norm": 5.217328334333304, "learning_rate": 9.912415837951522e-06, "loss": 0.9254, "step": 1076 }, { "epoch": 0.09, "grad_norm": 5.371558982797147, "learning_rate": 9.912168977756061e-06, "loss": 0.9842, "step": 1077 }, { "epoch": 0.09, "grad_norm": 4.104153333889138, "learning_rate": 9.911921773238935e-06, "loss": 0.9036, "step": 1078 }, { "epoch": 0.09, "grad_norm": 5.151522115800109, "learning_rate": 9.911674224417469e-06, "loss": 1.0193, "step": 1079 }, { "epoch": 0.09, "grad_norm": 5.746469156002981, "learning_rate": 9.911426331309013e-06, "loss": 1.1992, "step": 1080 }, { "epoch": 0.09, "grad_norm": 5.258407588455397, "learning_rate": 9.911178093930949e-06, "loss": 1.3186, "step": 1081 }, { "epoch": 0.09, "grad_norm": 4.235760757939858, "learning_rate": 9.910929512300673e-06, "loss": 1.0851, "step": 1082 }, { "epoch": 0.09, "grad_norm": 4.453081187996927, "learning_rate": 9.91068058643561e-06, "loss": 0.737, "step": 1083 }, { "epoch": 0.09, "grad_norm": 4.71536401568929, "learning_rate": 9.91043131635321e-06, "loss": 1.3866, "step": 1084 }, { "epoch": 0.09, "grad_norm": 5.547313676110811, "learning_rate": 9.910181702070944e-06, "loss": 1.1275, "step": 1085 }, { "epoch": 0.09, "grad_norm": 3.2128405071826625, "learning_rate": 9.90993174360631e-06, "loss": 0.5183, "step": 1086 }, { "epoch": 0.09, "grad_norm": 4.711441991976482, "learning_rate": 9.90968144097683e-06, "loss": 0.9171, "step": 1087 }, { "epoch": 0.09, "grad_norm": 3.8161551402219795, "learning_rate": 9.909430794200047e-06, "loss": 0.6247, "step": 1088 }, { "epoch": 0.09, "grad_norm": 4.613553679871445, "learning_rate": 9.909179803293532e-06, "loss": 0.9455, "step": 1089 }, { "epoch": 0.09, "grad_norm": 4.1355714333717275, "learning_rate": 9.90892846827488e-06, "loss": 1.2161, "step": 1090 }, { "epoch": 0.09, "grad_norm": 5.0312869401343745, "learning_rate": 9.908676789161701e-06, "loss": 1.0728, "step": 1091 }, { "epoch": 0.09, "grad_norm": 2.8283986450010477, "learning_rate": 9.908424765971644e-06, "loss": 0.5282, "step": 1092 }, { "epoch": 0.09, "grad_norm": 2.571969875090206, "learning_rate": 9.908172398722374e-06, "loss": 0.518, "step": 1093 }, { "epoch": 0.09, "grad_norm": 4.920075402725496, "learning_rate": 9.907919687431578e-06, "loss": 0.8735, "step": 1094 }, { "epoch": 0.09, "grad_norm": 3.4311968729395437, "learning_rate": 9.90766663211697e-06, "loss": 0.8832, "step": 1095 }, { "epoch": 0.09, "grad_norm": 4.73982103420756, "learning_rate": 9.90741323279629e-06, "loss": 1.2395, "step": 1096 }, { "epoch": 0.09, "grad_norm": 4.256664032664819, "learning_rate": 9.907159489487301e-06, "loss": 0.913, "step": 1097 }, { "epoch": 0.09, "grad_norm": 5.182530238530806, "learning_rate": 9.906905402207786e-06, "loss": 1.1338, "step": 1098 }, { "epoch": 0.09, "grad_norm": 5.10551835294458, "learning_rate": 9.90665097097556e-06, "loss": 0.9437, "step": 1099 }, { "epoch": 0.09, "grad_norm": 2.9631556450062604, "learning_rate": 9.906396195808452e-06, "loss": 0.6628, "step": 1100 }, { "epoch": 0.09, "grad_norm": 4.946634427167888, "learning_rate": 9.906141076724324e-06, "loss": 1.1265, "step": 1101 }, { "epoch": 0.09, "grad_norm": 4.369504579349011, "learning_rate": 9.905885613741058e-06, "loss": 0.6546, "step": 1102 }, { "epoch": 0.09, "grad_norm": 3.9024786940759517, "learning_rate": 9.905629806876562e-06, "loss": 0.6797, "step": 1103 }, { "epoch": 0.09, "grad_norm": 2.6359571441599225, "learning_rate": 9.905373656148765e-06, "loss": 0.3359, "step": 1104 }, { "epoch": 0.09, "grad_norm": 5.629817569105054, "learning_rate": 9.905117161575622e-06, "loss": 1.1071, "step": 1105 }, { "epoch": 0.09, "grad_norm": 4.820538418080594, "learning_rate": 9.904860323175114e-06, "loss": 0.6526, "step": 1106 }, { "epoch": 0.09, "grad_norm": 2.639546998032372, "learning_rate": 9.904603140965244e-06, "loss": 0.3731, "step": 1107 }, { "epoch": 0.09, "grad_norm": 5.60234746760394, "learning_rate": 9.904345614964039e-06, "loss": 1.2051, "step": 1108 }, { "epoch": 0.09, "grad_norm": 3.467763997576738, "learning_rate": 9.90408774518955e-06, "loss": 0.498, "step": 1109 }, { "epoch": 0.09, "grad_norm": 3.706513283245458, "learning_rate": 9.903829531659853e-06, "loss": 0.6948, "step": 1110 }, { "epoch": 0.09, "grad_norm": 5.228780493202378, "learning_rate": 9.903570974393044e-06, "loss": 0.6231, "step": 1111 }, { "epoch": 0.09, "grad_norm": 4.23269091182088, "learning_rate": 9.903312073407255e-06, "loss": 0.808, "step": 1112 }, { "epoch": 0.09, "grad_norm": 3.5909195757076335, "learning_rate": 9.903052828720626e-06, "loss": 0.9084, "step": 1113 }, { "epoch": 0.09, "grad_norm": 4.665407647763041, "learning_rate": 9.902793240351332e-06, "loss": 1.209, "step": 1114 }, { "epoch": 0.09, "grad_norm": 4.181500368016984, "learning_rate": 9.90253330831757e-06, "loss": 0.9321, "step": 1115 }, { "epoch": 0.09, "grad_norm": 4.007003219308571, "learning_rate": 9.902273032637558e-06, "loss": 0.6731, "step": 1116 }, { "epoch": 0.09, "grad_norm": 5.249991934849754, "learning_rate": 9.902012413329541e-06, "loss": 0.9785, "step": 1117 }, { "epoch": 0.09, "grad_norm": 3.636337888119685, "learning_rate": 9.901751450411787e-06, "loss": 0.5601, "step": 1118 }, { "epoch": 0.09, "grad_norm": 3.845419166719933, "learning_rate": 9.90149014390259e-06, "loss": 0.9011, "step": 1119 }, { "epoch": 0.09, "grad_norm": 4.201928266545957, "learning_rate": 9.901228493820265e-06, "loss": 1.3249, "step": 1120 }, { "epoch": 0.09, "grad_norm": 4.578795797853542, "learning_rate": 9.900966500183153e-06, "loss": 1.1478, "step": 1121 }, { "epoch": 0.09, "grad_norm": 4.6095756033428, "learning_rate": 9.90070416300962e-06, "loss": 0.6756, "step": 1122 }, { "epoch": 0.09, "grad_norm": 4.911041891172558, "learning_rate": 9.900441482318051e-06, "loss": 1.011, "step": 1123 }, { "epoch": 0.09, "grad_norm": 2.284294682149408, "learning_rate": 9.900178458126862e-06, "loss": 0.4008, "step": 1124 }, { "epoch": 0.09, "grad_norm": 3.7326206756730653, "learning_rate": 9.899915090454487e-06, "loss": 0.6352, "step": 1125 }, { "epoch": 0.09, "grad_norm": 5.428099213081668, "learning_rate": 9.89965137931939e-06, "loss": 0.8726, "step": 1126 }, { "epoch": 0.09, "grad_norm": 3.3340338283313966, "learning_rate": 9.899387324740053e-06, "loss": 0.813, "step": 1127 }, { "epoch": 0.09, "grad_norm": 2.746073944845481, "learning_rate": 9.899122926734988e-06, "loss": 0.6223, "step": 1128 }, { "epoch": 0.09, "grad_norm": 4.590965204756769, "learning_rate": 9.898858185322728e-06, "loss": 0.8611, "step": 1129 }, { "epoch": 0.09, "grad_norm": 5.894240050324802, "learning_rate": 9.898593100521828e-06, "loss": 0.8581, "step": 1130 }, { "epoch": 0.09, "grad_norm": 3.7572285946477177, "learning_rate": 9.898327672350871e-06, "loss": 0.6341, "step": 1131 }, { "epoch": 0.09, "grad_norm": 3.4296680237638593, "learning_rate": 9.898061900828461e-06, "loss": 0.7285, "step": 1132 }, { "epoch": 0.09, "grad_norm": 4.7157870658361665, "learning_rate": 9.897795785973227e-06, "loss": 0.9815, "step": 1133 }, { "epoch": 0.09, "grad_norm": 4.270557295796642, "learning_rate": 9.897529327803825e-06, "loss": 0.7319, "step": 1134 }, { "epoch": 0.09, "grad_norm": 2.906704857168554, "learning_rate": 9.897262526338933e-06, "loss": 0.436, "step": 1135 }, { "epoch": 0.09, "grad_norm": 2.3758294790859398, "learning_rate": 9.896995381597248e-06, "loss": 0.4856, "step": 1136 }, { "epoch": 0.09, "grad_norm": 3.349959256999474, "learning_rate": 9.8967278935975e-06, "loss": 0.6012, "step": 1137 }, { "epoch": 0.09, "grad_norm": 2.3521814313627614, "learning_rate": 9.896460062358437e-06, "loss": 0.3664, "step": 1138 }, { "epoch": 0.09, "grad_norm": 4.938674501026468, "learning_rate": 9.896191887898833e-06, "loss": 0.7762, "step": 1139 }, { "epoch": 0.09, "grad_norm": 4.476274030084168, "learning_rate": 9.895923370237487e-06, "loss": 0.8053, "step": 1140 }, { "epoch": 0.09, "grad_norm": 5.477353094729364, "learning_rate": 9.895654509393219e-06, "loss": 1.5004, "step": 1141 }, { "epoch": 0.09, "grad_norm": 3.7789096830346423, "learning_rate": 9.895385305384875e-06, "loss": 0.9327, "step": 1142 }, { "epoch": 0.09, "grad_norm": 4.052117060974299, "learning_rate": 9.895115758231327e-06, "loss": 0.7655, "step": 1143 }, { "epoch": 0.09, "grad_norm": 4.311851068484635, "learning_rate": 9.894845867951468e-06, "loss": 0.7396, "step": 1144 }, { "epoch": 0.09, "grad_norm": 3.7923414529988455, "learning_rate": 9.894575634564216e-06, "loss": 0.7855, "step": 1145 }, { "epoch": 0.09, "grad_norm": 3.945249613145775, "learning_rate": 9.894305058088513e-06, "loss": 0.9153, "step": 1146 }, { "epoch": 0.09, "grad_norm": 4.7840576172347395, "learning_rate": 9.894034138543325e-06, "loss": 1.0292, "step": 1147 }, { "epoch": 0.09, "grad_norm": 2.93236236975966, "learning_rate": 9.893762875947643e-06, "loss": 0.4059, "step": 1148 }, { "epoch": 0.09, "grad_norm": 4.58890168616701, "learning_rate": 9.893491270320482e-06, "loss": 1.0852, "step": 1149 }, { "epoch": 0.09, "grad_norm": 4.559899603280683, "learning_rate": 9.89321932168088e-06, "loss": 1.1237, "step": 1150 }, { "epoch": 0.09, "grad_norm": 4.60214704397996, "learning_rate": 9.892947030047897e-06, "loss": 0.9579, "step": 1151 }, { "epoch": 0.09, "grad_norm": 3.435288516889513, "learning_rate": 9.892674395440623e-06, "loss": 0.7672, "step": 1152 }, { "epoch": 0.09, "grad_norm": 3.966391987672413, "learning_rate": 9.892401417878166e-06, "loss": 0.494, "step": 1153 }, { "epoch": 0.09, "grad_norm": 3.5165349113611164, "learning_rate": 9.89212809737966e-06, "loss": 0.6433, "step": 1154 }, { "epoch": 0.09, "grad_norm": 4.964943785418872, "learning_rate": 9.891854433964268e-06, "loss": 1.0597, "step": 1155 }, { "epoch": 0.09, "grad_norm": 2.7599503203593763, "learning_rate": 9.891580427651169e-06, "loss": 0.6986, "step": 1156 }, { "epoch": 0.09, "grad_norm": 4.787335060739045, "learning_rate": 9.891306078459569e-06, "loss": 1.4073, "step": 1157 }, { "epoch": 0.09, "grad_norm": 4.2838187991127645, "learning_rate": 9.8910313864087e-06, "loss": 0.7886, "step": 1158 }, { "epoch": 0.09, "grad_norm": 3.4466029728678587, "learning_rate": 9.890756351517816e-06, "loss": 0.6352, "step": 1159 }, { "epoch": 0.09, "grad_norm": 4.956320751045181, "learning_rate": 9.890480973806198e-06, "loss": 1.3651, "step": 1160 }, { "epoch": 0.09, "grad_norm": 5.405188553619671, "learning_rate": 9.890205253293145e-06, "loss": 0.6927, "step": 1161 }, { "epoch": 0.09, "grad_norm": 4.331448218013707, "learning_rate": 9.889929189997988e-06, "loss": 1.1553, "step": 1162 }, { "epoch": 0.1, "grad_norm": 4.717470241828111, "learning_rate": 9.889652783940075e-06, "loss": 1.2707, "step": 1163 }, { "epoch": 0.1, "grad_norm": 3.3074133176826, "learning_rate": 9.889376035138782e-06, "loss": 0.8255, "step": 1164 }, { "epoch": 0.1, "grad_norm": 2.8692669892049047, "learning_rate": 9.889098943613508e-06, "loss": 0.669, "step": 1165 }, { "epoch": 0.1, "grad_norm": 3.0717586645649155, "learning_rate": 9.888821509383676e-06, "loss": 0.6628, "step": 1166 }, { "epoch": 0.1, "grad_norm": 4.781072878569787, "learning_rate": 9.888543732468732e-06, "loss": 1.0705, "step": 1167 }, { "epoch": 0.1, "grad_norm": 2.952219904396231, "learning_rate": 9.888265612888145e-06, "loss": 0.5253, "step": 1168 }, { "epoch": 0.1, "grad_norm": 5.000881351619064, "learning_rate": 9.887987150661415e-06, "loss": 1.2732, "step": 1169 }, { "epoch": 0.1, "grad_norm": 4.308052200362575, "learning_rate": 9.887708345808059e-06, "loss": 0.8583, "step": 1170 }, { "epoch": 0.1, "grad_norm": 4.8577523911579465, "learning_rate": 9.887429198347617e-06, "loss": 1.3483, "step": 1171 }, { "epoch": 0.1, "grad_norm": 3.898494233387562, "learning_rate": 9.88714970829966e-06, "loss": 0.9511, "step": 1172 }, { "epoch": 0.1, "grad_norm": 4.794041871561278, "learning_rate": 9.886869875683776e-06, "loss": 0.6661, "step": 1173 }, { "epoch": 0.1, "grad_norm": 2.9150265779956146, "learning_rate": 9.886589700519583e-06, "loss": 0.3034, "step": 1174 }, { "epoch": 0.1, "grad_norm": 4.368498537449657, "learning_rate": 9.886309182826717e-06, "loss": 0.5748, "step": 1175 }, { "epoch": 0.1, "grad_norm": 4.713244358718991, "learning_rate": 9.886028322624843e-06, "loss": 1.1596, "step": 1176 }, { "epoch": 0.1, "grad_norm": 4.28797089150776, "learning_rate": 9.885747119933648e-06, "loss": 1.0055, "step": 1177 }, { "epoch": 0.1, "grad_norm": 5.465500959031329, "learning_rate": 9.885465574772842e-06, "loss": 1.1441, "step": 1178 }, { "epoch": 0.1, "grad_norm": 3.163076994144076, "learning_rate": 9.885183687162162e-06, "loss": 0.4571, "step": 1179 }, { "epoch": 0.1, "grad_norm": 3.7490054918364697, "learning_rate": 9.884901457121366e-06, "loss": 0.6162, "step": 1180 }, { "epoch": 0.1, "grad_norm": 4.299431696370699, "learning_rate": 9.884618884670236e-06, "loss": 0.9722, "step": 1181 }, { "epoch": 0.1, "grad_norm": 3.29584811568989, "learning_rate": 9.88433596982858e-06, "loss": 0.6991, "step": 1182 }, { "epoch": 0.1, "grad_norm": 4.759772526614289, "learning_rate": 9.88405271261623e-06, "loss": 1.0675, "step": 1183 }, { "epoch": 0.1, "grad_norm": 1.3720315128058174, "learning_rate": 9.883769113053039e-06, "loss": 0.149, "step": 1184 }, { "epoch": 0.1, "grad_norm": 3.66518376225917, "learning_rate": 9.883485171158889e-06, "loss": 0.8875, "step": 1185 }, { "epoch": 0.1, "grad_norm": 5.227971159935246, "learning_rate": 9.883200886953682e-06, "loss": 1.3924, "step": 1186 }, { "epoch": 0.1, "grad_norm": 6.02754405998497, "learning_rate": 9.882916260457343e-06, "loss": 1.6309, "step": 1187 }, { "epoch": 0.1, "grad_norm": 4.07747958412001, "learning_rate": 9.882631291689823e-06, "loss": 0.8492, "step": 1188 }, { "epoch": 0.1, "grad_norm": 3.2134197448319526, "learning_rate": 9.882345980671102e-06, "loss": 0.6647, "step": 1189 }, { "epoch": 0.1, "grad_norm": 5.346689217592907, "learning_rate": 9.882060327421174e-06, "loss": 1.1335, "step": 1190 }, { "epoch": 0.1, "grad_norm": 2.263765075179125, "learning_rate": 9.881774331960065e-06, "loss": 0.3391, "step": 1191 }, { "epoch": 0.1, "grad_norm": 2.641436335366062, "learning_rate": 9.88148799430782e-06, "loss": 0.4934, "step": 1192 }, { "epoch": 0.1, "grad_norm": 6.032362831368957, "learning_rate": 9.881201314484513e-06, "loss": 0.8404, "step": 1193 }, { "epoch": 0.1, "grad_norm": 2.9748639666149304, "learning_rate": 9.880914292510233e-06, "loss": 0.7072, "step": 1194 }, { "epoch": 0.1, "grad_norm": 3.8610638245991957, "learning_rate": 9.880626928405106e-06, "loss": 1.1508, "step": 1195 }, { "epoch": 0.1, "grad_norm": 2.3376495404426785, "learning_rate": 9.88033922218927e-06, "loss": 0.4453, "step": 1196 }, { "epoch": 0.1, "grad_norm": 5.859625847530836, "learning_rate": 9.880051173882896e-06, "loss": 1.4039, "step": 1197 }, { "epoch": 0.1, "grad_norm": 4.202714929903976, "learning_rate": 9.879762783506172e-06, "loss": 0.9029, "step": 1198 }, { "epoch": 0.1, "grad_norm": 4.083474294001203, "learning_rate": 9.879474051079312e-06, "loss": 0.9305, "step": 1199 }, { "epoch": 0.1, "grad_norm": 4.199315543955551, "learning_rate": 9.879184976622557e-06, "loss": 1.0684, "step": 1200 }, { "epoch": 0.1, "grad_norm": 4.131337594808835, "learning_rate": 9.878895560156172e-06, "loss": 0.9519, "step": 1201 }, { "epoch": 0.1, "grad_norm": 3.1636870069489524, "learning_rate": 9.87860580170044e-06, "loss": 0.4102, "step": 1202 }, { "epoch": 0.1, "grad_norm": 5.486130674054726, "learning_rate": 9.878315701275671e-06, "loss": 1.1242, "step": 1203 }, { "epoch": 0.1, "grad_norm": 4.040979726468128, "learning_rate": 9.878025258902204e-06, "loss": 0.8364, "step": 1204 }, { "epoch": 0.1, "grad_norm": 5.454378469994424, "learning_rate": 9.877734474600395e-06, "loss": 1.3415, "step": 1205 }, { "epoch": 0.1, "grad_norm": 5.1153828727308985, "learning_rate": 9.877443348390627e-06, "loss": 1.0923, "step": 1206 }, { "epoch": 0.1, "grad_norm": 5.157714411270862, "learning_rate": 9.877151880293307e-06, "loss": 1.1689, "step": 1207 }, { "epoch": 0.1, "grad_norm": 3.205116285973389, "learning_rate": 9.876860070328867e-06, "loss": 0.7162, "step": 1208 }, { "epoch": 0.1, "grad_norm": 3.125812556804762, "learning_rate": 9.876567918517759e-06, "loss": 0.6812, "step": 1209 }, { "epoch": 0.1, "grad_norm": 2.136997839119193, "learning_rate": 9.876275424880463e-06, "loss": 0.3998, "step": 1210 }, { "epoch": 0.1, "grad_norm": 3.9726971596754406, "learning_rate": 9.875982589437481e-06, "loss": 0.9348, "step": 1211 }, { "epoch": 0.1, "grad_norm": 3.295857980061875, "learning_rate": 9.87568941220934e-06, "loss": 0.5959, "step": 1212 }, { "epoch": 0.1, "grad_norm": 6.049165049333135, "learning_rate": 9.87539589321659e-06, "loss": 1.2373, "step": 1213 }, { "epoch": 0.1, "grad_norm": 4.855626558039712, "learning_rate": 9.875102032479807e-06, "loss": 0.7339, "step": 1214 }, { "epoch": 0.1, "grad_norm": 3.826138284182142, "learning_rate": 9.874807830019586e-06, "loss": 0.6591, "step": 1215 }, { "epoch": 0.1, "grad_norm": 5.83684207885743, "learning_rate": 9.874513285856553e-06, "loss": 0.7731, "step": 1216 }, { "epoch": 0.1, "grad_norm": 3.933104564450693, "learning_rate": 9.874218400011352e-06, "loss": 0.8364, "step": 1217 }, { "epoch": 0.1, "grad_norm": 1.4001799848197887, "learning_rate": 9.873923172504653e-06, "loss": 0.18, "step": 1218 }, { "epoch": 0.1, "grad_norm": 5.252528244067028, "learning_rate": 9.873627603357152e-06, "loss": 0.8766, "step": 1219 }, { "epoch": 0.1, "grad_norm": 3.988190929247905, "learning_rate": 9.873331692589566e-06, "loss": 0.8168, "step": 1220 }, { "epoch": 0.1, "grad_norm": 4.0318100849354, "learning_rate": 9.873035440222638e-06, "loss": 0.8017, "step": 1221 }, { "epoch": 0.1, "grad_norm": 2.756505224009058, "learning_rate": 9.872738846277133e-06, "loss": 0.6414, "step": 1222 }, { "epoch": 0.1, "grad_norm": 4.069998346474113, "learning_rate": 9.87244191077384e-06, "loss": 0.7499, "step": 1223 }, { "epoch": 0.1, "grad_norm": 2.5809666241236684, "learning_rate": 9.872144633733573e-06, "loss": 0.739, "step": 1224 }, { "epoch": 0.1, "grad_norm": 3.0947168680195705, "learning_rate": 9.871847015177173e-06, "loss": 0.5898, "step": 1225 }, { "epoch": 0.1, "grad_norm": 3.1358159711006333, "learning_rate": 9.8715490551255e-06, "loss": 0.4362, "step": 1226 }, { "epoch": 0.1, "grad_norm": 5.202307254243295, "learning_rate": 9.871250753599438e-06, "loss": 0.8291, "step": 1227 }, { "epoch": 0.1, "grad_norm": 4.560667754171791, "learning_rate": 9.870952110619899e-06, "loss": 1.2004, "step": 1228 }, { "epoch": 0.1, "grad_norm": 4.839448570202385, "learning_rate": 9.870653126207813e-06, "loss": 1.1906, "step": 1229 }, { "epoch": 0.1, "grad_norm": 4.587820228326079, "learning_rate": 9.870353800384142e-06, "loss": 1.1577, "step": 1230 }, { "epoch": 0.1, "grad_norm": 2.369399402473251, "learning_rate": 9.870054133169864e-06, "loss": 0.3833, "step": 1231 }, { "epoch": 0.1, "grad_norm": 4.332705145152846, "learning_rate": 9.869754124585988e-06, "loss": 0.9675, "step": 1232 }, { "epoch": 0.1, "grad_norm": 5.892316115188248, "learning_rate": 9.869453774653539e-06, "loss": 1.351, "step": 1233 }, { "epoch": 0.1, "grad_norm": 5.125532292329167, "learning_rate": 9.869153083393573e-06, "loss": 0.9729, "step": 1234 }, { "epoch": 0.1, "grad_norm": 3.038649194505703, "learning_rate": 9.868852050827167e-06, "loss": 0.6525, "step": 1235 }, { "epoch": 0.1, "grad_norm": 2.5708776653671337, "learning_rate": 9.868550676975422e-06, "loss": 0.7557, "step": 1236 }, { "epoch": 0.1, "grad_norm": 2.9247024667032946, "learning_rate": 9.86824896185946e-06, "loss": 0.6781, "step": 1237 }, { "epoch": 0.1, "grad_norm": 3.0777651806845143, "learning_rate": 9.867946905500437e-06, "loss": 0.3866, "step": 1238 }, { "epoch": 0.1, "grad_norm": 3.8429779511171898, "learning_rate": 9.867644507919518e-06, "loss": 0.7838, "step": 1239 }, { "epoch": 0.1, "grad_norm": 2.9761613135796563, "learning_rate": 9.867341769137902e-06, "loss": 0.7101, "step": 1240 }, { "epoch": 0.1, "grad_norm": 6.379783725511916, "learning_rate": 9.867038689176814e-06, "loss": 1.2627, "step": 1241 }, { "epoch": 0.1, "grad_norm": 2.9441494398725743, "learning_rate": 9.866735268057492e-06, "loss": 0.6645, "step": 1242 }, { "epoch": 0.1, "grad_norm": 4.3048297328129035, "learning_rate": 9.86643150580121e-06, "loss": 0.7912, "step": 1243 }, { "epoch": 0.1, "grad_norm": 4.212674300762759, "learning_rate": 9.866127402429257e-06, "loss": 0.5092, "step": 1244 }, { "epoch": 0.1, "grad_norm": 4.867708233325816, "learning_rate": 9.865822957962952e-06, "loss": 1.2898, "step": 1245 }, { "epoch": 0.1, "grad_norm": 4.9895126564047425, "learning_rate": 9.865518172423634e-06, "loss": 0.8157, "step": 1246 }, { "epoch": 0.1, "grad_norm": 3.1574728111342276, "learning_rate": 9.865213045832664e-06, "loss": 0.613, "step": 1247 }, { "epoch": 0.1, "grad_norm": 4.308032016001108, "learning_rate": 9.864907578211436e-06, "loss": 0.9943, "step": 1248 }, { "epoch": 0.1, "grad_norm": 3.978772964634741, "learning_rate": 9.864601769581357e-06, "loss": 0.5542, "step": 1249 }, { "epoch": 0.1, "grad_norm": 3.5859393692610575, "learning_rate": 9.864295619963866e-06, "loss": 0.708, "step": 1250 }, { "epoch": 0.1, "grad_norm": 3.2253481480588952, "learning_rate": 9.863989129380421e-06, "loss": 0.5619, "step": 1251 }, { "epoch": 0.1, "grad_norm": 5.248573746610271, "learning_rate": 9.863682297852506e-06, "loss": 1.1753, "step": 1252 }, { "epoch": 0.1, "grad_norm": 3.7692726514908133, "learning_rate": 9.86337512540163e-06, "loss": 0.785, "step": 1253 }, { "epoch": 0.1, "grad_norm": 4.140073443579133, "learning_rate": 9.863067612049321e-06, "loss": 0.7547, "step": 1254 }, { "epoch": 0.1, "grad_norm": 1.219012600204615, "learning_rate": 9.862759757817138e-06, "loss": 0.2145, "step": 1255 }, { "epoch": 0.1, "grad_norm": 4.692923749310751, "learning_rate": 9.862451562726659e-06, "loss": 1.0327, "step": 1256 }, { "epoch": 0.1, "grad_norm": 5.142416334886868, "learning_rate": 9.862143026799486e-06, "loss": 1.1972, "step": 1257 }, { "epoch": 0.1, "grad_norm": 2.215708570787776, "learning_rate": 9.861834150057247e-06, "loss": 0.462, "step": 1258 }, { "epoch": 0.1, "grad_norm": 4.209753111231955, "learning_rate": 9.861524932521595e-06, "loss": 0.9367, "step": 1259 }, { "epoch": 0.1, "grad_norm": 3.4564385364514285, "learning_rate": 9.8612153742142e-06, "loss": 0.701, "step": 1260 }, { "epoch": 0.1, "grad_norm": 3.7263112565077634, "learning_rate": 9.860905475156765e-06, "loss": 0.8455, "step": 1261 }, { "epoch": 0.1, "grad_norm": 4.164659038297447, "learning_rate": 9.86059523537101e-06, "loss": 0.7103, "step": 1262 }, { "epoch": 0.1, "grad_norm": 3.4221185950858892, "learning_rate": 9.860284654878682e-06, "loss": 0.5031, "step": 1263 }, { "epoch": 0.1, "grad_norm": 3.8565077383119695, "learning_rate": 9.859973733701553e-06, "loss": 0.7379, "step": 1264 }, { "epoch": 0.1, "grad_norm": 5.610438004102319, "learning_rate": 9.859662471861415e-06, "loss": 1.1086, "step": 1265 }, { "epoch": 0.1, "grad_norm": 3.0726742543457335, "learning_rate": 9.859350869380086e-06, "loss": 0.6462, "step": 1266 }, { "epoch": 0.1, "grad_norm": 5.0570579775243845, "learning_rate": 9.859038926279412e-06, "loss": 1.3292, "step": 1267 }, { "epoch": 0.1, "grad_norm": 3.549728041559905, "learning_rate": 9.858726642581253e-06, "loss": 0.7154, "step": 1268 }, { "epoch": 0.1, "grad_norm": 5.013422156375804, "learning_rate": 9.858414018307503e-06, "loss": 1.2002, "step": 1269 }, { "epoch": 0.1, "grad_norm": 3.2470568492629837, "learning_rate": 9.858101053480074e-06, "loss": 0.5817, "step": 1270 }, { "epoch": 0.1, "grad_norm": 3.8005829520202603, "learning_rate": 9.857787748120904e-06, "loss": 0.9949, "step": 1271 }, { "epoch": 0.1, "grad_norm": 4.094339621776428, "learning_rate": 9.857474102251955e-06, "loss": 0.7578, "step": 1272 }, { "epoch": 0.1, "grad_norm": 4.525640812991137, "learning_rate": 9.857160115895208e-06, "loss": 0.7344, "step": 1273 }, { "epoch": 0.1, "grad_norm": 4.002900024886897, "learning_rate": 9.856845789072678e-06, "loss": 0.9144, "step": 1274 }, { "epoch": 0.1, "grad_norm": 4.72727273650834, "learning_rate": 9.856531121806395e-06, "loss": 1.2397, "step": 1275 }, { "epoch": 0.1, "grad_norm": 3.5075716444438942, "learning_rate": 9.856216114118416e-06, "loss": 0.5811, "step": 1276 }, { "epoch": 0.1, "grad_norm": 4.847247096507675, "learning_rate": 9.855900766030819e-06, "loss": 0.9566, "step": 1277 }, { "epoch": 0.1, "grad_norm": 2.5594697153204096, "learning_rate": 9.855585077565714e-06, "loss": 0.4128, "step": 1278 }, { "epoch": 0.1, "grad_norm": 4.821428414387075, "learning_rate": 9.855269048745227e-06, "loss": 0.9853, "step": 1279 }, { "epoch": 0.1, "grad_norm": 2.722415247680657, "learning_rate": 9.854952679591508e-06, "loss": 0.4608, "step": 1280 }, { "epoch": 0.1, "grad_norm": 3.2581275069199576, "learning_rate": 9.854635970126738e-06, "loss": 0.4616, "step": 1281 }, { "epoch": 0.1, "grad_norm": 4.600979724613779, "learning_rate": 9.854318920373111e-06, "loss": 0.869, "step": 1282 }, { "epoch": 0.1, "grad_norm": 3.5221907799442045, "learning_rate": 9.854001530352855e-06, "loss": 0.6486, "step": 1283 }, { "epoch": 0.1, "grad_norm": 3.6404075289214157, "learning_rate": 9.853683800088217e-06, "loss": 0.7015, "step": 1284 }, { "epoch": 0.11, "grad_norm": 3.650201614991193, "learning_rate": 9.853365729601465e-06, "loss": 0.7917, "step": 1285 }, { "epoch": 0.11, "grad_norm": 4.977376505203249, "learning_rate": 9.853047318914898e-06, "loss": 0.9086, "step": 1286 }, { "epoch": 0.11, "grad_norm": 4.186796749935245, "learning_rate": 9.852728568050838e-06, "loss": 0.7049, "step": 1287 }, { "epoch": 0.11, "grad_norm": 5.395589551710405, "learning_rate": 9.852409477031621e-06, "loss": 1.4272, "step": 1288 }, { "epoch": 0.11, "grad_norm": 5.224627049772095, "learning_rate": 9.852090045879619e-06, "loss": 0.9042, "step": 1289 }, { "epoch": 0.11, "grad_norm": 3.4495366289399074, "learning_rate": 9.85177027461722e-06, "loss": 0.6737, "step": 1290 }, { "epoch": 0.11, "grad_norm": 5.118977930027873, "learning_rate": 9.851450163266843e-06, "loss": 1.3165, "step": 1291 }, { "epoch": 0.11, "grad_norm": 2.946791989038946, "learning_rate": 9.85112971185092e-06, "loss": 0.5781, "step": 1292 }, { "epoch": 0.11, "grad_norm": 3.9952593447159943, "learning_rate": 9.850808920391917e-06, "loss": 0.9511, "step": 1293 }, { "epoch": 0.11, "grad_norm": 4.293125196572581, "learning_rate": 9.850487788912319e-06, "loss": 0.8557, "step": 1294 }, { "epoch": 0.11, "grad_norm": 3.410250168793054, "learning_rate": 9.850166317434638e-06, "loss": 0.6575, "step": 1295 }, { "epoch": 0.11, "grad_norm": 3.481123343866197, "learning_rate": 9.849844505981405e-06, "loss": 0.5825, "step": 1296 }, { "epoch": 0.11, "grad_norm": 5.3876454567069585, "learning_rate": 9.849522354575178e-06, "loss": 1.014, "step": 1297 }, { "epoch": 0.11, "grad_norm": 2.192263677148237, "learning_rate": 9.84919986323854e-06, "loss": 0.3586, "step": 1298 }, { "epoch": 0.11, "grad_norm": 3.3341819516293008, "learning_rate": 9.848877031994095e-06, "loss": 0.6102, "step": 1299 }, { "epoch": 0.11, "grad_norm": 3.2551287600956305, "learning_rate": 9.848553860864474e-06, "loss": 0.5703, "step": 1300 }, { "epoch": 0.11, "grad_norm": 3.7703916932902035, "learning_rate": 9.848230349872326e-06, "loss": 1.0168, "step": 1301 }, { "epoch": 0.11, "grad_norm": 4.053425404359997, "learning_rate": 9.847906499040332e-06, "loss": 0.9807, "step": 1302 }, { "epoch": 0.11, "grad_norm": 5.072379777925076, "learning_rate": 9.847582308391189e-06, "loss": 1.0596, "step": 1303 }, { "epoch": 0.11, "grad_norm": 3.207348153568482, "learning_rate": 9.847257777947624e-06, "loss": 0.5372, "step": 1304 }, { "epoch": 0.11, "grad_norm": 5.7962584417662635, "learning_rate": 9.846932907732383e-06, "loss": 1.4428, "step": 1305 }, { "epoch": 0.11, "grad_norm": 3.9570045662665962, "learning_rate": 9.84660769776824e-06, "loss": 0.9904, "step": 1306 }, { "epoch": 0.11, "grad_norm": 4.37422552493355, "learning_rate": 9.84628214807799e-06, "loss": 1.3554, "step": 1307 }, { "epoch": 0.11, "grad_norm": 5.344126050620689, "learning_rate": 9.845956258684453e-06, "loss": 1.1511, "step": 1308 }, { "epoch": 0.11, "grad_norm": 3.878686990867304, "learning_rate": 9.84563002961047e-06, "loss": 0.9766, "step": 1309 }, { "epoch": 0.11, "grad_norm": 3.2087546006341934, "learning_rate": 9.845303460878913e-06, "loss": 0.5185, "step": 1310 }, { "epoch": 0.11, "grad_norm": 6.443723713364844, "learning_rate": 9.844976552512669e-06, "loss": 2.064, "step": 1311 }, { "epoch": 0.11, "grad_norm": 1.9195624220575043, "learning_rate": 9.844649304534653e-06, "loss": 0.2766, "step": 1312 }, { "epoch": 0.11, "grad_norm": 3.073045313682807, "learning_rate": 9.844321716967805e-06, "loss": 0.6306, "step": 1313 }, { "epoch": 0.11, "grad_norm": 3.6814448226780194, "learning_rate": 9.843993789835088e-06, "loss": 0.8284, "step": 1314 }, { "epoch": 0.11, "grad_norm": 4.174265067657053, "learning_rate": 9.843665523159488e-06, "loss": 0.9003, "step": 1315 }, { "epoch": 0.11, "grad_norm": 4.8093957573304404, "learning_rate": 9.843336916964012e-06, "loss": 1.1277, "step": 1316 }, { "epoch": 0.11, "grad_norm": 4.1501702688347955, "learning_rate": 9.8430079712717e-06, "loss": 0.9939, "step": 1317 }, { "epoch": 0.11, "grad_norm": 3.180878812579681, "learning_rate": 9.842678686105603e-06, "loss": 0.6883, "step": 1318 }, { "epoch": 0.11, "grad_norm": 3.9089167772488325, "learning_rate": 9.842349061488805e-06, "loss": 1.0128, "step": 1319 }, { "epoch": 0.11, "grad_norm": 4.7583039485954535, "learning_rate": 9.842019097444414e-06, "loss": 1.3914, "step": 1320 }, { "epoch": 0.11, "grad_norm": 5.575696603618623, "learning_rate": 9.841688793995556e-06, "loss": 1.1302, "step": 1321 }, { "epoch": 0.11, "grad_norm": 2.32329831084482, "learning_rate": 9.841358151165385e-06, "loss": 0.4465, "step": 1322 }, { "epoch": 0.11, "grad_norm": 4.344382550224537, "learning_rate": 9.841027168977078e-06, "loss": 1.4557, "step": 1323 }, { "epoch": 0.11, "grad_norm": 3.7431485492243355, "learning_rate": 9.840695847453833e-06, "loss": 0.901, "step": 1324 }, { "epoch": 0.11, "grad_norm": 4.841450732154974, "learning_rate": 9.840364186618876e-06, "loss": 0.8751, "step": 1325 }, { "epoch": 0.11, "grad_norm": 0.86898813254035, "learning_rate": 9.840032186495457e-06, "loss": 0.1148, "step": 1326 }, { "epoch": 0.11, "grad_norm": 4.354391039173452, "learning_rate": 9.839699847106843e-06, "loss": 0.9702, "step": 1327 }, { "epoch": 0.11, "grad_norm": 3.6741596542471933, "learning_rate": 9.839367168476333e-06, "loss": 0.7526, "step": 1328 }, { "epoch": 0.11, "grad_norm": 4.16879096917253, "learning_rate": 9.839034150627245e-06, "loss": 0.7054, "step": 1329 }, { "epoch": 0.11, "grad_norm": 4.765591481592283, "learning_rate": 9.838700793582925e-06, "loss": 0.8889, "step": 1330 }, { "epoch": 0.11, "grad_norm": 3.38145209692892, "learning_rate": 9.838367097366734e-06, "loss": 0.6064, "step": 1331 }, { "epoch": 0.11, "grad_norm": 4.854729782273505, "learning_rate": 9.83803306200207e-06, "loss": 1.133, "step": 1332 }, { "epoch": 0.11, "grad_norm": 5.116468267131302, "learning_rate": 9.83769868751234e-06, "loss": 1.1587, "step": 1333 }, { "epoch": 0.11, "grad_norm": 6.275829287323761, "learning_rate": 9.837363973920989e-06, "loss": 1.2242, "step": 1334 }, { "epoch": 0.11, "grad_norm": 2.645549953945881, "learning_rate": 9.837028921251472e-06, "loss": 0.4298, "step": 1335 }, { "epoch": 0.11, "grad_norm": 4.623616570720286, "learning_rate": 9.836693529527281e-06, "loss": 0.7962, "step": 1336 }, { "epoch": 0.11, "grad_norm": 4.405795229808539, "learning_rate": 9.836357798771922e-06, "loss": 1.0292, "step": 1337 }, { "epoch": 0.11, "grad_norm": 3.8284035944571864, "learning_rate": 9.83602172900893e-06, "loss": 0.7384, "step": 1338 }, { "epoch": 0.11, "grad_norm": 4.405684722902709, "learning_rate": 9.83568532026186e-06, "loss": 1.0122, "step": 1339 }, { "epoch": 0.11, "grad_norm": 4.241662354295038, "learning_rate": 9.835348572554296e-06, "loss": 0.8407, "step": 1340 }, { "epoch": 0.11, "grad_norm": 4.2711045608163785, "learning_rate": 9.835011485909837e-06, "loss": 0.9705, "step": 1341 }, { "epoch": 0.11, "grad_norm": 5.518128074176903, "learning_rate": 9.834674060352119e-06, "loss": 1.445, "step": 1342 }, { "epoch": 0.11, "grad_norm": 3.9578130578482336, "learning_rate": 9.834336295904787e-06, "loss": 0.5462, "step": 1343 }, { "epoch": 0.11, "grad_norm": 4.230932342502631, "learning_rate": 9.83399819259152e-06, "loss": 0.7917, "step": 1344 }, { "epoch": 0.11, "grad_norm": 3.904182482171247, "learning_rate": 9.83365975043602e-06, "loss": 0.8205, "step": 1345 }, { "epoch": 0.11, "grad_norm": 4.052625787870512, "learning_rate": 9.833320969462006e-06, "loss": 0.9524, "step": 1346 }, { "epoch": 0.11, "grad_norm": 2.9363570141585504, "learning_rate": 9.832981849693226e-06, "loss": 0.6908, "step": 1347 }, { "epoch": 0.11, "grad_norm": 4.845194021761832, "learning_rate": 9.832642391153452e-06, "loss": 0.9929, "step": 1348 }, { "epoch": 0.11, "grad_norm": 4.250043298251886, "learning_rate": 9.832302593866478e-06, "loss": 1.3876, "step": 1349 }, { "epoch": 0.11, "grad_norm": 3.9347958176876774, "learning_rate": 9.831962457856124e-06, "loss": 0.897, "step": 1350 }, { "epoch": 0.11, "grad_norm": 5.7234753317121525, "learning_rate": 9.831621983146227e-06, "loss": 1.291, "step": 1351 }, { "epoch": 0.11, "grad_norm": 2.657411952759672, "learning_rate": 9.83128116976066e-06, "loss": 0.6593, "step": 1352 }, { "epoch": 0.11, "grad_norm": 3.4986134293129334, "learning_rate": 9.830940017723308e-06, "loss": 0.6853, "step": 1353 }, { "epoch": 0.11, "grad_norm": 3.6022819684060194, "learning_rate": 9.830598527058083e-06, "loss": 0.9227, "step": 1354 }, { "epoch": 0.11, "grad_norm": 3.0415847920356756, "learning_rate": 9.830256697788924e-06, "loss": 0.6046, "step": 1355 }, { "epoch": 0.11, "grad_norm": 4.222301158214487, "learning_rate": 9.829914529939794e-06, "loss": 0.6136, "step": 1356 }, { "epoch": 0.11, "grad_norm": 4.498417460325489, "learning_rate": 9.829572023534675e-06, "loss": 0.8575, "step": 1357 }, { "epoch": 0.11, "grad_norm": 5.3827152069059965, "learning_rate": 9.829229178597575e-06, "loss": 0.9866, "step": 1358 }, { "epoch": 0.11, "grad_norm": 4.531474859604405, "learning_rate": 9.828885995152525e-06, "loss": 1.1519, "step": 1359 }, { "epoch": 0.11, "grad_norm": 3.3485362372809284, "learning_rate": 9.828542473223586e-06, "loss": 0.4419, "step": 1360 }, { "epoch": 0.11, "grad_norm": 4.76206496778344, "learning_rate": 9.82819861283483e-06, "loss": 0.9233, "step": 1361 }, { "epoch": 0.11, "grad_norm": 6.539558811679356, "learning_rate": 9.827854414010366e-06, "loss": 1.7233, "step": 1362 }, { "epoch": 0.11, "grad_norm": 5.815566721141583, "learning_rate": 9.827509876774315e-06, "loss": 0.6861, "step": 1363 }, { "epoch": 0.11, "grad_norm": 5.757901899383783, "learning_rate": 9.827165001150834e-06, "loss": 1.1095, "step": 1364 }, { "epoch": 0.11, "grad_norm": 2.886305903447551, "learning_rate": 9.826819787164095e-06, "loss": 0.44, "step": 1365 }, { "epoch": 0.11, "grad_norm": 2.926176324529388, "learning_rate": 9.826474234838293e-06, "loss": 0.6576, "step": 1366 }, { "epoch": 0.11, "grad_norm": 5.459558687055677, "learning_rate": 9.826128344197653e-06, "loss": 1.2845, "step": 1367 }, { "epoch": 0.11, "grad_norm": 4.384367792526328, "learning_rate": 9.82578211526642e-06, "loss": 0.9965, "step": 1368 }, { "epoch": 0.11, "grad_norm": 3.3568468480970686, "learning_rate": 9.825435548068862e-06, "loss": 0.6548, "step": 1369 }, { "epoch": 0.11, "grad_norm": 4.872095489295021, "learning_rate": 9.825088642629271e-06, "loss": 1.259, "step": 1370 }, { "epoch": 0.11, "grad_norm": 4.260165583117883, "learning_rate": 9.824741398971966e-06, "loss": 0.9074, "step": 1371 }, { "epoch": 0.11, "grad_norm": 5.568108760017305, "learning_rate": 9.824393817121288e-06, "loss": 1.2388, "step": 1372 }, { "epoch": 0.11, "grad_norm": 3.952084072267941, "learning_rate": 9.824045897101598e-06, "loss": 0.7787, "step": 1373 }, { "epoch": 0.11, "grad_norm": 6.423914169789909, "learning_rate": 9.823697638937283e-06, "loss": 1.3275, "step": 1374 }, { "epoch": 0.11, "grad_norm": 3.7116454839421893, "learning_rate": 9.82334904265276e-06, "loss": 0.7016, "step": 1375 }, { "epoch": 0.11, "grad_norm": 2.2828456033633255, "learning_rate": 9.823000108272458e-06, "loss": 0.397, "step": 1376 }, { "epoch": 0.11, "grad_norm": 3.5598140162103995, "learning_rate": 9.82265083582084e-06, "loss": 0.7259, "step": 1377 }, { "epoch": 0.11, "grad_norm": 4.4343896057203995, "learning_rate": 9.822301225322384e-06, "loss": 0.9835, "step": 1378 }, { "epoch": 0.11, "grad_norm": 3.0649876892148185, "learning_rate": 9.8219512768016e-06, "loss": 0.6065, "step": 1379 }, { "epoch": 0.11, "grad_norm": 2.373673238593863, "learning_rate": 9.821600990283018e-06, "loss": 0.3573, "step": 1380 }, { "epoch": 0.11, "grad_norm": 4.275665587049736, "learning_rate": 9.821250365791189e-06, "loss": 1.1575, "step": 1381 }, { "epoch": 0.11, "grad_norm": 4.8296355375812245, "learning_rate": 9.820899403350693e-06, "loss": 1.0401, "step": 1382 }, { "epoch": 0.11, "grad_norm": 4.274330476790679, "learning_rate": 9.820548102986126e-06, "loss": 0.7298, "step": 1383 }, { "epoch": 0.11, "grad_norm": 3.54174599316339, "learning_rate": 9.820196464722118e-06, "loss": 0.5905, "step": 1384 }, { "epoch": 0.11, "grad_norm": 4.140626710176374, "learning_rate": 9.819844488583316e-06, "loss": 0.9191, "step": 1385 }, { "epoch": 0.11, "grad_norm": 1.9979127187144647, "learning_rate": 9.819492174594391e-06, "loss": 0.4385, "step": 1386 }, { "epoch": 0.11, "grad_norm": 3.5767283557263294, "learning_rate": 9.819139522780038e-06, "loss": 0.6833, "step": 1387 }, { "epoch": 0.11, "grad_norm": 2.796622823599924, "learning_rate": 9.81878653316498e-06, "loss": 0.5222, "step": 1388 }, { "epoch": 0.11, "grad_norm": 5.207140533365846, "learning_rate": 9.818433205773957e-06, "loss": 1.5635, "step": 1389 }, { "epoch": 0.11, "grad_norm": 3.294209540551426, "learning_rate": 9.818079540631732e-06, "loss": 0.4936, "step": 1390 }, { "epoch": 0.11, "grad_norm": 3.614697231475761, "learning_rate": 9.817725537763105e-06, "loss": 0.6669, "step": 1391 }, { "epoch": 0.11, "grad_norm": 4.692084220618287, "learning_rate": 9.817371197192883e-06, "loss": 1.22, "step": 1392 }, { "epoch": 0.11, "grad_norm": 3.752787361806504, "learning_rate": 9.817016518945904e-06, "loss": 0.7487, "step": 1393 }, { "epoch": 0.11, "grad_norm": 4.355449957476081, "learning_rate": 9.816661503047032e-06, "loss": 0.8125, "step": 1394 }, { "epoch": 0.11, "grad_norm": 4.128535214061091, "learning_rate": 9.816306149521149e-06, "loss": 0.8633, "step": 1395 }, { "epoch": 0.11, "grad_norm": 3.5263385505708444, "learning_rate": 9.815950458393166e-06, "loss": 0.932, "step": 1396 }, { "epoch": 0.11, "grad_norm": 4.112181680433037, "learning_rate": 9.815594429688015e-06, "loss": 0.9579, "step": 1397 }, { "epoch": 0.11, "grad_norm": 6.645733663557989, "learning_rate": 9.815238063430655e-06, "loss": 1.6471, "step": 1398 }, { "epoch": 0.11, "grad_norm": 3.1645593081286645, "learning_rate": 9.81488135964606e-06, "loss": 0.5587, "step": 1399 }, { "epoch": 0.11, "grad_norm": 2.099895044129971, "learning_rate": 9.814524318359235e-06, "loss": 0.3865, "step": 1400 }, { "epoch": 0.11, "grad_norm": 3.3226922818944016, "learning_rate": 9.81416693959521e-06, "loss": 0.6881, "step": 1401 }, { "epoch": 0.11, "grad_norm": 4.234298859212859, "learning_rate": 9.813809223379035e-06, "loss": 0.7269, "step": 1402 }, { "epoch": 0.11, "grad_norm": 4.8633112567954715, "learning_rate": 9.813451169735781e-06, "loss": 1.3484, "step": 1403 }, { "epoch": 0.11, "grad_norm": 5.249634519658076, "learning_rate": 9.813092778690549e-06, "loss": 1.1644, "step": 1404 }, { "epoch": 0.11, "grad_norm": 1.9572073819275848, "learning_rate": 9.81273405026846e-06, "loss": 0.3299, "step": 1405 }, { "epoch": 0.11, "grad_norm": 3.362360907175296, "learning_rate": 9.81237498449466e-06, "loss": 0.374, "step": 1406 }, { "epoch": 0.12, "grad_norm": 3.593558326449794, "learning_rate": 9.812015581394316e-06, "loss": 0.8593, "step": 1407 }, { "epoch": 0.12, "grad_norm": 5.463677247987641, "learning_rate": 9.811655840992621e-06, "loss": 1.2366, "step": 1408 }, { "epoch": 0.12, "grad_norm": 4.32660737602931, "learning_rate": 9.811295763314793e-06, "loss": 0.9649, "step": 1409 }, { "epoch": 0.12, "grad_norm": 2.616667588532532, "learning_rate": 9.810935348386071e-06, "loss": 0.6619, "step": 1410 }, { "epoch": 0.12, "grad_norm": 3.5831618295972625, "learning_rate": 9.810574596231717e-06, "loss": 0.9357, "step": 1411 }, { "epoch": 0.12, "grad_norm": 3.107669813938388, "learning_rate": 9.810213506877021e-06, "loss": 0.4273, "step": 1412 }, { "epoch": 0.12, "grad_norm": 4.507060032810869, "learning_rate": 9.80985208034729e-06, "loss": 1.0523, "step": 1413 }, { "epoch": 0.12, "grad_norm": 4.778321036304939, "learning_rate": 9.809490316667864e-06, "loss": 1.1447, "step": 1414 }, { "epoch": 0.12, "grad_norm": 3.34623077029894, "learning_rate": 9.809128215864096e-06, "loss": 0.4395, "step": 1415 }, { "epoch": 0.12, "grad_norm": 5.176105641856664, "learning_rate": 9.80876577796137e-06, "loss": 1.2677, "step": 1416 }, { "epoch": 0.12, "grad_norm": 2.7614486871129627, "learning_rate": 9.808403002985089e-06, "loss": 0.5742, "step": 1417 }, { "epoch": 0.12, "grad_norm": 4.620567580821391, "learning_rate": 9.808039890960687e-06, "loss": 0.7539, "step": 1418 }, { "epoch": 0.12, "grad_norm": 2.6874968173664113, "learning_rate": 9.807676441913611e-06, "loss": 0.4365, "step": 1419 }, { "epoch": 0.12, "grad_norm": 4.541845640368986, "learning_rate": 9.80731265586934e-06, "loss": 0.9808, "step": 1420 }, { "epoch": 0.12, "grad_norm": 7.123859209612732, "learning_rate": 9.806948532853373e-06, "loss": 1.6505, "step": 1421 }, { "epoch": 0.12, "grad_norm": 4.068838188795062, "learning_rate": 9.806584072891234e-06, "loss": 0.8492, "step": 1422 }, { "epoch": 0.12, "grad_norm": 4.347877420451936, "learning_rate": 9.80621927600847e-06, "loss": 0.7774, "step": 1423 }, { "epoch": 0.12, "grad_norm": 5.895252251166327, "learning_rate": 9.805854142230652e-06, "loss": 1.3487, "step": 1424 }, { "epoch": 0.12, "grad_norm": 2.267175221265885, "learning_rate": 9.805488671583372e-06, "loss": 0.3279, "step": 1425 }, { "epoch": 0.12, "grad_norm": 5.513202645820931, "learning_rate": 9.80512286409225e-06, "loss": 0.8466, "step": 1426 }, { "epoch": 0.12, "grad_norm": 4.423363671659631, "learning_rate": 9.80475671978293e-06, "loss": 0.7481, "step": 1427 }, { "epoch": 0.12, "grad_norm": 5.215881653445727, "learning_rate": 9.804390238681072e-06, "loss": 1.2405, "step": 1428 }, { "epoch": 0.12, "grad_norm": 1.3735270370900747, "learning_rate": 9.804023420812368e-06, "loss": 0.2282, "step": 1429 }, { "epoch": 0.12, "grad_norm": 3.832715397250428, "learning_rate": 9.803656266202528e-06, "loss": 0.7381, "step": 1430 }, { "epoch": 0.12, "grad_norm": 4.34847388319156, "learning_rate": 9.80328877487729e-06, "loss": 0.8616, "step": 1431 }, { "epoch": 0.12, "grad_norm": 4.360354162722593, "learning_rate": 9.802920946862413e-06, "loss": 0.9632, "step": 1432 }, { "epoch": 0.12, "grad_norm": 4.954599355658937, "learning_rate": 9.802552782183682e-06, "loss": 1.3467, "step": 1433 }, { "epoch": 0.12, "grad_norm": 4.53958496323328, "learning_rate": 9.802184280866898e-06, "loss": 1.011, "step": 1434 }, { "epoch": 0.12, "grad_norm": 5.434045092648887, "learning_rate": 9.801815442937897e-06, "loss": 1.3907, "step": 1435 }, { "epoch": 0.12, "grad_norm": 3.6551014885191977, "learning_rate": 9.80144626842253e-06, "loss": 0.8479, "step": 1436 }, { "epoch": 0.12, "grad_norm": 3.6799219829730974, "learning_rate": 9.801076757346677e-06, "loss": 0.9056, "step": 1437 }, { "epoch": 0.12, "grad_norm": 5.763827428568593, "learning_rate": 9.800706909736237e-06, "loss": 1.3745, "step": 1438 }, { "epoch": 0.12, "grad_norm": 4.907388733876458, "learning_rate": 9.800336725617136e-06, "loss": 1.1375, "step": 1439 }, { "epoch": 0.12, "grad_norm": 5.367826033412014, "learning_rate": 9.79996620501532e-06, "loss": 1.2576, "step": 1440 }, { "epoch": 0.12, "grad_norm": 5.256085637336182, "learning_rate": 9.799595347956764e-06, "loss": 1.3381, "step": 1441 }, { "epoch": 0.12, "grad_norm": 3.1059074280871393, "learning_rate": 9.79922415446746e-06, "loss": 0.7916, "step": 1442 }, { "epoch": 0.12, "grad_norm": 4.329508448979159, "learning_rate": 9.798852624573432e-06, "loss": 0.9223, "step": 1443 }, { "epoch": 0.12, "grad_norm": 3.8386067688568892, "learning_rate": 9.79848075830072e-06, "loss": 0.5353, "step": 1444 }, { "epoch": 0.12, "grad_norm": 4.926444301146794, "learning_rate": 9.798108555675388e-06, "loss": 0.9793, "step": 1445 }, { "epoch": 0.12, "grad_norm": 5.056237294722397, "learning_rate": 9.797736016723527e-06, "loss": 0.8944, "step": 1446 }, { "epoch": 0.12, "grad_norm": 2.7066633631885098, "learning_rate": 9.797363141471252e-06, "loss": 0.5836, "step": 1447 }, { "epoch": 0.12, "grad_norm": 1.5653471821648113, "learning_rate": 9.796989929944699e-06, "loss": 0.2457, "step": 1448 }, { "epoch": 0.12, "grad_norm": 4.171565444891341, "learning_rate": 9.796616382170028e-06, "loss": 1.3466, "step": 1449 }, { "epoch": 0.12, "grad_norm": 5.8421857025707835, "learning_rate": 9.796242498173425e-06, "loss": 1.4725, "step": 1450 }, { "epoch": 0.12, "grad_norm": 1.1240727792271181, "learning_rate": 9.795868277981095e-06, "loss": 0.1594, "step": 1451 }, { "epoch": 0.12, "grad_norm": 3.2964525728779703, "learning_rate": 9.795493721619271e-06, "loss": 0.7353, "step": 1452 }, { "epoch": 0.12, "grad_norm": 3.073169173071545, "learning_rate": 9.795118829114205e-06, "loss": 0.5664, "step": 1453 }, { "epoch": 0.12, "grad_norm": 4.921321716329079, "learning_rate": 9.79474360049218e-06, "loss": 0.8704, "step": 1454 }, { "epoch": 0.12, "grad_norm": 4.326213766913278, "learning_rate": 9.794368035779496e-06, "loss": 0.6958, "step": 1455 }, { "epoch": 0.12, "grad_norm": 5.670985328015172, "learning_rate": 9.793992135002476e-06, "loss": 1.1243, "step": 1456 }, { "epoch": 0.12, "grad_norm": 5.921221306493759, "learning_rate": 9.793615898187473e-06, "loss": 1.4096, "step": 1457 }, { "epoch": 0.12, "grad_norm": 4.652832396557077, "learning_rate": 9.793239325360855e-06, "loss": 1.0637, "step": 1458 }, { "epoch": 0.12, "grad_norm": 3.7588457089114455, "learning_rate": 9.792862416549021e-06, "loss": 0.6599, "step": 1459 }, { "epoch": 0.12, "grad_norm": 2.13310023429643, "learning_rate": 9.792485171778389e-06, "loss": 0.3432, "step": 1460 }, { "epoch": 0.12, "grad_norm": 3.7093931763306576, "learning_rate": 9.792107591075406e-06, "loss": 0.8885, "step": 1461 }, { "epoch": 0.12, "grad_norm": 3.21724107443512, "learning_rate": 9.791729674466534e-06, "loss": 0.6405, "step": 1462 }, { "epoch": 0.12, "grad_norm": 5.805812423017493, "learning_rate": 9.791351421978269e-06, "loss": 1.5855, "step": 1463 }, { "epoch": 0.12, "grad_norm": 3.1123464014190056, "learning_rate": 9.790972833637118e-06, "loss": 0.3719, "step": 1464 }, { "epoch": 0.12, "grad_norm": 2.7958239629043624, "learning_rate": 9.790593909469623e-06, "loss": 0.4876, "step": 1465 }, { "epoch": 0.12, "grad_norm": 4.37017892442726, "learning_rate": 9.790214649502343e-06, "loss": 1.2543, "step": 1466 }, { "epoch": 0.12, "grad_norm": 3.9239535160362804, "learning_rate": 9.789835053761865e-06, "loss": 0.8073, "step": 1467 }, { "epoch": 0.12, "grad_norm": 4.555633739107286, "learning_rate": 9.789455122274793e-06, "loss": 1.0265, "step": 1468 }, { "epoch": 0.12, "grad_norm": 4.869738418739143, "learning_rate": 9.789074855067761e-06, "loss": 1.1461, "step": 1469 }, { "epoch": 0.12, "grad_norm": 3.8066477350594425, "learning_rate": 9.788694252167424e-06, "loss": 0.6731, "step": 1470 }, { "epoch": 0.12, "grad_norm": 2.9441755106990506, "learning_rate": 9.788313313600462e-06, "loss": 0.6184, "step": 1471 }, { "epoch": 0.12, "grad_norm": 3.9209845177925553, "learning_rate": 9.787932039393574e-06, "loss": 0.7527, "step": 1472 }, { "epoch": 0.12, "grad_norm": 3.5411521898176197, "learning_rate": 9.787550429573487e-06, "loss": 0.6581, "step": 1473 }, { "epoch": 0.12, "grad_norm": 4.588217832482472, "learning_rate": 9.78716848416695e-06, "loss": 1.0138, "step": 1474 }, { "epoch": 0.12, "grad_norm": 3.908534892628834, "learning_rate": 9.786786203200738e-06, "loss": 1.1612, "step": 1475 }, { "epoch": 0.12, "grad_norm": 3.6631338624314527, "learning_rate": 9.786403586701643e-06, "loss": 0.9183, "step": 1476 }, { "epoch": 0.12, "grad_norm": 5.24287728720431, "learning_rate": 9.786020634696489e-06, "loss": 1.4494, "step": 1477 }, { "epoch": 0.12, "grad_norm": 5.355388150801754, "learning_rate": 9.785637347212117e-06, "loss": 1.1414, "step": 1478 }, { "epoch": 0.12, "grad_norm": 5.574465500450339, "learning_rate": 9.785253724275394e-06, "loss": 0.948, "step": 1479 }, { "epoch": 0.12, "grad_norm": 3.8645555875140043, "learning_rate": 9.78486976591321e-06, "loss": 1.345, "step": 1480 }, { "epoch": 0.12, "grad_norm": 4.0871937132066, "learning_rate": 9.784485472152479e-06, "loss": 0.7189, "step": 1481 }, { "epoch": 0.12, "grad_norm": 4.754552849267925, "learning_rate": 9.784100843020139e-06, "loss": 1.1957, "step": 1482 }, { "epoch": 0.12, "grad_norm": 3.231759982859807, "learning_rate": 9.783715878543149e-06, "loss": 0.5092, "step": 1483 }, { "epoch": 0.12, "grad_norm": 3.9490285385860218, "learning_rate": 9.783330578748497e-06, "loss": 0.8589, "step": 1484 }, { "epoch": 0.12, "grad_norm": 3.2352180597670688, "learning_rate": 9.782944943663187e-06, "loss": 0.6635, "step": 1485 }, { "epoch": 0.12, "grad_norm": 2.184309454595811, "learning_rate": 9.782558973314254e-06, "loss": 0.41, "step": 1486 }, { "epoch": 0.12, "grad_norm": 4.461287240576972, "learning_rate": 9.782172667728748e-06, "loss": 1.5705, "step": 1487 }, { "epoch": 0.12, "grad_norm": 2.9641417501280753, "learning_rate": 9.781786026933752e-06, "loss": 0.3327, "step": 1488 }, { "epoch": 0.12, "grad_norm": 2.7825304552740193, "learning_rate": 9.781399050956364e-06, "loss": 0.5386, "step": 1489 }, { "epoch": 0.12, "grad_norm": 5.403847438502365, "learning_rate": 9.781011739823715e-06, "loss": 0.9521, "step": 1490 }, { "epoch": 0.12, "grad_norm": 2.746341743522744, "learning_rate": 9.780624093562944e-06, "loss": 0.8428, "step": 1491 }, { "epoch": 0.12, "grad_norm": 4.220237980809123, "learning_rate": 9.780236112201235e-06, "loss": 0.9644, "step": 1492 }, { "epoch": 0.12, "grad_norm": 1.9812539161163594, "learning_rate": 9.779847795765776e-06, "loss": 0.3911, "step": 1493 }, { "epoch": 0.12, "grad_norm": 3.3386624342418085, "learning_rate": 9.779459144283788e-06, "loss": 0.8402, "step": 1494 }, { "epoch": 0.12, "grad_norm": 3.488310991588463, "learning_rate": 9.779070157782515e-06, "loss": 0.7609, "step": 1495 }, { "epoch": 0.12, "grad_norm": 2.805387887932308, "learning_rate": 9.778680836289222e-06, "loss": 0.5685, "step": 1496 }, { "epoch": 0.12, "grad_norm": 3.6968205267244985, "learning_rate": 9.778291179831201e-06, "loss": 0.9227, "step": 1497 }, { "epoch": 0.12, "grad_norm": 3.1847305569872852, "learning_rate": 9.777901188435762e-06, "loss": 0.6676, "step": 1498 }, { "epoch": 0.12, "grad_norm": 4.209245828758046, "learning_rate": 9.777510862130242e-06, "loss": 1.0329, "step": 1499 }, { "epoch": 0.12, "grad_norm": 3.037052184090017, "learning_rate": 9.777120200942004e-06, "loss": 0.6037, "step": 1500 }, { "epoch": 0.12, "grad_norm": 2.672518759031155, "learning_rate": 9.77672920489843e-06, "loss": 0.5179, "step": 1501 }, { "epoch": 0.12, "grad_norm": 3.6466801675576104, "learning_rate": 9.776337874026926e-06, "loss": 0.7091, "step": 1502 }, { "epoch": 0.12, "grad_norm": 5.411916495301089, "learning_rate": 9.775946208354924e-06, "loss": 1.0544, "step": 1503 }, { "epoch": 0.12, "grad_norm": 4.060051829029946, "learning_rate": 9.775554207909879e-06, "loss": 0.9035, "step": 1504 }, { "epoch": 0.12, "grad_norm": 3.07841372268714, "learning_rate": 9.775161872719268e-06, "loss": 0.7718, "step": 1505 }, { "epoch": 0.12, "grad_norm": 3.4655063677047417, "learning_rate": 9.77476920281059e-06, "loss": 0.6952, "step": 1506 }, { "epoch": 0.12, "grad_norm": 4.008834167325511, "learning_rate": 9.77437619821137e-06, "loss": 0.4992, "step": 1507 }, { "epoch": 0.12, "grad_norm": 5.618330088193883, "learning_rate": 9.773982858949157e-06, "loss": 0.8979, "step": 1508 }, { "epoch": 0.12, "grad_norm": 4.409746682340359, "learning_rate": 9.773589185051522e-06, "loss": 1.0297, "step": 1509 }, { "epoch": 0.12, "grad_norm": 4.7310014040891994, "learning_rate": 9.77319517654606e-06, "loss": 1.2448, "step": 1510 }, { "epoch": 0.12, "grad_norm": 3.9619728725901817, "learning_rate": 9.77280083346039e-06, "loss": 0.5431, "step": 1511 }, { "epoch": 0.12, "grad_norm": 4.205388385519706, "learning_rate": 9.772406155822152e-06, "loss": 0.773, "step": 1512 }, { "epoch": 0.12, "grad_norm": 3.5967486073724833, "learning_rate": 9.772011143659013e-06, "loss": 0.805, "step": 1513 }, { "epoch": 0.12, "grad_norm": 3.3716225881025577, "learning_rate": 9.77161579699866e-06, "loss": 0.5813, "step": 1514 }, { "epoch": 0.12, "grad_norm": 2.0722565727923925, "learning_rate": 9.771220115868805e-06, "loss": 0.3726, "step": 1515 }, { "epoch": 0.12, "grad_norm": 5.1345323779270755, "learning_rate": 9.770824100297185e-06, "loss": 1.6538, "step": 1516 }, { "epoch": 0.12, "grad_norm": 4.337949991423968, "learning_rate": 9.770427750311557e-06, "loss": 1.219, "step": 1517 }, { "epoch": 0.12, "grad_norm": 4.0302519135261194, "learning_rate": 9.770031065939707e-06, "loss": 1.1707, "step": 1518 }, { "epoch": 0.12, "grad_norm": 4.807159466088738, "learning_rate": 9.769634047209437e-06, "loss": 1.3163, "step": 1519 }, { "epoch": 0.12, "grad_norm": 3.4439580054897303, "learning_rate": 9.769236694148579e-06, "loss": 0.4129, "step": 1520 }, { "epoch": 0.12, "grad_norm": 4.4429376768979445, "learning_rate": 9.768839006784983e-06, "loss": 0.9111, "step": 1521 }, { "epoch": 0.12, "grad_norm": 4.056315866869216, "learning_rate": 9.768440985146529e-06, "loss": 0.9465, "step": 1522 }, { "epoch": 0.12, "grad_norm": 2.043077745088097, "learning_rate": 9.768042629261112e-06, "loss": 0.4399, "step": 1523 }, { "epoch": 0.12, "grad_norm": 3.3192423340805197, "learning_rate": 9.767643939156658e-06, "loss": 0.5992, "step": 1524 }, { "epoch": 0.12, "grad_norm": 2.724102578486323, "learning_rate": 9.767244914861114e-06, "loss": 0.7851, "step": 1525 }, { "epoch": 0.12, "grad_norm": 3.960661209852135, "learning_rate": 9.766845556402447e-06, "loss": 0.853, "step": 1526 }, { "epoch": 0.12, "grad_norm": 3.132502240121899, "learning_rate": 9.766445863808652e-06, "loss": 0.8542, "step": 1527 }, { "epoch": 0.12, "grad_norm": 4.464818791787063, "learning_rate": 9.766045837107745e-06, "loss": 0.6607, "step": 1528 }, { "epoch": 0.12, "grad_norm": 2.9151827316498458, "learning_rate": 9.765645476327768e-06, "loss": 0.6962, "step": 1529 }, { "epoch": 0.13, "grad_norm": 5.397921765887368, "learning_rate": 9.765244781496783e-06, "loss": 1.2484, "step": 1530 }, { "epoch": 0.13, "grad_norm": 4.647934850478616, "learning_rate": 9.764843752642876e-06, "loss": 1.2478, "step": 1531 }, { "epoch": 0.13, "grad_norm": 2.3059946410621546, "learning_rate": 9.76444238979416e-06, "loss": 0.4285, "step": 1532 }, { "epoch": 0.13, "grad_norm": 3.8796632727172904, "learning_rate": 9.764040692978767e-06, "loss": 1.0721, "step": 1533 }, { "epoch": 0.13, "grad_norm": 4.532900000889822, "learning_rate": 9.763638662224854e-06, "loss": 0.8132, "step": 1534 }, { "epoch": 0.13, "grad_norm": 2.640399343376495, "learning_rate": 9.763236297560603e-06, "loss": 0.7806, "step": 1535 }, { "epoch": 0.13, "grad_norm": 3.1418755348948055, "learning_rate": 9.762833599014219e-06, "loss": 0.5318, "step": 1536 }, { "epoch": 0.13, "grad_norm": 4.586786253813165, "learning_rate": 9.762430566613925e-06, "loss": 0.9011, "step": 1537 }, { "epoch": 0.13, "grad_norm": 2.8280442642483803, "learning_rate": 9.762027200387974e-06, "loss": 0.5989, "step": 1538 }, { "epoch": 0.13, "grad_norm": 3.916761256288874, "learning_rate": 9.761623500364643e-06, "loss": 1.084, "step": 1539 }, { "epoch": 0.13, "grad_norm": 3.1449798089696324, "learning_rate": 9.761219466572227e-06, "loss": 0.8712, "step": 1540 }, { "epoch": 0.13, "grad_norm": 3.0647901636925643, "learning_rate": 9.760815099039045e-06, "loss": 0.6931, "step": 1541 }, { "epoch": 0.13, "grad_norm": 4.645838963488648, "learning_rate": 9.760410397793445e-06, "loss": 1.1038, "step": 1542 }, { "epoch": 0.13, "grad_norm": 4.045311760642244, "learning_rate": 9.760005362863794e-06, "loss": 0.8072, "step": 1543 }, { "epoch": 0.13, "grad_norm": 3.1799765839075955, "learning_rate": 9.759599994278481e-06, "loss": 0.6287, "step": 1544 }, { "epoch": 0.13, "grad_norm": 3.814868528193712, "learning_rate": 9.759194292065925e-06, "loss": 0.8318, "step": 1545 }, { "epoch": 0.13, "grad_norm": 5.554545532458709, "learning_rate": 9.758788256254559e-06, "loss": 1.5856, "step": 1546 }, { "epoch": 0.13, "grad_norm": 4.196075660470384, "learning_rate": 9.758381886872848e-06, "loss": 0.958, "step": 1547 }, { "epoch": 0.13, "grad_norm": 1.0660407285250322, "learning_rate": 9.757975183949275e-06, "loss": 0.1906, "step": 1548 }, { "epoch": 0.13, "grad_norm": 4.842620018989324, "learning_rate": 9.757568147512347e-06, "loss": 1.0968, "step": 1549 }, { "epoch": 0.13, "grad_norm": 4.776522241854018, "learning_rate": 9.757160777590597e-06, "loss": 0.9168, "step": 1550 }, { "epoch": 0.13, "grad_norm": 5.409198973311985, "learning_rate": 9.756753074212581e-06, "loss": 1.8172, "step": 1551 }, { "epoch": 0.13, "grad_norm": 3.1874379937760207, "learning_rate": 9.756345037406876e-06, "loss": 0.5296, "step": 1552 }, { "epoch": 0.13, "grad_norm": 4.191603881891276, "learning_rate": 9.755936667202084e-06, "loss": 1.0245, "step": 1553 }, { "epoch": 0.13, "grad_norm": 3.9092492108199655, "learning_rate": 9.755527963626828e-06, "loss": 0.8931, "step": 1554 }, { "epoch": 0.13, "grad_norm": 4.429272274630198, "learning_rate": 9.755118926709757e-06, "loss": 0.6388, "step": 1555 }, { "epoch": 0.13, "grad_norm": 2.9498809549310594, "learning_rate": 9.754709556479546e-06, "loss": 0.6189, "step": 1556 }, { "epoch": 0.13, "grad_norm": 4.36139121660757, "learning_rate": 9.754299852964886e-06, "loss": 0.8474, "step": 1557 }, { "epoch": 0.13, "grad_norm": 5.489779862984102, "learning_rate": 9.753889816194498e-06, "loss": 0.7664, "step": 1558 }, { "epoch": 0.13, "grad_norm": 4.718174282412993, "learning_rate": 9.753479446197122e-06, "loss": 0.9349, "step": 1559 }, { "epoch": 0.13, "grad_norm": 4.191140162879363, "learning_rate": 9.753068743001525e-06, "loss": 0.5427, "step": 1560 }, { "epoch": 0.13, "grad_norm": 3.3149836151747762, "learning_rate": 9.752657706636494e-06, "loss": 0.4822, "step": 1561 }, { "epoch": 0.13, "grad_norm": 6.986035668649894, "learning_rate": 9.752246337130841e-06, "loss": 0.9761, "step": 1562 }, { "epoch": 0.13, "grad_norm": 2.6889568491015656, "learning_rate": 9.751834634513404e-06, "loss": 0.6154, "step": 1563 }, { "epoch": 0.13, "grad_norm": 3.1432527726484345, "learning_rate": 9.751422598813037e-06, "loss": 0.5122, "step": 1564 }, { "epoch": 0.13, "grad_norm": 3.3942869359327195, "learning_rate": 9.751010230058624e-06, "loss": 0.7232, "step": 1565 }, { "epoch": 0.13, "grad_norm": 2.3100380532903135, "learning_rate": 9.750597528279072e-06, "loss": 0.5781, "step": 1566 }, { "epoch": 0.13, "grad_norm": 5.7308147712033355, "learning_rate": 9.750184493503306e-06, "loss": 1.463, "step": 1567 }, { "epoch": 0.13, "grad_norm": 2.8363396138495314, "learning_rate": 9.749771125760281e-06, "loss": 0.623, "step": 1568 }, { "epoch": 0.13, "grad_norm": 4.067053229913921, "learning_rate": 9.74935742507897e-06, "loss": 0.8514, "step": 1569 }, { "epoch": 0.13, "grad_norm": 4.582862830286583, "learning_rate": 9.748943391488374e-06, "loss": 1.1104, "step": 1570 }, { "epoch": 0.13, "grad_norm": 3.7410002095176718, "learning_rate": 9.748529025017512e-06, "loss": 0.649, "step": 1571 }, { "epoch": 0.13, "grad_norm": 3.59880905028096, "learning_rate": 9.748114325695433e-06, "loss": 0.8195, "step": 1572 }, { "epoch": 0.13, "grad_norm": 4.108543123341428, "learning_rate": 9.747699293551203e-06, "loss": 0.6387, "step": 1573 }, { "epoch": 0.13, "grad_norm": 4.757953511646958, "learning_rate": 9.747283928613915e-06, "loss": 1.1136, "step": 1574 }, { "epoch": 0.13, "grad_norm": 3.635770244508862, "learning_rate": 9.746868230912683e-06, "loss": 0.6198, "step": 1575 }, { "epoch": 0.13, "grad_norm": 2.8130446795283235, "learning_rate": 9.746452200476647e-06, "loss": 0.6654, "step": 1576 }, { "epoch": 0.13, "grad_norm": 4.229086650299244, "learning_rate": 9.74603583733497e-06, "loss": 0.8575, "step": 1577 }, { "epoch": 0.13, "grad_norm": 4.877197051217789, "learning_rate": 9.745619141516833e-06, "loss": 0.7622, "step": 1578 }, { "epoch": 0.13, "grad_norm": 4.832704704597798, "learning_rate": 9.745202113051448e-06, "loss": 1.442, "step": 1579 }, { "epoch": 0.13, "grad_norm": 4.83291036043407, "learning_rate": 9.744784751968046e-06, "loss": 1.1734, "step": 1580 }, { "epoch": 0.13, "grad_norm": 5.766668118698729, "learning_rate": 9.744367058295881e-06, "loss": 1.0921, "step": 1581 }, { "epoch": 0.13, "grad_norm": 3.7452921009672466, "learning_rate": 9.743949032064235e-06, "loss": 0.8583, "step": 1582 }, { "epoch": 0.13, "grad_norm": 3.5324161965107184, "learning_rate": 9.743530673302407e-06, "loss": 0.615, "step": 1583 }, { "epoch": 0.13, "grad_norm": 4.96485593458066, "learning_rate": 9.743111982039721e-06, "loss": 1.2621, "step": 1584 }, { "epoch": 0.13, "grad_norm": 3.404087488808251, "learning_rate": 9.742692958305528e-06, "loss": 0.6942, "step": 1585 }, { "epoch": 0.13, "grad_norm": 5.4955877818953915, "learning_rate": 9.742273602129201e-06, "loss": 1.2858, "step": 1586 }, { "epoch": 0.13, "grad_norm": 5.213928992509133, "learning_rate": 9.741853913540132e-06, "loss": 1.1965, "step": 1587 }, { "epoch": 0.13, "grad_norm": 3.190641501195003, "learning_rate": 9.74143389256774e-06, "loss": 0.7693, "step": 1588 }, { "epoch": 0.13, "grad_norm": 3.70693513925525, "learning_rate": 9.741013539241467e-06, "loss": 0.6004, "step": 1589 }, { "epoch": 0.13, "grad_norm": 3.8013575804530837, "learning_rate": 9.740592853590776e-06, "loss": 0.7414, "step": 1590 }, { "epoch": 0.13, "grad_norm": 3.125539338774779, "learning_rate": 9.740171835645158e-06, "loss": 0.7715, "step": 1591 }, { "epoch": 0.13, "grad_norm": 1.8688054107462326, "learning_rate": 9.739750485434126e-06, "loss": 0.4439, "step": 1592 }, { "epoch": 0.13, "grad_norm": 3.3397393731341016, "learning_rate": 9.739328802987209e-06, "loss": 0.6942, "step": 1593 }, { "epoch": 0.13, "grad_norm": 1.3694727764828176, "learning_rate": 9.738906788333971e-06, "loss": 0.2148, "step": 1594 }, { "epoch": 0.13, "grad_norm": 2.298421483587479, "learning_rate": 9.738484441503989e-06, "loss": 0.4418, "step": 1595 }, { "epoch": 0.13, "grad_norm": 5.0267036380367305, "learning_rate": 9.738061762526871e-06, "loss": 0.6679, "step": 1596 }, { "epoch": 0.13, "grad_norm": 4.346525534672905, "learning_rate": 9.737638751432244e-06, "loss": 0.7686, "step": 1597 }, { "epoch": 0.13, "grad_norm": 5.250728585271418, "learning_rate": 9.737215408249757e-06, "loss": 1.2046, "step": 1598 }, { "epoch": 0.13, "grad_norm": 4.722914124483091, "learning_rate": 9.736791733009087e-06, "loss": 0.9467, "step": 1599 }, { "epoch": 0.13, "grad_norm": 5.902253643134758, "learning_rate": 9.736367725739932e-06, "loss": 1.284, "step": 1600 }, { "epoch": 0.13, "grad_norm": 4.683735001274794, "learning_rate": 9.735943386472012e-06, "loss": 1.4378, "step": 1601 }, { "epoch": 0.13, "grad_norm": 4.20889208970552, "learning_rate": 9.73551871523507e-06, "loss": 0.7832, "step": 1602 }, { "epoch": 0.13, "grad_norm": 3.0226202867704597, "learning_rate": 9.735093712058876e-06, "loss": 0.5958, "step": 1603 }, { "epoch": 0.13, "grad_norm": 2.3969939040986987, "learning_rate": 9.73466837697322e-06, "loss": 0.3861, "step": 1604 }, { "epoch": 0.13, "grad_norm": 2.404473157535664, "learning_rate": 9.734242710007918e-06, "loss": 0.2924, "step": 1605 }, { "epoch": 0.13, "grad_norm": 2.237250360859097, "learning_rate": 9.733816711192803e-06, "loss": 0.3559, "step": 1606 }, { "epoch": 0.13, "grad_norm": 2.6456248141718715, "learning_rate": 9.733390380557739e-06, "loss": 0.4604, "step": 1607 }, { "epoch": 0.13, "grad_norm": 4.761247734859934, "learning_rate": 9.732963718132609e-06, "loss": 1.1434, "step": 1608 }, { "epoch": 0.13, "grad_norm": 3.707625528974655, "learning_rate": 9.73253672394732e-06, "loss": 0.8052, "step": 1609 }, { "epoch": 0.13, "grad_norm": 3.0816386035698535, "learning_rate": 9.732109398031804e-06, "loss": 0.5118, "step": 1610 }, { "epoch": 0.13, "grad_norm": 5.1755136938926745, "learning_rate": 9.731681740416012e-06, "loss": 1.3818, "step": 1611 }, { "epoch": 0.13, "grad_norm": 2.469392922393817, "learning_rate": 9.731253751129923e-06, "loss": 0.3968, "step": 1612 }, { "epoch": 0.13, "grad_norm": 3.3961943593224126, "learning_rate": 9.730825430203536e-06, "loss": 0.7839, "step": 1613 }, { "epoch": 0.13, "grad_norm": 4.042124620832441, "learning_rate": 9.730396777666875e-06, "loss": 0.8608, "step": 1614 }, { "epoch": 0.13, "grad_norm": 5.685312270195895, "learning_rate": 9.729967793549987e-06, "loss": 0.968, "step": 1615 }, { "epoch": 0.13, "grad_norm": 4.494774242668652, "learning_rate": 9.729538477882942e-06, "loss": 0.896, "step": 1616 }, { "epoch": 0.13, "grad_norm": 4.48705535782037, "learning_rate": 9.729108830695833e-06, "loss": 1.0858, "step": 1617 }, { "epoch": 0.13, "grad_norm": 2.079053530713893, "learning_rate": 9.728678852018775e-06, "loss": 0.3735, "step": 1618 }, { "epoch": 0.13, "grad_norm": 3.522198117801278, "learning_rate": 9.728248541881909e-06, "loss": 0.7978, "step": 1619 }, { "epoch": 0.13, "grad_norm": 3.6673248515297145, "learning_rate": 9.727817900315399e-06, "loss": 0.7038, "step": 1620 }, { "epoch": 0.13, "grad_norm": 2.3868342089022954, "learning_rate": 9.727386927349427e-06, "loss": 0.4164, "step": 1621 }, { "epoch": 0.13, "grad_norm": 4.096149179181914, "learning_rate": 9.726955623014207e-06, "loss": 0.7268, "step": 1622 }, { "epoch": 0.13, "grad_norm": 4.1892463351865405, "learning_rate": 9.72652398733997e-06, "loss": 0.999, "step": 1623 }, { "epoch": 0.13, "grad_norm": 4.111134640592722, "learning_rate": 9.72609202035697e-06, "loss": 0.8955, "step": 1624 }, { "epoch": 0.13, "grad_norm": 3.619508841593312, "learning_rate": 9.725659722095488e-06, "loss": 0.8145, "step": 1625 }, { "epoch": 0.13, "grad_norm": 3.284117540605217, "learning_rate": 9.725227092585824e-06, "loss": 0.7883, "step": 1626 }, { "epoch": 0.13, "grad_norm": 1.3053035878084933, "learning_rate": 9.724794131858309e-06, "loss": 0.2017, "step": 1627 }, { "epoch": 0.13, "grad_norm": 4.787383239731883, "learning_rate": 9.724360839943285e-06, "loss": 0.7844, "step": 1628 }, { "epoch": 0.13, "grad_norm": 2.566361724602944, "learning_rate": 9.723927216871127e-06, "loss": 0.623, "step": 1629 }, { "epoch": 0.13, "grad_norm": 1.4600819399127305, "learning_rate": 9.723493262672229e-06, "loss": 0.2211, "step": 1630 }, { "epoch": 0.13, "grad_norm": 5.008762197460526, "learning_rate": 9.723058977377012e-06, "loss": 1.1448, "step": 1631 }, { "epoch": 0.13, "grad_norm": 6.186521654143597, "learning_rate": 9.722624361015913e-06, "loss": 1.0704, "step": 1632 }, { "epoch": 0.13, "grad_norm": 5.255225640204374, "learning_rate": 9.7221894136194e-06, "loss": 1.6081, "step": 1633 }, { "epoch": 0.13, "grad_norm": 6.173596004247248, "learning_rate": 9.72175413521796e-06, "loss": 1.2399, "step": 1634 }, { "epoch": 0.13, "grad_norm": 2.385440698351537, "learning_rate": 9.721318525842105e-06, "loss": 0.467, "step": 1635 }, { "epoch": 0.13, "grad_norm": 4.450899861913443, "learning_rate": 9.720882585522368e-06, "loss": 0.7838, "step": 1636 }, { "epoch": 0.13, "grad_norm": 4.001750450563991, "learning_rate": 9.720446314289309e-06, "loss": 0.7508, "step": 1637 }, { "epoch": 0.13, "grad_norm": 2.809705500312421, "learning_rate": 9.720009712173504e-06, "loss": 0.7776, "step": 1638 }, { "epoch": 0.13, "grad_norm": 3.0593428034581707, "learning_rate": 9.719572779205562e-06, "loss": 0.4057, "step": 1639 }, { "epoch": 0.13, "grad_norm": 6.409124956173562, "learning_rate": 9.719135515416107e-06, "loss": 1.4807, "step": 1640 }, { "epoch": 0.13, "grad_norm": 2.9182037085275163, "learning_rate": 9.71869792083579e-06, "loss": 0.5006, "step": 1641 }, { "epoch": 0.13, "grad_norm": 1.8982368084936982, "learning_rate": 9.718259995495284e-06, "loss": 0.3411, "step": 1642 }, { "epoch": 0.13, "grad_norm": 3.6506744249878254, "learning_rate": 9.717821739425286e-06, "loss": 0.6956, "step": 1643 }, { "epoch": 0.13, "grad_norm": 3.7552476156750574, "learning_rate": 9.717383152656518e-06, "loss": 1.046, "step": 1644 }, { "epoch": 0.13, "grad_norm": 2.0608675911426966, "learning_rate": 9.71694423521972e-06, "loss": 0.4058, "step": 1645 }, { "epoch": 0.13, "grad_norm": 5.030380844163063, "learning_rate": 9.716504987145658e-06, "loss": 1.2439, "step": 1646 }, { "epoch": 0.13, "grad_norm": 3.742621411829504, "learning_rate": 9.716065408465124e-06, "loss": 0.8299, "step": 1647 }, { "epoch": 0.13, "grad_norm": 4.194510187995256, "learning_rate": 9.715625499208931e-06, "loss": 1.0898, "step": 1648 }, { "epoch": 0.13, "grad_norm": 3.2683273114298403, "learning_rate": 9.715185259407911e-06, "loss": 0.8357, "step": 1649 }, { "epoch": 0.13, "grad_norm": 5.4839620674400145, "learning_rate": 9.714744689092925e-06, "loss": 1.0901, "step": 1650 }, { "epoch": 0.13, "grad_norm": 3.131210682499025, "learning_rate": 9.714303788294854e-06, "loss": 0.8754, "step": 1651 }, { "epoch": 0.14, "grad_norm": 2.6719171196306264, "learning_rate": 9.713862557044607e-06, "loss": 0.4085, "step": 1652 }, { "epoch": 0.14, "grad_norm": 2.524196692690876, "learning_rate": 9.713420995373108e-06, "loss": 0.6716, "step": 1653 }, { "epoch": 0.14, "grad_norm": 1.3654936297880624, "learning_rate": 9.712979103311308e-06, "loss": 0.2285, "step": 1654 }, { "epoch": 0.14, "grad_norm": 4.852407882522216, "learning_rate": 9.712536880890186e-06, "loss": 1.2415, "step": 1655 }, { "epoch": 0.14, "grad_norm": 5.363295234003041, "learning_rate": 9.712094328140738e-06, "loss": 1.155, "step": 1656 }, { "epoch": 0.14, "grad_norm": 5.119060952663525, "learning_rate": 9.711651445093984e-06, "loss": 1.4858, "step": 1657 }, { "epoch": 0.14, "grad_norm": 6.07263896623256, "learning_rate": 9.711208231780969e-06, "loss": 1.5487, "step": 1658 }, { "epoch": 0.14, "grad_norm": 4.417118269313506, "learning_rate": 9.71076468823276e-06, "loss": 1.0961, "step": 1659 }, { "epoch": 0.14, "grad_norm": 4.240636340765624, "learning_rate": 9.710320814480448e-06, "loss": 1.0508, "step": 1660 }, { "epoch": 0.14, "grad_norm": 2.7871496737720873, "learning_rate": 9.709876610555148e-06, "loss": 0.7364, "step": 1661 }, { "epoch": 0.14, "grad_norm": 2.7911961749043086, "learning_rate": 9.709432076487991e-06, "loss": 0.3766, "step": 1662 }, { "epoch": 0.14, "grad_norm": 4.285382931590067, "learning_rate": 9.708987212310144e-06, "loss": 0.7827, "step": 1663 }, { "epoch": 0.14, "grad_norm": 1.770461234555658, "learning_rate": 9.708542018052786e-06, "loss": 0.378, "step": 1664 }, { "epoch": 0.14, "grad_norm": 2.6635570174102163, "learning_rate": 9.708096493747123e-06, "loss": 0.6873, "step": 1665 }, { "epoch": 0.14, "grad_norm": 6.309510039579954, "learning_rate": 9.70765063942439e-06, "loss": 1.3979, "step": 1666 }, { "epoch": 0.14, "grad_norm": 4.5408722162646455, "learning_rate": 9.707204455115829e-06, "loss": 0.9899, "step": 1667 }, { "epoch": 0.14, "grad_norm": 2.983879844016031, "learning_rate": 9.706757940852724e-06, "loss": 0.6502, "step": 1668 }, { "epoch": 0.14, "grad_norm": 3.611735659887384, "learning_rate": 9.706311096666372e-06, "loss": 0.8234, "step": 1669 }, { "epoch": 0.14, "grad_norm": 2.342774436992476, "learning_rate": 9.705863922588093e-06, "loss": 0.5715, "step": 1670 }, { "epoch": 0.14, "grad_norm": 4.347075365810961, "learning_rate": 9.705416418649233e-06, "loss": 0.685, "step": 1671 }, { "epoch": 0.14, "grad_norm": 4.097322467099301, "learning_rate": 9.704968584881163e-06, "loss": 0.957, "step": 1672 }, { "epoch": 0.14, "grad_norm": 2.284651690740073, "learning_rate": 9.704520421315268e-06, "loss": 0.412, "step": 1673 }, { "epoch": 0.14, "grad_norm": 3.9964240169463348, "learning_rate": 9.704071927982966e-06, "loss": 0.9967, "step": 1674 }, { "epoch": 0.14, "grad_norm": 0.8789215636177454, "learning_rate": 9.703623104915696e-06, "loss": 0.1869, "step": 1675 }, { "epoch": 0.14, "grad_norm": 3.3620035126073153, "learning_rate": 9.703173952144915e-06, "loss": 0.3587, "step": 1676 }, { "epoch": 0.14, "grad_norm": 3.0358459729420963, "learning_rate": 9.702724469702107e-06, "loss": 0.5392, "step": 1677 }, { "epoch": 0.14, "grad_norm": 4.952662389243149, "learning_rate": 9.702274657618781e-06, "loss": 1.0653, "step": 1678 }, { "epoch": 0.14, "grad_norm": 5.242149136790763, "learning_rate": 9.701824515926469e-06, "loss": 0.9788, "step": 1679 }, { "epoch": 0.14, "grad_norm": 4.708365030480298, "learning_rate": 9.701374044656716e-06, "loss": 0.793, "step": 1680 }, { "epoch": 0.14, "grad_norm": 3.764014596285324, "learning_rate": 9.700923243841106e-06, "loss": 1.0391, "step": 1681 }, { "epoch": 0.14, "grad_norm": 3.482193777342618, "learning_rate": 9.700472113511234e-06, "loss": 0.6562, "step": 1682 }, { "epoch": 0.14, "grad_norm": 5.047861768371731, "learning_rate": 9.700020653698722e-06, "loss": 1.6494, "step": 1683 }, { "epoch": 0.14, "grad_norm": 5.170079677755622, "learning_rate": 9.699568864435219e-06, "loss": 1.4652, "step": 1684 }, { "epoch": 0.14, "grad_norm": 4.076027692146769, "learning_rate": 9.69911674575239e-06, "loss": 0.9265, "step": 1685 }, { "epoch": 0.14, "grad_norm": 2.85699929880583, "learning_rate": 9.698664297681929e-06, "loss": 0.4537, "step": 1686 }, { "epoch": 0.14, "grad_norm": 2.0338154983394117, "learning_rate": 9.698211520255549e-06, "loss": 0.2813, "step": 1687 }, { "epoch": 0.14, "grad_norm": 2.875842754777342, "learning_rate": 9.697758413504987e-06, "loss": 0.3438, "step": 1688 }, { "epoch": 0.14, "grad_norm": 4.029530453781569, "learning_rate": 9.697304977462005e-06, "loss": 0.9498, "step": 1689 }, { "epoch": 0.14, "grad_norm": 3.754246632963344, "learning_rate": 9.696851212158388e-06, "loss": 1.0082, "step": 1690 }, { "epoch": 0.14, "grad_norm": 4.549144461758611, "learning_rate": 9.696397117625942e-06, "loss": 0.6994, "step": 1691 }, { "epoch": 0.14, "grad_norm": 3.530806771898034, "learning_rate": 9.695942693896495e-06, "loss": 0.6279, "step": 1692 }, { "epoch": 0.14, "grad_norm": 4.794353861007372, "learning_rate": 9.695487941001905e-06, "loss": 0.8341, "step": 1693 }, { "epoch": 0.14, "grad_norm": 2.96207377085134, "learning_rate": 9.695032858974042e-06, "loss": 0.755, "step": 1694 }, { "epoch": 0.14, "grad_norm": 3.6405072518293555, "learning_rate": 9.694577447844809e-06, "loss": 1.0264, "step": 1695 }, { "epoch": 0.14, "grad_norm": 3.4635637170023017, "learning_rate": 9.694121707646129e-06, "loss": 0.8758, "step": 1696 }, { "epoch": 0.14, "grad_norm": 5.624796096775306, "learning_rate": 9.693665638409946e-06, "loss": 1.5846, "step": 1697 }, { "epoch": 0.14, "grad_norm": 3.3551310744837854, "learning_rate": 9.693209240168227e-06, "loss": 0.6217, "step": 1698 }, { "epoch": 0.14, "grad_norm": 3.6274131018894096, "learning_rate": 9.692752512952968e-06, "loss": 0.9201, "step": 1699 }, { "epoch": 0.14, "grad_norm": 4.944204078548009, "learning_rate": 9.692295456796178e-06, "loss": 1.2045, "step": 1700 }, { "epoch": 0.14, "grad_norm": 4.46614524346582, "learning_rate": 9.691838071729899e-06, "loss": 1.0025, "step": 1701 }, { "epoch": 0.14, "grad_norm": 3.707777143872066, "learning_rate": 9.691380357786189e-06, "loss": 0.8076, "step": 1702 }, { "epoch": 0.14, "grad_norm": 4.450657216952367, "learning_rate": 9.690922314997132e-06, "loss": 0.9325, "step": 1703 }, { "epoch": 0.14, "grad_norm": 3.1716327050722235, "learning_rate": 9.69046394339484e-06, "loss": 0.663, "step": 1704 }, { "epoch": 0.14, "grad_norm": 2.7394523838000957, "learning_rate": 9.690005243011436e-06, "loss": 0.7823, "step": 1705 }, { "epoch": 0.14, "grad_norm": 3.4636145269488483, "learning_rate": 9.689546213879074e-06, "loss": 0.6237, "step": 1706 }, { "epoch": 0.14, "grad_norm": 3.690978397307599, "learning_rate": 9.689086856029931e-06, "loss": 0.7688, "step": 1707 }, { "epoch": 0.14, "grad_norm": 4.86435667426149, "learning_rate": 9.68862716949621e-06, "loss": 0.8823, "step": 1708 }, { "epoch": 0.14, "grad_norm": 6.120164203164415, "learning_rate": 9.688167154310127e-06, "loss": 1.3421, "step": 1709 }, { "epoch": 0.14, "grad_norm": 3.418183466759219, "learning_rate": 9.68770681050393e-06, "loss": 0.8123, "step": 1710 }, { "epoch": 0.14, "grad_norm": 2.6492709004623847, "learning_rate": 9.687246138109888e-06, "loss": 0.5541, "step": 1711 }, { "epoch": 0.14, "grad_norm": 2.604298506319751, "learning_rate": 9.686785137160287e-06, "loss": 0.5288, "step": 1712 }, { "epoch": 0.14, "grad_norm": 4.706652364518963, "learning_rate": 9.686323807687447e-06, "loss": 1.2949, "step": 1713 }, { "epoch": 0.14, "grad_norm": 3.5539009429313473, "learning_rate": 9.685862149723703e-06, "loss": 0.8843, "step": 1714 }, { "epoch": 0.14, "grad_norm": 3.9510422841886905, "learning_rate": 9.685400163301415e-06, "loss": 0.7165, "step": 1715 }, { "epoch": 0.14, "grad_norm": 1.4029927629261745, "learning_rate": 9.684937848452966e-06, "loss": 0.2267, "step": 1716 }, { "epoch": 0.14, "grad_norm": 2.8532270273503113, "learning_rate": 9.684475205210764e-06, "loss": 0.4159, "step": 1717 }, { "epoch": 0.14, "grad_norm": 3.683097077822043, "learning_rate": 9.684012233607237e-06, "loss": 0.7487, "step": 1718 }, { "epoch": 0.14, "grad_norm": 3.564688641828072, "learning_rate": 9.683548933674837e-06, "loss": 0.6828, "step": 1719 }, { "epoch": 0.14, "grad_norm": 6.669827615735416, "learning_rate": 9.683085305446038e-06, "loss": 1.514, "step": 1720 }, { "epoch": 0.14, "grad_norm": 5.390248451343604, "learning_rate": 9.682621348953343e-06, "loss": 0.8943, "step": 1721 }, { "epoch": 0.14, "grad_norm": 3.465928623189289, "learning_rate": 9.682157064229268e-06, "loss": 0.777, "step": 1722 }, { "epoch": 0.14, "grad_norm": 3.4567929831863875, "learning_rate": 9.68169245130636e-06, "loss": 0.9962, "step": 1723 }, { "epoch": 0.14, "grad_norm": 5.5170790192773405, "learning_rate": 9.681227510217186e-06, "loss": 0.7667, "step": 1724 }, { "epoch": 0.14, "grad_norm": 3.8671609039319743, "learning_rate": 9.680762240994336e-06, "loss": 0.8341, "step": 1725 }, { "epoch": 0.14, "grad_norm": 3.090436071858452, "learning_rate": 9.680296643670425e-06, "loss": 0.4865, "step": 1726 }, { "epoch": 0.14, "grad_norm": 3.830201007655817, "learning_rate": 9.679830718278087e-06, "loss": 0.884, "step": 1727 }, { "epoch": 0.14, "grad_norm": 6.80538832131407, "learning_rate": 9.679364464849983e-06, "loss": 1.8187, "step": 1728 }, { "epoch": 0.14, "grad_norm": 2.6906925076304757, "learning_rate": 9.678897883418794e-06, "loss": 0.4176, "step": 1729 }, { "epoch": 0.14, "grad_norm": 5.0343769521331945, "learning_rate": 9.678430974017226e-06, "loss": 1.252, "step": 1730 }, { "epoch": 0.14, "grad_norm": 2.8324493094817256, "learning_rate": 9.677963736678007e-06, "loss": 0.4308, "step": 1731 }, { "epoch": 0.14, "grad_norm": 4.4653845451836105, "learning_rate": 9.677496171433889e-06, "loss": 0.9912, "step": 1732 }, { "epoch": 0.14, "grad_norm": 3.9840595685560345, "learning_rate": 9.677028278317646e-06, "loss": 0.7753, "step": 1733 }, { "epoch": 0.14, "grad_norm": 2.8594742130196154, "learning_rate": 9.676560057362076e-06, "loss": 0.7411, "step": 1734 }, { "epoch": 0.14, "grad_norm": 3.5574909671573813, "learning_rate": 9.676091508599995e-06, "loss": 0.8224, "step": 1735 }, { "epoch": 0.14, "grad_norm": 4.469295454039877, "learning_rate": 9.675622632064255e-06, "loss": 1.0785, "step": 1736 }, { "epoch": 0.14, "grad_norm": 5.839232896804892, "learning_rate": 9.675153427787713e-06, "loss": 1.437, "step": 1737 }, { "epoch": 0.14, "grad_norm": 3.4990413210459708, "learning_rate": 9.674683895803262e-06, "loss": 0.7086, "step": 1738 }, { "epoch": 0.14, "grad_norm": 6.6088857858705525, "learning_rate": 9.674214036143817e-06, "loss": 1.5871, "step": 1739 }, { "epoch": 0.14, "grad_norm": 2.3806875521861675, "learning_rate": 9.673743848842309e-06, "loss": 0.291, "step": 1740 }, { "epoch": 0.14, "grad_norm": 4.384847468400944, "learning_rate": 9.673273333931696e-06, "loss": 1.3201, "step": 1741 }, { "epoch": 0.14, "grad_norm": 2.7623502944249165, "learning_rate": 9.672802491444962e-06, "loss": 0.4727, "step": 1742 }, { "epoch": 0.14, "grad_norm": 3.839881108249043, "learning_rate": 9.672331321415109e-06, "loss": 0.8002, "step": 1743 }, { "epoch": 0.14, "grad_norm": 4.267489571195822, "learning_rate": 9.671859823875166e-06, "loss": 0.6568, "step": 1744 }, { "epoch": 0.14, "grad_norm": 4.02003689875668, "learning_rate": 9.671387998858178e-06, "loss": 1.0033, "step": 1745 }, { "epoch": 0.14, "grad_norm": 3.334547483328481, "learning_rate": 9.670915846397224e-06, "loss": 1.0469, "step": 1746 }, { "epoch": 0.14, "grad_norm": 2.894229760592162, "learning_rate": 9.670443366525396e-06, "loss": 0.5112, "step": 1747 }, { "epoch": 0.14, "grad_norm": 5.335584294099776, "learning_rate": 9.669970559275814e-06, "loss": 1.3517, "step": 1748 }, { "epoch": 0.14, "grad_norm": 3.3717056903934015, "learning_rate": 9.66949742468162e-06, "loss": 0.7089, "step": 1749 }, { "epoch": 0.14, "grad_norm": 3.627215878970121, "learning_rate": 9.669023962775976e-06, "loss": 0.7062, "step": 1750 }, { "epoch": 0.14, "grad_norm": 4.289061741779872, "learning_rate": 9.668550173592075e-06, "loss": 0.8946, "step": 1751 }, { "epoch": 0.14, "grad_norm": 4.224035246700839, "learning_rate": 9.668076057163122e-06, "loss": 0.8385, "step": 1752 }, { "epoch": 0.14, "grad_norm": 3.429426490400277, "learning_rate": 9.667601613522355e-06, "loss": 0.8882, "step": 1753 }, { "epoch": 0.14, "grad_norm": 1.511297438492241, "learning_rate": 9.667126842703027e-06, "loss": 0.2118, "step": 1754 }, { "epoch": 0.14, "grad_norm": 4.094554570312316, "learning_rate": 9.666651744738417e-06, "loss": 0.7532, "step": 1755 }, { "epoch": 0.14, "grad_norm": 5.468245134556565, "learning_rate": 9.666176319661833e-06, "loss": 0.8349, "step": 1756 }, { "epoch": 0.14, "grad_norm": 3.9396583629732396, "learning_rate": 9.665700567506594e-06, "loss": 1.0694, "step": 1757 }, { "epoch": 0.14, "grad_norm": 4.172716573001196, "learning_rate": 9.66522448830605e-06, "loss": 1.1011, "step": 1758 }, { "epoch": 0.14, "grad_norm": 3.2948059164102803, "learning_rate": 9.664748082093573e-06, "loss": 0.84, "step": 1759 }, { "epoch": 0.14, "grad_norm": 3.6774929634029596, "learning_rate": 9.664271348902558e-06, "loss": 0.3558, "step": 1760 }, { "epoch": 0.14, "grad_norm": 4.293291501424655, "learning_rate": 9.66379428876642e-06, "loss": 1.0833, "step": 1761 }, { "epoch": 0.14, "grad_norm": 4.8630268243168615, "learning_rate": 9.663316901718599e-06, "loss": 1.419, "step": 1762 }, { "epoch": 0.14, "grad_norm": 3.144353130832779, "learning_rate": 9.662839187792556e-06, "loss": 0.5933, "step": 1763 }, { "epoch": 0.14, "grad_norm": 3.4750146714606323, "learning_rate": 9.66236114702178e-06, "loss": 0.888, "step": 1764 }, { "epoch": 0.14, "grad_norm": 3.391849057094597, "learning_rate": 9.661882779439778e-06, "loss": 0.5401, "step": 1765 }, { "epoch": 0.14, "grad_norm": 2.330429068652904, "learning_rate": 9.661404085080082e-06, "loss": 0.47, "step": 1766 }, { "epoch": 0.14, "grad_norm": 4.802510374591477, "learning_rate": 9.660925063976247e-06, "loss": 1.0848, "step": 1767 }, { "epoch": 0.14, "grad_norm": 3.8303554655188647, "learning_rate": 9.660445716161849e-06, "loss": 1.1645, "step": 1768 }, { "epoch": 0.14, "grad_norm": 5.332359385121991, "learning_rate": 9.659966041670488e-06, "loss": 1.3822, "step": 1769 }, { "epoch": 0.14, "grad_norm": 4.49826770270501, "learning_rate": 9.659486040535788e-06, "loss": 0.75, "step": 1770 }, { "epoch": 0.14, "grad_norm": 4.625414454564575, "learning_rate": 9.659005712791394e-06, "loss": 1.0039, "step": 1771 }, { "epoch": 0.14, "grad_norm": 3.5687891069349362, "learning_rate": 9.658525058470977e-06, "loss": 0.7928, "step": 1772 }, { "epoch": 0.14, "grad_norm": 3.0856531814552612, "learning_rate": 9.658044077608227e-06, "loss": 0.9806, "step": 1773 }, { "epoch": 0.14, "grad_norm": 2.0481731703439325, "learning_rate": 9.657562770236857e-06, "loss": 0.3284, "step": 1774 }, { "epoch": 0.15, "grad_norm": 3.477448597774813, "learning_rate": 9.65708113639061e-06, "loss": 0.684, "step": 1775 }, { "epoch": 0.15, "grad_norm": 5.786223392707992, "learning_rate": 9.656599176103241e-06, "loss": 1.5706, "step": 1776 }, { "epoch": 0.15, "grad_norm": 3.4299279077369706, "learning_rate": 9.656116889408536e-06, "loss": 0.9167, "step": 1777 }, { "epoch": 0.15, "grad_norm": 3.845132154284454, "learning_rate": 9.6556342763403e-06, "loss": 0.6764, "step": 1778 }, { "epoch": 0.15, "grad_norm": 5.990118489197662, "learning_rate": 9.655151336932362e-06, "loss": 1.2797, "step": 1779 }, { "epoch": 0.15, "grad_norm": 2.883998603403723, "learning_rate": 9.654668071218576e-06, "loss": 0.5424, "step": 1780 }, { "epoch": 0.15, "grad_norm": 4.976081723771014, "learning_rate": 9.654184479232815e-06, "loss": 1.2058, "step": 1781 }, { "epoch": 0.15, "grad_norm": 4.612574873209907, "learning_rate": 9.653700561008976e-06, "loss": 1.0735, "step": 1782 }, { "epoch": 0.15, "grad_norm": 3.153755087512503, "learning_rate": 9.653216316580983e-06, "loss": 0.9932, "step": 1783 }, { "epoch": 0.15, "grad_norm": 4.250713760253444, "learning_rate": 9.652731745982775e-06, "loss": 1.0884, "step": 1784 }, { "epoch": 0.15, "grad_norm": 3.5887055213448864, "learning_rate": 9.652246849248321e-06, "loss": 0.9022, "step": 1785 }, { "epoch": 0.15, "grad_norm": 4.760788781957178, "learning_rate": 9.651761626411608e-06, "loss": 1.2023, "step": 1786 }, { "epoch": 0.15, "grad_norm": 3.00386777303116, "learning_rate": 9.651276077506653e-06, "loss": 0.6427, "step": 1787 }, { "epoch": 0.15, "grad_norm": 4.734213588146607, "learning_rate": 9.650790202567484e-06, "loss": 1.0691, "step": 1788 }, { "epoch": 0.15, "grad_norm": 4.153949352710521, "learning_rate": 9.650304001628164e-06, "loss": 1.1805, "step": 1789 }, { "epoch": 0.15, "grad_norm": 4.59076553094134, "learning_rate": 9.649817474722772e-06, "loss": 1.0221, "step": 1790 }, { "epoch": 0.15, "grad_norm": 3.26331738370133, "learning_rate": 9.64933062188541e-06, "loss": 0.7572, "step": 1791 }, { "epoch": 0.15, "grad_norm": 3.920409775213098, "learning_rate": 9.648843443150203e-06, "loss": 0.727, "step": 1792 }, { "epoch": 0.15, "grad_norm": 4.005900979192892, "learning_rate": 9.648355938551307e-06, "loss": 0.5756, "step": 1793 }, { "epoch": 0.15, "grad_norm": 3.8375256998421667, "learning_rate": 9.647868108122887e-06, "loss": 0.9936, "step": 1794 }, { "epoch": 0.15, "grad_norm": 3.9040876808362346, "learning_rate": 9.64737995189914e-06, "loss": 0.7834, "step": 1795 }, { "epoch": 0.15, "grad_norm": 4.041394694735733, "learning_rate": 9.646891469914285e-06, "loss": 0.8196, "step": 1796 }, { "epoch": 0.15, "grad_norm": 4.082808833427863, "learning_rate": 9.646402662202562e-06, "loss": 0.8898, "step": 1797 }, { "epoch": 0.15, "grad_norm": 1.8436244812636793, "learning_rate": 9.645913528798231e-06, "loss": 0.4136, "step": 1798 }, { "epoch": 0.15, "grad_norm": 4.493558306583951, "learning_rate": 9.645424069735582e-06, "loss": 1.0796, "step": 1799 }, { "epoch": 0.15, "grad_norm": 4.195389897526096, "learning_rate": 9.644934285048924e-06, "loss": 0.8139, "step": 1800 }, { "epoch": 0.15, "grad_norm": 4.203540340494399, "learning_rate": 9.644444174772586e-06, "loss": 0.9193, "step": 1801 }, { "epoch": 0.15, "grad_norm": 3.9164410427299736, "learning_rate": 9.643953738940926e-06, "loss": 0.6488, "step": 1802 }, { "epoch": 0.15, "grad_norm": 3.565239558464744, "learning_rate": 9.643462977588315e-06, "loss": 0.9911, "step": 1803 }, { "epoch": 0.15, "grad_norm": 3.6880077819346773, "learning_rate": 9.642971890749163e-06, "loss": 0.8051, "step": 1804 }, { "epoch": 0.15, "grad_norm": 4.401719903746576, "learning_rate": 9.642480478457883e-06, "loss": 0.7522, "step": 1805 }, { "epoch": 0.15, "grad_norm": 4.275589998252871, "learning_rate": 9.64198874074893e-06, "loss": 0.9793, "step": 1806 }, { "epoch": 0.15, "grad_norm": 5.114652840279528, "learning_rate": 9.641496677656766e-06, "loss": 0.9217, "step": 1807 }, { "epoch": 0.15, "grad_norm": 3.0759207295978834, "learning_rate": 9.641004289215884e-06, "loss": 0.4599, "step": 1808 }, { "epoch": 0.15, "grad_norm": 4.237292245483329, "learning_rate": 9.6405115754608e-06, "loss": 0.6787, "step": 1809 }, { "epoch": 0.15, "grad_norm": 3.9325714220187704, "learning_rate": 9.64001853642605e-06, "loss": 0.708, "step": 1810 }, { "epoch": 0.15, "grad_norm": 3.4734493440378533, "learning_rate": 9.639525172146194e-06, "loss": 0.8007, "step": 1811 }, { "epoch": 0.15, "grad_norm": 1.3238979248148512, "learning_rate": 9.639031482655814e-06, "loss": 0.1702, "step": 1812 }, { "epoch": 0.15, "grad_norm": 4.147979100312514, "learning_rate": 9.638537467989517e-06, "loss": 0.832, "step": 1813 }, { "epoch": 0.15, "grad_norm": 3.5306368535026618, "learning_rate": 9.63804312818193e-06, "loss": 0.7108, "step": 1814 }, { "epoch": 0.15, "grad_norm": 3.354011197916151, "learning_rate": 9.637548463267705e-06, "loss": 0.6388, "step": 1815 }, { "epoch": 0.15, "grad_norm": 3.3059057159340535, "learning_rate": 9.637053473281517e-06, "loss": 0.5239, "step": 1816 }, { "epoch": 0.15, "grad_norm": 3.2267324783997355, "learning_rate": 9.636558158258057e-06, "loss": 0.6933, "step": 1817 }, { "epoch": 0.15, "grad_norm": 1.7180296092373988, "learning_rate": 9.636062518232052e-06, "loss": 0.3191, "step": 1818 }, { "epoch": 0.15, "grad_norm": 2.910215876952668, "learning_rate": 9.63556655323824e-06, "loss": 0.6652, "step": 1819 }, { "epoch": 0.15, "grad_norm": 2.5030198200823057, "learning_rate": 9.635070263311386e-06, "loss": 0.5872, "step": 1820 }, { "epoch": 0.15, "grad_norm": 4.245126438609335, "learning_rate": 9.634573648486278e-06, "loss": 0.8985, "step": 1821 }, { "epoch": 0.15, "grad_norm": 4.960095535317417, "learning_rate": 9.634076708797728e-06, "loss": 0.6347, "step": 1822 }, { "epoch": 0.15, "grad_norm": 4.189854565011895, "learning_rate": 9.633579444280568e-06, "loss": 0.7024, "step": 1823 }, { "epoch": 0.15, "grad_norm": 4.554196019272498, "learning_rate": 9.633081854969655e-06, "loss": 0.7781, "step": 1824 }, { "epoch": 0.15, "grad_norm": 4.304378751681649, "learning_rate": 9.632583940899866e-06, "loss": 1.4871, "step": 1825 }, { "epoch": 0.15, "grad_norm": 3.481617498792958, "learning_rate": 9.632085702106104e-06, "loss": 0.6789, "step": 1826 }, { "epoch": 0.15, "grad_norm": 2.4928801318769964, "learning_rate": 9.631587138623295e-06, "loss": 0.4831, "step": 1827 }, { "epoch": 0.15, "grad_norm": 3.0021844643847864, "learning_rate": 9.631088250486383e-06, "loss": 0.4137, "step": 1828 }, { "epoch": 0.15, "grad_norm": 3.4877779902389694, "learning_rate": 9.630589037730338e-06, "loss": 0.4901, "step": 1829 }, { "epoch": 0.15, "grad_norm": 3.6749886358582238, "learning_rate": 9.630089500390154e-06, "loss": 0.9204, "step": 1830 }, { "epoch": 0.15, "grad_norm": 4.7402582348689775, "learning_rate": 9.629589638500849e-06, "loss": 1.2322, "step": 1831 }, { "epoch": 0.15, "grad_norm": 4.4426550739107515, "learning_rate": 9.629089452097455e-06, "loss": 0.7329, "step": 1832 }, { "epoch": 0.15, "grad_norm": 2.6734999505376194, "learning_rate": 9.628588941215037e-06, "loss": 0.7417, "step": 1833 }, { "epoch": 0.15, "grad_norm": 4.942397568879872, "learning_rate": 9.62808810588868e-06, "loss": 0.9746, "step": 1834 }, { "epoch": 0.15, "grad_norm": 3.175304781579661, "learning_rate": 9.627586946153487e-06, "loss": 0.5455, "step": 1835 }, { "epoch": 0.15, "grad_norm": 3.486209116943911, "learning_rate": 9.62708546204459e-06, "loss": 0.4457, "step": 1836 }, { "epoch": 0.15, "grad_norm": 3.699947940635257, "learning_rate": 9.626583653597136e-06, "loss": 0.7648, "step": 1837 }, { "epoch": 0.15, "grad_norm": 3.563861331354613, "learning_rate": 9.626081520846304e-06, "loss": 0.9206, "step": 1838 }, { "epoch": 0.15, "grad_norm": 3.9746637208939366, "learning_rate": 9.625579063827291e-06, "loss": 0.9338, "step": 1839 }, { "epoch": 0.15, "grad_norm": 2.993335491725338, "learning_rate": 9.625076282575317e-06, "loss": 0.7197, "step": 1840 }, { "epoch": 0.15, "grad_norm": 3.7751650590973203, "learning_rate": 9.624573177125623e-06, "loss": 0.6854, "step": 1841 }, { "epoch": 0.15, "grad_norm": 3.6287066239927293, "learning_rate": 9.624069747513474e-06, "loss": 0.8383, "step": 1842 }, { "epoch": 0.15, "grad_norm": 5.466341283669408, "learning_rate": 9.623565993774161e-06, "loss": 1.379, "step": 1843 }, { "epoch": 0.15, "grad_norm": 3.9565044600451404, "learning_rate": 9.623061915942994e-06, "loss": 0.8075, "step": 1844 }, { "epoch": 0.15, "grad_norm": 4.353500384494615, "learning_rate": 9.622557514055306e-06, "loss": 0.8647, "step": 1845 }, { "epoch": 0.15, "grad_norm": 2.7802807636965956, "learning_rate": 9.622052788146454e-06, "loss": 0.7, "step": 1846 }, { "epoch": 0.15, "grad_norm": 3.7243261490814894, "learning_rate": 9.621547738251816e-06, "loss": 0.6569, "step": 1847 }, { "epoch": 0.15, "grad_norm": 3.5979273143338655, "learning_rate": 9.621042364406796e-06, "loss": 0.6535, "step": 1848 }, { "epoch": 0.15, "grad_norm": 3.4155454455842635, "learning_rate": 9.620536666646816e-06, "loss": 0.5346, "step": 1849 }, { "epoch": 0.15, "grad_norm": 5.346796067656894, "learning_rate": 9.620030645007324e-06, "loss": 1.1118, "step": 1850 }, { "epoch": 0.15, "grad_norm": 2.065844455634222, "learning_rate": 9.61952429952379e-06, "loss": 0.4093, "step": 1851 }, { "epoch": 0.15, "grad_norm": 2.0422151702096683, "learning_rate": 9.619017630231709e-06, "loss": 0.3572, "step": 1852 }, { "epoch": 0.15, "grad_norm": 3.384252750841941, "learning_rate": 9.618510637166591e-06, "loss": 0.6136, "step": 1853 }, { "epoch": 0.15, "grad_norm": 5.710009165195783, "learning_rate": 9.61800332036398e-06, "loss": 0.9828, "step": 1854 }, { "epoch": 0.15, "grad_norm": 2.274612438483947, "learning_rate": 9.617495679859432e-06, "loss": 0.3935, "step": 1855 }, { "epoch": 0.15, "grad_norm": 4.685079377159412, "learning_rate": 9.616987715688534e-06, "loss": 1.2142, "step": 1856 }, { "epoch": 0.15, "grad_norm": 3.7327329404216423, "learning_rate": 9.616479427886889e-06, "loss": 1.0473, "step": 1857 }, { "epoch": 0.15, "grad_norm": 4.633754836253732, "learning_rate": 9.615970816490127e-06, "loss": 1.088, "step": 1858 }, { "epoch": 0.15, "grad_norm": 2.6267774185656303, "learning_rate": 9.6154618815339e-06, "loss": 0.3738, "step": 1859 }, { "epoch": 0.15, "grad_norm": 2.683276366996444, "learning_rate": 9.61495262305388e-06, "loss": 0.7404, "step": 1860 }, { "epoch": 0.15, "grad_norm": 2.4397339031584555, "learning_rate": 9.614443041085768e-06, "loss": 0.5179, "step": 1861 }, { "epoch": 0.15, "grad_norm": 4.080227084199175, "learning_rate": 9.613933135665281e-06, "loss": 0.8409, "step": 1862 }, { "epoch": 0.15, "grad_norm": 2.720888044583893, "learning_rate": 9.613422906828161e-06, "loss": 0.5713, "step": 1863 }, { "epoch": 0.15, "grad_norm": 4.605411201655714, "learning_rate": 9.61291235461017e-06, "loss": 1.1279, "step": 1864 }, { "epoch": 0.15, "grad_norm": 3.6743922503335473, "learning_rate": 9.612401479047102e-06, "loss": 0.5532, "step": 1865 }, { "epoch": 0.15, "grad_norm": 2.458010962051488, "learning_rate": 9.611890280174761e-06, "loss": 0.4904, "step": 1866 }, { "epoch": 0.15, "grad_norm": 3.590096886998366, "learning_rate": 9.611378758028984e-06, "loss": 0.5157, "step": 1867 }, { "epoch": 0.15, "grad_norm": 4.786726498871613, "learning_rate": 9.610866912645624e-06, "loss": 0.9906, "step": 1868 }, { "epoch": 0.15, "grad_norm": 2.520165208333526, "learning_rate": 9.61035474406056e-06, "loss": 0.49, "step": 1869 }, { "epoch": 0.15, "grad_norm": 3.2529485159125437, "learning_rate": 9.609842252309694e-06, "loss": 0.9277, "step": 1870 }, { "epoch": 0.15, "grad_norm": 2.702654732411725, "learning_rate": 9.609329437428946e-06, "loss": 0.6127, "step": 1871 }, { "epoch": 0.15, "grad_norm": 4.388100944422168, "learning_rate": 9.608816299454267e-06, "loss": 0.4555, "step": 1872 }, { "epoch": 0.15, "grad_norm": 4.987626503763048, "learning_rate": 9.608302838421622e-06, "loss": 1.1882, "step": 1873 }, { "epoch": 0.15, "grad_norm": 4.92680809173823, "learning_rate": 9.607789054367e-06, "loss": 1.6455, "step": 1874 }, { "epoch": 0.15, "grad_norm": 2.933180856469748, "learning_rate": 9.607274947326423e-06, "loss": 0.2325, "step": 1875 }, { "epoch": 0.15, "grad_norm": 3.7454435218935003, "learning_rate": 9.606760517335923e-06, "loss": 0.6886, "step": 1876 }, { "epoch": 0.15, "grad_norm": 3.7601473733544006, "learning_rate": 9.606245764431557e-06, "loss": 1.0919, "step": 1877 }, { "epoch": 0.15, "grad_norm": 1.963578919378046, "learning_rate": 9.605730688649411e-06, "loss": 0.4018, "step": 1878 }, { "epoch": 0.15, "grad_norm": 4.094419034446812, "learning_rate": 9.605215290025587e-06, "loss": 0.8573, "step": 1879 }, { "epoch": 0.15, "grad_norm": 4.232518157536914, "learning_rate": 9.604699568596211e-06, "loss": 1.1854, "step": 1880 }, { "epoch": 0.15, "grad_norm": 5.330752275162666, "learning_rate": 9.604183524397439e-06, "loss": 1.1544, "step": 1881 }, { "epoch": 0.15, "grad_norm": 5.738236543702836, "learning_rate": 9.603667157465436e-06, "loss": 1.2742, "step": 1882 }, { "epoch": 0.15, "grad_norm": 4.500833585139346, "learning_rate": 9.603150467836403e-06, "loss": 1.0557, "step": 1883 }, { "epoch": 0.15, "grad_norm": 4.348349354005987, "learning_rate": 9.602633455546556e-06, "loss": 0.8161, "step": 1884 }, { "epoch": 0.15, "grad_norm": 3.345114650968165, "learning_rate": 9.602116120632133e-06, "loss": 0.9003, "step": 1885 }, { "epoch": 0.15, "grad_norm": 4.535743444943094, "learning_rate": 9.601598463129398e-06, "loss": 1.003, "step": 1886 }, { "epoch": 0.15, "grad_norm": 3.9347754032019444, "learning_rate": 9.601080483074637e-06, "loss": 0.7201, "step": 1887 }, { "epoch": 0.15, "grad_norm": 4.512700028513524, "learning_rate": 9.600562180504158e-06, "loss": 0.6555, "step": 1888 }, { "epoch": 0.15, "grad_norm": 4.831953765720359, "learning_rate": 9.600043555454292e-06, "loss": 1.2459, "step": 1889 }, { "epoch": 0.15, "grad_norm": 3.908436927960402, "learning_rate": 9.599524607961394e-06, "loss": 1.0104, "step": 1890 }, { "epoch": 0.15, "grad_norm": 5.103109745089075, "learning_rate": 9.599005338061835e-06, "loss": 1.1343, "step": 1891 }, { "epoch": 0.15, "grad_norm": 4.718322584210091, "learning_rate": 9.598485745792019e-06, "loss": 0.8079, "step": 1892 }, { "epoch": 0.15, "grad_norm": 4.160552069027906, "learning_rate": 9.597965831188365e-06, "loss": 0.8904, "step": 1893 }, { "epoch": 0.15, "grad_norm": 4.893236221583511, "learning_rate": 9.597445594287315e-06, "loss": 0.8749, "step": 1894 }, { "epoch": 0.15, "grad_norm": 3.8989511257441816, "learning_rate": 9.596925035125338e-06, "loss": 0.8513, "step": 1895 }, { "epoch": 0.15, "grad_norm": 5.502391681117759, "learning_rate": 9.596404153738922e-06, "loss": 1.2835, "step": 1896 }, { "epoch": 0.16, "grad_norm": 1.5096032919902738, "learning_rate": 9.595882950164579e-06, "loss": 0.2231, "step": 1897 }, { "epoch": 0.16, "grad_norm": 3.3940828284046365, "learning_rate": 9.595361424438841e-06, "loss": 0.8311, "step": 1898 }, { "epoch": 0.16, "grad_norm": 3.1565144371734566, "learning_rate": 9.594839576598267e-06, "loss": 0.7746, "step": 1899 }, { "epoch": 0.16, "grad_norm": 4.861681899339718, "learning_rate": 9.594317406679436e-06, "loss": 1.4082, "step": 1900 }, { "epoch": 0.16, "grad_norm": 4.3701934090314865, "learning_rate": 9.593794914718948e-06, "loss": 1.0056, "step": 1901 }, { "epoch": 0.16, "grad_norm": 1.4361844931507608, "learning_rate": 9.593272100753431e-06, "loss": 0.183, "step": 1902 }, { "epoch": 0.16, "grad_norm": 5.341261409056521, "learning_rate": 9.592748964819528e-06, "loss": 0.9966, "step": 1903 }, { "epoch": 0.16, "grad_norm": 4.246802133865648, "learning_rate": 9.592225506953911e-06, "loss": 0.8687, "step": 1904 }, { "epoch": 0.16, "grad_norm": 2.536665860148006, "learning_rate": 9.591701727193272e-06, "loss": 0.4399, "step": 1905 }, { "epoch": 0.16, "grad_norm": 3.3123258067487007, "learning_rate": 9.591177625574322e-06, "loss": 0.4896, "step": 1906 }, { "epoch": 0.16, "grad_norm": 4.273373552745985, "learning_rate": 9.590653202133804e-06, "loss": 0.9065, "step": 1907 }, { "epoch": 0.16, "grad_norm": 5.014798967567046, "learning_rate": 9.590128456908476e-06, "loss": 0.96, "step": 1908 }, { "epoch": 0.16, "grad_norm": 2.4186995200654637, "learning_rate": 9.589603389935118e-06, "loss": 0.5375, "step": 1909 }, { "epoch": 0.16, "grad_norm": 3.0198594591627925, "learning_rate": 9.589078001250537e-06, "loss": 0.6707, "step": 1910 }, { "epoch": 0.16, "grad_norm": 4.1364010070572155, "learning_rate": 9.588552290891557e-06, "loss": 1.1784, "step": 1911 }, { "epoch": 0.16, "grad_norm": 5.158358601291325, "learning_rate": 9.588026258895034e-06, "loss": 0.9459, "step": 1912 }, { "epoch": 0.16, "grad_norm": 4.444787446353252, "learning_rate": 9.587499905297836e-06, "loss": 1.0501, "step": 1913 }, { "epoch": 0.16, "grad_norm": 3.357310424227119, "learning_rate": 9.58697323013686e-06, "loss": 0.906, "step": 1914 }, { "epoch": 0.16, "grad_norm": 2.340254000033742, "learning_rate": 9.586446233449024e-06, "loss": 0.539, "step": 1915 }, { "epoch": 0.16, "grad_norm": 1.3421768612808074, "learning_rate": 9.585918915271267e-06, "loss": 0.1963, "step": 1916 }, { "epoch": 0.16, "grad_norm": 5.200976251872486, "learning_rate": 9.58539127564055e-06, "loss": 0.9611, "step": 1917 }, { "epoch": 0.16, "grad_norm": 5.14363328588231, "learning_rate": 9.584863314593862e-06, "loss": 1.1128, "step": 1918 }, { "epoch": 0.16, "grad_norm": 4.767959520821083, "learning_rate": 9.584335032168209e-06, "loss": 1.2102, "step": 1919 }, { "epoch": 0.16, "grad_norm": 4.0603362286037, "learning_rate": 9.58380642840062e-06, "loss": 0.8753, "step": 1920 }, { "epoch": 0.16, "grad_norm": 2.784097276200648, "learning_rate": 9.583277503328152e-06, "loss": 0.6181, "step": 1921 }, { "epoch": 0.16, "grad_norm": 3.3628590333982915, "learning_rate": 9.582748256987875e-06, "loss": 0.3996, "step": 1922 }, { "epoch": 0.16, "grad_norm": 3.1992436604635115, "learning_rate": 9.58221868941689e-06, "loss": 0.8356, "step": 1923 }, { "epoch": 0.16, "grad_norm": 2.5690561422258256, "learning_rate": 9.581688800652317e-06, "loss": 0.5881, "step": 1924 }, { "epoch": 0.16, "grad_norm": 3.9962006736267157, "learning_rate": 9.581158590731298e-06, "loss": 0.6964, "step": 1925 }, { "epoch": 0.16, "grad_norm": 4.838643693846644, "learning_rate": 9.580628059691e-06, "loss": 1.1755, "step": 1926 }, { "epoch": 0.16, "grad_norm": 5.645525539871668, "learning_rate": 9.58009720756861e-06, "loss": 1.3661, "step": 1927 }, { "epoch": 0.16, "grad_norm": 5.369531010137947, "learning_rate": 9.57956603440134e-06, "loss": 1.1669, "step": 1928 }, { "epoch": 0.16, "grad_norm": 2.0261167417220345, "learning_rate": 9.579034540226417e-06, "loss": 0.4221, "step": 1929 }, { "epoch": 0.16, "grad_norm": 2.805961619998852, "learning_rate": 9.578502725081105e-06, "loss": 0.4036, "step": 1930 }, { "epoch": 0.16, "grad_norm": 4.7343364076028704, "learning_rate": 9.577970589002674e-06, "loss": 0.8297, "step": 1931 }, { "epoch": 0.16, "grad_norm": 3.115812274867747, "learning_rate": 9.577438132028431e-06, "loss": 0.5971, "step": 1932 }, { "epoch": 0.16, "grad_norm": 4.013164357851665, "learning_rate": 9.576905354195695e-06, "loss": 0.7369, "step": 1933 }, { "epoch": 0.16, "grad_norm": 5.044032284551135, "learning_rate": 9.576372255541812e-06, "loss": 1.0083, "step": 1934 }, { "epoch": 0.16, "grad_norm": 4.314036059653204, "learning_rate": 9.575838836104152e-06, "loss": 1.1176, "step": 1935 }, { "epoch": 0.16, "grad_norm": 3.9222150730303875, "learning_rate": 9.575305095920101e-06, "loss": 0.8157, "step": 1936 }, { "epoch": 0.16, "grad_norm": 3.5652621046635025, "learning_rate": 9.574771035027077e-06, "loss": 0.826, "step": 1937 }, { "epoch": 0.16, "grad_norm": 2.9630771554858146, "learning_rate": 9.574236653462511e-06, "loss": 0.4971, "step": 1938 }, { "epoch": 0.16, "grad_norm": 5.412336714047085, "learning_rate": 9.573701951263862e-06, "loss": 1.0939, "step": 1939 }, { "epoch": 0.16, "grad_norm": 4.6333530332442, "learning_rate": 9.573166928468615e-06, "loss": 1.4336, "step": 1940 }, { "epoch": 0.16, "grad_norm": 4.62204995393872, "learning_rate": 9.572631585114267e-06, "loss": 1.1644, "step": 1941 }, { "epoch": 0.16, "grad_norm": 3.1738069100273223, "learning_rate": 9.572095921238343e-06, "loss": 0.7047, "step": 1942 }, { "epoch": 0.16, "grad_norm": 3.8525663295063906, "learning_rate": 9.571559936878394e-06, "loss": 0.9256, "step": 1943 }, { "epoch": 0.16, "grad_norm": 1.4944576959377527, "learning_rate": 9.571023632071989e-06, "loss": 0.2304, "step": 1944 }, { "epoch": 0.16, "grad_norm": 3.812644322997221, "learning_rate": 9.570487006856722e-06, "loss": 0.6744, "step": 1945 }, { "epoch": 0.16, "grad_norm": 3.1507359526587972, "learning_rate": 9.569950061270204e-06, "loss": 0.7059, "step": 1946 }, { "epoch": 0.16, "grad_norm": 2.624674249680327, "learning_rate": 9.569412795350076e-06, "loss": 0.6104, "step": 1947 }, { "epoch": 0.16, "grad_norm": 4.31256529328677, "learning_rate": 9.568875209133999e-06, "loss": 1.0901, "step": 1948 }, { "epoch": 0.16, "grad_norm": 3.861101663997485, "learning_rate": 9.568337302659652e-06, "loss": 0.7332, "step": 1949 }, { "epoch": 0.16, "grad_norm": 3.252967963194855, "learning_rate": 9.567799075964743e-06, "loss": 0.473, "step": 1950 }, { "epoch": 0.16, "grad_norm": 3.7954437726383152, "learning_rate": 9.567260529086997e-06, "loss": 0.8397, "step": 1951 }, { "epoch": 0.16, "grad_norm": 2.0570487471770336, "learning_rate": 9.566721662064164e-06, "loss": 0.4776, "step": 1952 }, { "epoch": 0.16, "grad_norm": 4.598324280894434, "learning_rate": 9.566182474934017e-06, "loss": 1.0364, "step": 1953 }, { "epoch": 0.16, "grad_norm": 3.9893068037392205, "learning_rate": 9.565642967734351e-06, "loss": 0.8069, "step": 1954 }, { "epoch": 0.16, "grad_norm": 2.4689637147814354, "learning_rate": 9.565103140502982e-06, "loss": 0.4132, "step": 1955 }, { "epoch": 0.16, "grad_norm": 5.9962889936616355, "learning_rate": 9.564562993277752e-06, "loss": 1.2551, "step": 1956 }, { "epoch": 0.16, "grad_norm": 4.037837016283461, "learning_rate": 9.564022526096521e-06, "loss": 0.6973, "step": 1957 }, { "epoch": 0.16, "grad_norm": 4.623565841563198, "learning_rate": 9.563481738997172e-06, "loss": 1.2694, "step": 1958 }, { "epoch": 0.16, "grad_norm": 3.7142809741906966, "learning_rate": 9.562940632017614e-06, "loss": 0.565, "step": 1959 }, { "epoch": 0.16, "grad_norm": 3.5410216749125705, "learning_rate": 9.562399205195775e-06, "loss": 0.6941, "step": 1960 }, { "epoch": 0.16, "grad_norm": 3.9151566880983317, "learning_rate": 9.56185745856961e-06, "loss": 0.9364, "step": 1961 }, { "epoch": 0.16, "grad_norm": 5.682734437236148, "learning_rate": 9.561315392177089e-06, "loss": 0.767, "step": 1962 }, { "epoch": 0.16, "grad_norm": 3.6613759123423164, "learning_rate": 9.56077300605621e-06, "loss": 1.0541, "step": 1963 }, { "epoch": 0.16, "grad_norm": 4.110581249062556, "learning_rate": 9.56023030024499e-06, "loss": 0.8277, "step": 1964 }, { "epoch": 0.16, "grad_norm": 4.288487454511914, "learning_rate": 9.559687274781475e-06, "loss": 0.768, "step": 1965 }, { "epoch": 0.16, "grad_norm": 3.8223670341511733, "learning_rate": 9.559143929703724e-06, "loss": 0.9807, "step": 1966 }, { "epoch": 0.16, "grad_norm": 4.108908693851089, "learning_rate": 9.558600265049825e-06, "loss": 0.8094, "step": 1967 }, { "epoch": 0.16, "grad_norm": 3.630304005106419, "learning_rate": 9.558056280857887e-06, "loss": 0.7759, "step": 1968 }, { "epoch": 0.16, "grad_norm": 3.8460453116763738, "learning_rate": 9.55751197716604e-06, "loss": 0.8335, "step": 1969 }, { "epoch": 0.16, "grad_norm": 4.365489981874306, "learning_rate": 9.556967354012438e-06, "loss": 0.8814, "step": 1970 }, { "epoch": 0.16, "grad_norm": 4.194935912677049, "learning_rate": 9.556422411435257e-06, "loss": 1.0257, "step": 1971 }, { "epoch": 0.16, "grad_norm": 3.9574399155708813, "learning_rate": 9.555877149472695e-06, "loss": 0.8627, "step": 1972 }, { "epoch": 0.16, "grad_norm": 4.040316351809699, "learning_rate": 9.555331568162972e-06, "loss": 0.8235, "step": 1973 }, { "epoch": 0.16, "grad_norm": 2.273221383752521, "learning_rate": 9.554785667544329e-06, "loss": 0.6658, "step": 1974 }, { "epoch": 0.16, "grad_norm": 4.044878071923383, "learning_rate": 9.554239447655036e-06, "loss": 0.9372, "step": 1975 }, { "epoch": 0.16, "grad_norm": 7.246557282396511, "learning_rate": 9.553692908533375e-06, "loss": 1.4103, "step": 1976 }, { "epoch": 0.16, "grad_norm": 4.274490525562516, "learning_rate": 9.55314605021766e-06, "loss": 1.3265, "step": 1977 }, { "epoch": 0.16, "grad_norm": 3.784351877267063, "learning_rate": 9.552598872746223e-06, "loss": 0.7399, "step": 1978 }, { "epoch": 0.16, "grad_norm": 5.510740649222768, "learning_rate": 9.552051376157417e-06, "loss": 1.1919, "step": 1979 }, { "epoch": 0.16, "grad_norm": 4.682551788596266, "learning_rate": 9.55150356048962e-06, "loss": 1.2566, "step": 1980 }, { "epoch": 0.16, "grad_norm": 3.308435854041583, "learning_rate": 9.550955425781234e-06, "loss": 0.5514, "step": 1981 }, { "epoch": 0.16, "grad_norm": 3.5546470124450935, "learning_rate": 9.550406972070676e-06, "loss": 0.7517, "step": 1982 }, { "epoch": 0.16, "grad_norm": 3.847516559239803, "learning_rate": 9.549858199396394e-06, "loss": 0.8813, "step": 1983 }, { "epoch": 0.16, "grad_norm": 3.2468457030579745, "learning_rate": 9.549309107796852e-06, "loss": 0.9317, "step": 1984 }, { "epoch": 0.16, "grad_norm": 2.7988168500030888, "learning_rate": 9.54875969731054e-06, "loss": 0.5256, "step": 1985 }, { "epoch": 0.16, "grad_norm": 4.034857576281372, "learning_rate": 9.54820996797597e-06, "loss": 0.9549, "step": 1986 }, { "epoch": 0.16, "grad_norm": 4.8790834699851295, "learning_rate": 9.547659919831676e-06, "loss": 1.6624, "step": 1987 }, { "epoch": 0.16, "grad_norm": 4.05241404605833, "learning_rate": 9.547109552916211e-06, "loss": 1.2909, "step": 1988 }, { "epoch": 0.16, "grad_norm": 2.9117588210126377, "learning_rate": 9.546558867268159e-06, "loss": 0.6867, "step": 1989 }, { "epoch": 0.16, "grad_norm": 3.309118950701456, "learning_rate": 9.546007862926115e-06, "loss": 0.6384, "step": 1990 }, { "epoch": 0.16, "grad_norm": 4.116069205074591, "learning_rate": 9.545456539928704e-06, "loss": 1.443, "step": 1991 }, { "epoch": 0.16, "grad_norm": 4.9087098585219024, "learning_rate": 9.544904898314572e-06, "loss": 1.0374, "step": 1992 }, { "epoch": 0.16, "grad_norm": 3.9309719792696702, "learning_rate": 9.544352938122384e-06, "loss": 0.8559, "step": 1993 }, { "epoch": 0.16, "grad_norm": 4.469895891148484, "learning_rate": 9.543800659390835e-06, "loss": 0.8993, "step": 1994 }, { "epoch": 0.16, "grad_norm": 4.278454789623266, "learning_rate": 9.543248062158632e-06, "loss": 1.0097, "step": 1995 }, { "epoch": 0.16, "grad_norm": 4.529235977103577, "learning_rate": 9.542695146464513e-06, "loss": 1.0865, "step": 1996 }, { "epoch": 0.16, "grad_norm": 4.169795889365141, "learning_rate": 9.542141912347236e-06, "loss": 1.018, "step": 1997 }, { "epoch": 0.16, "grad_norm": 5.51563326069593, "learning_rate": 9.541588359845575e-06, "loss": 1.0636, "step": 1998 }, { "epoch": 0.16, "grad_norm": 5.479455931358969, "learning_rate": 9.541034488998338e-06, "loss": 0.9901, "step": 1999 }, { "epoch": 0.16, "grad_norm": 3.372716248240536, "learning_rate": 9.540480299844345e-06, "loss": 0.6926, "step": 2000 }, { "epoch": 0.16, "grad_norm": 5.747085234992996, "learning_rate": 9.539925792422443e-06, "loss": 1.0609, "step": 2001 }, { "epoch": 0.16, "grad_norm": 1.269631520809824, "learning_rate": 9.5393709667715e-06, "loss": 0.209, "step": 2002 }, { "epoch": 0.16, "grad_norm": 4.8841818602939, "learning_rate": 9.53881582293041e-06, "loss": 0.8058, "step": 2003 }, { "epoch": 0.16, "grad_norm": 4.405695660762923, "learning_rate": 9.538260360938081e-06, "loss": 0.8038, "step": 2004 }, { "epoch": 0.16, "grad_norm": 2.771381211989904, "learning_rate": 9.537704580833453e-06, "loss": 0.4068, "step": 2005 }, { "epoch": 0.16, "grad_norm": 3.26310212288867, "learning_rate": 9.537148482655482e-06, "loss": 0.9612, "step": 2006 }, { "epoch": 0.16, "grad_norm": 3.976865740042055, "learning_rate": 9.536592066443147e-06, "loss": 1.0408, "step": 2007 }, { "epoch": 0.16, "grad_norm": 4.71800834789916, "learning_rate": 9.536035332235454e-06, "loss": 0.9944, "step": 2008 }, { "epoch": 0.16, "grad_norm": 5.622149280558416, "learning_rate": 9.535478280071422e-06, "loss": 0.9308, "step": 2009 }, { "epoch": 0.16, "grad_norm": 6.105878039691027, "learning_rate": 9.534920909990101e-06, "loss": 1.3476, "step": 2010 }, { "epoch": 0.16, "grad_norm": 4.166972114310374, "learning_rate": 9.534363222030563e-06, "loss": 0.8512, "step": 2011 }, { "epoch": 0.16, "grad_norm": 4.628377013586853, "learning_rate": 9.533805216231894e-06, "loss": 0.9158, "step": 2012 }, { "epoch": 0.16, "grad_norm": 5.980529070798636, "learning_rate": 9.533246892633213e-06, "loss": 0.7445, "step": 2013 }, { "epoch": 0.16, "grad_norm": 3.4338879751047973, "learning_rate": 9.532688251273654e-06, "loss": 0.5876, "step": 2014 }, { "epoch": 0.16, "grad_norm": 3.2946132643396164, "learning_rate": 9.532129292192373e-06, "loss": 0.7603, "step": 2015 }, { "epoch": 0.16, "grad_norm": 3.900269383522834, "learning_rate": 9.531570015428553e-06, "loss": 0.7026, "step": 2016 }, { "epoch": 0.16, "grad_norm": 4.469620357210813, "learning_rate": 9.531010421021396e-06, "loss": 1.2024, "step": 2017 }, { "epoch": 0.16, "grad_norm": 5.3814539431713895, "learning_rate": 9.530450509010128e-06, "loss": 1.1572, "step": 2018 }, { "epoch": 0.17, "grad_norm": 4.214817152693931, "learning_rate": 9.529890279433995e-06, "loss": 0.72, "step": 2019 }, { "epoch": 0.17, "grad_norm": 2.520489716408855, "learning_rate": 9.529329732332268e-06, "loss": 0.3485, "step": 2020 }, { "epoch": 0.17, "grad_norm": 3.803091814303413, "learning_rate": 9.52876886774424e-06, "loss": 1.1041, "step": 2021 }, { "epoch": 0.17, "grad_norm": 4.6421407248221, "learning_rate": 9.528207685709221e-06, "loss": 1.2171, "step": 2022 }, { "epoch": 0.17, "grad_norm": 4.23653445974646, "learning_rate": 9.52764618626655e-06, "loss": 1.1003, "step": 2023 }, { "epoch": 0.17, "grad_norm": 4.885493299123106, "learning_rate": 9.527084369455589e-06, "loss": 1.0119, "step": 2024 }, { "epoch": 0.17, "grad_norm": 4.731147655154812, "learning_rate": 9.526522235315713e-06, "loss": 1.1316, "step": 2025 }, { "epoch": 0.17, "grad_norm": 2.8303744896202905, "learning_rate": 9.525959783886329e-06, "loss": 0.7168, "step": 2026 }, { "epoch": 0.17, "grad_norm": 4.79528547028082, "learning_rate": 9.525397015206861e-06, "loss": 0.8626, "step": 2027 }, { "epoch": 0.17, "grad_norm": 4.403137139314899, "learning_rate": 9.524833929316758e-06, "loss": 0.7673, "step": 2028 }, { "epoch": 0.17, "grad_norm": 5.632791389070294, "learning_rate": 9.524270526255486e-06, "loss": 1.6007, "step": 2029 }, { "epoch": 0.17, "grad_norm": 4.910686152455089, "learning_rate": 9.523706806062541e-06, "loss": 1.383, "step": 2030 }, { "epoch": 0.17, "grad_norm": 5.2941777100821295, "learning_rate": 9.523142768777435e-06, "loss": 1.4508, "step": 2031 }, { "epoch": 0.17, "grad_norm": 4.2257729295585085, "learning_rate": 9.522578414439707e-06, "loss": 0.7555, "step": 2032 }, { "epoch": 0.17, "grad_norm": 5.177167550722004, "learning_rate": 9.522013743088916e-06, "loss": 0.9737, "step": 2033 }, { "epoch": 0.17, "grad_norm": 4.878369880410306, "learning_rate": 9.52144875476464e-06, "loss": 1.3448, "step": 2034 }, { "epoch": 0.17, "grad_norm": 5.307328908557558, "learning_rate": 9.520883449506483e-06, "loss": 1.0423, "step": 2035 }, { "epoch": 0.17, "grad_norm": 3.7938378285887997, "learning_rate": 9.52031782735407e-06, "loss": 0.5762, "step": 2036 }, { "epoch": 0.17, "grad_norm": 4.410318130606425, "learning_rate": 9.519751888347053e-06, "loss": 0.9626, "step": 2037 }, { "epoch": 0.17, "grad_norm": 4.689113064551304, "learning_rate": 9.519185632525097e-06, "loss": 1.0112, "step": 2038 }, { "epoch": 0.17, "grad_norm": 3.771765049703681, "learning_rate": 9.518619059927895e-06, "loss": 0.4313, "step": 2039 }, { "epoch": 0.17, "grad_norm": 3.524515596196058, "learning_rate": 9.518052170595165e-06, "loss": 0.9909, "step": 2040 }, { "epoch": 0.17, "grad_norm": 3.1098921131187756, "learning_rate": 9.517484964566637e-06, "loss": 0.4388, "step": 2041 }, { "epoch": 0.17, "grad_norm": 3.840815725983443, "learning_rate": 9.516917441882074e-06, "loss": 0.8934, "step": 2042 }, { "epoch": 0.17, "grad_norm": 4.737988001113275, "learning_rate": 9.516349602581256e-06, "loss": 0.9319, "step": 2043 }, { "epoch": 0.17, "grad_norm": 5.233006946267547, "learning_rate": 9.515781446703988e-06, "loss": 0.9586, "step": 2044 }, { "epoch": 0.17, "grad_norm": 5.338332469127597, "learning_rate": 9.51521297429009e-06, "loss": 1.1064, "step": 2045 }, { "epoch": 0.17, "grad_norm": 2.181487637050818, "learning_rate": 9.514644185379416e-06, "loss": 0.3214, "step": 2046 }, { "epoch": 0.17, "grad_norm": 4.820616144216788, "learning_rate": 9.51407508001183e-06, "loss": 1.0967, "step": 2047 }, { "epoch": 0.17, "grad_norm": 2.3995605186361595, "learning_rate": 9.51350565822723e-06, "loss": 0.46, "step": 2048 }, { "epoch": 0.17, "grad_norm": 4.189027154695834, "learning_rate": 9.512935920065523e-06, "loss": 1.0905, "step": 2049 }, { "epoch": 0.17, "grad_norm": 4.208339630062005, "learning_rate": 9.512365865566648e-06, "loss": 0.7666, "step": 2050 }, { "epoch": 0.17, "grad_norm": 4.44626051074097, "learning_rate": 9.511795494770563e-06, "loss": 0.9391, "step": 2051 }, { "epoch": 0.17, "grad_norm": 4.014287763227923, "learning_rate": 9.511224807717253e-06, "loss": 0.4553, "step": 2052 }, { "epoch": 0.17, "grad_norm": 3.5272840701887276, "learning_rate": 9.510653804446714e-06, "loss": 0.98, "step": 2053 }, { "epoch": 0.17, "grad_norm": 2.5804379646472637, "learning_rate": 9.510082484998975e-06, "loss": 0.5724, "step": 2054 }, { "epoch": 0.17, "grad_norm": 3.0664109603373664, "learning_rate": 9.50951084941408e-06, "loss": 0.777, "step": 2055 }, { "epoch": 0.17, "grad_norm": 5.9548121149971385, "learning_rate": 9.508938897732101e-06, "loss": 1.1975, "step": 2056 }, { "epoch": 0.17, "grad_norm": 3.704230560766375, "learning_rate": 9.508366629993129e-06, "loss": 0.8394, "step": 2057 }, { "epoch": 0.17, "grad_norm": 3.0182372497951966, "learning_rate": 9.507794046237275e-06, "loss": 0.4947, "step": 2058 }, { "epoch": 0.17, "grad_norm": 4.3287807453200715, "learning_rate": 9.507221146504679e-06, "loss": 0.5632, "step": 2059 }, { "epoch": 0.17, "grad_norm": 4.986043890091539, "learning_rate": 9.506647930835494e-06, "loss": 1.1423, "step": 2060 }, { "epoch": 0.17, "grad_norm": 4.958680811383781, "learning_rate": 9.506074399269902e-06, "loss": 1.0541, "step": 2061 }, { "epoch": 0.17, "grad_norm": 4.1924867873842455, "learning_rate": 9.505500551848105e-06, "loss": 0.7383, "step": 2062 }, { "epoch": 0.17, "grad_norm": 2.192658274030753, "learning_rate": 9.50492638861033e-06, "loss": 0.4288, "step": 2063 }, { "epoch": 0.17, "grad_norm": 3.0884543370481534, "learning_rate": 9.504351909596818e-06, "loss": 0.6757, "step": 2064 }, { "epoch": 0.17, "grad_norm": 2.299401261090491, "learning_rate": 9.503777114847841e-06, "loss": 0.5118, "step": 2065 }, { "epoch": 0.17, "grad_norm": 1.9253161949191624, "learning_rate": 9.503202004403688e-06, "loss": 0.4369, "step": 2066 }, { "epoch": 0.17, "grad_norm": 4.028626104070394, "learning_rate": 9.502626578304673e-06, "loss": 0.7785, "step": 2067 }, { "epoch": 0.17, "grad_norm": 4.1122624328146475, "learning_rate": 9.50205083659113e-06, "loss": 0.7224, "step": 2068 }, { "epoch": 0.17, "grad_norm": 3.2818337720691417, "learning_rate": 9.501474779303416e-06, "loss": 0.6204, "step": 2069 }, { "epoch": 0.17, "grad_norm": 4.771811140932043, "learning_rate": 9.500898406481911e-06, "loss": 1.1285, "step": 2070 }, { "epoch": 0.17, "grad_norm": 3.8057931128703726, "learning_rate": 9.500321718167017e-06, "loss": 0.8784, "step": 2071 }, { "epoch": 0.17, "grad_norm": 3.5515639183361767, "learning_rate": 9.499744714399155e-06, "loss": 0.899, "step": 2072 }, { "epoch": 0.17, "grad_norm": 5.122862374078744, "learning_rate": 9.499167395218772e-06, "loss": 0.9491, "step": 2073 }, { "epoch": 0.17, "grad_norm": 3.7491156377139907, "learning_rate": 9.498589760666333e-06, "loss": 1.0864, "step": 2074 }, { "epoch": 0.17, "grad_norm": 3.448237666496974, "learning_rate": 9.498011810782332e-06, "loss": 0.7363, "step": 2075 }, { "epoch": 0.17, "grad_norm": 2.865420210192231, "learning_rate": 9.497433545607278e-06, "loss": 0.6435, "step": 2076 }, { "epoch": 0.17, "grad_norm": 3.8746356846623153, "learning_rate": 9.496854965181705e-06, "loss": 1.1468, "step": 2077 }, { "epoch": 0.17, "grad_norm": 4.349451863770051, "learning_rate": 9.496276069546169e-06, "loss": 0.998, "step": 2078 }, { "epoch": 0.17, "grad_norm": 5.444977958151714, "learning_rate": 9.495696858741249e-06, "loss": 0.9305, "step": 2079 }, { "epoch": 0.17, "grad_norm": 4.643233439483295, "learning_rate": 9.495117332807542e-06, "loss": 0.9184, "step": 2080 }, { "epoch": 0.17, "grad_norm": 3.447022066674226, "learning_rate": 9.494537491785676e-06, "loss": 0.7931, "step": 2081 }, { "epoch": 0.17, "grad_norm": 3.9125196189107183, "learning_rate": 9.493957335716291e-06, "loss": 0.9897, "step": 2082 }, { "epoch": 0.17, "grad_norm": 6.285592320779023, "learning_rate": 9.493376864640054e-06, "loss": 1.3372, "step": 2083 }, { "epoch": 0.17, "grad_norm": 3.0882900515413283, "learning_rate": 9.492796078597655e-06, "loss": 0.527, "step": 2084 }, { "epoch": 0.17, "grad_norm": 4.573554407494425, "learning_rate": 9.492214977629804e-06, "loss": 0.9002, "step": 2085 }, { "epoch": 0.17, "grad_norm": 5.834869538459952, "learning_rate": 9.491633561777232e-06, "loss": 1.1973, "step": 2086 }, { "epoch": 0.17, "grad_norm": 4.591982689006695, "learning_rate": 9.491051831080695e-06, "loss": 1.2376, "step": 2087 }, { "epoch": 0.17, "grad_norm": 3.410080452072863, "learning_rate": 9.490469785580971e-06, "loss": 0.77, "step": 2088 }, { "epoch": 0.17, "grad_norm": 3.4696812104534214, "learning_rate": 9.489887425318856e-06, "loss": 0.8371, "step": 2089 }, { "epoch": 0.17, "grad_norm": 3.6395549603734527, "learning_rate": 9.489304750335173e-06, "loss": 0.8124, "step": 2090 }, { "epoch": 0.17, "grad_norm": 3.3541758421205494, "learning_rate": 9.488721760670766e-06, "loss": 0.7443, "step": 2091 }, { "epoch": 0.17, "grad_norm": 2.157586521603289, "learning_rate": 9.488138456366497e-06, "loss": 0.681, "step": 2092 }, { "epoch": 0.17, "grad_norm": 3.117454253879517, "learning_rate": 9.487554837463255e-06, "loss": 0.4913, "step": 2093 }, { "epoch": 0.17, "grad_norm": 6.380817613738605, "learning_rate": 9.48697090400195e-06, "loss": 1.092, "step": 2094 }, { "epoch": 0.17, "grad_norm": 2.4747229179837382, "learning_rate": 9.486386656023509e-06, "loss": 0.5649, "step": 2095 }, { "epoch": 0.17, "grad_norm": 3.3807338815393715, "learning_rate": 9.485802093568892e-06, "loss": 0.7537, "step": 2096 }, { "epoch": 0.17, "grad_norm": 3.1771176947496484, "learning_rate": 9.485217216679068e-06, "loss": 0.9096, "step": 2097 }, { "epoch": 0.17, "grad_norm": 4.282633156917091, "learning_rate": 9.484632025395037e-06, "loss": 0.9372, "step": 2098 }, { "epoch": 0.17, "grad_norm": 5.095661517054039, "learning_rate": 9.48404651975782e-06, "loss": 1.414, "step": 2099 }, { "epoch": 0.17, "grad_norm": 5.529039630511247, "learning_rate": 9.483460699808454e-06, "loss": 1.2644, "step": 2100 }, { "epoch": 0.17, "grad_norm": 5.867831198743964, "learning_rate": 9.482874565588008e-06, "loss": 1.2301, "step": 2101 }, { "epoch": 0.17, "grad_norm": 3.908320279178014, "learning_rate": 9.482288117137561e-06, "loss": 0.5719, "step": 2102 }, { "epoch": 0.17, "grad_norm": 4.22276681186549, "learning_rate": 9.481701354498227e-06, "loss": 0.7356, "step": 2103 }, { "epoch": 0.17, "grad_norm": 3.8726519472165974, "learning_rate": 9.48111427771113e-06, "loss": 1.0238, "step": 2104 }, { "epoch": 0.17, "grad_norm": 3.545740432284491, "learning_rate": 9.480526886817425e-06, "loss": 0.5745, "step": 2105 }, { "epoch": 0.17, "grad_norm": 3.525664610056473, "learning_rate": 9.479939181858286e-06, "loss": 0.841, "step": 2106 }, { "epoch": 0.17, "grad_norm": 5.119819181857357, "learning_rate": 9.479351162874905e-06, "loss": 1.0244, "step": 2107 }, { "epoch": 0.17, "grad_norm": 4.480209901923949, "learning_rate": 9.478762829908503e-06, "loss": 1.0946, "step": 2108 }, { "epoch": 0.17, "grad_norm": 4.06730290748614, "learning_rate": 9.478174183000319e-06, "loss": 0.9159, "step": 2109 }, { "epoch": 0.17, "grad_norm": 4.50710093602591, "learning_rate": 9.477585222191612e-06, "loss": 1.0762, "step": 2110 }, { "epoch": 0.17, "grad_norm": 5.821870746791981, "learning_rate": 9.476995947523668e-06, "loss": 1.1686, "step": 2111 }, { "epoch": 0.17, "grad_norm": 3.4036271881923335, "learning_rate": 9.476406359037792e-06, "loss": 0.6, "step": 2112 }, { "epoch": 0.17, "grad_norm": 4.135499123303508, "learning_rate": 9.475816456775313e-06, "loss": 0.8012, "step": 2113 }, { "epoch": 0.17, "grad_norm": 4.674933703337597, "learning_rate": 9.475226240777577e-06, "loss": 1.1691, "step": 2114 }, { "epoch": 0.17, "grad_norm": 6.075280978313644, "learning_rate": 9.47463571108596e-06, "loss": 1.0191, "step": 2115 }, { "epoch": 0.17, "grad_norm": 3.4166952874689573, "learning_rate": 9.474044867741852e-06, "loss": 0.5438, "step": 2116 }, { "epoch": 0.17, "grad_norm": 3.991601282845236, "learning_rate": 9.47345371078667e-06, "loss": 1.1868, "step": 2117 }, { "epoch": 0.17, "grad_norm": 4.387621946592111, "learning_rate": 9.47286224026185e-06, "loss": 0.9359, "step": 2118 }, { "epoch": 0.17, "grad_norm": 3.120682905250976, "learning_rate": 9.472270456208856e-06, "loss": 0.682, "step": 2119 }, { "epoch": 0.17, "grad_norm": 2.9720525101079796, "learning_rate": 9.471678358669164e-06, "loss": 0.6188, "step": 2120 }, { "epoch": 0.17, "grad_norm": 3.7381808096544877, "learning_rate": 9.47108594768428e-06, "loss": 0.8853, "step": 2121 }, { "epoch": 0.17, "grad_norm": 3.8959848975983835, "learning_rate": 9.47049322329573e-06, "loss": 0.9168, "step": 2122 }, { "epoch": 0.17, "grad_norm": 1.2368095730765394, "learning_rate": 9.469900185545061e-06, "loss": 0.1946, "step": 2123 }, { "epoch": 0.17, "grad_norm": 3.286467425215324, "learning_rate": 9.469306834473841e-06, "loss": 0.7813, "step": 2124 }, { "epoch": 0.17, "grad_norm": 2.914048061704354, "learning_rate": 9.468713170123664e-06, "loss": 0.4727, "step": 2125 }, { "epoch": 0.17, "grad_norm": 3.035570216006248, "learning_rate": 9.46811919253614e-06, "loss": 0.4793, "step": 2126 }, { "epoch": 0.17, "grad_norm": 4.383252038597668, "learning_rate": 9.467524901752906e-06, "loss": 1.0153, "step": 2127 }, { "epoch": 0.17, "grad_norm": 3.9088761095690225, "learning_rate": 9.466930297815622e-06, "loss": 1.2265, "step": 2128 }, { "epoch": 0.17, "grad_norm": 3.659464819329504, "learning_rate": 9.46633538076596e-06, "loss": 0.8317, "step": 2129 }, { "epoch": 0.17, "grad_norm": 3.897382976866003, "learning_rate": 9.465740150645629e-06, "loss": 0.6749, "step": 2130 }, { "epoch": 0.17, "grad_norm": 4.286261516991291, "learning_rate": 9.465144607496347e-06, "loss": 0.9856, "step": 2131 }, { "epoch": 0.17, "grad_norm": 3.8718066416881376, "learning_rate": 9.46454875135986e-06, "loss": 0.9202, "step": 2132 }, { "epoch": 0.17, "grad_norm": 3.9295301699594405, "learning_rate": 9.463952582277936e-06, "loss": 0.637, "step": 2133 }, { "epoch": 0.17, "grad_norm": 2.7624787002724838, "learning_rate": 9.463356100292363e-06, "loss": 0.6039, "step": 2134 }, { "epoch": 0.17, "grad_norm": 5.45137275432468, "learning_rate": 9.462759305444951e-06, "loss": 1.5769, "step": 2135 }, { "epoch": 0.17, "grad_norm": 5.043486054594081, "learning_rate": 9.462162197777533e-06, "loss": 0.8245, "step": 2136 }, { "epoch": 0.17, "grad_norm": 3.4508470611822823, "learning_rate": 9.461564777331966e-06, "loss": 0.6273, "step": 2137 }, { "epoch": 0.17, "grad_norm": 3.550464396618127, "learning_rate": 9.460967044150125e-06, "loss": 0.4785, "step": 2138 }, { "epoch": 0.17, "grad_norm": 4.220157500165512, "learning_rate": 9.460368998273908e-06, "loss": 0.8692, "step": 2139 }, { "epoch": 0.17, "grad_norm": 4.451446644164049, "learning_rate": 9.459770639745235e-06, "loss": 0.8494, "step": 2140 }, { "epoch": 0.17, "grad_norm": 4.649836832615487, "learning_rate": 9.459171968606051e-06, "loss": 1.0236, "step": 2141 }, { "epoch": 0.18, "grad_norm": 2.986127517748761, "learning_rate": 9.458572984898318e-06, "loss": 0.5701, "step": 2142 }, { "epoch": 0.18, "grad_norm": 3.979887171050888, "learning_rate": 9.457973688664021e-06, "loss": 0.865, "step": 2143 }, { "epoch": 0.18, "grad_norm": 1.2761285963995945, "learning_rate": 9.45737407994517e-06, "loss": 0.2562, "step": 2144 }, { "epoch": 0.18, "grad_norm": 5.330361544442108, "learning_rate": 9.456774158783795e-06, "loss": 1.2154, "step": 2145 }, { "epoch": 0.18, "grad_norm": 4.612898726359513, "learning_rate": 9.456173925221948e-06, "loss": 0.7982, "step": 2146 }, { "epoch": 0.18, "grad_norm": 5.966569355397138, "learning_rate": 9.455573379301702e-06, "loss": 1.5651, "step": 2147 }, { "epoch": 0.18, "grad_norm": 2.4979505753209525, "learning_rate": 9.45497252106515e-06, "loss": 0.3068, "step": 2148 }, { "epoch": 0.18, "grad_norm": 0.8643227063850487, "learning_rate": 9.454371350554417e-06, "loss": 0.1983, "step": 2149 }, { "epoch": 0.18, "grad_norm": 2.8998501733971542, "learning_rate": 9.453769867811636e-06, "loss": 0.7906, "step": 2150 }, { "epoch": 0.18, "grad_norm": 3.334813471461509, "learning_rate": 9.45316807287897e-06, "loss": 0.5067, "step": 2151 }, { "epoch": 0.18, "grad_norm": 2.6296558987705527, "learning_rate": 9.4525659657986e-06, "loss": 0.4986, "step": 2152 }, { "epoch": 0.18, "grad_norm": 3.9692009490803257, "learning_rate": 9.451963546612737e-06, "loss": 0.9403, "step": 2153 }, { "epoch": 0.18, "grad_norm": 4.384935016423835, "learning_rate": 9.451360815363601e-06, "loss": 0.9806, "step": 2154 }, { "epoch": 0.18, "grad_norm": 3.7487390901287174, "learning_rate": 9.450757772093447e-06, "loss": 0.6419, "step": 2155 }, { "epoch": 0.18, "grad_norm": 3.7973741872239026, "learning_rate": 9.450154416844543e-06, "loss": 0.7477, "step": 2156 }, { "epoch": 0.18, "grad_norm": 4.490270775900813, "learning_rate": 9.44955074965918e-06, "loss": 1.148, "step": 2157 }, { "epoch": 0.18, "grad_norm": 4.100769635829848, "learning_rate": 9.448946770579675e-06, "loss": 0.9803, "step": 2158 }, { "epoch": 0.18, "grad_norm": 4.324000798766789, "learning_rate": 9.448342479648362e-06, "loss": 0.6454, "step": 2159 }, { "epoch": 0.18, "grad_norm": 4.757169125636011, "learning_rate": 9.447737876907602e-06, "loss": 1.2184, "step": 2160 }, { "epoch": 0.18, "grad_norm": 3.7057613162293004, "learning_rate": 9.447132962399772e-06, "loss": 0.8668, "step": 2161 }, { "epoch": 0.18, "grad_norm": 2.352280279037213, "learning_rate": 9.446527736167277e-06, "loss": 0.3339, "step": 2162 }, { "epoch": 0.18, "grad_norm": 3.4131389889547674, "learning_rate": 9.445922198252538e-06, "loss": 0.8509, "step": 2163 }, { "epoch": 0.18, "grad_norm": 3.1302691719793336, "learning_rate": 9.445316348698002e-06, "loss": 0.9048, "step": 2164 }, { "epoch": 0.18, "grad_norm": 5.184514928247789, "learning_rate": 9.444710187546136e-06, "loss": 1.2745, "step": 2165 }, { "epoch": 0.18, "grad_norm": 4.3023991330152445, "learning_rate": 9.444103714839427e-06, "loss": 0.8257, "step": 2166 }, { "epoch": 0.18, "grad_norm": 2.4559356624211106, "learning_rate": 9.443496930620392e-06, "loss": 0.7127, "step": 2167 }, { "epoch": 0.18, "grad_norm": 3.0009784342285326, "learning_rate": 9.442889834931558e-06, "loss": 0.5808, "step": 2168 }, { "epoch": 0.18, "grad_norm": 4.271613359566975, "learning_rate": 9.442282427815483e-06, "loss": 0.8843, "step": 2169 }, { "epoch": 0.18, "grad_norm": 3.768172855722143, "learning_rate": 9.441674709314743e-06, "loss": 1.0149, "step": 2170 }, { "epoch": 0.18, "grad_norm": 2.393967273531713, "learning_rate": 9.441066679471935e-06, "loss": 0.5938, "step": 2171 }, { "epoch": 0.18, "grad_norm": 3.7071192673095257, "learning_rate": 9.440458338329681e-06, "loss": 0.5606, "step": 2172 }, { "epoch": 0.18, "grad_norm": 4.286003794630861, "learning_rate": 9.439849685930623e-06, "loss": 0.9005, "step": 2173 }, { "epoch": 0.18, "grad_norm": 3.8373323976564766, "learning_rate": 9.439240722317423e-06, "loss": 0.4884, "step": 2174 }, { "epoch": 0.18, "grad_norm": 2.8342604024352474, "learning_rate": 9.43863144753277e-06, "loss": 0.6117, "step": 2175 }, { "epoch": 0.18, "grad_norm": 4.5346372794679, "learning_rate": 9.438021861619367e-06, "loss": 0.8834, "step": 2176 }, { "epoch": 0.18, "grad_norm": 3.630996103152386, "learning_rate": 9.437411964619947e-06, "loss": 0.8089, "step": 2177 }, { "epoch": 0.18, "grad_norm": 4.55704600360632, "learning_rate": 9.43680175657726e-06, "loss": 0.8158, "step": 2178 }, { "epoch": 0.18, "grad_norm": 5.547727769767378, "learning_rate": 9.43619123753408e-06, "loss": 1.5137, "step": 2179 }, { "epoch": 0.18, "grad_norm": 3.6197319803485417, "learning_rate": 9.435580407533198e-06, "loss": 0.4518, "step": 2180 }, { "epoch": 0.18, "grad_norm": 5.264578431627168, "learning_rate": 9.434969266617436e-06, "loss": 1.4073, "step": 2181 }, { "epoch": 0.18, "grad_norm": 4.085161116879681, "learning_rate": 9.434357814829627e-06, "loss": 0.7944, "step": 2182 }, { "epoch": 0.18, "grad_norm": 3.8200278738459086, "learning_rate": 9.433746052212636e-06, "loss": 0.8745, "step": 2183 }, { "epoch": 0.18, "grad_norm": 5.073425916795223, "learning_rate": 9.43313397880934e-06, "loss": 0.9598, "step": 2184 }, { "epoch": 0.18, "grad_norm": 3.8512311184225396, "learning_rate": 9.432521594662648e-06, "loss": 0.7672, "step": 2185 }, { "epoch": 0.18, "grad_norm": 3.9337318171462963, "learning_rate": 9.431908899815479e-06, "loss": 0.8608, "step": 2186 }, { "epoch": 0.18, "grad_norm": 1.8885296919716454, "learning_rate": 9.431295894310786e-06, "loss": 0.4857, "step": 2187 }, { "epoch": 0.18, "grad_norm": 2.903393735231428, "learning_rate": 9.430682578191537e-06, "loss": 0.6505, "step": 2188 }, { "epoch": 0.18, "grad_norm": 1.6887859434219867, "learning_rate": 9.43006895150072e-06, "loss": 0.336, "step": 2189 }, { "epoch": 0.18, "grad_norm": 5.153251818217103, "learning_rate": 9.429455014281349e-06, "loss": 1.4917, "step": 2190 }, { "epoch": 0.18, "grad_norm": 3.706061847628713, "learning_rate": 9.428840766576459e-06, "loss": 1.0367, "step": 2191 }, { "epoch": 0.18, "grad_norm": 5.457412294800602, "learning_rate": 9.428226208429106e-06, "loss": 1.1445, "step": 2192 }, { "epoch": 0.18, "grad_norm": 4.555600596305112, "learning_rate": 9.427611339882368e-06, "loss": 1.197, "step": 2193 }, { "epoch": 0.18, "grad_norm": 4.005253708947643, "learning_rate": 9.426996160979342e-06, "loss": 1.0339, "step": 2194 }, { "epoch": 0.18, "grad_norm": 4.977210166365334, "learning_rate": 9.426380671763154e-06, "loss": 1.0448, "step": 2195 }, { "epoch": 0.18, "grad_norm": 3.1991074719897274, "learning_rate": 9.425764872276942e-06, "loss": 0.5701, "step": 2196 }, { "epoch": 0.18, "grad_norm": 3.1006524749382405, "learning_rate": 9.425148762563876e-06, "loss": 0.9353, "step": 2197 }, { "epoch": 0.18, "grad_norm": 3.332035032248812, "learning_rate": 9.424532342667138e-06, "loss": 0.7351, "step": 2198 }, { "epoch": 0.18, "grad_norm": 3.6185299233730506, "learning_rate": 9.42391561262994e-06, "loss": 0.7285, "step": 2199 }, { "epoch": 0.18, "grad_norm": 2.5599192483980033, "learning_rate": 9.42329857249551e-06, "loss": 0.4709, "step": 2200 }, { "epoch": 0.18, "grad_norm": 4.312042537898623, "learning_rate": 9.422681222307099e-06, "loss": 0.9185, "step": 2201 }, { "epoch": 0.18, "grad_norm": 3.6335923426745067, "learning_rate": 9.422063562107984e-06, "loss": 0.8953, "step": 2202 }, { "epoch": 0.18, "grad_norm": 6.034931329079666, "learning_rate": 9.421445591941456e-06, "loss": 1.3278, "step": 2203 }, { "epoch": 0.18, "grad_norm": 2.7279775684316347, "learning_rate": 9.420827311850836e-06, "loss": 0.5268, "step": 2204 }, { "epoch": 0.18, "grad_norm": 3.636644980441379, "learning_rate": 9.42020872187946e-06, "loss": 0.543, "step": 2205 }, { "epoch": 0.18, "grad_norm": 3.8513882635748193, "learning_rate": 9.41958982207069e-06, "loss": 1.1868, "step": 2206 }, { "epoch": 0.18, "grad_norm": 3.599020465113644, "learning_rate": 9.418970612467908e-06, "loss": 0.5247, "step": 2207 }, { "epoch": 0.18, "grad_norm": 2.247518479537959, "learning_rate": 9.418351093114517e-06, "loss": 0.343, "step": 2208 }, { "epoch": 0.18, "grad_norm": 5.2366428056676835, "learning_rate": 9.417731264053942e-06, "loss": 1.409, "step": 2209 }, { "epoch": 0.18, "grad_norm": 3.424511385285495, "learning_rate": 9.417111125329633e-06, "loss": 0.891, "step": 2210 }, { "epoch": 0.18, "grad_norm": 2.854324920321562, "learning_rate": 9.416490676985057e-06, "loss": 0.57, "step": 2211 }, { "epoch": 0.18, "grad_norm": 4.814197414087323, "learning_rate": 9.415869919063705e-06, "loss": 0.957, "step": 2212 }, { "epoch": 0.18, "grad_norm": 3.1892540112475123, "learning_rate": 9.415248851609089e-06, "loss": 0.6602, "step": 2213 }, { "epoch": 0.18, "grad_norm": 6.012278285033315, "learning_rate": 9.414627474664745e-06, "loss": 1.3654, "step": 2214 }, { "epoch": 0.18, "grad_norm": 3.5956857349837175, "learning_rate": 9.414005788274226e-06, "loss": 0.7299, "step": 2215 }, { "epoch": 0.18, "grad_norm": 2.8920306479101443, "learning_rate": 9.413383792481112e-06, "loss": 0.5877, "step": 2216 }, { "epoch": 0.18, "grad_norm": 2.961917764585591, "learning_rate": 9.412761487329e-06, "loss": 0.3996, "step": 2217 }, { "epoch": 0.18, "grad_norm": 3.622972061370353, "learning_rate": 9.412138872861514e-06, "loss": 0.7335, "step": 2218 }, { "epoch": 0.18, "grad_norm": 3.8898421548794957, "learning_rate": 9.411515949122295e-06, "loss": 0.7544, "step": 2219 }, { "epoch": 0.18, "grad_norm": 5.542825331149399, "learning_rate": 9.410892716155006e-06, "loss": 1.1682, "step": 2220 }, { "epoch": 0.18, "grad_norm": 5.240458429700037, "learning_rate": 9.410269174003333e-06, "loss": 1.1765, "step": 2221 }, { "epoch": 0.18, "grad_norm": 4.717643764221059, "learning_rate": 9.409645322710985e-06, "loss": 1.0527, "step": 2222 }, { "epoch": 0.18, "grad_norm": 5.61581799462886, "learning_rate": 9.409021162321692e-06, "loss": 1.2727, "step": 2223 }, { "epoch": 0.18, "grad_norm": 3.902406104492035, "learning_rate": 9.408396692879202e-06, "loss": 1.1395, "step": 2224 }, { "epoch": 0.18, "grad_norm": 4.042305305793964, "learning_rate": 9.40777191442729e-06, "loss": 0.859, "step": 2225 }, { "epoch": 0.18, "grad_norm": 1.0774316749315587, "learning_rate": 9.40714682700975e-06, "loss": 0.165, "step": 2226 }, { "epoch": 0.18, "grad_norm": 3.302788281417149, "learning_rate": 9.406521430670397e-06, "loss": 0.8008, "step": 2227 }, { "epoch": 0.18, "grad_norm": 3.7846222730904553, "learning_rate": 9.405895725453069e-06, "loss": 0.7245, "step": 2228 }, { "epoch": 0.18, "grad_norm": 4.0857113770735145, "learning_rate": 9.405269711401625e-06, "loss": 0.8109, "step": 2229 }, { "epoch": 0.18, "grad_norm": 1.9188680752732636, "learning_rate": 9.404643388559945e-06, "loss": 0.3626, "step": 2230 }, { "epoch": 0.18, "grad_norm": 3.1849555018977895, "learning_rate": 9.404016756971934e-06, "loss": 0.9315, "step": 2231 }, { "epoch": 0.18, "grad_norm": 3.7608332507637434, "learning_rate": 9.403389816681514e-06, "loss": 0.7762, "step": 2232 }, { "epoch": 0.18, "grad_norm": 3.9655397836011237, "learning_rate": 9.402762567732632e-06, "loss": 0.9902, "step": 2233 }, { "epoch": 0.18, "grad_norm": 4.1218309956038155, "learning_rate": 9.402135010169254e-06, "loss": 0.5033, "step": 2234 }, { "epoch": 0.18, "grad_norm": 1.366445790921517, "learning_rate": 9.401507144035371e-06, "loss": 0.222, "step": 2235 }, { "epoch": 0.18, "grad_norm": 3.903752718477064, "learning_rate": 9.400878969374991e-06, "loss": 0.7701, "step": 2236 }, { "epoch": 0.18, "grad_norm": 3.751144292776653, "learning_rate": 9.40025048623215e-06, "loss": 0.7907, "step": 2237 }, { "epoch": 0.18, "grad_norm": 4.825728154383655, "learning_rate": 9.399621694650898e-06, "loss": 1.3445, "step": 2238 }, { "epoch": 0.18, "grad_norm": 4.068955811651021, "learning_rate": 9.398992594675314e-06, "loss": 0.751, "step": 2239 }, { "epoch": 0.18, "grad_norm": 3.658416537459373, "learning_rate": 9.398363186349493e-06, "loss": 1.0305, "step": 2240 }, { "epoch": 0.18, "grad_norm": 4.329549125699289, "learning_rate": 9.397733469717554e-06, "loss": 1.2354, "step": 2241 }, { "epoch": 0.18, "grad_norm": 3.069811292045692, "learning_rate": 9.397103444823638e-06, "loss": 0.5039, "step": 2242 }, { "epoch": 0.18, "grad_norm": 4.574464473522762, "learning_rate": 9.396473111711908e-06, "loss": 1.2865, "step": 2243 }, { "epoch": 0.18, "grad_norm": 3.1815009387994815, "learning_rate": 9.395842470426545e-06, "loss": 0.3662, "step": 2244 }, { "epoch": 0.18, "grad_norm": 4.596516254714554, "learning_rate": 9.395211521011756e-06, "loss": 1.0371, "step": 2245 }, { "epoch": 0.18, "grad_norm": 4.278370333633337, "learning_rate": 9.394580263511765e-06, "loss": 0.9215, "step": 2246 }, { "epoch": 0.18, "grad_norm": 4.5781826380201975, "learning_rate": 9.393948697970827e-06, "loss": 0.6672, "step": 2247 }, { "epoch": 0.18, "grad_norm": 3.692094398353298, "learning_rate": 9.393316824433204e-06, "loss": 0.6842, "step": 2248 }, { "epoch": 0.18, "grad_norm": 3.611932153208114, "learning_rate": 9.392684642943195e-06, "loss": 0.9484, "step": 2249 }, { "epoch": 0.18, "grad_norm": 3.1271003219389075, "learning_rate": 9.392052153545108e-06, "loss": 0.6765, "step": 2250 }, { "epoch": 0.18, "grad_norm": 4.2203921452166915, "learning_rate": 9.391419356283277e-06, "loss": 1.0756, "step": 2251 }, { "epoch": 0.18, "grad_norm": 4.809427884781176, "learning_rate": 9.390786251202064e-06, "loss": 1.1822, "step": 2252 }, { "epoch": 0.18, "grad_norm": 2.7567251671685584, "learning_rate": 9.390152838345842e-06, "loss": 0.6625, "step": 2253 }, { "epoch": 0.18, "grad_norm": 4.694270323963662, "learning_rate": 9.389519117759012e-06, "loss": 1.3287, "step": 2254 }, { "epoch": 0.18, "grad_norm": 4.555233870715096, "learning_rate": 9.388885089485995e-06, "loss": 0.9989, "step": 2255 }, { "epoch": 0.18, "grad_norm": 4.243426813783271, "learning_rate": 9.388250753571235e-06, "loss": 0.8727, "step": 2256 }, { "epoch": 0.18, "grad_norm": 1.1269155707380953, "learning_rate": 9.387616110059194e-06, "loss": 0.1509, "step": 2257 }, { "epoch": 0.18, "grad_norm": 3.1492173789558664, "learning_rate": 9.386981158994359e-06, "loss": 0.8127, "step": 2258 }, { "epoch": 0.18, "grad_norm": 2.778123070227287, "learning_rate": 9.386345900421236e-06, "loss": 0.6753, "step": 2259 }, { "epoch": 0.18, "grad_norm": 3.126349253955989, "learning_rate": 9.385710334384357e-06, "loss": 0.3218, "step": 2260 }, { "epoch": 0.18, "grad_norm": 3.1294988384899938, "learning_rate": 9.385074460928267e-06, "loss": 0.4438, "step": 2261 }, { "epoch": 0.18, "grad_norm": 3.9584109911097376, "learning_rate": 9.384438280097543e-06, "loss": 0.8727, "step": 2262 }, { "epoch": 0.18, "grad_norm": 2.5563394754276163, "learning_rate": 9.383801791936777e-06, "loss": 0.6102, "step": 2263 }, { "epoch": 0.19, "grad_norm": 3.360677635526274, "learning_rate": 9.383164996490583e-06, "loss": 0.4224, "step": 2264 }, { "epoch": 0.19, "grad_norm": 4.006397861225834, "learning_rate": 9.382527893803599e-06, "loss": 1.1627, "step": 2265 }, { "epoch": 0.19, "grad_norm": 3.465562048660857, "learning_rate": 9.381890483920482e-06, "loss": 0.5598, "step": 2266 }, { "epoch": 0.19, "grad_norm": 4.389436228854544, "learning_rate": 9.381252766885914e-06, "loss": 1.3719, "step": 2267 }, { "epoch": 0.19, "grad_norm": 4.650189768224478, "learning_rate": 9.380614742744592e-06, "loss": 1.073, "step": 2268 }, { "epoch": 0.19, "grad_norm": 3.940624357871344, "learning_rate": 9.379976411541241e-06, "loss": 1.0487, "step": 2269 }, { "epoch": 0.19, "grad_norm": 4.0691136895443245, "learning_rate": 9.379337773320604e-06, "loss": 0.9991, "step": 2270 }, { "epoch": 0.19, "grad_norm": 3.671113323218031, "learning_rate": 9.378698828127451e-06, "loss": 0.7108, "step": 2271 }, { "epoch": 0.19, "grad_norm": 3.094792417803656, "learning_rate": 9.378059576006567e-06, "loss": 0.6131, "step": 2272 }, { "epoch": 0.19, "grad_norm": 4.097438813925161, "learning_rate": 9.377420017002756e-06, "loss": 1.0312, "step": 2273 }, { "epoch": 0.19, "grad_norm": 4.892784583139424, "learning_rate": 9.376780151160856e-06, "loss": 1.0126, "step": 2274 }, { "epoch": 0.19, "grad_norm": 3.8083537051345573, "learning_rate": 9.376139978525713e-06, "loss": 0.9419, "step": 2275 }, { "epoch": 0.19, "grad_norm": 2.641312816729417, "learning_rate": 9.375499499142204e-06, "loss": 0.8278, "step": 2276 }, { "epoch": 0.19, "grad_norm": 4.562235260630101, "learning_rate": 9.374858713055221e-06, "loss": 1.0192, "step": 2277 }, { "epoch": 0.19, "grad_norm": 1.2153746964649463, "learning_rate": 9.374217620309684e-06, "loss": 0.2192, "step": 2278 }, { "epoch": 0.19, "grad_norm": 2.6305760677433625, "learning_rate": 9.373576220950527e-06, "loss": 0.452, "step": 2279 }, { "epoch": 0.19, "grad_norm": 4.230674908813215, "learning_rate": 9.37293451502271e-06, "loss": 1.0268, "step": 2280 }, { "epoch": 0.19, "grad_norm": 5.652602389587261, "learning_rate": 9.372292502571217e-06, "loss": 1.3541, "step": 2281 }, { "epoch": 0.19, "grad_norm": 1.5384697420557394, "learning_rate": 9.371650183641046e-06, "loss": 0.2499, "step": 2282 }, { "epoch": 0.19, "grad_norm": 5.051187199744434, "learning_rate": 9.371007558277221e-06, "loss": 1.079, "step": 2283 }, { "epoch": 0.19, "grad_norm": 2.6611226742779315, "learning_rate": 9.370364626524791e-06, "loss": 0.5498, "step": 2284 }, { "epoch": 0.19, "grad_norm": 3.9110461646193837, "learning_rate": 9.36972138842882e-06, "loss": 0.9785, "step": 2285 }, { "epoch": 0.19, "grad_norm": 5.815306567877642, "learning_rate": 9.369077844034398e-06, "loss": 1.5498, "step": 2286 }, { "epoch": 0.19, "grad_norm": 3.8516878045698872, "learning_rate": 9.368433993386632e-06, "loss": 0.7308, "step": 2287 }, { "epoch": 0.19, "grad_norm": 4.22649733569183, "learning_rate": 9.367789836530655e-06, "loss": 1.3369, "step": 2288 }, { "epoch": 0.19, "grad_norm": 6.188081935922202, "learning_rate": 9.36714537351162e-06, "loss": 1.3739, "step": 2289 }, { "epoch": 0.19, "grad_norm": 5.836611550160533, "learning_rate": 9.366500604374699e-06, "loss": 1.4048, "step": 2290 }, { "epoch": 0.19, "grad_norm": 3.68458182019333, "learning_rate": 9.365855529165089e-06, "loss": 0.8023, "step": 2291 }, { "epoch": 0.19, "grad_norm": 2.937118676656062, "learning_rate": 9.365210147928006e-06, "loss": 0.607, "step": 2292 }, { "epoch": 0.19, "grad_norm": 3.0959864170466402, "learning_rate": 9.364564460708689e-06, "loss": 0.5608, "step": 2293 }, { "epoch": 0.19, "grad_norm": 4.279504397544929, "learning_rate": 9.3639184675524e-06, "loss": 0.8709, "step": 2294 }, { "epoch": 0.19, "grad_norm": 2.814079321161493, "learning_rate": 9.363272168504417e-06, "loss": 0.4674, "step": 2295 }, { "epoch": 0.19, "grad_norm": 3.61620991804245, "learning_rate": 9.362625563610044e-06, "loss": 1.1514, "step": 2296 }, { "epoch": 0.19, "grad_norm": 4.758909968343346, "learning_rate": 9.361978652914605e-06, "loss": 0.8233, "step": 2297 }, { "epoch": 0.19, "grad_norm": 3.2385771274175483, "learning_rate": 9.361331436463446e-06, "loss": 0.9019, "step": 2298 }, { "epoch": 0.19, "grad_norm": 4.896629749654269, "learning_rate": 9.360683914301934e-06, "loss": 1.5023, "step": 2299 }, { "epoch": 0.19, "grad_norm": 5.312029356565346, "learning_rate": 9.360036086475457e-06, "loss": 1.4424, "step": 2300 }, { "epoch": 0.19, "grad_norm": 5.150712940690037, "learning_rate": 9.359387953029425e-06, "loss": 0.8739, "step": 2301 }, { "epoch": 0.19, "grad_norm": 3.363421170051736, "learning_rate": 9.358739514009271e-06, "loss": 0.9312, "step": 2302 }, { "epoch": 0.19, "grad_norm": 2.79247579777736, "learning_rate": 9.358090769460446e-06, "loss": 0.598, "step": 2303 }, { "epoch": 0.19, "grad_norm": 3.185708394371197, "learning_rate": 9.357441719428423e-06, "loss": 0.5437, "step": 2304 }, { "epoch": 0.19, "grad_norm": 4.29604390462049, "learning_rate": 9.3567923639587e-06, "loss": 0.872, "step": 2305 }, { "epoch": 0.19, "grad_norm": 4.621954214424412, "learning_rate": 9.356142703096793e-06, "loss": 1.0147, "step": 2306 }, { "epoch": 0.19, "grad_norm": 3.776210916732992, "learning_rate": 9.355492736888242e-06, "loss": 0.8019, "step": 2307 }, { "epoch": 0.19, "grad_norm": 3.737184950318045, "learning_rate": 9.354842465378604e-06, "loss": 1.0243, "step": 2308 }, { "epoch": 0.19, "grad_norm": 3.7661736156524657, "learning_rate": 9.354191888613462e-06, "loss": 0.9402, "step": 2309 }, { "epoch": 0.19, "grad_norm": 3.3941009941822626, "learning_rate": 9.353541006638417e-06, "loss": 0.7118, "step": 2310 }, { "epoch": 0.19, "grad_norm": 5.008294593308233, "learning_rate": 9.352889819499096e-06, "loss": 1.2041, "step": 2311 }, { "epoch": 0.19, "grad_norm": 4.497801080060148, "learning_rate": 9.35223832724114e-06, "loss": 1.1584, "step": 2312 }, { "epoch": 0.19, "grad_norm": 3.5206679515786554, "learning_rate": 9.35158652991022e-06, "loss": 1.1447, "step": 2313 }, { "epoch": 0.19, "grad_norm": 5.118739250240711, "learning_rate": 9.350934427552023e-06, "loss": 1.476, "step": 2314 }, { "epoch": 0.19, "grad_norm": 3.5796514622825963, "learning_rate": 9.350282020212256e-06, "loss": 0.5229, "step": 2315 }, { "epoch": 0.19, "grad_norm": 5.519285126458806, "learning_rate": 9.349629307936653e-06, "loss": 1.039, "step": 2316 }, { "epoch": 0.19, "grad_norm": 4.409487761550421, "learning_rate": 9.348976290770965e-06, "loss": 1.0798, "step": 2317 }, { "epoch": 0.19, "grad_norm": 4.566681383204464, "learning_rate": 9.348322968760965e-06, "loss": 0.8468, "step": 2318 }, { "epoch": 0.19, "grad_norm": 3.167042571796452, "learning_rate": 9.34766934195245e-06, "loss": 0.7639, "step": 2319 }, { "epoch": 0.19, "grad_norm": 3.0381343904833593, "learning_rate": 9.347015410391235e-06, "loss": 0.3901, "step": 2320 }, { "epoch": 0.19, "grad_norm": 4.5957839378494025, "learning_rate": 9.34636117412316e-06, "loss": 1.4191, "step": 2321 }, { "epoch": 0.19, "grad_norm": 2.3712381446725423, "learning_rate": 9.345706633194078e-06, "loss": 0.6917, "step": 2322 }, { "epoch": 0.19, "grad_norm": 4.525764739475181, "learning_rate": 9.345051787649877e-06, "loss": 0.6715, "step": 2323 }, { "epoch": 0.19, "grad_norm": 3.4365910876302572, "learning_rate": 9.344396637536453e-06, "loss": 0.8326, "step": 2324 }, { "epoch": 0.19, "grad_norm": 4.244381507411723, "learning_rate": 9.343741182899733e-06, "loss": 1.1729, "step": 2325 }, { "epoch": 0.19, "grad_norm": 3.6725681990302252, "learning_rate": 9.34308542378566e-06, "loss": 0.8226, "step": 2326 }, { "epoch": 0.19, "grad_norm": 2.8662307678886147, "learning_rate": 9.3424293602402e-06, "loss": 0.6593, "step": 2327 }, { "epoch": 0.19, "grad_norm": 4.578416090268908, "learning_rate": 9.34177299230934e-06, "loss": 0.7326, "step": 2328 }, { "epoch": 0.19, "grad_norm": 4.1837032657349225, "learning_rate": 9.341116320039088e-06, "loss": 0.9397, "step": 2329 }, { "epoch": 0.19, "grad_norm": 4.3667517386706205, "learning_rate": 9.340459343475475e-06, "loss": 1.2654, "step": 2330 }, { "epoch": 0.19, "grad_norm": 5.3000469622278645, "learning_rate": 9.339802062664553e-06, "loss": 1.3045, "step": 2331 }, { "epoch": 0.19, "grad_norm": 4.2957218695500545, "learning_rate": 9.339144477652391e-06, "loss": 1.0857, "step": 2332 }, { "epoch": 0.19, "grad_norm": 4.68864267751406, "learning_rate": 9.338486588485087e-06, "loss": 1.3811, "step": 2333 }, { "epoch": 0.19, "grad_norm": 3.372649789054265, "learning_rate": 9.337828395208755e-06, "loss": 0.5209, "step": 2334 }, { "epoch": 0.19, "grad_norm": 3.8499680904005222, "learning_rate": 9.337169897869528e-06, "loss": 0.8476, "step": 2335 }, { "epoch": 0.19, "grad_norm": 5.961242137201628, "learning_rate": 9.336511096513568e-06, "loss": 1.528, "step": 2336 }, { "epoch": 0.19, "grad_norm": 5.693661806290446, "learning_rate": 9.335851991187053e-06, "loss": 0.873, "step": 2337 }, { "epoch": 0.19, "grad_norm": 4.508153442359315, "learning_rate": 9.335192581936183e-06, "loss": 1.0223, "step": 2338 }, { "epoch": 0.19, "grad_norm": 4.4103159948747335, "learning_rate": 9.334532868807179e-06, "loss": 1.2319, "step": 2339 }, { "epoch": 0.19, "grad_norm": 5.19185231767955, "learning_rate": 9.333872851846285e-06, "loss": 1.0721, "step": 2340 }, { "epoch": 0.19, "grad_norm": 4.111140255013692, "learning_rate": 9.333212531099767e-06, "loss": 0.9359, "step": 2341 }, { "epoch": 0.19, "grad_norm": 3.5008772177759786, "learning_rate": 9.332551906613908e-06, "loss": 0.4058, "step": 2342 }, { "epoch": 0.19, "grad_norm": 4.1415338749073785, "learning_rate": 9.331890978435014e-06, "loss": 1.0631, "step": 2343 }, { "epoch": 0.19, "grad_norm": 3.80184772378031, "learning_rate": 9.331229746609416e-06, "loss": 0.6319, "step": 2344 }, { "epoch": 0.19, "grad_norm": 2.3313134792011, "learning_rate": 9.330568211183461e-06, "loss": 0.4148, "step": 2345 }, { "epoch": 0.19, "grad_norm": 3.914408637597205, "learning_rate": 9.329906372203523e-06, "loss": 0.4193, "step": 2346 }, { "epoch": 0.19, "grad_norm": 1.765654467492976, "learning_rate": 9.329244229715992e-06, "loss": 0.4101, "step": 2347 }, { "epoch": 0.19, "grad_norm": 3.550335647297338, "learning_rate": 9.328581783767281e-06, "loss": 0.6818, "step": 2348 }, { "epoch": 0.19, "grad_norm": 2.9500036638624088, "learning_rate": 9.327919034403825e-06, "loss": 0.3719, "step": 2349 }, { "epoch": 0.19, "grad_norm": 5.501275834383856, "learning_rate": 9.327255981672082e-06, "loss": 1.395, "step": 2350 }, { "epoch": 0.19, "grad_norm": 4.279179026210534, "learning_rate": 9.326592625618523e-06, "loss": 1.2283, "step": 2351 }, { "epoch": 0.19, "grad_norm": 5.262990927460758, "learning_rate": 9.325928966289652e-06, "loss": 1.0392, "step": 2352 }, { "epoch": 0.19, "grad_norm": 3.8054019782508375, "learning_rate": 9.325265003731988e-06, "loss": 0.5625, "step": 2353 }, { "epoch": 0.19, "grad_norm": 3.065618563471102, "learning_rate": 9.324600737992069e-06, "loss": 0.6746, "step": 2354 }, { "epoch": 0.19, "grad_norm": 2.9175758949154034, "learning_rate": 9.323936169116461e-06, "loss": 0.8539, "step": 2355 }, { "epoch": 0.19, "grad_norm": 4.403739334261655, "learning_rate": 9.323271297151743e-06, "loss": 1.1677, "step": 2356 }, { "epoch": 0.19, "grad_norm": 4.28527399597012, "learning_rate": 9.322606122144524e-06, "loss": 0.8743, "step": 2357 }, { "epoch": 0.19, "grad_norm": 3.158860499691112, "learning_rate": 9.321940644141427e-06, "loss": 0.3042, "step": 2358 }, { "epoch": 0.19, "grad_norm": 4.485617366183805, "learning_rate": 9.3212748631891e-06, "loss": 1.0994, "step": 2359 }, { "epoch": 0.19, "grad_norm": 3.1845245822430632, "learning_rate": 9.320608779334212e-06, "loss": 0.401, "step": 2360 }, { "epoch": 0.19, "grad_norm": 2.408107084724933, "learning_rate": 9.319942392623451e-06, "loss": 0.5151, "step": 2361 }, { "epoch": 0.19, "grad_norm": 3.550807236446376, "learning_rate": 9.319275703103529e-06, "loss": 0.5964, "step": 2362 }, { "epoch": 0.19, "grad_norm": 3.59677790334423, "learning_rate": 9.318608710821179e-06, "loss": 1.0998, "step": 2363 }, { "epoch": 0.19, "grad_norm": 3.2999653007697263, "learning_rate": 9.317941415823151e-06, "loss": 0.8447, "step": 2364 }, { "epoch": 0.19, "grad_norm": 1.0328133919182665, "learning_rate": 9.317273818156223e-06, "loss": 0.1885, "step": 2365 }, { "epoch": 0.19, "grad_norm": 5.439082118673476, "learning_rate": 9.316605917867189e-06, "loss": 1.244, "step": 2366 }, { "epoch": 0.19, "grad_norm": 4.144453026630218, "learning_rate": 9.315937715002865e-06, "loss": 0.8939, "step": 2367 }, { "epoch": 0.19, "grad_norm": 3.661134589171982, "learning_rate": 9.315269209610092e-06, "loss": 0.727, "step": 2368 }, { "epoch": 0.19, "grad_norm": 3.0121522655064643, "learning_rate": 9.314600401735727e-06, "loss": 0.6767, "step": 2369 }, { "epoch": 0.19, "grad_norm": 3.9143730505910006, "learning_rate": 9.31393129142665e-06, "loss": 0.7601, "step": 2370 }, { "epoch": 0.19, "grad_norm": 3.5397444675924405, "learning_rate": 9.313261878729765e-06, "loss": 0.7476, "step": 2371 }, { "epoch": 0.19, "grad_norm": 3.4541352720937573, "learning_rate": 9.312592163691991e-06, "loss": 1.0122, "step": 2372 }, { "epoch": 0.19, "grad_norm": 5.368894813193451, "learning_rate": 9.311922146360276e-06, "loss": 1.2218, "step": 2373 }, { "epoch": 0.19, "grad_norm": 4.196780787673087, "learning_rate": 9.311251826781587e-06, "loss": 0.7043, "step": 2374 }, { "epoch": 0.19, "grad_norm": 3.937401888424667, "learning_rate": 9.310581205002905e-06, "loss": 0.7497, "step": 2375 }, { "epoch": 0.19, "grad_norm": 3.7329755870495376, "learning_rate": 9.30991028107124e-06, "loss": 0.9845, "step": 2376 }, { "epoch": 0.19, "grad_norm": 4.1875849682133115, "learning_rate": 9.309239055033623e-06, "loss": 0.7722, "step": 2377 }, { "epoch": 0.19, "grad_norm": 4.936687169393056, "learning_rate": 9.3085675269371e-06, "loss": 1.018, "step": 2378 }, { "epoch": 0.19, "grad_norm": 4.192478312596038, "learning_rate": 9.307895696828746e-06, "loss": 1.0972, "step": 2379 }, { "epoch": 0.19, "grad_norm": 2.0847921517774552, "learning_rate": 9.307223564755649e-06, "loss": 0.3726, "step": 2380 }, { "epoch": 0.19, "grad_norm": 2.861499554696102, "learning_rate": 9.306551130764929e-06, "loss": 0.6663, "step": 2381 }, { "epoch": 0.19, "grad_norm": 4.132355188229489, "learning_rate": 9.305878394903714e-06, "loss": 0.8424, "step": 2382 }, { "epoch": 0.19, "grad_norm": 3.9622667131259264, "learning_rate": 9.305205357219165e-06, "loss": 0.88, "step": 2383 }, { "epoch": 0.19, "grad_norm": 4.162565828964038, "learning_rate": 9.304532017758454e-06, "loss": 0.8993, "step": 2384 }, { "epoch": 0.19, "grad_norm": 3.720231045728556, "learning_rate": 9.303858376568784e-06, "loss": 0.8852, "step": 2385 }, { "epoch": 0.2, "grad_norm": 3.6436473738437174, "learning_rate": 9.303184433697371e-06, "loss": 0.7395, "step": 2386 }, { "epoch": 0.2, "grad_norm": 3.7860005278747586, "learning_rate": 9.302510189191458e-06, "loss": 0.974, "step": 2387 }, { "epoch": 0.2, "grad_norm": 5.741299005535924, "learning_rate": 9.301835643098305e-06, "loss": 1.4295, "step": 2388 }, { "epoch": 0.2, "grad_norm": 4.393585118933961, "learning_rate": 9.301160795465196e-06, "loss": 1.1355, "step": 2389 }, { "epoch": 0.2, "grad_norm": 3.619503406329702, "learning_rate": 9.300485646339431e-06, "loss": 0.9534, "step": 2390 }, { "epoch": 0.2, "grad_norm": 4.111439895035622, "learning_rate": 9.299810195768341e-06, "loss": 0.8473, "step": 2391 }, { "epoch": 0.2, "grad_norm": 2.223696444269811, "learning_rate": 9.299134443799267e-06, "loss": 0.4557, "step": 2392 }, { "epoch": 0.2, "grad_norm": 5.174862259989276, "learning_rate": 9.298458390479579e-06, "loss": 1.2313, "step": 2393 }, { "epoch": 0.2, "grad_norm": 4.049120303711942, "learning_rate": 9.297782035856667e-06, "loss": 0.9009, "step": 2394 }, { "epoch": 0.2, "grad_norm": 4.607973996729967, "learning_rate": 9.297105379977935e-06, "loss": 1.311, "step": 2395 }, { "epoch": 0.2, "grad_norm": 1.804514523499376, "learning_rate": 9.296428422890817e-06, "loss": 0.5371, "step": 2396 }, { "epoch": 0.2, "grad_norm": 2.5712502914725803, "learning_rate": 9.295751164642767e-06, "loss": 0.3081, "step": 2397 }, { "epoch": 0.2, "grad_norm": 4.9385847868834265, "learning_rate": 9.295073605281255e-06, "loss": 1.3572, "step": 2398 }, { "epoch": 0.2, "grad_norm": 3.953183154224133, "learning_rate": 9.294395744853775e-06, "loss": 0.7451, "step": 2399 }, { "epoch": 0.2, "grad_norm": 4.006366705979045, "learning_rate": 9.293717583407843e-06, "loss": 1.0411, "step": 2400 }, { "epoch": 0.2, "grad_norm": 2.9360113632346465, "learning_rate": 9.293039120990995e-06, "loss": 0.7905, "step": 2401 }, { "epoch": 0.2, "grad_norm": 3.970447326489623, "learning_rate": 9.292360357650785e-06, "loss": 0.9431, "step": 2402 }, { "epoch": 0.2, "grad_norm": 3.742640339977302, "learning_rate": 9.291681293434797e-06, "loss": 0.8987, "step": 2403 }, { "epoch": 0.2, "grad_norm": 3.5528572102404032, "learning_rate": 9.291001928390629e-06, "loss": 0.8615, "step": 2404 }, { "epoch": 0.2, "grad_norm": 5.186330963000171, "learning_rate": 9.290322262565897e-06, "loss": 1.3626, "step": 2405 }, { "epoch": 0.2, "grad_norm": 2.9948835802395184, "learning_rate": 9.289642296008248e-06, "loss": 0.7315, "step": 2406 }, { "epoch": 0.2, "grad_norm": 2.8638274177086402, "learning_rate": 9.288962028765342e-06, "loss": 0.5873, "step": 2407 }, { "epoch": 0.2, "grad_norm": 5.20550751601823, "learning_rate": 9.288281460884864e-06, "loss": 1.2367, "step": 2408 }, { "epoch": 0.2, "grad_norm": 3.6467454307293434, "learning_rate": 9.287600592414517e-06, "loss": 0.6496, "step": 2409 }, { "epoch": 0.2, "grad_norm": 1.0927102373684074, "learning_rate": 9.28691942340203e-06, "loss": 0.2213, "step": 2410 }, { "epoch": 0.2, "grad_norm": 2.9930871543106923, "learning_rate": 9.286237953895148e-06, "loss": 0.5662, "step": 2411 }, { "epoch": 0.2, "grad_norm": 3.3265903378205772, "learning_rate": 9.285556183941637e-06, "loss": 0.9091, "step": 2412 }, { "epoch": 0.2, "grad_norm": 2.955673187066376, "learning_rate": 9.28487411358929e-06, "loss": 0.6372, "step": 2413 }, { "epoch": 0.2, "grad_norm": 4.006607505537565, "learning_rate": 9.284191742885915e-06, "loss": 1.1425, "step": 2414 }, { "epoch": 0.2, "grad_norm": 5.078956744021499, "learning_rate": 9.283509071879344e-06, "loss": 1.2454, "step": 2415 }, { "epoch": 0.2, "grad_norm": 2.392870823843581, "learning_rate": 9.282826100617429e-06, "loss": 0.5376, "step": 2416 }, { "epoch": 0.2, "grad_norm": 4.467695605250697, "learning_rate": 9.282142829148043e-06, "loss": 1.0673, "step": 2417 }, { "epoch": 0.2, "grad_norm": 1.1094007382106783, "learning_rate": 9.28145925751908e-06, "loss": 0.1823, "step": 2418 }, { "epoch": 0.2, "grad_norm": 5.416330381903597, "learning_rate": 9.280775385778458e-06, "loss": 1.1083, "step": 2419 }, { "epoch": 0.2, "grad_norm": 2.8890851270704814, "learning_rate": 9.280091213974109e-06, "loss": 0.974, "step": 2420 }, { "epoch": 0.2, "grad_norm": 2.781892576166949, "learning_rate": 9.279406742153996e-06, "loss": 0.4089, "step": 2421 }, { "epoch": 0.2, "grad_norm": 5.066866788110671, "learning_rate": 9.278721970366092e-06, "loss": 1.3843, "step": 2422 }, { "epoch": 0.2, "grad_norm": 2.9425551802733314, "learning_rate": 9.278036898658401e-06, "loss": 0.7035, "step": 2423 }, { "epoch": 0.2, "grad_norm": 4.68743343050816, "learning_rate": 9.277351527078938e-06, "loss": 1.1653, "step": 2424 }, { "epoch": 0.2, "grad_norm": 4.845281486402816, "learning_rate": 9.276665855675751e-06, "loss": 1.1039, "step": 2425 }, { "epoch": 0.2, "grad_norm": 4.201882004264333, "learning_rate": 9.275979884496898e-06, "loss": 1.107, "step": 2426 }, { "epoch": 0.2, "grad_norm": 4.02109199001339, "learning_rate": 9.275293613590465e-06, "loss": 0.9791, "step": 2427 }, { "epoch": 0.2, "grad_norm": 1.9454430982583224, "learning_rate": 9.274607043004556e-06, "loss": 0.4404, "step": 2428 }, { "epoch": 0.2, "grad_norm": 4.627297027829158, "learning_rate": 9.273920172787297e-06, "loss": 0.7501, "step": 2429 }, { "epoch": 0.2, "grad_norm": 3.4906400608114176, "learning_rate": 9.273233002986833e-06, "loss": 0.929, "step": 2430 }, { "epoch": 0.2, "grad_norm": 3.8919170121454916, "learning_rate": 9.27254553365133e-06, "loss": 1.2215, "step": 2431 }, { "epoch": 0.2, "grad_norm": 4.729171809737526, "learning_rate": 9.271857764828985e-06, "loss": 1.419, "step": 2432 }, { "epoch": 0.2, "grad_norm": 3.387967287590401, "learning_rate": 9.271169696567999e-06, "loss": 0.7166, "step": 2433 }, { "epoch": 0.2, "grad_norm": 2.7446962517953417, "learning_rate": 9.270481328916605e-06, "loss": 0.509, "step": 2434 }, { "epoch": 0.2, "grad_norm": 4.3337688838086725, "learning_rate": 9.269792661923055e-06, "loss": 0.8205, "step": 2435 }, { "epoch": 0.2, "grad_norm": 5.070855586850423, "learning_rate": 9.269103695635622e-06, "loss": 1.2684, "step": 2436 }, { "epoch": 0.2, "grad_norm": 5.072157256597414, "learning_rate": 9.2684144301026e-06, "loss": 1.0799, "step": 2437 }, { "epoch": 0.2, "grad_norm": 3.4439968585400815, "learning_rate": 9.2677248653723e-06, "loss": 0.7584, "step": 2438 }, { "epoch": 0.2, "grad_norm": 3.0997069644786572, "learning_rate": 9.267035001493064e-06, "loss": 0.7358, "step": 2439 }, { "epoch": 0.2, "grad_norm": 4.321923940828505, "learning_rate": 9.266344838513241e-06, "loss": 0.911, "step": 2440 }, { "epoch": 0.2, "grad_norm": 3.797153143640582, "learning_rate": 9.265654376481214e-06, "loss": 0.7008, "step": 2441 }, { "epoch": 0.2, "grad_norm": 3.137346063274294, "learning_rate": 9.264963615445378e-06, "loss": 0.5624, "step": 2442 }, { "epoch": 0.2, "grad_norm": 4.329038814070353, "learning_rate": 9.264272555454156e-06, "loss": 1.1483, "step": 2443 }, { "epoch": 0.2, "grad_norm": 3.846667627346543, "learning_rate": 9.263581196555984e-06, "loss": 1.1069, "step": 2444 }, { "epoch": 0.2, "grad_norm": 3.0885308129338425, "learning_rate": 9.262889538799327e-06, "loss": 0.6528, "step": 2445 }, { "epoch": 0.2, "grad_norm": 3.984212107188813, "learning_rate": 9.262197582232665e-06, "loss": 0.7519, "step": 2446 }, { "epoch": 0.2, "grad_norm": 2.6441126736579625, "learning_rate": 9.2615053269045e-06, "loss": 0.5124, "step": 2447 }, { "epoch": 0.2, "grad_norm": 2.801090724259183, "learning_rate": 9.260812772863362e-06, "loss": 0.5342, "step": 2448 }, { "epoch": 0.2, "grad_norm": 5.0335952212132895, "learning_rate": 9.260119920157786e-06, "loss": 1.2109, "step": 2449 }, { "epoch": 0.2, "grad_norm": 3.3807668935028627, "learning_rate": 9.259426768836347e-06, "loss": 0.7047, "step": 2450 }, { "epoch": 0.2, "grad_norm": 5.588787739876182, "learning_rate": 9.258733318947627e-06, "loss": 1.5709, "step": 2451 }, { "epoch": 0.2, "grad_norm": 3.1994010042190997, "learning_rate": 9.258039570540238e-06, "loss": 0.8593, "step": 2452 }, { "epoch": 0.2, "grad_norm": 3.7504222258312465, "learning_rate": 9.257345523662804e-06, "loss": 0.9261, "step": 2453 }, { "epoch": 0.2, "grad_norm": 4.411878854734679, "learning_rate": 9.256651178363978e-06, "loss": 1.0292, "step": 2454 }, { "epoch": 0.2, "grad_norm": 5.609824834430926, "learning_rate": 9.255956534692428e-06, "loss": 1.5112, "step": 2455 }, { "epoch": 0.2, "grad_norm": 4.657719639064696, "learning_rate": 9.255261592696849e-06, "loss": 1.0546, "step": 2456 }, { "epoch": 0.2, "grad_norm": 3.9287243102542893, "learning_rate": 9.254566352425949e-06, "loss": 1.0827, "step": 2457 }, { "epoch": 0.2, "grad_norm": 4.406667014838754, "learning_rate": 9.253870813928465e-06, "loss": 1.0112, "step": 2458 }, { "epoch": 0.2, "grad_norm": 3.7527747181267754, "learning_rate": 9.25317497725315e-06, "loss": 0.9954, "step": 2459 }, { "epoch": 0.2, "grad_norm": 1.9851742175340266, "learning_rate": 9.252478842448778e-06, "loss": 0.4253, "step": 2460 }, { "epoch": 0.2, "grad_norm": 2.928820661023417, "learning_rate": 9.251782409564146e-06, "loss": 0.4773, "step": 2461 }, { "epoch": 0.2, "grad_norm": 4.542990238697449, "learning_rate": 9.251085678648072e-06, "loss": 0.5688, "step": 2462 }, { "epoch": 0.2, "grad_norm": 2.9259639052018427, "learning_rate": 9.250388649749391e-06, "loss": 0.6107, "step": 2463 }, { "epoch": 0.2, "grad_norm": 5.511230033717203, "learning_rate": 9.249691322916965e-06, "loss": 1.7268, "step": 2464 }, { "epoch": 0.2, "grad_norm": 4.015230401762193, "learning_rate": 9.248993698199672e-06, "loss": 0.9483, "step": 2465 }, { "epoch": 0.2, "grad_norm": 5.568010511496238, "learning_rate": 9.248295775646412e-06, "loss": 0.9769, "step": 2466 }, { "epoch": 0.2, "grad_norm": 3.767556720925712, "learning_rate": 9.247597555306107e-06, "loss": 0.7468, "step": 2467 }, { "epoch": 0.2, "grad_norm": 0.6824470971410319, "learning_rate": 9.246899037227698e-06, "loss": 0.1253, "step": 2468 }, { "epoch": 0.2, "grad_norm": 2.556352405481264, "learning_rate": 9.246200221460148e-06, "loss": 0.4267, "step": 2469 }, { "epoch": 0.2, "grad_norm": 3.5598987471209234, "learning_rate": 9.245501108052447e-06, "loss": 0.654, "step": 2470 }, { "epoch": 0.2, "grad_norm": 2.1694604413817036, "learning_rate": 9.24480169705359e-06, "loss": 0.3556, "step": 2471 }, { "epoch": 0.2, "grad_norm": 3.614876497302162, "learning_rate": 9.244101988512608e-06, "loss": 0.8888, "step": 2472 }, { "epoch": 0.2, "grad_norm": 4.11782195067912, "learning_rate": 9.243401982478548e-06, "loss": 1.0358, "step": 2473 }, { "epoch": 0.2, "grad_norm": 3.0826329130323074, "learning_rate": 9.242701679000477e-06, "loss": 0.4509, "step": 2474 }, { "epoch": 0.2, "grad_norm": 3.449403101466768, "learning_rate": 9.242001078127483e-06, "loss": 0.4125, "step": 2475 }, { "epoch": 0.2, "grad_norm": 2.0237449018883034, "learning_rate": 9.241300179908672e-06, "loss": 0.4052, "step": 2476 }, { "epoch": 0.2, "grad_norm": 5.948681216804102, "learning_rate": 9.240598984393179e-06, "loss": 0.3693, "step": 2477 }, { "epoch": 0.2, "grad_norm": 3.3261854739380206, "learning_rate": 9.239897491630152e-06, "loss": 0.8982, "step": 2478 }, { "epoch": 0.2, "grad_norm": 3.033930031952647, "learning_rate": 9.239195701668762e-06, "loss": 0.6192, "step": 2479 }, { "epoch": 0.2, "grad_norm": 3.718121732200636, "learning_rate": 9.238493614558203e-06, "loss": 0.944, "step": 2480 }, { "epoch": 0.2, "grad_norm": 3.9801540794617747, "learning_rate": 9.237791230347688e-06, "loss": 1.0094, "step": 2481 }, { "epoch": 0.2, "grad_norm": 2.3348847550539866, "learning_rate": 9.237088549086449e-06, "loss": 0.7, "step": 2482 }, { "epoch": 0.2, "grad_norm": 3.9710795346239567, "learning_rate": 9.236385570823746e-06, "loss": 0.5839, "step": 2483 }, { "epoch": 0.2, "grad_norm": 5.174006026845121, "learning_rate": 9.235682295608848e-06, "loss": 1.3029, "step": 2484 }, { "epoch": 0.2, "grad_norm": 4.230100928597258, "learning_rate": 9.234978723491054e-06, "loss": 1.027, "step": 2485 }, { "epoch": 0.2, "grad_norm": 4.648003166348003, "learning_rate": 9.234274854519685e-06, "loss": 1.0663, "step": 2486 }, { "epoch": 0.2, "grad_norm": 3.8057104068829544, "learning_rate": 9.233570688744076e-06, "loss": 0.4592, "step": 2487 }, { "epoch": 0.2, "grad_norm": 2.906143424329132, "learning_rate": 9.232866226213586e-06, "loss": 0.5144, "step": 2488 }, { "epoch": 0.2, "grad_norm": 3.8208585072714936, "learning_rate": 9.232161466977595e-06, "loss": 0.5398, "step": 2489 }, { "epoch": 0.2, "grad_norm": 2.105264891709723, "learning_rate": 9.231456411085502e-06, "loss": 0.3397, "step": 2490 }, { "epoch": 0.2, "grad_norm": 3.9134449975356302, "learning_rate": 9.23075105858673e-06, "loss": 0.9787, "step": 2491 }, { "epoch": 0.2, "grad_norm": 4.549363434492431, "learning_rate": 9.230045409530724e-06, "loss": 1.1104, "step": 2492 }, { "epoch": 0.2, "grad_norm": 2.741785394799542, "learning_rate": 9.229339463966942e-06, "loss": 0.4278, "step": 2493 }, { "epoch": 0.2, "grad_norm": 3.3875035043855193, "learning_rate": 9.228633221944869e-06, "loss": 0.4966, "step": 2494 }, { "epoch": 0.2, "grad_norm": 3.079424147406122, "learning_rate": 9.227926683514012e-06, "loss": 0.4968, "step": 2495 }, { "epoch": 0.2, "grad_norm": 3.319924544614497, "learning_rate": 9.227219848723893e-06, "loss": 0.6642, "step": 2496 }, { "epoch": 0.2, "grad_norm": 4.25907882646003, "learning_rate": 9.226512717624062e-06, "loss": 0.7904, "step": 2497 }, { "epoch": 0.2, "grad_norm": 3.497068125726394, "learning_rate": 9.22580529026408e-06, "loss": 0.7181, "step": 2498 }, { "epoch": 0.2, "grad_norm": 3.802893634393092, "learning_rate": 9.225097566693539e-06, "loss": 0.7675, "step": 2499 }, { "epoch": 0.2, "grad_norm": 1.095308909812506, "learning_rate": 9.224389546962047e-06, "loss": 0.1666, "step": 2500 }, { "epoch": 0.2, "grad_norm": 3.395290691931256, "learning_rate": 9.223681231119232e-06, "loss": 0.6342, "step": 2501 }, { "epoch": 0.2, "grad_norm": 4.544834977506785, "learning_rate": 9.222972619214745e-06, "loss": 1.0556, "step": 2502 }, { "epoch": 0.2, "grad_norm": 5.257132580663881, "learning_rate": 9.222263711298256e-06, "loss": 1.2329, "step": 2503 }, { "epoch": 0.2, "grad_norm": 2.952517170894928, "learning_rate": 9.221554507419455e-06, "loss": 0.4574, "step": 2504 }, { "epoch": 0.2, "grad_norm": 5.894079523365917, "learning_rate": 9.220845007628055e-06, "loss": 0.94, "step": 2505 }, { "epoch": 0.2, "grad_norm": 3.166460250726755, "learning_rate": 9.22013521197379e-06, "loss": 0.8148, "step": 2506 }, { "epoch": 0.2, "grad_norm": 2.795346323399248, "learning_rate": 9.219425120506414e-06, "loss": 0.6581, "step": 2507 }, { "epoch": 0.2, "grad_norm": 3.856522992182047, "learning_rate": 9.218714733275698e-06, "loss": 0.8704, "step": 2508 }, { "epoch": 0.21, "grad_norm": 5.514542570030826, "learning_rate": 9.21800405033144e-06, "loss": 1.3371, "step": 2509 }, { "epoch": 0.21, "grad_norm": 4.5945254947430785, "learning_rate": 9.217293071723455e-06, "loss": 1.136, "step": 2510 }, { "epoch": 0.21, "grad_norm": 3.7277842663523812, "learning_rate": 9.216581797501578e-06, "loss": 0.686, "step": 2511 }, { "epoch": 0.21, "grad_norm": 4.369640029754952, "learning_rate": 9.215870227715669e-06, "loss": 1.2206, "step": 2512 }, { "epoch": 0.21, "grad_norm": 4.127736755897348, "learning_rate": 9.215158362415604e-06, "loss": 0.8052, "step": 2513 }, { "epoch": 0.21, "grad_norm": 4.604555276458949, "learning_rate": 9.21444620165128e-06, "loss": 1.0992, "step": 2514 }, { "epoch": 0.21, "grad_norm": 4.653818385651074, "learning_rate": 9.213733745472623e-06, "loss": 0.9455, "step": 2515 }, { "epoch": 0.21, "grad_norm": 2.9787448891704096, "learning_rate": 9.213020993929566e-06, "loss": 0.6354, "step": 2516 }, { "epoch": 0.21, "grad_norm": 2.8866749246671595, "learning_rate": 9.212307947072074e-06, "loss": 0.541, "step": 2517 }, { "epoch": 0.21, "grad_norm": 6.339381202051915, "learning_rate": 9.211594604950127e-06, "loss": 1.2898, "step": 2518 }, { "epoch": 0.21, "grad_norm": 3.961553529245296, "learning_rate": 9.210880967613724e-06, "loss": 1.0144, "step": 2519 }, { "epoch": 0.21, "grad_norm": 5.234675316545422, "learning_rate": 9.210167035112894e-06, "loss": 1.2231, "step": 2520 }, { "epoch": 0.21, "grad_norm": 5.358216145490436, "learning_rate": 9.209452807497677e-06, "loss": 1.1559, "step": 2521 }, { "epoch": 0.21, "grad_norm": 3.5686683887336494, "learning_rate": 9.208738284818138e-06, "loss": 0.7985, "step": 2522 }, { "epoch": 0.21, "grad_norm": 1.9181533771460326, "learning_rate": 9.20802346712436e-06, "loss": 0.4145, "step": 2523 }, { "epoch": 0.21, "grad_norm": 4.231466180884634, "learning_rate": 9.20730835446645e-06, "loss": 0.9182, "step": 2524 }, { "epoch": 0.21, "grad_norm": 3.7500185297541964, "learning_rate": 9.206592946894538e-06, "loss": 0.6986, "step": 2525 }, { "epoch": 0.21, "grad_norm": 4.414215285507461, "learning_rate": 9.205877244458765e-06, "loss": 0.6744, "step": 2526 }, { "epoch": 0.21, "grad_norm": 2.395880282392656, "learning_rate": 9.205161247209303e-06, "loss": 0.317, "step": 2527 }, { "epoch": 0.21, "grad_norm": 4.427669095406331, "learning_rate": 9.204444955196337e-06, "loss": 0.8707, "step": 2528 }, { "epoch": 0.21, "grad_norm": 3.7214519772571037, "learning_rate": 9.203728368470077e-06, "loss": 0.8583, "step": 2529 }, { "epoch": 0.21, "grad_norm": 4.423959878964401, "learning_rate": 9.203011487080755e-06, "loss": 0.5747, "step": 2530 }, { "epoch": 0.21, "grad_norm": 2.98099678270369, "learning_rate": 9.202294311078618e-06, "loss": 0.7567, "step": 2531 }, { "epoch": 0.21, "grad_norm": 3.4053424148101112, "learning_rate": 9.201576840513939e-06, "loss": 0.737, "step": 2532 }, { "epoch": 0.21, "grad_norm": 2.3427163106192985, "learning_rate": 9.200859075437008e-06, "loss": 0.6799, "step": 2533 }, { "epoch": 0.21, "grad_norm": 4.32925968832063, "learning_rate": 9.200141015898138e-06, "loss": 1.209, "step": 2534 }, { "epoch": 0.21, "grad_norm": 4.265874001661272, "learning_rate": 9.199422661947662e-06, "loss": 0.8002, "step": 2535 }, { "epoch": 0.21, "grad_norm": 3.08560714492616, "learning_rate": 9.198704013635934e-06, "loss": 0.7652, "step": 2536 }, { "epoch": 0.21, "grad_norm": 2.8415414566991486, "learning_rate": 9.197985071013326e-06, "loss": 0.7683, "step": 2537 }, { "epoch": 0.21, "grad_norm": 3.849674167020621, "learning_rate": 9.197265834130235e-06, "loss": 0.8824, "step": 2538 }, { "epoch": 0.21, "grad_norm": 3.2292221455199654, "learning_rate": 9.196546303037077e-06, "loss": 0.9428, "step": 2539 }, { "epoch": 0.21, "grad_norm": 3.3977499377383547, "learning_rate": 9.195826477784286e-06, "loss": 0.8224, "step": 2540 }, { "epoch": 0.21, "grad_norm": 5.407183252962648, "learning_rate": 9.19510635842232e-06, "loss": 1.0697, "step": 2541 }, { "epoch": 0.21, "grad_norm": 4.555713809852212, "learning_rate": 9.194385945001652e-06, "loss": 1.1947, "step": 2542 }, { "epoch": 0.21, "grad_norm": 2.5305475089934677, "learning_rate": 9.193665237572785e-06, "loss": 0.6139, "step": 2543 }, { "epoch": 0.21, "grad_norm": 2.9543541968597733, "learning_rate": 9.192944236186237e-06, "loss": 0.5635, "step": 2544 }, { "epoch": 0.21, "grad_norm": 3.804833370830699, "learning_rate": 9.192222940892543e-06, "loss": 0.7743, "step": 2545 }, { "epoch": 0.21, "grad_norm": 4.051494233217439, "learning_rate": 9.191501351742269e-06, "loss": 0.6582, "step": 2546 }, { "epoch": 0.21, "grad_norm": 4.79343864325062, "learning_rate": 9.19077946878599e-06, "loss": 1.1509, "step": 2547 }, { "epoch": 0.21, "grad_norm": 6.22126397598874, "learning_rate": 9.190057292074308e-06, "loss": 1.5538, "step": 2548 }, { "epoch": 0.21, "grad_norm": 4.317098774130853, "learning_rate": 9.189334821657846e-06, "loss": 0.6657, "step": 2549 }, { "epoch": 0.21, "grad_norm": 2.7015628351357672, "learning_rate": 9.188612057587246e-06, "loss": 0.6748, "step": 2550 }, { "epoch": 0.21, "grad_norm": 2.6132826447826805, "learning_rate": 9.187888999913166e-06, "loss": 0.7137, "step": 2551 }, { "epoch": 0.21, "grad_norm": 3.2849372692578647, "learning_rate": 9.187165648686296e-06, "loss": 0.8883, "step": 2552 }, { "epoch": 0.21, "grad_norm": 1.7770415993893534, "learning_rate": 9.186442003957337e-06, "loss": 0.4326, "step": 2553 }, { "epoch": 0.21, "grad_norm": 4.206890972843402, "learning_rate": 9.185718065777011e-06, "loss": 1.0782, "step": 2554 }, { "epoch": 0.21, "grad_norm": 3.855430118818454, "learning_rate": 9.184993834196065e-06, "loss": 0.9108, "step": 2555 }, { "epoch": 0.21, "grad_norm": 4.224854598296555, "learning_rate": 9.184269309265266e-06, "loss": 0.9526, "step": 2556 }, { "epoch": 0.21, "grad_norm": 3.649110311640722, "learning_rate": 9.183544491035396e-06, "loss": 0.5603, "step": 2557 }, { "epoch": 0.21, "grad_norm": 2.6275348993531513, "learning_rate": 9.182819379557266e-06, "loss": 0.634, "step": 2558 }, { "epoch": 0.21, "grad_norm": 3.656912091187971, "learning_rate": 9.1820939748817e-06, "loss": 0.8573, "step": 2559 }, { "epoch": 0.21, "grad_norm": 3.174479933286031, "learning_rate": 9.181368277059548e-06, "loss": 0.6829, "step": 2560 }, { "epoch": 0.21, "grad_norm": 2.5872657569719784, "learning_rate": 9.180642286141678e-06, "loss": 0.3411, "step": 2561 }, { "epoch": 0.21, "grad_norm": 3.789949702148944, "learning_rate": 9.179916002178976e-06, "loss": 0.6619, "step": 2562 }, { "epoch": 0.21, "grad_norm": 4.534928294722872, "learning_rate": 9.179189425222354e-06, "loss": 0.8082, "step": 2563 }, { "epoch": 0.21, "grad_norm": 4.851233261542717, "learning_rate": 9.178462555322742e-06, "loss": 1.2742, "step": 2564 }, { "epoch": 0.21, "grad_norm": 3.1864289822875618, "learning_rate": 9.177735392531088e-06, "loss": 0.6699, "step": 2565 }, { "epoch": 0.21, "grad_norm": 3.5049285283905367, "learning_rate": 9.177007936898365e-06, "loss": 0.804, "step": 2566 }, { "epoch": 0.21, "grad_norm": 4.372758267623915, "learning_rate": 9.176280188475565e-06, "loss": 0.8771, "step": 2567 }, { "epoch": 0.21, "grad_norm": 3.0264069966642824, "learning_rate": 9.175552147313698e-06, "loss": 0.458, "step": 2568 }, { "epoch": 0.21, "grad_norm": 4.921229189159816, "learning_rate": 9.174823813463799e-06, "loss": 1.0942, "step": 2569 }, { "epoch": 0.21, "grad_norm": 3.393971440135176, "learning_rate": 9.174095186976916e-06, "loss": 0.8405, "step": 2570 }, { "epoch": 0.21, "grad_norm": 5.680965342339284, "learning_rate": 9.17336626790413e-06, "loss": 1.5422, "step": 2571 }, { "epoch": 0.21, "grad_norm": 2.681309457351905, "learning_rate": 9.172637056296529e-06, "loss": 0.7138, "step": 2572 }, { "epoch": 0.21, "grad_norm": 5.449514166310241, "learning_rate": 9.17190755220523e-06, "loss": 1.0039, "step": 2573 }, { "epoch": 0.21, "grad_norm": 5.251873711827825, "learning_rate": 9.17117775568137e-06, "loss": 0.8907, "step": 2574 }, { "epoch": 0.21, "grad_norm": 3.1650591046235372, "learning_rate": 9.170447666776101e-06, "loss": 0.5766, "step": 2575 }, { "epoch": 0.21, "grad_norm": 3.4827679981366515, "learning_rate": 9.1697172855406e-06, "loss": 0.5803, "step": 2576 }, { "epoch": 0.21, "grad_norm": 4.405827340101964, "learning_rate": 9.168986612026063e-06, "loss": 1.0401, "step": 2577 }, { "epoch": 0.21, "grad_norm": 4.108557485440849, "learning_rate": 9.16825564628371e-06, "loss": 0.9864, "step": 2578 }, { "epoch": 0.21, "grad_norm": 4.72691957372029, "learning_rate": 9.167524388364775e-06, "loss": 1.2002, "step": 2579 }, { "epoch": 0.21, "grad_norm": 3.206603025302393, "learning_rate": 9.166792838320517e-06, "loss": 0.6778, "step": 2580 }, { "epoch": 0.21, "grad_norm": 2.857778089354351, "learning_rate": 9.166060996202218e-06, "loss": 0.5272, "step": 2581 }, { "epoch": 0.21, "grad_norm": 3.0259298682451554, "learning_rate": 9.165328862061172e-06, "loss": 0.5766, "step": 2582 }, { "epoch": 0.21, "grad_norm": 3.414197217782002, "learning_rate": 9.164596435948699e-06, "loss": 1.0155, "step": 2583 }, { "epoch": 0.21, "grad_norm": 4.908278810961928, "learning_rate": 9.163863717916142e-06, "loss": 1.0146, "step": 2584 }, { "epoch": 0.21, "grad_norm": 3.712167202066841, "learning_rate": 9.163130708014858e-06, "loss": 0.5501, "step": 2585 }, { "epoch": 0.21, "grad_norm": 3.0649722491996014, "learning_rate": 9.16239740629623e-06, "loss": 0.5382, "step": 2586 }, { "epoch": 0.21, "grad_norm": 2.3574318547925315, "learning_rate": 9.16166381281166e-06, "loss": 0.2507, "step": 2587 }, { "epoch": 0.21, "grad_norm": 4.015733310555574, "learning_rate": 9.160929927612567e-06, "loss": 1.1375, "step": 2588 }, { "epoch": 0.21, "grad_norm": 2.1669045648508427, "learning_rate": 9.160195750750396e-06, "loss": 0.3214, "step": 2589 }, { "epoch": 0.21, "grad_norm": 5.127864108182433, "learning_rate": 9.159461282276605e-06, "loss": 1.0092, "step": 2590 }, { "epoch": 0.21, "grad_norm": 2.848476885058125, "learning_rate": 9.158726522242684e-06, "loss": 0.6853, "step": 2591 }, { "epoch": 0.21, "grad_norm": 2.3804941378319917, "learning_rate": 9.15799147070013e-06, "loss": 0.4897, "step": 2592 }, { "epoch": 0.21, "grad_norm": 4.053977480378836, "learning_rate": 9.157256127700472e-06, "loss": 0.8833, "step": 2593 }, { "epoch": 0.21, "grad_norm": 3.75654008244524, "learning_rate": 9.156520493295249e-06, "loss": 0.7025, "step": 2594 }, { "epoch": 0.21, "grad_norm": 3.2543139923423734, "learning_rate": 9.15578456753603e-06, "loss": 0.8477, "step": 2595 }, { "epoch": 0.21, "grad_norm": 2.4459278163166998, "learning_rate": 9.155048350474398e-06, "loss": 0.4061, "step": 2596 }, { "epoch": 0.21, "grad_norm": 3.4250556753826427, "learning_rate": 9.15431184216196e-06, "loss": 0.559, "step": 2597 }, { "epoch": 0.21, "grad_norm": 2.7293760100346875, "learning_rate": 9.153575042650342e-06, "loss": 0.4459, "step": 2598 }, { "epoch": 0.21, "grad_norm": 3.88752240281919, "learning_rate": 9.15283795199119e-06, "loss": 0.702, "step": 2599 }, { "epoch": 0.21, "grad_norm": 4.850180234109402, "learning_rate": 9.152100570236172e-06, "loss": 1.0422, "step": 2600 }, { "epoch": 0.21, "grad_norm": 3.1384254357693853, "learning_rate": 9.15136289743697e-06, "loss": 1.0971, "step": 2601 }, { "epoch": 0.21, "grad_norm": 3.1876046760275427, "learning_rate": 9.150624933645297e-06, "loss": 0.9115, "step": 2602 }, { "epoch": 0.21, "grad_norm": 3.909756335673938, "learning_rate": 9.149886678912883e-06, "loss": 0.7705, "step": 2603 }, { "epoch": 0.21, "grad_norm": 4.100639887160335, "learning_rate": 9.14914813329147e-06, "loss": 0.8299, "step": 2604 }, { "epoch": 0.21, "grad_norm": 3.270330154313572, "learning_rate": 9.148409296832832e-06, "loss": 0.9597, "step": 2605 }, { "epoch": 0.21, "grad_norm": 4.054076412924293, "learning_rate": 9.147670169588754e-06, "loss": 1.1523, "step": 2606 }, { "epoch": 0.21, "grad_norm": 3.716632979901013, "learning_rate": 9.14693075161105e-06, "loss": 1.0254, "step": 2607 }, { "epoch": 0.21, "grad_norm": 3.6393434225559393, "learning_rate": 9.146191042951546e-06, "loss": 0.7487, "step": 2608 }, { "epoch": 0.21, "grad_norm": 3.467533295509262, "learning_rate": 9.145451043662095e-06, "loss": 0.7654, "step": 2609 }, { "epoch": 0.21, "grad_norm": 2.3948877468965932, "learning_rate": 9.144710753794567e-06, "loss": 0.3971, "step": 2610 }, { "epoch": 0.21, "grad_norm": 2.4801815731732444, "learning_rate": 9.143970173400853e-06, "loss": 0.3545, "step": 2611 }, { "epoch": 0.21, "grad_norm": 4.6644744590683365, "learning_rate": 9.143229302532866e-06, "loss": 1.3814, "step": 2612 }, { "epoch": 0.21, "grad_norm": 4.417698106094416, "learning_rate": 9.142488141242534e-06, "loss": 1.0687, "step": 2613 }, { "epoch": 0.21, "grad_norm": 3.8191416836094914, "learning_rate": 9.141746689581811e-06, "loss": 0.8562, "step": 2614 }, { "epoch": 0.21, "grad_norm": 2.273085690695707, "learning_rate": 9.141004947602672e-06, "loss": 0.5464, "step": 2615 }, { "epoch": 0.21, "grad_norm": 3.001040918183898, "learning_rate": 9.140262915357107e-06, "loss": 0.5869, "step": 2616 }, { "epoch": 0.21, "grad_norm": 4.510086130850804, "learning_rate": 9.139520592897131e-06, "loss": 1.0375, "step": 2617 }, { "epoch": 0.21, "grad_norm": 3.0965008392075584, "learning_rate": 9.138777980274776e-06, "loss": 0.564, "step": 2618 }, { "epoch": 0.21, "grad_norm": 2.2354925236562706, "learning_rate": 9.138035077542096e-06, "loss": 0.5104, "step": 2619 }, { "epoch": 0.21, "grad_norm": 5.003837214181366, "learning_rate": 9.137291884751165e-06, "loss": 1.6036, "step": 2620 }, { "epoch": 0.21, "grad_norm": 4.132618628152957, "learning_rate": 9.13654840195408e-06, "loss": 0.9808, "step": 2621 }, { "epoch": 0.21, "grad_norm": 4.189334795046342, "learning_rate": 9.135804629202955e-06, "loss": 0.7809, "step": 2622 }, { "epoch": 0.21, "grad_norm": 5.956989408432872, "learning_rate": 9.135060566549924e-06, "loss": 1.608, "step": 2623 }, { "epoch": 0.21, "grad_norm": 3.4074585256132326, "learning_rate": 9.134316214047144e-06, "loss": 0.3921, "step": 2624 }, { "epoch": 0.21, "grad_norm": 4.742860665175202, "learning_rate": 9.133571571746786e-06, "loss": 1.0199, "step": 2625 }, { "epoch": 0.21, "grad_norm": 4.452758980350587, "learning_rate": 9.132826639701055e-06, "loss": 0.8877, "step": 2626 }, { "epoch": 0.21, "grad_norm": 3.949992256628996, "learning_rate": 9.13208141796216e-06, "loss": 0.7818, "step": 2627 }, { "epoch": 0.21, "grad_norm": 2.4071015775853146, "learning_rate": 9.13133590658234e-06, "loss": 0.4309, "step": 2628 }, { "epoch": 0.21, "grad_norm": 3.843805103301418, "learning_rate": 9.130590105613854e-06, "loss": 0.7782, "step": 2629 }, { "epoch": 0.21, "grad_norm": 4.859908067407454, "learning_rate": 9.129844015108978e-06, "loss": 1.06, "step": 2630 }, { "epoch": 0.22, "grad_norm": 5.143641660915089, "learning_rate": 9.12909763512001e-06, "loss": 1.1174, "step": 2631 }, { "epoch": 0.22, "grad_norm": 4.0003721523478255, "learning_rate": 9.128350965699267e-06, "loss": 1.1409, "step": 2632 }, { "epoch": 0.22, "grad_norm": 2.3123734082883107, "learning_rate": 9.127604006899088e-06, "loss": 0.6201, "step": 2633 }, { "epoch": 0.22, "grad_norm": 3.8886274054005017, "learning_rate": 9.126856758771832e-06, "loss": 0.9564, "step": 2634 }, { "epoch": 0.22, "grad_norm": 3.363089925833701, "learning_rate": 9.126109221369877e-06, "loss": 0.8048, "step": 2635 }, { "epoch": 0.22, "grad_norm": 2.597459332169226, "learning_rate": 9.125361394745621e-06, "loss": 0.5441, "step": 2636 }, { "epoch": 0.22, "grad_norm": 3.121212674843806, "learning_rate": 9.124613278951486e-06, "loss": 0.6559, "step": 2637 }, { "epoch": 0.22, "grad_norm": 4.34419329799521, "learning_rate": 9.12386487403991e-06, "loss": 0.8502, "step": 2638 }, { "epoch": 0.22, "grad_norm": 2.4451716397751406, "learning_rate": 9.123116180063356e-06, "loss": 0.5108, "step": 2639 }, { "epoch": 0.22, "grad_norm": 4.473863821303688, "learning_rate": 9.1223671970743e-06, "loss": 0.9417, "step": 2640 }, { "epoch": 0.22, "grad_norm": 3.6621722801899974, "learning_rate": 9.121617925125244e-06, "loss": 0.8106, "step": 2641 }, { "epoch": 0.22, "grad_norm": 3.6666378806199424, "learning_rate": 9.12086836426871e-06, "loss": 0.932, "step": 2642 }, { "epoch": 0.22, "grad_norm": 1.4977537563590089, "learning_rate": 9.120118514557235e-06, "loss": 0.2099, "step": 2643 }, { "epoch": 0.22, "grad_norm": 4.113942022339024, "learning_rate": 9.119368376043384e-06, "loss": 0.774, "step": 2644 }, { "epoch": 0.22, "grad_norm": 4.816855066586145, "learning_rate": 9.118617948779738e-06, "loss": 1.1958, "step": 2645 }, { "epoch": 0.22, "grad_norm": 4.335000089257285, "learning_rate": 9.117867232818897e-06, "loss": 1.0147, "step": 2646 }, { "epoch": 0.22, "grad_norm": 4.171377852877847, "learning_rate": 9.117116228213485e-06, "loss": 0.9544, "step": 2647 }, { "epoch": 0.22, "grad_norm": 5.662025483828039, "learning_rate": 9.116364935016144e-06, "loss": 1.0558, "step": 2648 }, { "epoch": 0.22, "grad_norm": 3.5688211631492845, "learning_rate": 9.115613353279533e-06, "loss": 0.6812, "step": 2649 }, { "epoch": 0.22, "grad_norm": 4.267943032312493, "learning_rate": 9.11486148305634e-06, "loss": 1.248, "step": 2650 }, { "epoch": 0.22, "grad_norm": 4.542385422783675, "learning_rate": 9.114109324399263e-06, "loss": 1.3974, "step": 2651 }, { "epoch": 0.22, "grad_norm": 3.4433326699791102, "learning_rate": 9.113356877361027e-06, "loss": 0.4743, "step": 2652 }, { "epoch": 0.22, "grad_norm": 3.6224171332006985, "learning_rate": 9.112604141994376e-06, "loss": 0.7523, "step": 2653 }, { "epoch": 0.22, "grad_norm": 3.3394163890362543, "learning_rate": 9.111851118352074e-06, "loss": 0.6131, "step": 2654 }, { "epoch": 0.22, "grad_norm": 3.7817423764246874, "learning_rate": 9.111097806486901e-06, "loss": 1.0332, "step": 2655 }, { "epoch": 0.22, "grad_norm": 2.691615566474534, "learning_rate": 9.110344206451665e-06, "loss": 0.697, "step": 2656 }, { "epoch": 0.22, "grad_norm": 3.4666547510013435, "learning_rate": 9.109590318299189e-06, "loss": 0.9713, "step": 2657 }, { "epoch": 0.22, "grad_norm": 4.537850282404029, "learning_rate": 9.108836142082316e-06, "loss": 1.2781, "step": 2658 }, { "epoch": 0.22, "grad_norm": 4.533021480613041, "learning_rate": 9.108081677853911e-06, "loss": 1.2023, "step": 2659 }, { "epoch": 0.22, "grad_norm": 0.7972539953327008, "learning_rate": 9.107326925666857e-06, "loss": 0.1528, "step": 2660 }, { "epoch": 0.22, "grad_norm": 3.7799263906140883, "learning_rate": 9.106571885574062e-06, "loss": 0.9498, "step": 2661 }, { "epoch": 0.22, "grad_norm": 4.73423685102641, "learning_rate": 9.105816557628451e-06, "loss": 0.9331, "step": 2662 }, { "epoch": 0.22, "grad_norm": 4.762462442085408, "learning_rate": 9.105060941882966e-06, "loss": 1.0003, "step": 2663 }, { "epoch": 0.22, "grad_norm": 4.012753402102101, "learning_rate": 9.104305038390575e-06, "loss": 0.6783, "step": 2664 }, { "epoch": 0.22, "grad_norm": 4.68841305451672, "learning_rate": 9.103548847204263e-06, "loss": 1.2537, "step": 2665 }, { "epoch": 0.22, "grad_norm": 1.846154686958786, "learning_rate": 9.102792368377036e-06, "loss": 0.3033, "step": 2666 }, { "epoch": 0.22, "grad_norm": 2.785629135154621, "learning_rate": 9.102035601961919e-06, "loss": 0.5163, "step": 2667 }, { "epoch": 0.22, "grad_norm": 3.528580565267492, "learning_rate": 9.101278548011959e-06, "loss": 0.8711, "step": 2668 }, { "epoch": 0.22, "grad_norm": 3.3211863555450694, "learning_rate": 9.100521206580219e-06, "loss": 0.6148, "step": 2669 }, { "epoch": 0.22, "grad_norm": 5.636657691789844, "learning_rate": 9.099763577719788e-06, "loss": 1.4335, "step": 2670 }, { "epoch": 0.22, "grad_norm": 5.589107135519192, "learning_rate": 9.099005661483776e-06, "loss": 1.2736, "step": 2671 }, { "epoch": 0.22, "grad_norm": 4.473990332404505, "learning_rate": 9.098247457925304e-06, "loss": 0.9545, "step": 2672 }, { "epoch": 0.22, "grad_norm": 3.984610233768719, "learning_rate": 9.09748896709752e-06, "loss": 0.5687, "step": 2673 }, { "epoch": 0.22, "grad_norm": 6.398528093033693, "learning_rate": 9.096730189053594e-06, "loss": 1.6897, "step": 2674 }, { "epoch": 0.22, "grad_norm": 3.561569204175784, "learning_rate": 9.09597112384671e-06, "loss": 0.7358, "step": 2675 }, { "epoch": 0.22, "grad_norm": 6.1036865093347785, "learning_rate": 9.095211771530074e-06, "loss": 1.1493, "step": 2676 }, { "epoch": 0.22, "grad_norm": 1.3760146537329225, "learning_rate": 9.094452132156917e-06, "loss": 0.2076, "step": 2677 }, { "epoch": 0.22, "grad_norm": 4.098394907191026, "learning_rate": 9.093692205780485e-06, "loss": 0.8469, "step": 2678 }, { "epoch": 0.22, "grad_norm": 5.467224924945489, "learning_rate": 9.092931992454044e-06, "loss": 1.1177, "step": 2679 }, { "epoch": 0.22, "grad_norm": 4.06386919862057, "learning_rate": 9.092171492230883e-06, "loss": 0.9505, "step": 2680 }, { "epoch": 0.22, "grad_norm": 2.5510675849780027, "learning_rate": 9.091410705164312e-06, "loss": 0.4873, "step": 2681 }, { "epoch": 0.22, "grad_norm": 5.255174006776468, "learning_rate": 9.090649631307653e-06, "loss": 1.1771, "step": 2682 }, { "epoch": 0.22, "grad_norm": 4.599542010746827, "learning_rate": 9.08988827071426e-06, "loss": 0.8635, "step": 2683 }, { "epoch": 0.22, "grad_norm": 5.673085770773519, "learning_rate": 9.089126623437496e-06, "loss": 1.5034, "step": 2684 }, { "epoch": 0.22, "grad_norm": 4.117986430311266, "learning_rate": 9.088364689530753e-06, "loss": 1.0058, "step": 2685 }, { "epoch": 0.22, "grad_norm": 2.5266863984789554, "learning_rate": 9.087602469047438e-06, "loss": 0.4884, "step": 2686 }, { "epoch": 0.22, "grad_norm": 3.577416855872609, "learning_rate": 9.08683996204098e-06, "loss": 0.9433, "step": 2687 }, { "epoch": 0.22, "grad_norm": 4.629953813099042, "learning_rate": 9.086077168564825e-06, "loss": 1.1885, "step": 2688 }, { "epoch": 0.22, "grad_norm": 2.596494599376898, "learning_rate": 9.085314088672443e-06, "loss": 0.797, "step": 2689 }, { "epoch": 0.22, "grad_norm": 5.020907218922748, "learning_rate": 9.084550722417324e-06, "loss": 1.3782, "step": 2690 }, { "epoch": 0.22, "grad_norm": 4.125555556634683, "learning_rate": 9.083787069852976e-06, "loss": 0.8511, "step": 2691 }, { "epoch": 0.22, "grad_norm": 3.513382843467977, "learning_rate": 9.083023131032926e-06, "loss": 0.6633, "step": 2692 }, { "epoch": 0.22, "grad_norm": 2.869475350461378, "learning_rate": 9.082258906010724e-06, "loss": 0.7274, "step": 2693 }, { "epoch": 0.22, "grad_norm": 5.066640667462265, "learning_rate": 9.081494394839937e-06, "loss": 0.8265, "step": 2694 }, { "epoch": 0.22, "grad_norm": 2.9031623988993083, "learning_rate": 9.080729597574159e-06, "loss": 0.7738, "step": 2695 }, { "epoch": 0.22, "grad_norm": 5.210771030915046, "learning_rate": 9.079964514266993e-06, "loss": 1.0799, "step": 2696 }, { "epoch": 0.22, "grad_norm": 3.746004952544171, "learning_rate": 9.079199144972072e-06, "loss": 0.8076, "step": 2697 }, { "epoch": 0.22, "grad_norm": 5.211666453404126, "learning_rate": 9.078433489743044e-06, "loss": 0.8715, "step": 2698 }, { "epoch": 0.22, "grad_norm": 3.725053966461781, "learning_rate": 9.077667548633576e-06, "loss": 0.7972, "step": 2699 }, { "epoch": 0.22, "grad_norm": 2.9799110669409408, "learning_rate": 9.07690132169736e-06, "loss": 0.6411, "step": 2700 }, { "epoch": 0.22, "grad_norm": 4.3679873723537055, "learning_rate": 9.076134808988104e-06, "loss": 1.417, "step": 2701 }, { "epoch": 0.22, "grad_norm": 2.1280222277258822, "learning_rate": 9.075368010559538e-06, "loss": 0.2649, "step": 2702 }, { "epoch": 0.22, "grad_norm": 3.6689600488903578, "learning_rate": 9.07460092646541e-06, "loss": 0.9392, "step": 2703 }, { "epoch": 0.22, "grad_norm": 4.193899358666189, "learning_rate": 9.073833556759489e-06, "loss": 0.801, "step": 2704 }, { "epoch": 0.22, "grad_norm": 5.847355714154215, "learning_rate": 9.073065901495565e-06, "loss": 1.1343, "step": 2705 }, { "epoch": 0.22, "grad_norm": 3.2529862008053017, "learning_rate": 9.072297960727449e-06, "loss": 0.7906, "step": 2706 }, { "epoch": 0.22, "grad_norm": 2.6042894685662863, "learning_rate": 9.071529734508968e-06, "loss": 0.704, "step": 2707 }, { "epoch": 0.22, "grad_norm": 2.9368839985787356, "learning_rate": 9.070761222893972e-06, "loss": 0.5706, "step": 2708 }, { "epoch": 0.22, "grad_norm": 3.7028095547180655, "learning_rate": 9.06999242593633e-06, "loss": 0.8101, "step": 2709 }, { "epoch": 0.22, "grad_norm": 3.8895209253771186, "learning_rate": 9.06922334368993e-06, "loss": 0.9076, "step": 2710 }, { "epoch": 0.22, "grad_norm": 5.513508298678548, "learning_rate": 9.068453976208685e-06, "loss": 1.2225, "step": 2711 }, { "epoch": 0.22, "grad_norm": 2.7300236102942743, "learning_rate": 9.067684323546522e-06, "loss": 0.5809, "step": 2712 }, { "epoch": 0.22, "grad_norm": 2.737520002700896, "learning_rate": 9.066914385757391e-06, "loss": 0.5947, "step": 2713 }, { "epoch": 0.22, "grad_norm": 3.500085553474255, "learning_rate": 9.066144162895259e-06, "loss": 0.5553, "step": 2714 }, { "epoch": 0.22, "grad_norm": 5.173765965653176, "learning_rate": 9.065373655014118e-06, "loss": 0.9426, "step": 2715 }, { "epoch": 0.22, "grad_norm": 3.0139284654348866, "learning_rate": 9.064602862167978e-06, "loss": 0.9988, "step": 2716 }, { "epoch": 0.22, "grad_norm": 1.0631356101532043, "learning_rate": 9.063831784410864e-06, "loss": 0.233, "step": 2717 }, { "epoch": 0.22, "grad_norm": 3.3993921934621922, "learning_rate": 9.06306042179683e-06, "loss": 0.6685, "step": 2718 }, { "epoch": 0.22, "grad_norm": 3.032180623334634, "learning_rate": 9.06228877437994e-06, "loss": 0.5603, "step": 2719 }, { "epoch": 0.22, "grad_norm": 5.139258586810135, "learning_rate": 9.061516842214289e-06, "loss": 1.2063, "step": 2720 }, { "epoch": 0.22, "grad_norm": 4.178060190654128, "learning_rate": 9.060744625353981e-06, "loss": 1.0683, "step": 2721 }, { "epoch": 0.22, "grad_norm": 5.040238592557097, "learning_rate": 9.059972123853147e-06, "loss": 1.5402, "step": 2722 }, { "epoch": 0.22, "grad_norm": 4.555367406515826, "learning_rate": 9.059199337765938e-06, "loss": 1.0049, "step": 2723 }, { "epoch": 0.22, "grad_norm": 3.609256108481837, "learning_rate": 9.05842626714652e-06, "loss": 1.0136, "step": 2724 }, { "epoch": 0.22, "grad_norm": 2.6825562606320705, "learning_rate": 9.057652912049084e-06, "loss": 0.443, "step": 2725 }, { "epoch": 0.22, "grad_norm": 6.359237350151824, "learning_rate": 9.056879272527837e-06, "loss": 1.1107, "step": 2726 }, { "epoch": 0.22, "grad_norm": 3.4778726856530744, "learning_rate": 9.05610534863701e-06, "loss": 0.9013, "step": 2727 }, { "epoch": 0.22, "grad_norm": 4.147177221249099, "learning_rate": 9.05533114043085e-06, "loss": 1.0685, "step": 2728 }, { "epoch": 0.22, "grad_norm": 3.9314829273281657, "learning_rate": 9.054556647963624e-06, "loss": 1.0849, "step": 2729 }, { "epoch": 0.22, "grad_norm": 3.4745032042371378, "learning_rate": 9.053781871289624e-06, "loss": 0.7362, "step": 2730 }, { "epoch": 0.22, "grad_norm": 5.777530136657484, "learning_rate": 9.053006810463156e-06, "loss": 1.4634, "step": 2731 }, { "epoch": 0.22, "grad_norm": 4.414922717015416, "learning_rate": 9.05223146553855e-06, "loss": 1.0374, "step": 2732 }, { "epoch": 0.22, "grad_norm": 4.492283894983976, "learning_rate": 9.051455836570154e-06, "loss": 1.0562, "step": 2733 }, { "epoch": 0.22, "grad_norm": 1.8660740691181457, "learning_rate": 9.050679923612334e-06, "loss": 0.3383, "step": 2734 }, { "epoch": 0.22, "grad_norm": 4.992589365919628, "learning_rate": 9.049903726719482e-06, "loss": 1.4697, "step": 2735 }, { "epoch": 0.22, "grad_norm": 3.2639832225852814, "learning_rate": 9.049127245946003e-06, "loss": 0.5715, "step": 2736 }, { "epoch": 0.22, "grad_norm": 4.503803510304516, "learning_rate": 9.048350481346326e-06, "loss": 0.793, "step": 2737 }, { "epoch": 0.22, "grad_norm": 3.2856682173269554, "learning_rate": 9.047573432974898e-06, "loss": 0.9469, "step": 2738 }, { "epoch": 0.22, "grad_norm": 2.985965444532201, "learning_rate": 9.04679610088619e-06, "loss": 0.3888, "step": 2739 }, { "epoch": 0.22, "grad_norm": 4.433476556382269, "learning_rate": 9.046018485134684e-06, "loss": 1.0396, "step": 2740 }, { "epoch": 0.22, "grad_norm": 2.1370696305822245, "learning_rate": 9.045240585774893e-06, "loss": 0.5518, "step": 2741 }, { "epoch": 0.22, "grad_norm": 4.337055499474134, "learning_rate": 9.04446240286134e-06, "loss": 0.7675, "step": 2742 }, { "epoch": 0.22, "grad_norm": 2.000323836971652, "learning_rate": 9.043683936448576e-06, "loss": 0.4838, "step": 2743 }, { "epoch": 0.22, "grad_norm": 5.88456891213328, "learning_rate": 9.042905186591165e-06, "loss": 1.1856, "step": 2744 }, { "epoch": 0.22, "grad_norm": 3.4318484148804034, "learning_rate": 9.042126153343696e-06, "loss": 0.6447, "step": 2745 }, { "epoch": 0.22, "grad_norm": 3.4363596168029864, "learning_rate": 9.041346836760774e-06, "loss": 0.6539, "step": 2746 }, { "epoch": 0.22, "grad_norm": 2.8489378324494146, "learning_rate": 9.040567236897027e-06, "loss": 0.6987, "step": 2747 }, { "epoch": 0.22, "grad_norm": 3.333163031387533, "learning_rate": 9.039787353807101e-06, "loss": 0.8948, "step": 2748 }, { "epoch": 0.22, "grad_norm": 4.691612764942628, "learning_rate": 9.039007187545663e-06, "loss": 1.2254, "step": 2749 }, { "epoch": 0.22, "grad_norm": 4.395036206555651, "learning_rate": 9.0382267381674e-06, "loss": 1.1411, "step": 2750 }, { "epoch": 0.22, "grad_norm": 4.264749249109578, "learning_rate": 9.037446005727015e-06, "loss": 0.8549, "step": 2751 }, { "epoch": 0.22, "grad_norm": 2.3635051911212206, "learning_rate": 9.036664990279238e-06, "loss": 0.6115, "step": 2752 }, { "epoch": 0.23, "grad_norm": 2.539616537868484, "learning_rate": 9.035883691878811e-06, "loss": 0.627, "step": 2753 }, { "epoch": 0.23, "grad_norm": 3.6299450090948415, "learning_rate": 9.035102110580503e-06, "loss": 0.5744, "step": 2754 }, { "epoch": 0.23, "grad_norm": 2.660952668616124, "learning_rate": 9.034320246439099e-06, "loss": 0.5867, "step": 2755 }, { "epoch": 0.23, "grad_norm": 3.7375965940589637, "learning_rate": 9.0335380995094e-06, "loss": 0.7727, "step": 2756 }, { "epoch": 0.23, "grad_norm": 5.536191405126242, "learning_rate": 9.032755669846236e-06, "loss": 1.3356, "step": 2757 }, { "epoch": 0.23, "grad_norm": 5.17031370829871, "learning_rate": 9.03197295750445e-06, "loss": 1.2032, "step": 2758 }, { "epoch": 0.23, "grad_norm": 3.026485263691201, "learning_rate": 9.031189962538906e-06, "loss": 0.837, "step": 2759 }, { "epoch": 0.23, "grad_norm": 4.423354422955292, "learning_rate": 9.030406685004491e-06, "loss": 0.9258, "step": 2760 }, { "epoch": 0.23, "grad_norm": 3.055087678512803, "learning_rate": 9.029623124956107e-06, "loss": 0.6416, "step": 2761 }, { "epoch": 0.23, "grad_norm": 4.190891435470464, "learning_rate": 9.028839282448678e-06, "loss": 0.5773, "step": 2762 }, { "epoch": 0.23, "grad_norm": 2.819205835772208, "learning_rate": 9.02805515753715e-06, "loss": 0.6063, "step": 2763 }, { "epoch": 0.23, "grad_norm": 4.024035874945257, "learning_rate": 9.027270750276486e-06, "loss": 0.5309, "step": 2764 }, { "epoch": 0.23, "grad_norm": 2.9569835244011022, "learning_rate": 9.026486060721668e-06, "loss": 0.521, "step": 2765 }, { "epoch": 0.23, "grad_norm": 5.359550607675502, "learning_rate": 9.0257010889277e-06, "loss": 1.5101, "step": 2766 }, { "epoch": 0.23, "grad_norm": 3.8025381643603797, "learning_rate": 9.024915834949607e-06, "loss": 1.0569, "step": 2767 }, { "epoch": 0.23, "grad_norm": 4.5427223532062015, "learning_rate": 9.02413029884243e-06, "loss": 1.044, "step": 2768 }, { "epoch": 0.23, "grad_norm": 3.606930909658853, "learning_rate": 9.023344480661234e-06, "loss": 0.8584, "step": 2769 }, { "epoch": 0.23, "grad_norm": 5.4269758171149345, "learning_rate": 9.022558380461097e-06, "loss": 1.2617, "step": 2770 }, { "epoch": 0.23, "grad_norm": 3.947679373528407, "learning_rate": 9.021771998297124e-06, "loss": 0.7401, "step": 2771 }, { "epoch": 0.23, "grad_norm": 3.6036071767627855, "learning_rate": 9.020985334224437e-06, "loss": 0.987, "step": 2772 }, { "epoch": 0.23, "grad_norm": 4.958509943303449, "learning_rate": 9.020198388298179e-06, "loss": 1.4065, "step": 2773 }, { "epoch": 0.23, "grad_norm": 3.673660488009326, "learning_rate": 9.019411160573508e-06, "loss": 0.7963, "step": 2774 }, { "epoch": 0.23, "grad_norm": 3.650622270596607, "learning_rate": 9.018623651105607e-06, "loss": 0.5745, "step": 2775 }, { "epoch": 0.23, "grad_norm": 4.216886938088694, "learning_rate": 9.017835859949677e-06, "loss": 0.8929, "step": 2776 }, { "epoch": 0.23, "grad_norm": 1.8007191532303202, "learning_rate": 9.01704778716094e-06, "loss": 0.3384, "step": 2777 }, { "epoch": 0.23, "grad_norm": 3.657644897341523, "learning_rate": 9.016259432794637e-06, "loss": 0.811, "step": 2778 }, { "epoch": 0.23, "grad_norm": 3.512963732012924, "learning_rate": 9.015470796906024e-06, "loss": 1.1201, "step": 2779 }, { "epoch": 0.23, "grad_norm": 5.010832411307142, "learning_rate": 9.014681879550385e-06, "loss": 0.7525, "step": 2780 }, { "epoch": 0.23, "grad_norm": 5.486459322409384, "learning_rate": 9.013892680783016e-06, "loss": 1.0253, "step": 2781 }, { "epoch": 0.23, "grad_norm": 3.2106168850862566, "learning_rate": 9.01310320065924e-06, "loss": 0.8443, "step": 2782 }, { "epoch": 0.23, "grad_norm": 2.4301667912614193, "learning_rate": 9.012313439234395e-06, "loss": 0.637, "step": 2783 }, { "epoch": 0.23, "grad_norm": 4.212204675584937, "learning_rate": 9.011523396563839e-06, "loss": 0.8935, "step": 2784 }, { "epoch": 0.23, "grad_norm": 5.044338879716831, "learning_rate": 9.010733072702952e-06, "loss": 1.2117, "step": 2785 }, { "epoch": 0.23, "grad_norm": 2.859085867712923, "learning_rate": 9.009942467707131e-06, "loss": 0.5685, "step": 2786 }, { "epoch": 0.23, "grad_norm": 3.7258987707406455, "learning_rate": 9.009151581631795e-06, "loss": 0.8138, "step": 2787 }, { "epoch": 0.23, "grad_norm": 3.3186899853149128, "learning_rate": 9.00836041453238e-06, "loss": 0.5666, "step": 2788 }, { "epoch": 0.23, "grad_norm": 2.6209517043383737, "learning_rate": 9.007568966464345e-06, "loss": 0.6105, "step": 2789 }, { "epoch": 0.23, "grad_norm": 3.3931208874085, "learning_rate": 9.006777237483165e-06, "loss": 0.4942, "step": 2790 }, { "epoch": 0.23, "grad_norm": 4.544807705087439, "learning_rate": 9.00598522764434e-06, "loss": 0.7926, "step": 2791 }, { "epoch": 0.23, "grad_norm": 4.005000496520486, "learning_rate": 9.005192937003384e-06, "loss": 0.7464, "step": 2792 }, { "epoch": 0.23, "grad_norm": 4.872935081002712, "learning_rate": 9.004400365615834e-06, "loss": 1.3192, "step": 2793 }, { "epoch": 0.23, "grad_norm": 3.7811303859401684, "learning_rate": 9.003607513537245e-06, "loss": 0.7219, "step": 2794 }, { "epoch": 0.23, "grad_norm": 4.354983640897277, "learning_rate": 9.002814380823192e-06, "loss": 0.757, "step": 2795 }, { "epoch": 0.23, "grad_norm": 4.254025619919019, "learning_rate": 9.002020967529272e-06, "loss": 0.7183, "step": 2796 }, { "epoch": 0.23, "grad_norm": 4.057649153469898, "learning_rate": 9.0012272737111e-06, "loss": 1.0508, "step": 2797 }, { "epoch": 0.23, "grad_norm": 3.561540618546092, "learning_rate": 9.000433299424308e-06, "loss": 0.5899, "step": 2798 }, { "epoch": 0.23, "grad_norm": 3.940995656792812, "learning_rate": 8.999639044724555e-06, "loss": 0.9293, "step": 2799 }, { "epoch": 0.23, "grad_norm": 2.6829296806974505, "learning_rate": 8.998844509667508e-06, "loss": 0.6437, "step": 2800 }, { "epoch": 0.23, "grad_norm": 4.433040218782608, "learning_rate": 8.998049694308866e-06, "loss": 0.6815, "step": 2801 }, { "epoch": 0.23, "grad_norm": 3.4827796949727086, "learning_rate": 8.99725459870434e-06, "loss": 0.8859, "step": 2802 }, { "epoch": 0.23, "grad_norm": 4.9475617906007505, "learning_rate": 8.996459222909662e-06, "loss": 1.2561, "step": 2803 }, { "epoch": 0.23, "grad_norm": 2.340861492907865, "learning_rate": 8.995663566980583e-06, "loss": 0.4739, "step": 2804 }, { "epoch": 0.23, "grad_norm": 4.983847979765691, "learning_rate": 8.994867630972878e-06, "loss": 0.9867, "step": 2805 }, { "epoch": 0.23, "grad_norm": 3.4039614230101574, "learning_rate": 8.99407141494234e-06, "loss": 0.7996, "step": 2806 }, { "epoch": 0.23, "grad_norm": 5.665702582646373, "learning_rate": 8.993274918944777e-06, "loss": 1.0746, "step": 2807 }, { "epoch": 0.23, "grad_norm": 1.010574700798193, "learning_rate": 8.992478143036022e-06, "loss": 0.1999, "step": 2808 }, { "epoch": 0.23, "grad_norm": 4.811328637907539, "learning_rate": 8.991681087271922e-06, "loss": 0.9865, "step": 2809 }, { "epoch": 0.23, "grad_norm": 3.8568309546794937, "learning_rate": 8.990883751708353e-06, "loss": 0.812, "step": 2810 }, { "epoch": 0.23, "grad_norm": 4.1161826007469795, "learning_rate": 8.990086136401199e-06, "loss": 0.6223, "step": 2811 }, { "epoch": 0.23, "grad_norm": 4.456885999014682, "learning_rate": 8.989288241406371e-06, "loss": 0.8013, "step": 2812 }, { "epoch": 0.23, "grad_norm": 2.374121412700305, "learning_rate": 8.9884900667798e-06, "loss": 0.4702, "step": 2813 }, { "epoch": 0.23, "grad_norm": 3.5488691936882972, "learning_rate": 8.987691612577433e-06, "loss": 0.9326, "step": 2814 }, { "epoch": 0.23, "grad_norm": 4.828785058948604, "learning_rate": 8.986892878855238e-06, "loss": 1.2953, "step": 2815 }, { "epoch": 0.23, "grad_norm": 5.465452757305021, "learning_rate": 8.986093865669205e-06, "loss": 0.8868, "step": 2816 }, { "epoch": 0.23, "grad_norm": 4.891282308557172, "learning_rate": 8.985294573075338e-06, "loss": 1.0112, "step": 2817 }, { "epoch": 0.23, "grad_norm": 3.890800750650286, "learning_rate": 8.984495001129667e-06, "loss": 0.9078, "step": 2818 }, { "epoch": 0.23, "grad_norm": 6.05677774070146, "learning_rate": 8.983695149888234e-06, "loss": 1.4547, "step": 2819 }, { "epoch": 0.23, "grad_norm": 5.250995322790718, "learning_rate": 8.982895019407112e-06, "loss": 1.038, "step": 2820 }, { "epoch": 0.23, "grad_norm": 3.0724656980668823, "learning_rate": 8.98209460974238e-06, "loss": 0.7388, "step": 2821 }, { "epoch": 0.23, "grad_norm": 3.340758261958266, "learning_rate": 8.981293920950147e-06, "loss": 1.0972, "step": 2822 }, { "epoch": 0.23, "grad_norm": 2.462736754135804, "learning_rate": 8.980492953086535e-06, "loss": 0.5185, "step": 2823 }, { "epoch": 0.23, "grad_norm": 3.2096517402684297, "learning_rate": 8.979691706207692e-06, "loss": 0.6595, "step": 2824 }, { "epoch": 0.23, "grad_norm": 4.346508184568805, "learning_rate": 8.97889018036978e-06, "loss": 0.9841, "step": 2825 }, { "epoch": 0.23, "grad_norm": 3.848452650091313, "learning_rate": 8.978088375628983e-06, "loss": 0.7972, "step": 2826 }, { "epoch": 0.23, "grad_norm": 4.526629528918161, "learning_rate": 8.977286292041503e-06, "loss": 0.9327, "step": 2827 }, { "epoch": 0.23, "grad_norm": 2.9604609578722085, "learning_rate": 8.976483929663562e-06, "loss": 0.5696, "step": 2828 }, { "epoch": 0.23, "grad_norm": 4.259698795083026, "learning_rate": 8.975681288551405e-06, "loss": 0.8955, "step": 2829 }, { "epoch": 0.23, "grad_norm": 1.713873183280796, "learning_rate": 8.97487836876129e-06, "loss": 0.3443, "step": 2830 }, { "epoch": 0.23, "grad_norm": 2.466880037688497, "learning_rate": 8.974075170349502e-06, "loss": 0.6895, "step": 2831 }, { "epoch": 0.23, "grad_norm": 2.5967277234419393, "learning_rate": 8.973271693372338e-06, "loss": 0.411, "step": 2832 }, { "epoch": 0.23, "grad_norm": 4.029576229429126, "learning_rate": 8.972467937886122e-06, "loss": 0.733, "step": 2833 }, { "epoch": 0.23, "grad_norm": 1.9498422274837677, "learning_rate": 8.971663903947191e-06, "loss": 0.3991, "step": 2834 }, { "epoch": 0.23, "grad_norm": 2.3772068633274634, "learning_rate": 8.970859591611904e-06, "loss": 0.5426, "step": 2835 }, { "epoch": 0.23, "grad_norm": 3.5590228128457717, "learning_rate": 8.970055000936643e-06, "loss": 0.9203, "step": 2836 }, { "epoch": 0.23, "grad_norm": 3.6475066147276056, "learning_rate": 8.969250131977803e-06, "loss": 0.9788, "step": 2837 }, { "epoch": 0.23, "grad_norm": 4.114364170867909, "learning_rate": 8.968444984791801e-06, "loss": 0.7102, "step": 2838 }, { "epoch": 0.23, "grad_norm": 3.201567434821648, "learning_rate": 8.967639559435079e-06, "loss": 0.7126, "step": 2839 }, { "epoch": 0.23, "grad_norm": 3.8625761671129175, "learning_rate": 8.96683385596409e-06, "loss": 1.3315, "step": 2840 }, { "epoch": 0.23, "grad_norm": 5.57336965684408, "learning_rate": 8.966027874435313e-06, "loss": 1.3707, "step": 2841 }, { "epoch": 0.23, "grad_norm": 2.4775308239244853, "learning_rate": 8.965221614905241e-06, "loss": 0.6265, "step": 2842 }, { "epoch": 0.23, "grad_norm": 3.850705463406801, "learning_rate": 8.964415077430391e-06, "loss": 0.6261, "step": 2843 }, { "epoch": 0.23, "grad_norm": 3.021140145526666, "learning_rate": 8.963608262067296e-06, "loss": 0.4525, "step": 2844 }, { "epoch": 0.23, "grad_norm": 4.147160395871126, "learning_rate": 8.962801168872513e-06, "loss": 0.9202, "step": 2845 }, { "epoch": 0.23, "grad_norm": 1.6818190008786265, "learning_rate": 8.961993797902613e-06, "loss": 0.2499, "step": 2846 }, { "epoch": 0.23, "grad_norm": 3.281047026662951, "learning_rate": 8.961186149214191e-06, "loss": 0.8977, "step": 2847 }, { "epoch": 0.23, "grad_norm": 3.101860188112433, "learning_rate": 8.96037822286386e-06, "loss": 0.6484, "step": 2848 }, { "epoch": 0.23, "grad_norm": 2.623832576872076, "learning_rate": 8.959570018908248e-06, "loss": 0.6869, "step": 2849 }, { "epoch": 0.23, "grad_norm": 4.0541378484556665, "learning_rate": 8.958761537404012e-06, "loss": 0.7, "step": 2850 }, { "epoch": 0.23, "grad_norm": 2.779380648621428, "learning_rate": 8.957952778407822e-06, "loss": 0.6621, "step": 2851 }, { "epoch": 0.23, "grad_norm": 4.634388934331967, "learning_rate": 8.957143741976366e-06, "loss": 1.0359, "step": 2852 }, { "epoch": 0.23, "grad_norm": 3.745980430150055, "learning_rate": 8.956334428166355e-06, "loss": 1.0663, "step": 2853 }, { "epoch": 0.23, "grad_norm": 1.0527854010053523, "learning_rate": 8.95552483703452e-06, "loss": 0.1956, "step": 2854 }, { "epoch": 0.23, "grad_norm": 3.2624856816156536, "learning_rate": 8.954714968637606e-06, "loss": 0.8258, "step": 2855 }, { "epoch": 0.23, "grad_norm": 3.8966623314578803, "learning_rate": 8.953904823032384e-06, "loss": 0.9205, "step": 2856 }, { "epoch": 0.23, "grad_norm": 4.640423997502576, "learning_rate": 8.953094400275644e-06, "loss": 1.499, "step": 2857 }, { "epoch": 0.23, "grad_norm": 3.986840018375436, "learning_rate": 8.952283700424188e-06, "loss": 1.0383, "step": 2858 }, { "epoch": 0.23, "grad_norm": 4.606151699801707, "learning_rate": 8.951472723534846e-06, "loss": 1.1475, "step": 2859 }, { "epoch": 0.23, "grad_norm": 3.788270601765476, "learning_rate": 8.950661469664462e-06, "loss": 1.0722, "step": 2860 }, { "epoch": 0.23, "grad_norm": 4.836919142856838, "learning_rate": 8.949849938869904e-06, "loss": 0.9889, "step": 2861 }, { "epoch": 0.23, "grad_norm": 4.563519527237157, "learning_rate": 8.949038131208054e-06, "loss": 0.8877, "step": 2862 }, { "epoch": 0.23, "grad_norm": 4.721883252612204, "learning_rate": 8.948226046735817e-06, "loss": 0.5332, "step": 2863 }, { "epoch": 0.23, "grad_norm": 3.448798099577989, "learning_rate": 8.947413685510118e-06, "loss": 1.032, "step": 2864 }, { "epoch": 0.23, "grad_norm": 3.160199095042135, "learning_rate": 8.946601047587898e-06, "loss": 0.5859, "step": 2865 }, { "epoch": 0.23, "grad_norm": 5.415316077485991, "learning_rate": 8.94578813302612e-06, "loss": 1.6669, "step": 2866 }, { "epoch": 0.23, "grad_norm": 4.28215722241667, "learning_rate": 8.944974941881766e-06, "loss": 0.883, "step": 2867 }, { "epoch": 0.23, "grad_norm": 4.2851793502192725, "learning_rate": 8.94416147421184e-06, "loss": 0.9386, "step": 2868 }, { "epoch": 0.23, "grad_norm": 1.7798034305693526, "learning_rate": 8.943347730073355e-06, "loss": 0.3629, "step": 2869 }, { "epoch": 0.23, "grad_norm": 4.029526509580939, "learning_rate": 8.942533709523358e-06, "loss": 1.2032, "step": 2870 }, { "epoch": 0.23, "grad_norm": 1.715867193032858, "learning_rate": 8.941719412618905e-06, "loss": 0.2987, "step": 2871 }, { "epoch": 0.23, "grad_norm": 4.280487822475332, "learning_rate": 8.940904839417078e-06, "loss": 0.7824, "step": 2872 }, { "epoch": 0.23, "grad_norm": 2.987478844312189, "learning_rate": 8.94008998997497e-06, "loss": 0.7334, "step": 2873 }, { "epoch": 0.23, "grad_norm": 3.791659519165454, "learning_rate": 8.939274864349702e-06, "loss": 0.9929, "step": 2874 }, { "epoch": 0.23, "grad_norm": 7.081963403891954, "learning_rate": 8.93845946259841e-06, "loss": 1.318, "step": 2875 }, { "epoch": 0.24, "grad_norm": 3.7244126800865307, "learning_rate": 8.93764378477825e-06, "loss": 0.9154, "step": 2876 }, { "epoch": 0.24, "grad_norm": 3.4485265914698013, "learning_rate": 8.936827830946397e-06, "loss": 0.7187, "step": 2877 }, { "epoch": 0.24, "grad_norm": 3.302352810307197, "learning_rate": 8.936011601160046e-06, "loss": 0.7165, "step": 2878 }, { "epoch": 0.24, "grad_norm": 4.303497388254325, "learning_rate": 8.935195095476412e-06, "loss": 0.9351, "step": 2879 }, { "epoch": 0.24, "grad_norm": 3.9582810507637536, "learning_rate": 8.934378313952727e-06, "loss": 0.8128, "step": 2880 }, { "epoch": 0.24, "grad_norm": 2.63455467974814, "learning_rate": 8.933561256646247e-06, "loss": 0.599, "step": 2881 }, { "epoch": 0.24, "grad_norm": 3.817216225830816, "learning_rate": 8.932743923614237e-06, "loss": 0.8345, "step": 2882 }, { "epoch": 0.24, "grad_norm": 2.2788123295098903, "learning_rate": 8.931926314913998e-06, "loss": 0.495, "step": 2883 }, { "epoch": 0.24, "grad_norm": 1.9451263961915763, "learning_rate": 8.931108430602834e-06, "loss": 0.3557, "step": 2884 }, { "epoch": 0.24, "grad_norm": 3.674895514486894, "learning_rate": 8.930290270738079e-06, "loss": 0.8228, "step": 2885 }, { "epoch": 0.24, "grad_norm": 2.7542288766120713, "learning_rate": 8.929471835377078e-06, "loss": 0.4755, "step": 2886 }, { "epoch": 0.24, "grad_norm": 6.057165314865615, "learning_rate": 8.928653124577204e-06, "loss": 1.569, "step": 2887 }, { "epoch": 0.24, "grad_norm": 2.9347074343546335, "learning_rate": 8.927834138395843e-06, "loss": 0.6106, "step": 2888 }, { "epoch": 0.24, "grad_norm": 4.248265115238616, "learning_rate": 8.927014876890402e-06, "loss": 1.0259, "step": 2889 }, { "epoch": 0.24, "grad_norm": 2.6666573587433353, "learning_rate": 8.926195340118312e-06, "loss": 0.4648, "step": 2890 }, { "epoch": 0.24, "grad_norm": 4.867031223985313, "learning_rate": 8.925375528137012e-06, "loss": 1.0118, "step": 2891 }, { "epoch": 0.24, "grad_norm": 4.6026790307902745, "learning_rate": 8.924555441003973e-06, "loss": 0.8052, "step": 2892 }, { "epoch": 0.24, "grad_norm": 4.857068725782363, "learning_rate": 8.923735078776676e-06, "loss": 1.1488, "step": 2893 }, { "epoch": 0.24, "grad_norm": 2.9381131716176174, "learning_rate": 8.922914441512626e-06, "loss": 0.3417, "step": 2894 }, { "epoch": 0.24, "grad_norm": 1.4132180157242291, "learning_rate": 8.922093529269347e-06, "loss": 0.1973, "step": 2895 }, { "epoch": 0.24, "grad_norm": 3.9565676205158287, "learning_rate": 8.921272342104382e-06, "loss": 1.0563, "step": 2896 }, { "epoch": 0.24, "grad_norm": 2.9575448163406683, "learning_rate": 8.92045088007529e-06, "loss": 1.0864, "step": 2897 }, { "epoch": 0.24, "grad_norm": 3.0103830571098324, "learning_rate": 8.919629143239652e-06, "loss": 0.8211, "step": 2898 }, { "epoch": 0.24, "grad_norm": 3.6310097637805145, "learning_rate": 8.918807131655069e-06, "loss": 0.8162, "step": 2899 }, { "epoch": 0.24, "grad_norm": 3.6579277723192414, "learning_rate": 8.917984845379162e-06, "loss": 0.765, "step": 2900 }, { "epoch": 0.24, "grad_norm": 4.447466899662024, "learning_rate": 8.917162284469569e-06, "loss": 0.8084, "step": 2901 }, { "epoch": 0.24, "grad_norm": 4.345413652998398, "learning_rate": 8.916339448983945e-06, "loss": 1.2272, "step": 2902 }, { "epoch": 0.24, "grad_norm": 3.388461703716796, "learning_rate": 8.915516338979973e-06, "loss": 0.6305, "step": 2903 }, { "epoch": 0.24, "grad_norm": 2.687060236470035, "learning_rate": 8.914692954515344e-06, "loss": 0.4097, "step": 2904 }, { "epoch": 0.24, "grad_norm": 3.6100909371539154, "learning_rate": 8.913869295647777e-06, "loss": 0.7575, "step": 2905 }, { "epoch": 0.24, "grad_norm": 3.97719742428522, "learning_rate": 8.913045362435004e-06, "loss": 0.938, "step": 2906 }, { "epoch": 0.24, "grad_norm": 3.1183558522117183, "learning_rate": 8.91222115493478e-06, "loss": 0.7863, "step": 2907 }, { "epoch": 0.24, "grad_norm": 2.9833914186492243, "learning_rate": 8.911396673204881e-06, "loss": 0.5393, "step": 2908 }, { "epoch": 0.24, "grad_norm": 3.16799800442453, "learning_rate": 8.910571917303098e-06, "loss": 0.7233, "step": 2909 }, { "epoch": 0.24, "grad_norm": 3.8362192782466056, "learning_rate": 8.90974688728724e-06, "loss": 0.9734, "step": 2910 }, { "epoch": 0.24, "grad_norm": 4.6294280620231945, "learning_rate": 8.908921583215143e-06, "loss": 1.0333, "step": 2911 }, { "epoch": 0.24, "grad_norm": 3.8164965009412533, "learning_rate": 8.908096005144654e-06, "loss": 0.7351, "step": 2912 }, { "epoch": 0.24, "grad_norm": 4.405646643100329, "learning_rate": 8.907270153133643e-06, "loss": 1.1334, "step": 2913 }, { "epoch": 0.24, "grad_norm": 5.044455880720274, "learning_rate": 8.906444027239999e-06, "loss": 1.2822, "step": 2914 }, { "epoch": 0.24, "grad_norm": 3.3033739123756076, "learning_rate": 8.90561762752163e-06, "loss": 0.5727, "step": 2915 }, { "epoch": 0.24, "grad_norm": 4.408990250089634, "learning_rate": 8.90479095403646e-06, "loss": 0.7527, "step": 2916 }, { "epoch": 0.24, "grad_norm": 4.810162697001605, "learning_rate": 8.90396400684244e-06, "loss": 1.1846, "step": 2917 }, { "epoch": 0.24, "grad_norm": 5.771676233576919, "learning_rate": 8.903136785997533e-06, "loss": 1.3764, "step": 2918 }, { "epoch": 0.24, "grad_norm": 4.133198302514898, "learning_rate": 8.902309291559724e-06, "loss": 1.001, "step": 2919 }, { "epoch": 0.24, "grad_norm": 4.1451373989928895, "learning_rate": 8.901481523587017e-06, "loss": 0.9517, "step": 2920 }, { "epoch": 0.24, "grad_norm": 3.9171169131457897, "learning_rate": 8.900653482137434e-06, "loss": 0.7557, "step": 2921 }, { "epoch": 0.24, "grad_norm": 4.120715791842282, "learning_rate": 8.899825167269016e-06, "loss": 0.5538, "step": 2922 }, { "epoch": 0.24, "grad_norm": 3.0165744064583526, "learning_rate": 8.898996579039829e-06, "loss": 0.783, "step": 2923 }, { "epoch": 0.24, "grad_norm": 2.9787819731271474, "learning_rate": 8.898167717507949e-06, "loss": 0.3642, "step": 2924 }, { "epoch": 0.24, "grad_norm": 3.9979649908032515, "learning_rate": 8.897338582731476e-06, "loss": 0.694, "step": 2925 }, { "epoch": 0.24, "grad_norm": 5.465345306276673, "learning_rate": 8.896509174768528e-06, "loss": 1.2069, "step": 2926 }, { "epoch": 0.24, "grad_norm": 3.001986170825165, "learning_rate": 8.895679493677247e-06, "loss": 0.5428, "step": 2927 }, { "epoch": 0.24, "grad_norm": 2.970549933403323, "learning_rate": 8.894849539515788e-06, "loss": 0.5073, "step": 2928 }, { "epoch": 0.24, "grad_norm": 2.581485075523803, "learning_rate": 8.894019312342325e-06, "loss": 0.5156, "step": 2929 }, { "epoch": 0.24, "grad_norm": 6.758865803490577, "learning_rate": 8.893188812215057e-06, "loss": 1.1096, "step": 2930 }, { "epoch": 0.24, "grad_norm": 4.242292637061147, "learning_rate": 8.892358039192196e-06, "loss": 0.9125, "step": 2931 }, { "epoch": 0.24, "grad_norm": 2.851855379448961, "learning_rate": 8.891526993331974e-06, "loss": 0.658, "step": 2932 }, { "epoch": 0.24, "grad_norm": 2.7730340149059307, "learning_rate": 8.890695674692648e-06, "loss": 0.839, "step": 2933 }, { "epoch": 0.24, "grad_norm": 4.593971328906433, "learning_rate": 8.889864083332486e-06, "loss": 0.6572, "step": 2934 }, { "epoch": 0.24, "grad_norm": 1.509712803796424, "learning_rate": 8.889032219309781e-06, "loss": 0.2217, "step": 2935 }, { "epoch": 0.24, "grad_norm": 4.189866905519617, "learning_rate": 8.888200082682842e-06, "loss": 1.0506, "step": 2936 }, { "epoch": 0.24, "grad_norm": 5.721479794619482, "learning_rate": 8.887367673510002e-06, "loss": 1.4691, "step": 2937 }, { "epoch": 0.24, "grad_norm": 4.901042227359208, "learning_rate": 8.886534991849603e-06, "loss": 1.2325, "step": 2938 }, { "epoch": 0.24, "grad_norm": 1.6920310456009662, "learning_rate": 8.885702037760016e-06, "loss": 0.2909, "step": 2939 }, { "epoch": 0.24, "grad_norm": 3.1842414345062067, "learning_rate": 8.884868811299627e-06, "loss": 0.858, "step": 2940 }, { "epoch": 0.24, "grad_norm": 3.217988813255178, "learning_rate": 8.88403531252684e-06, "loss": 0.7523, "step": 2941 }, { "epoch": 0.24, "grad_norm": 4.585574617111233, "learning_rate": 8.883201541500082e-06, "loss": 1.0042, "step": 2942 }, { "epoch": 0.24, "grad_norm": 5.334108757662184, "learning_rate": 8.882367498277795e-06, "loss": 1.034, "step": 2943 }, { "epoch": 0.24, "grad_norm": 3.8751675188151795, "learning_rate": 8.881533182918444e-06, "loss": 0.7488, "step": 2944 }, { "epoch": 0.24, "grad_norm": 2.5142236100060154, "learning_rate": 8.880698595480509e-06, "loss": 0.6338, "step": 2945 }, { "epoch": 0.24, "grad_norm": 5.212057409312506, "learning_rate": 8.87986373602249e-06, "loss": 1.2324, "step": 2946 }, { "epoch": 0.24, "grad_norm": 4.402148710685035, "learning_rate": 8.879028604602908e-06, "loss": 1.2733, "step": 2947 }, { "epoch": 0.24, "grad_norm": 3.195936383889245, "learning_rate": 8.878193201280305e-06, "loss": 0.8655, "step": 2948 }, { "epoch": 0.24, "grad_norm": 2.733486040762455, "learning_rate": 8.877357526113234e-06, "loss": 0.5531, "step": 2949 }, { "epoch": 0.24, "grad_norm": 2.142029788504823, "learning_rate": 8.876521579160275e-06, "loss": 0.4354, "step": 2950 }, { "epoch": 0.24, "grad_norm": 4.48630056663649, "learning_rate": 8.875685360480027e-06, "loss": 1.3183, "step": 2951 }, { "epoch": 0.24, "grad_norm": 5.994076738007516, "learning_rate": 8.874848870131098e-06, "loss": 1.7964, "step": 2952 }, { "epoch": 0.24, "grad_norm": 3.7150255903944416, "learning_rate": 8.87401210817213e-06, "loss": 0.5186, "step": 2953 }, { "epoch": 0.24, "grad_norm": 2.9136979849392772, "learning_rate": 8.87317507466177e-06, "loss": 0.4953, "step": 2954 }, { "epoch": 0.24, "grad_norm": 3.3771356342933405, "learning_rate": 8.872337769658696e-06, "loss": 0.991, "step": 2955 }, { "epoch": 0.24, "grad_norm": 1.8083950829827165, "learning_rate": 8.871500193221596e-06, "loss": 0.3512, "step": 2956 }, { "epoch": 0.24, "grad_norm": 3.5684383495880447, "learning_rate": 8.87066234540918e-06, "loss": 0.7557, "step": 2957 }, { "epoch": 0.24, "grad_norm": 3.452996042800122, "learning_rate": 8.86982422628018e-06, "loss": 0.9597, "step": 2958 }, { "epoch": 0.24, "grad_norm": 2.9979832047022694, "learning_rate": 8.868985835893344e-06, "loss": 0.6404, "step": 2959 }, { "epoch": 0.24, "grad_norm": 4.537443193679681, "learning_rate": 8.868147174307438e-06, "loss": 1.2665, "step": 2960 }, { "epoch": 0.24, "grad_norm": 3.3265675796338345, "learning_rate": 8.86730824158125e-06, "loss": 0.648, "step": 2961 }, { "epoch": 0.24, "grad_norm": 5.830979842557535, "learning_rate": 8.866469037773582e-06, "loss": 1.4799, "step": 2962 }, { "epoch": 0.24, "grad_norm": 4.010278861649628, "learning_rate": 8.865629562943265e-06, "loss": 0.3216, "step": 2963 }, { "epoch": 0.24, "grad_norm": 3.616138757300956, "learning_rate": 8.864789817149137e-06, "loss": 0.7531, "step": 2964 }, { "epoch": 0.24, "grad_norm": 5.790458959821874, "learning_rate": 8.863949800450063e-06, "loss": 1.4917, "step": 2965 }, { "epoch": 0.24, "grad_norm": 2.9789882643523837, "learning_rate": 8.863109512904924e-06, "loss": 0.7569, "step": 2966 }, { "epoch": 0.24, "grad_norm": 3.5722174289876714, "learning_rate": 8.862268954572618e-06, "loss": 0.6274, "step": 2967 }, { "epoch": 0.24, "grad_norm": 3.0627508713733618, "learning_rate": 8.861428125512071e-06, "loss": 0.6932, "step": 2968 }, { "epoch": 0.24, "grad_norm": 2.5744792073861236, "learning_rate": 8.860587025782215e-06, "loss": 0.4992, "step": 2969 }, { "epoch": 0.24, "grad_norm": 2.43717337203305, "learning_rate": 8.859745655442012e-06, "loss": 0.503, "step": 2970 }, { "epoch": 0.24, "grad_norm": 5.287648098546942, "learning_rate": 8.858904014550434e-06, "loss": 1.156, "step": 2971 }, { "epoch": 0.24, "grad_norm": 3.6511168741579314, "learning_rate": 8.858062103166479e-06, "loss": 0.817, "step": 2972 }, { "epoch": 0.24, "grad_norm": 3.5830778351070243, "learning_rate": 8.85721992134916e-06, "loss": 1.1603, "step": 2973 }, { "epoch": 0.24, "grad_norm": 2.135773229473065, "learning_rate": 8.856377469157513e-06, "loss": 0.3204, "step": 2974 }, { "epoch": 0.24, "grad_norm": 2.242353578298476, "learning_rate": 8.855534746650586e-06, "loss": 0.4531, "step": 2975 }, { "epoch": 0.24, "grad_norm": 4.21442194749558, "learning_rate": 8.854691753887455e-06, "loss": 0.9405, "step": 2976 }, { "epoch": 0.24, "grad_norm": 3.836501269058079, "learning_rate": 8.853848490927207e-06, "loss": 0.8754, "step": 2977 }, { "epoch": 0.24, "grad_norm": 3.5125849385426475, "learning_rate": 8.853004957828952e-06, "loss": 0.5242, "step": 2978 }, { "epoch": 0.24, "grad_norm": 2.7242216451581576, "learning_rate": 8.852161154651817e-06, "loss": 0.4244, "step": 2979 }, { "epoch": 0.24, "grad_norm": 3.802380953813256, "learning_rate": 8.851317081454951e-06, "loss": 0.9277, "step": 2980 }, { "epoch": 0.24, "grad_norm": 3.049767752733464, "learning_rate": 8.850472738297518e-06, "loss": 0.6299, "step": 2981 }, { "epoch": 0.24, "grad_norm": 3.7215201474495654, "learning_rate": 8.849628125238703e-06, "loss": 0.7794, "step": 2982 }, { "epoch": 0.24, "grad_norm": 4.735496235244078, "learning_rate": 8.848783242337711e-06, "loss": 1.0313, "step": 2983 }, { "epoch": 0.24, "grad_norm": 3.8805762816472638, "learning_rate": 8.847938089653763e-06, "loss": 0.5554, "step": 2984 }, { "epoch": 0.24, "grad_norm": 1.2182610153442475, "learning_rate": 8.847092667246101e-06, "loss": 0.1831, "step": 2985 }, { "epoch": 0.24, "grad_norm": 5.886218186327842, "learning_rate": 8.846246975173985e-06, "loss": 1.7733, "step": 2986 }, { "epoch": 0.24, "grad_norm": 3.01633070745938, "learning_rate": 8.845401013496697e-06, "loss": 0.5916, "step": 2987 }, { "epoch": 0.24, "grad_norm": 4.820860752067593, "learning_rate": 8.84455478227353e-06, "loss": 1.1315, "step": 2988 }, { "epoch": 0.24, "grad_norm": 5.0352922168496095, "learning_rate": 8.843708281563808e-06, "loss": 1.539, "step": 2989 }, { "epoch": 0.24, "grad_norm": 4.562737207595621, "learning_rate": 8.842861511426862e-06, "loss": 0.8672, "step": 2990 }, { "epoch": 0.24, "grad_norm": 4.0376963816397025, "learning_rate": 8.842014471922046e-06, "loss": 1.1109, "step": 2991 }, { "epoch": 0.24, "grad_norm": 5.132946137148141, "learning_rate": 8.84116716310874e-06, "loss": 1.2181, "step": 2992 }, { "epoch": 0.24, "grad_norm": 1.607972489288798, "learning_rate": 8.84031958504633e-06, "loss": 0.3004, "step": 2993 }, { "epoch": 0.24, "grad_norm": 2.1621884343551834, "learning_rate": 8.839471737794232e-06, "loss": 0.5174, "step": 2994 }, { "epoch": 0.24, "grad_norm": 2.320221171032136, "learning_rate": 8.838623621411871e-06, "loss": 0.5463, "step": 2995 }, { "epoch": 0.24, "grad_norm": 2.7088125465197876, "learning_rate": 8.837775235958704e-06, "loss": 0.5495, "step": 2996 }, { "epoch": 0.24, "grad_norm": 3.8556123789911116, "learning_rate": 8.836926581494191e-06, "loss": 0.8726, "step": 2997 }, { "epoch": 0.25, "grad_norm": 5.258349986279272, "learning_rate": 8.836077658077825e-06, "loss": 1.0929, "step": 2998 }, { "epoch": 0.25, "grad_norm": 4.886977236391661, "learning_rate": 8.835228465769113e-06, "loss": 1.1653, "step": 2999 }, { "epoch": 0.25, "grad_norm": 3.1490792321452905, "learning_rate": 8.834379004627572e-06, "loss": 0.5612, "step": 3000 }, { "epoch": 0.25, "grad_norm": 3.537597675669336, "learning_rate": 8.833529274712751e-06, "loss": 0.8386, "step": 3001 }, { "epoch": 0.25, "grad_norm": 3.7338484716835243, "learning_rate": 8.832679276084213e-06, "loss": 0.5664, "step": 3002 }, { "epoch": 0.25, "grad_norm": 3.834281332933508, "learning_rate": 8.831829008801536e-06, "loss": 0.8881, "step": 3003 }, { "epoch": 0.25, "grad_norm": 3.8698804980984405, "learning_rate": 8.830978472924323e-06, "loss": 0.8899, "step": 3004 }, { "epoch": 0.25, "grad_norm": 3.6275615582211356, "learning_rate": 8.830127668512191e-06, "loss": 0.8097, "step": 3005 }, { "epoch": 0.25, "grad_norm": 3.176712118230684, "learning_rate": 8.829276595624778e-06, "loss": 0.7396, "step": 3006 }, { "epoch": 0.25, "grad_norm": 4.450354431030108, "learning_rate": 8.828425254321742e-06, "loss": 0.8398, "step": 3007 }, { "epoch": 0.25, "grad_norm": 3.842755616061428, "learning_rate": 8.827573644662756e-06, "loss": 0.8975, "step": 3008 }, { "epoch": 0.25, "grad_norm": 5.422657524771481, "learning_rate": 8.826721766707514e-06, "loss": 1.2179, "step": 3009 }, { "epoch": 0.25, "grad_norm": 2.9833365920739645, "learning_rate": 8.825869620515732e-06, "loss": 0.5358, "step": 3010 }, { "epoch": 0.25, "grad_norm": 3.879167488092526, "learning_rate": 8.825017206147139e-06, "loss": 0.7018, "step": 3011 }, { "epoch": 0.25, "grad_norm": 3.355405824595915, "learning_rate": 8.824164523661485e-06, "loss": 0.4947, "step": 3012 }, { "epoch": 0.25, "grad_norm": 4.352312149931795, "learning_rate": 8.823311573118543e-06, "loss": 1.0323, "step": 3013 }, { "epoch": 0.25, "grad_norm": 2.7924492631796953, "learning_rate": 8.822458354578098e-06, "loss": 0.8462, "step": 3014 }, { "epoch": 0.25, "grad_norm": 4.319453911353385, "learning_rate": 8.821604868099957e-06, "loss": 1.1634, "step": 3015 }, { "epoch": 0.25, "grad_norm": 2.4177468723900803, "learning_rate": 8.820751113743948e-06, "loss": 0.4041, "step": 3016 }, { "epoch": 0.25, "grad_norm": 3.6565659236706436, "learning_rate": 8.819897091569911e-06, "loss": 0.8552, "step": 3017 }, { "epoch": 0.25, "grad_norm": 2.6235116887935432, "learning_rate": 8.819042801637715e-06, "loss": 0.5085, "step": 3018 }, { "epoch": 0.25, "grad_norm": 3.061247292842075, "learning_rate": 8.81818824400724e-06, "loss": 0.8747, "step": 3019 }, { "epoch": 0.25, "grad_norm": 5.807535007879584, "learning_rate": 8.817333418738382e-06, "loss": 1.2014, "step": 3020 }, { "epoch": 0.25, "grad_norm": 4.0249120389592665, "learning_rate": 8.816478325891067e-06, "loss": 0.7394, "step": 3021 }, { "epoch": 0.25, "grad_norm": 3.8172430367905124, "learning_rate": 8.815622965525231e-06, "loss": 0.6459, "step": 3022 }, { "epoch": 0.25, "grad_norm": 4.6977222583715434, "learning_rate": 8.814767337700829e-06, "loss": 1.14, "step": 3023 }, { "epoch": 0.25, "grad_norm": 2.945164214725052, "learning_rate": 8.81391144247784e-06, "loss": 0.6226, "step": 3024 }, { "epoch": 0.25, "grad_norm": 3.2322993706947263, "learning_rate": 8.813055279916258e-06, "loss": 0.5395, "step": 3025 }, { "epoch": 0.25, "grad_norm": 4.352276850776771, "learning_rate": 8.812198850076092e-06, "loss": 1.2339, "step": 3026 }, { "epoch": 0.25, "grad_norm": 2.269106853685705, "learning_rate": 8.81134215301738e-06, "loss": 0.3062, "step": 3027 }, { "epoch": 0.25, "grad_norm": 4.073530103864496, "learning_rate": 8.81048518880017e-06, "loss": 1.0182, "step": 3028 }, { "epoch": 0.25, "grad_norm": 4.73778326088764, "learning_rate": 8.80962795748453e-06, "loss": 0.8068, "step": 3029 }, { "epoch": 0.25, "grad_norm": 3.8308118516350893, "learning_rate": 8.80877045913055e-06, "loss": 0.8938, "step": 3030 }, { "epoch": 0.25, "grad_norm": 2.9154835262683707, "learning_rate": 8.80791269379834e-06, "loss": 0.5688, "step": 3031 }, { "epoch": 0.25, "grad_norm": 4.591077187198804, "learning_rate": 8.807054661548019e-06, "loss": 1.3252, "step": 3032 }, { "epoch": 0.25, "grad_norm": 3.6776801143872526, "learning_rate": 8.806196362439734e-06, "loss": 1.0447, "step": 3033 }, { "epoch": 0.25, "grad_norm": 4.2212908361446635, "learning_rate": 8.80533779653365e-06, "loss": 0.7886, "step": 3034 }, { "epoch": 0.25, "grad_norm": 3.1280264324817724, "learning_rate": 8.804478963889948e-06, "loss": 0.7596, "step": 3035 }, { "epoch": 0.25, "grad_norm": 3.078134161585271, "learning_rate": 8.803619864568827e-06, "loss": 0.5037, "step": 3036 }, { "epoch": 0.25, "grad_norm": 3.325914291845112, "learning_rate": 8.802760498630507e-06, "loss": 0.921, "step": 3037 }, { "epoch": 0.25, "grad_norm": 3.079137764634465, "learning_rate": 8.801900866135225e-06, "loss": 0.8713, "step": 3038 }, { "epoch": 0.25, "grad_norm": 3.2472378005541467, "learning_rate": 8.80104096714324e-06, "loss": 0.7648, "step": 3039 }, { "epoch": 0.25, "grad_norm": 4.168245278168702, "learning_rate": 8.800180801714824e-06, "loss": 1.0513, "step": 3040 }, { "epoch": 0.25, "grad_norm": 5.625803081651678, "learning_rate": 8.799320369910273e-06, "loss": 1.5099, "step": 3041 }, { "epoch": 0.25, "grad_norm": 5.16058040833108, "learning_rate": 8.798459671789898e-06, "loss": 0.9635, "step": 3042 }, { "epoch": 0.25, "grad_norm": 4.308757760875945, "learning_rate": 8.79759870741403e-06, "loss": 0.9304, "step": 3043 }, { "epoch": 0.25, "grad_norm": 3.187234819766285, "learning_rate": 8.796737476843023e-06, "loss": 0.4738, "step": 3044 }, { "epoch": 0.25, "grad_norm": 5.226740566813347, "learning_rate": 8.795875980137238e-06, "loss": 0.8863, "step": 3045 }, { "epoch": 0.25, "grad_norm": 5.51213011831428, "learning_rate": 8.79501421735707e-06, "loss": 1.4908, "step": 3046 }, { "epoch": 0.25, "grad_norm": 2.458799182603262, "learning_rate": 8.79415218856292e-06, "loss": 0.4516, "step": 3047 }, { "epoch": 0.25, "grad_norm": 2.033543389356406, "learning_rate": 8.793289893815213e-06, "loss": 0.2532, "step": 3048 }, { "epoch": 0.25, "grad_norm": 3.1917699404856252, "learning_rate": 8.792427333174395e-06, "loss": 0.7104, "step": 3049 }, { "epoch": 0.25, "grad_norm": 2.856959628270824, "learning_rate": 8.791564506700925e-06, "loss": 0.6704, "step": 3050 }, { "epoch": 0.25, "grad_norm": 5.470122110245426, "learning_rate": 8.790701414455283e-06, "loss": 0.9739, "step": 3051 }, { "epoch": 0.25, "grad_norm": 4.3464298401882315, "learning_rate": 8.789838056497969e-06, "loss": 0.8312, "step": 3052 }, { "epoch": 0.25, "grad_norm": 5.007427352617898, "learning_rate": 8.7889744328895e-06, "loss": 1.0857, "step": 3053 }, { "epoch": 0.25, "grad_norm": 5.173821069060673, "learning_rate": 8.788110543690415e-06, "loss": 0.8516, "step": 3054 }, { "epoch": 0.25, "grad_norm": 3.9007425602595327, "learning_rate": 8.787246388961265e-06, "loss": 1.0306, "step": 3055 }, { "epoch": 0.25, "grad_norm": 2.8175766762413605, "learning_rate": 8.786381968762628e-06, "loss": 0.344, "step": 3056 }, { "epoch": 0.25, "grad_norm": 4.714141728356981, "learning_rate": 8.785517283155092e-06, "loss": 1.0833, "step": 3057 }, { "epoch": 0.25, "grad_norm": 3.4920510483411547, "learning_rate": 8.784652332199269e-06, "loss": 0.9534, "step": 3058 }, { "epoch": 0.25, "grad_norm": 3.201057136856587, "learning_rate": 8.783787115955787e-06, "loss": 0.6107, "step": 3059 }, { "epoch": 0.25, "grad_norm": 2.96564825037283, "learning_rate": 8.782921634485297e-06, "loss": 0.5547, "step": 3060 }, { "epoch": 0.25, "grad_norm": 3.1921970486894624, "learning_rate": 8.782055887848462e-06, "loss": 0.6019, "step": 3061 }, { "epoch": 0.25, "grad_norm": 3.4492343149468145, "learning_rate": 8.78118987610597e-06, "loss": 0.8935, "step": 3062 }, { "epoch": 0.25, "grad_norm": 3.7783810161720743, "learning_rate": 8.780323599318524e-06, "loss": 0.8023, "step": 3063 }, { "epoch": 0.25, "grad_norm": 3.9544689623425415, "learning_rate": 8.779457057546844e-06, "loss": 0.7754, "step": 3064 }, { "epoch": 0.25, "grad_norm": 5.225298130462849, "learning_rate": 8.778590250851674e-06, "loss": 1.0747, "step": 3065 }, { "epoch": 0.25, "grad_norm": 1.2969341365306017, "learning_rate": 8.777723179293772e-06, "loss": 0.2045, "step": 3066 }, { "epoch": 0.25, "grad_norm": 5.418704482740044, "learning_rate": 8.776855842933915e-06, "loss": 0.6294, "step": 3067 }, { "epoch": 0.25, "grad_norm": 2.8206797529615666, "learning_rate": 8.7759882418329e-06, "loss": 0.6849, "step": 3068 }, { "epoch": 0.25, "grad_norm": 2.607499970750639, "learning_rate": 8.775120376051544e-06, "loss": 0.5084, "step": 3069 }, { "epoch": 0.25, "grad_norm": 6.946479721945572, "learning_rate": 8.774252245650678e-06, "loss": 1.2328, "step": 3070 }, { "epoch": 0.25, "grad_norm": 5.305464030242199, "learning_rate": 8.773383850691155e-06, "loss": 1.336, "step": 3071 }, { "epoch": 0.25, "grad_norm": 2.521782900215057, "learning_rate": 8.772515191233846e-06, "loss": 0.3972, "step": 3072 }, { "epoch": 0.25, "grad_norm": 4.130677228668136, "learning_rate": 8.771646267339641e-06, "loss": 0.7908, "step": 3073 }, { "epoch": 0.25, "grad_norm": 3.3666015604730046, "learning_rate": 8.770777079069446e-06, "loss": 0.9013, "step": 3074 }, { "epoch": 0.25, "grad_norm": 3.1417450167135086, "learning_rate": 8.769907626484189e-06, "loss": 0.8646, "step": 3075 }, { "epoch": 0.25, "grad_norm": 1.1039939450121492, "learning_rate": 8.769037909644813e-06, "loss": 0.1571, "step": 3076 }, { "epoch": 0.25, "grad_norm": 4.381687631869932, "learning_rate": 8.768167928612283e-06, "loss": 0.714, "step": 3077 }, { "epoch": 0.25, "grad_norm": 4.028985577124985, "learning_rate": 8.76729768344758e-06, "loss": 1.0472, "step": 3078 }, { "epoch": 0.25, "grad_norm": 2.4696662022938765, "learning_rate": 8.766427174211704e-06, "loss": 0.4696, "step": 3079 }, { "epoch": 0.25, "grad_norm": 4.270044585126113, "learning_rate": 8.765556400965677e-06, "loss": 0.8139, "step": 3080 }, { "epoch": 0.25, "grad_norm": 3.621035925749316, "learning_rate": 8.764685363770534e-06, "loss": 1.0925, "step": 3081 }, { "epoch": 0.25, "grad_norm": 2.025670248824383, "learning_rate": 8.763814062687329e-06, "loss": 0.4951, "step": 3082 }, { "epoch": 0.25, "grad_norm": 3.154694676655994, "learning_rate": 8.762942497777138e-06, "loss": 0.6662, "step": 3083 }, { "epoch": 0.25, "grad_norm": 4.17545478000675, "learning_rate": 8.762070669101054e-06, "loss": 0.9638, "step": 3084 }, { "epoch": 0.25, "grad_norm": 2.0777362421774552, "learning_rate": 8.761198576720191e-06, "loss": 0.3896, "step": 3085 }, { "epoch": 0.25, "grad_norm": 2.8485534506189727, "learning_rate": 8.760326220695677e-06, "loss": 0.5518, "step": 3086 }, { "epoch": 0.25, "grad_norm": 5.4570503971498185, "learning_rate": 8.759453601088658e-06, "loss": 0.9435, "step": 3087 }, { "epoch": 0.25, "grad_norm": 2.762459736007317, "learning_rate": 8.758580717960303e-06, "loss": 0.6803, "step": 3088 }, { "epoch": 0.25, "grad_norm": 4.072988444518039, "learning_rate": 8.757707571371795e-06, "loss": 1.2577, "step": 3089 }, { "epoch": 0.25, "grad_norm": 3.0319532056540286, "learning_rate": 8.756834161384344e-06, "loss": 0.6772, "step": 3090 }, { "epoch": 0.25, "grad_norm": 2.0531323444970426, "learning_rate": 8.755960488059167e-06, "loss": 0.3901, "step": 3091 }, { "epoch": 0.25, "grad_norm": 3.8435794741136617, "learning_rate": 8.755086551457504e-06, "loss": 1.0897, "step": 3092 }, { "epoch": 0.25, "grad_norm": 6.1029982378400565, "learning_rate": 8.754212351640618e-06, "loss": 0.9717, "step": 3093 }, { "epoch": 0.25, "grad_norm": 4.467093227327127, "learning_rate": 8.753337888669783e-06, "loss": 0.7102, "step": 3094 }, { "epoch": 0.25, "grad_norm": 4.026197241645423, "learning_rate": 8.752463162606297e-06, "loss": 0.6473, "step": 3095 }, { "epoch": 0.25, "grad_norm": 4.915327712366391, "learning_rate": 8.751588173511474e-06, "loss": 1.1337, "step": 3096 }, { "epoch": 0.25, "grad_norm": 4.9382618749318565, "learning_rate": 8.750712921446647e-06, "loss": 1.0953, "step": 3097 }, { "epoch": 0.25, "grad_norm": 3.2741886506684112, "learning_rate": 8.749837406473168e-06, "loss": 0.7295, "step": 3098 }, { "epoch": 0.25, "grad_norm": 3.701676756960232, "learning_rate": 8.748961628652406e-06, "loss": 0.7643, "step": 3099 }, { "epoch": 0.25, "grad_norm": 4.241600773286563, "learning_rate": 8.74808558804575e-06, "loss": 1.0252, "step": 3100 }, { "epoch": 0.25, "grad_norm": 3.367775399977547, "learning_rate": 8.747209284714604e-06, "loss": 0.6707, "step": 3101 }, { "epoch": 0.25, "grad_norm": 4.5932625088710335, "learning_rate": 8.746332718720395e-06, "loss": 1.2799, "step": 3102 }, { "epoch": 0.25, "grad_norm": 3.2022569176912743, "learning_rate": 8.745455890124567e-06, "loss": 0.803, "step": 3103 }, { "epoch": 0.25, "grad_norm": 3.9448983739541603, "learning_rate": 8.744578798988584e-06, "loss": 0.7613, "step": 3104 }, { "epoch": 0.25, "grad_norm": 4.885468151504446, "learning_rate": 8.743701445373922e-06, "loss": 1.1849, "step": 3105 }, { "epoch": 0.25, "grad_norm": 5.584814906714775, "learning_rate": 8.74282382934208e-06, "loss": 1.3323, "step": 3106 }, { "epoch": 0.25, "grad_norm": 2.7150379669348883, "learning_rate": 8.741945950954577e-06, "loss": 0.6995, "step": 3107 }, { "epoch": 0.25, "grad_norm": 3.3623560606552503, "learning_rate": 8.741067810272949e-06, "loss": 0.4057, "step": 3108 }, { "epoch": 0.25, "grad_norm": 3.7438001086287556, "learning_rate": 8.740189407358747e-06, "loss": 1.0674, "step": 3109 }, { "epoch": 0.25, "grad_norm": 2.386810913733393, "learning_rate": 8.739310742273546e-06, "loss": 0.4845, "step": 3110 }, { "epoch": 0.25, "grad_norm": 3.908178175809527, "learning_rate": 8.738431815078937e-06, "loss": 1.0802, "step": 3111 }, { "epoch": 0.25, "grad_norm": 2.133124078726872, "learning_rate": 8.737552625836525e-06, "loss": 0.3782, "step": 3112 }, { "epoch": 0.25, "grad_norm": 3.1146653144289034, "learning_rate": 8.73667317460794e-06, "loss": 0.7028, "step": 3113 }, { "epoch": 0.25, "grad_norm": 4.8189764064404965, "learning_rate": 8.735793461454828e-06, "loss": 1.0162, "step": 3114 }, { "epoch": 0.25, "grad_norm": 6.21556316598591, "learning_rate": 8.734913486438854e-06, "loss": 1.6151, "step": 3115 }, { "epoch": 0.25, "grad_norm": 4.610434382709641, "learning_rate": 8.734033249621695e-06, "loss": 0.6186, "step": 3116 }, { "epoch": 0.25, "grad_norm": 3.1136830203208943, "learning_rate": 8.73315275106506e-06, "loss": 0.4801, "step": 3117 }, { "epoch": 0.25, "grad_norm": 2.8810962077461704, "learning_rate": 8.732271990830663e-06, "loss": 0.6271, "step": 3118 }, { "epoch": 0.25, "grad_norm": 2.8009657466197377, "learning_rate": 8.731390968980242e-06, "loss": 0.5574, "step": 3119 }, { "epoch": 0.26, "grad_norm": 3.5106276997584525, "learning_rate": 8.730509685575552e-06, "loss": 0.8657, "step": 3120 }, { "epoch": 0.26, "grad_norm": 4.70797696118993, "learning_rate": 8.72962814067837e-06, "loss": 0.9346, "step": 3121 }, { "epoch": 0.26, "grad_norm": 4.1023948752405675, "learning_rate": 8.728746334350483e-06, "loss": 1.2921, "step": 3122 }, { "epoch": 0.26, "grad_norm": 2.1629856076308487, "learning_rate": 8.72786426665371e-06, "loss": 0.2938, "step": 3123 }, { "epoch": 0.26, "grad_norm": 4.098831316671046, "learning_rate": 8.726981937649875e-06, "loss": 1.134, "step": 3124 }, { "epoch": 0.26, "grad_norm": 3.927581384352747, "learning_rate": 8.726099347400824e-06, "loss": 0.6837, "step": 3125 }, { "epoch": 0.26, "grad_norm": 4.745472576702596, "learning_rate": 8.725216495968426e-06, "loss": 1.5312, "step": 3126 }, { "epoch": 0.26, "grad_norm": 3.4741765877770887, "learning_rate": 8.724333383414563e-06, "loss": 0.7448, "step": 3127 }, { "epoch": 0.26, "grad_norm": 3.3200229506989993, "learning_rate": 8.723450009801139e-06, "loss": 0.5416, "step": 3128 }, { "epoch": 0.26, "grad_norm": 4.1467770194927835, "learning_rate": 8.722566375190073e-06, "loss": 0.9865, "step": 3129 }, { "epoch": 0.26, "grad_norm": 3.1764658181726544, "learning_rate": 8.721682479643307e-06, "loss": 0.679, "step": 3130 }, { "epoch": 0.26, "grad_norm": 5.045637342760714, "learning_rate": 8.720798323222795e-06, "loss": 1.0718, "step": 3131 }, { "epoch": 0.26, "grad_norm": 6.108791828817261, "learning_rate": 8.719913905990511e-06, "loss": 1.3495, "step": 3132 }, { "epoch": 0.26, "grad_norm": 4.581562699895055, "learning_rate": 8.719029228008454e-06, "loss": 0.9609, "step": 3133 }, { "epoch": 0.26, "grad_norm": 4.905464031884023, "learning_rate": 8.718144289338632e-06, "loss": 1.1061, "step": 3134 }, { "epoch": 0.26, "grad_norm": 5.038760696296255, "learning_rate": 8.717259090043078e-06, "loss": 1.6582, "step": 3135 }, { "epoch": 0.26, "grad_norm": 2.8090502903294605, "learning_rate": 8.716373630183839e-06, "loss": 0.5046, "step": 3136 }, { "epoch": 0.26, "grad_norm": 3.019665012589466, "learning_rate": 8.715487909822982e-06, "loss": 0.7241, "step": 3137 }, { "epoch": 0.26, "grad_norm": 4.342658470523938, "learning_rate": 8.714601929022591e-06, "loss": 0.7319, "step": 3138 }, { "epoch": 0.26, "grad_norm": 3.2930299554632527, "learning_rate": 8.713715687844772e-06, "loss": 0.6454, "step": 3139 }, { "epoch": 0.26, "grad_norm": 3.7485307418029983, "learning_rate": 8.712829186351646e-06, "loss": 0.8893, "step": 3140 }, { "epoch": 0.26, "grad_norm": 3.609508442458764, "learning_rate": 8.711942424605352e-06, "loss": 0.903, "step": 3141 }, { "epoch": 0.26, "grad_norm": 3.4121316309959697, "learning_rate": 8.711055402668049e-06, "loss": 0.8174, "step": 3142 }, { "epoch": 0.26, "grad_norm": 3.6279918762098293, "learning_rate": 8.710168120601912e-06, "loss": 0.6543, "step": 3143 }, { "epoch": 0.26, "grad_norm": 4.403460609793017, "learning_rate": 8.709280578469135e-06, "loss": 1.2444, "step": 3144 }, { "epoch": 0.26, "grad_norm": 2.7833886357786386, "learning_rate": 8.708392776331935e-06, "loss": 0.5886, "step": 3145 }, { "epoch": 0.26, "grad_norm": 3.3529510963178852, "learning_rate": 8.707504714252539e-06, "loss": 0.8862, "step": 3146 }, { "epoch": 0.26, "grad_norm": 1.0977778285785627, "learning_rate": 8.706616392293199e-06, "loss": 0.1751, "step": 3147 }, { "epoch": 0.26, "grad_norm": 6.1552747267107994, "learning_rate": 8.705727810516179e-06, "loss": 1.0112, "step": 3148 }, { "epoch": 0.26, "grad_norm": 5.988284675168184, "learning_rate": 8.70483896898377e-06, "loss": 1.4558, "step": 3149 }, { "epoch": 0.26, "grad_norm": 1.146370997195073, "learning_rate": 8.703949867758269e-06, "loss": 0.1939, "step": 3150 }, { "epoch": 0.26, "grad_norm": 4.518287166379679, "learning_rate": 8.703060506902004e-06, "loss": 0.7558, "step": 3151 }, { "epoch": 0.26, "grad_norm": 4.253671816177628, "learning_rate": 8.702170886477312e-06, "loss": 1.1437, "step": 3152 }, { "epoch": 0.26, "grad_norm": 3.8170806354964486, "learning_rate": 8.701281006546554e-06, "loss": 0.9219, "step": 3153 }, { "epoch": 0.26, "grad_norm": 5.031382707502849, "learning_rate": 8.700390867172104e-06, "loss": 1.4201, "step": 3154 }, { "epoch": 0.26, "grad_norm": 4.142443824282733, "learning_rate": 8.699500468416359e-06, "loss": 0.5597, "step": 3155 }, { "epoch": 0.26, "grad_norm": 5.268972095381594, "learning_rate": 8.698609810341733e-06, "loss": 1.1795, "step": 3156 }, { "epoch": 0.26, "grad_norm": 4.245915832669328, "learning_rate": 8.697718893010654e-06, "loss": 1.0289, "step": 3157 }, { "epoch": 0.26, "grad_norm": 3.3672476490029912, "learning_rate": 8.696827716485575e-06, "loss": 0.6437, "step": 3158 }, { "epoch": 0.26, "grad_norm": 3.32873091167265, "learning_rate": 8.69593628082896e-06, "loss": 0.7401, "step": 3159 }, { "epoch": 0.26, "grad_norm": 4.05422206738384, "learning_rate": 8.695044586103297e-06, "loss": 0.7442, "step": 3160 }, { "epoch": 0.26, "grad_norm": 3.8301976158364446, "learning_rate": 8.69415263237109e-06, "loss": 0.8611, "step": 3161 }, { "epoch": 0.26, "grad_norm": 2.4771740486546956, "learning_rate": 8.693260419694858e-06, "loss": 0.4881, "step": 3162 }, { "epoch": 0.26, "grad_norm": 4.354569593164627, "learning_rate": 8.692367948137146e-06, "loss": 0.8609, "step": 3163 }, { "epoch": 0.26, "grad_norm": 3.3948040475678503, "learning_rate": 8.69147521776051e-06, "loss": 0.7847, "step": 3164 }, { "epoch": 0.26, "grad_norm": 3.952371981072322, "learning_rate": 8.690582228627525e-06, "loss": 1.2305, "step": 3165 }, { "epoch": 0.26, "grad_norm": 2.9741205651636946, "learning_rate": 8.68968898080079e-06, "loss": 0.4751, "step": 3166 }, { "epoch": 0.26, "grad_norm": 2.558786018301694, "learning_rate": 8.688795474342913e-06, "loss": 0.3973, "step": 3167 }, { "epoch": 0.26, "grad_norm": 5.3154057654178395, "learning_rate": 8.687901709316526e-06, "loss": 1.5187, "step": 3168 }, { "epoch": 0.26, "grad_norm": 1.86788167062856, "learning_rate": 8.68700768578428e-06, "loss": 0.3617, "step": 3169 }, { "epoch": 0.26, "grad_norm": 7.8911866823110985, "learning_rate": 8.686113403808843e-06, "loss": 0.997, "step": 3170 }, { "epoch": 0.26, "grad_norm": 4.869882483356167, "learning_rate": 8.685218863452896e-06, "loss": 1.2136, "step": 3171 }, { "epoch": 0.26, "grad_norm": 2.496630214403798, "learning_rate": 8.684324064779147e-06, "loss": 0.5059, "step": 3172 }, { "epoch": 0.26, "grad_norm": 3.0616387089416395, "learning_rate": 8.683429007850313e-06, "loss": 0.6251, "step": 3173 }, { "epoch": 0.26, "grad_norm": 3.2089421498861, "learning_rate": 8.682533692729137e-06, "loss": 0.7262, "step": 3174 }, { "epoch": 0.26, "grad_norm": 3.707763156712534, "learning_rate": 8.681638119478375e-06, "loss": 0.8375, "step": 3175 }, { "epoch": 0.26, "grad_norm": 2.622146077316868, "learning_rate": 8.680742288160803e-06, "loss": 0.5689, "step": 3176 }, { "epoch": 0.26, "grad_norm": 4.846788125256505, "learning_rate": 8.679846198839216e-06, "loss": 1.0026, "step": 3177 }, { "epoch": 0.26, "grad_norm": 3.110639668818378, "learning_rate": 8.678949851576425e-06, "loss": 0.5267, "step": 3178 }, { "epoch": 0.26, "grad_norm": 1.5945107075834242, "learning_rate": 8.678053246435261e-06, "loss": 0.3912, "step": 3179 }, { "epoch": 0.26, "grad_norm": 2.0778832980751147, "learning_rate": 8.677156383478571e-06, "loss": 0.5534, "step": 3180 }, { "epoch": 0.26, "grad_norm": 4.170325352138206, "learning_rate": 8.676259262769222e-06, "loss": 1.2933, "step": 3181 }, { "epoch": 0.26, "grad_norm": 2.6725061220038397, "learning_rate": 8.675361884370097e-06, "loss": 0.5551, "step": 3182 }, { "epoch": 0.26, "grad_norm": 3.1924954050152907, "learning_rate": 8.6744642483441e-06, "loss": 0.7139, "step": 3183 }, { "epoch": 0.26, "grad_norm": 2.9889298140945484, "learning_rate": 8.67356635475415e-06, "loss": 0.4657, "step": 3184 }, { "epoch": 0.26, "grad_norm": 3.434549748705281, "learning_rate": 8.672668203663185e-06, "loss": 0.6748, "step": 3185 }, { "epoch": 0.26, "grad_norm": 4.549624402623971, "learning_rate": 8.671769795134164e-06, "loss": 1.0978, "step": 3186 }, { "epoch": 0.26, "grad_norm": 2.962116235820115, "learning_rate": 8.67087112923006e-06, "loss": 0.6544, "step": 3187 }, { "epoch": 0.26, "grad_norm": 4.522077343959333, "learning_rate": 8.669972206013864e-06, "loss": 1.3273, "step": 3188 }, { "epoch": 0.26, "grad_norm": 3.2457425570795433, "learning_rate": 8.669073025548588e-06, "loss": 0.7193, "step": 3189 }, { "epoch": 0.26, "grad_norm": 3.7196750818422535, "learning_rate": 8.668173587897261e-06, "loss": 0.7571, "step": 3190 }, { "epoch": 0.26, "grad_norm": 4.3977177249844335, "learning_rate": 8.667273893122932e-06, "loss": 0.9859, "step": 3191 }, { "epoch": 0.26, "grad_norm": 3.8157013770108463, "learning_rate": 8.666373941288661e-06, "loss": 1.2364, "step": 3192 }, { "epoch": 0.26, "grad_norm": 3.493289730627912, "learning_rate": 8.665473732457533e-06, "loss": 0.9903, "step": 3193 }, { "epoch": 0.26, "grad_norm": 3.98117903788595, "learning_rate": 8.664573266692648e-06, "loss": 1.0363, "step": 3194 }, { "epoch": 0.26, "grad_norm": 4.7274376036047885, "learning_rate": 8.663672544057126e-06, "loss": 0.9828, "step": 3195 }, { "epoch": 0.26, "grad_norm": 3.84076112404563, "learning_rate": 8.662771564614102e-06, "loss": 0.6248, "step": 3196 }, { "epoch": 0.26, "grad_norm": 2.890432405279825, "learning_rate": 8.661870328426734e-06, "loss": 0.7854, "step": 3197 }, { "epoch": 0.26, "grad_norm": 3.554546599450026, "learning_rate": 8.66096883555819e-06, "loss": 0.5484, "step": 3198 }, { "epoch": 0.26, "grad_norm": 4.383581001219526, "learning_rate": 8.660067086071665e-06, "loss": 1.0188, "step": 3199 }, { "epoch": 0.26, "grad_norm": 3.5078927578736643, "learning_rate": 8.659165080030366e-06, "loss": 0.6404, "step": 3200 }, { "epoch": 0.26, "grad_norm": 2.906240123006569, "learning_rate": 8.658262817497517e-06, "loss": 0.6517, "step": 3201 }, { "epoch": 0.26, "grad_norm": 3.2438586396263625, "learning_rate": 8.657360298536368e-06, "loss": 0.3485, "step": 3202 }, { "epoch": 0.26, "grad_norm": 3.6901225925717074, "learning_rate": 8.656457523210178e-06, "loss": 0.7426, "step": 3203 }, { "epoch": 0.26, "grad_norm": 5.135785463653785, "learning_rate": 8.65555449158223e-06, "loss": 1.2519, "step": 3204 }, { "epoch": 0.26, "grad_norm": 4.197218607509568, "learning_rate": 8.65465120371582e-06, "loss": 0.9433, "step": 3205 }, { "epoch": 0.26, "grad_norm": 4.414861530515555, "learning_rate": 8.653747659674265e-06, "loss": 1.0128, "step": 3206 }, { "epoch": 0.26, "grad_norm": 4.7227671150350234, "learning_rate": 8.6528438595209e-06, "loss": 0.9727, "step": 3207 }, { "epoch": 0.26, "grad_norm": 4.859963917459991, "learning_rate": 8.651939803319079e-06, "loss": 0.8443, "step": 3208 }, { "epoch": 0.26, "grad_norm": 3.026595312703798, "learning_rate": 8.651035491132171e-06, "loss": 0.6055, "step": 3209 }, { "epoch": 0.26, "grad_norm": 4.250457816338681, "learning_rate": 8.650130923023564e-06, "loss": 0.8802, "step": 3210 }, { "epoch": 0.26, "grad_norm": 3.4885457304896335, "learning_rate": 8.649226099056664e-06, "loss": 0.8172, "step": 3211 }, { "epoch": 0.26, "grad_norm": 2.726001747173857, "learning_rate": 8.648321019294896e-06, "loss": 0.5996, "step": 3212 }, { "epoch": 0.26, "grad_norm": 4.501357604415449, "learning_rate": 8.647415683801703e-06, "loss": 0.8746, "step": 3213 }, { "epoch": 0.26, "grad_norm": 3.668083160728214, "learning_rate": 8.646510092640546e-06, "loss": 0.7768, "step": 3214 }, { "epoch": 0.26, "grad_norm": 5.0515484249074465, "learning_rate": 8.645604245874898e-06, "loss": 1.1065, "step": 3215 }, { "epoch": 0.26, "grad_norm": 4.044574041841505, "learning_rate": 8.64469814356826e-06, "loss": 1.0122, "step": 3216 }, { "epoch": 0.26, "grad_norm": 5.206242620906617, "learning_rate": 8.643791785784143e-06, "loss": 1.2983, "step": 3217 }, { "epoch": 0.26, "grad_norm": 3.788662775672696, "learning_rate": 8.642885172586079e-06, "loss": 0.6405, "step": 3218 }, { "epoch": 0.26, "grad_norm": 2.771380323773392, "learning_rate": 8.641978304037619e-06, "loss": 0.502, "step": 3219 }, { "epoch": 0.26, "grad_norm": 3.196793597191887, "learning_rate": 8.64107118020233e-06, "loss": 0.4887, "step": 3220 }, { "epoch": 0.26, "grad_norm": 4.197875163689073, "learning_rate": 8.640163801143797e-06, "loss": 0.8519, "step": 3221 }, { "epoch": 0.26, "grad_norm": 2.267636049624269, "learning_rate": 8.639256166925623e-06, "loss": 0.4885, "step": 3222 }, { "epoch": 0.26, "grad_norm": 4.219769417655235, "learning_rate": 8.638348277611431e-06, "loss": 1.0352, "step": 3223 }, { "epoch": 0.26, "grad_norm": 3.300249351364667, "learning_rate": 8.637440133264858e-06, "loss": 0.8545, "step": 3224 }, { "epoch": 0.26, "grad_norm": 4.122445723263637, "learning_rate": 8.636531733949562e-06, "loss": 0.9217, "step": 3225 }, { "epoch": 0.26, "grad_norm": 3.6354846730557955, "learning_rate": 8.635623079729218e-06, "loss": 0.53, "step": 3226 }, { "epoch": 0.26, "grad_norm": 3.037083101704012, "learning_rate": 8.63471417066752e-06, "loss": 0.5501, "step": 3227 }, { "epoch": 0.26, "grad_norm": 4.815458380227235, "learning_rate": 8.633805006828175e-06, "loss": 0.7468, "step": 3228 }, { "epoch": 0.26, "grad_norm": 3.5727780826689126, "learning_rate": 8.632895588274913e-06, "loss": 0.7856, "step": 3229 }, { "epoch": 0.26, "grad_norm": 3.2251162220840035, "learning_rate": 8.63198591507148e-06, "loss": 0.6999, "step": 3230 }, { "epoch": 0.26, "grad_norm": 3.1361439942367513, "learning_rate": 8.631075987281645e-06, "loss": 0.7396, "step": 3231 }, { "epoch": 0.26, "grad_norm": 3.4658354764042856, "learning_rate": 8.630165804969181e-06, "loss": 1.0458, "step": 3232 }, { "epoch": 0.26, "grad_norm": 3.45617644112384, "learning_rate": 8.629255368197894e-06, "loss": 0.8709, "step": 3233 }, { "epoch": 0.26, "grad_norm": 4.986012130449948, "learning_rate": 8.628344677031602e-06, "loss": 1.3019, "step": 3234 }, { "epoch": 0.26, "grad_norm": 4.327627012636433, "learning_rate": 8.627433731534138e-06, "loss": 1.1655, "step": 3235 }, { "epoch": 0.26, "grad_norm": 2.5669462906154594, "learning_rate": 8.626522531769356e-06, "loss": 0.571, "step": 3236 }, { "epoch": 0.26, "grad_norm": 3.7131241591546362, "learning_rate": 8.625611077801127e-06, "loss": 0.7743, "step": 3237 }, { "epoch": 0.26, "grad_norm": 3.0749190601802012, "learning_rate": 8.624699369693338e-06, "loss": 1.0538, "step": 3238 }, { "epoch": 0.26, "grad_norm": 3.836550605964662, "learning_rate": 8.623787407509902e-06, "loss": 0.7836, "step": 3239 }, { "epoch": 0.26, "grad_norm": 4.759929928706973, "learning_rate": 8.622875191314737e-06, "loss": 0.6143, "step": 3240 }, { "epoch": 0.26, "grad_norm": 2.6827245495757204, "learning_rate": 8.621962721171789e-06, "loss": 0.6333, "step": 3241 }, { "epoch": 0.26, "grad_norm": 2.017636572954006, "learning_rate": 8.621049997145016e-06, "loss": 0.4061, "step": 3242 }, { "epoch": 0.27, "grad_norm": 4.046091184963527, "learning_rate": 8.620137019298397e-06, "loss": 1.274, "step": 3243 }, { "epoch": 0.27, "grad_norm": 3.630981230839866, "learning_rate": 8.61922378769593e-06, "loss": 0.8632, "step": 3244 }, { "epoch": 0.27, "grad_norm": 3.857283660257242, "learning_rate": 8.618310302401624e-06, "loss": 0.8118, "step": 3245 }, { "epoch": 0.27, "grad_norm": 3.8626741829376483, "learning_rate": 8.617396563479512e-06, "loss": 0.7905, "step": 3246 }, { "epoch": 0.27, "grad_norm": 4.421926641303325, "learning_rate": 8.616482570993648e-06, "loss": 1.2504, "step": 3247 }, { "epoch": 0.27, "grad_norm": 4.546897594665816, "learning_rate": 8.61556832500809e-06, "loss": 1.514, "step": 3248 }, { "epoch": 0.27, "grad_norm": 5.060131968138134, "learning_rate": 8.614653825586929e-06, "loss": 1.1402, "step": 3249 }, { "epoch": 0.27, "grad_norm": 3.1715649425333967, "learning_rate": 8.613739072794268e-06, "loss": 0.6246, "step": 3250 }, { "epoch": 0.27, "grad_norm": 5.404068495336512, "learning_rate": 8.612824066694223e-06, "loss": 1.0936, "step": 3251 }, { "epoch": 0.27, "grad_norm": 4.484444207292898, "learning_rate": 8.611908807350934e-06, "loss": 0.9757, "step": 3252 }, { "epoch": 0.27, "grad_norm": 3.423023227701489, "learning_rate": 8.610993294828557e-06, "loss": 0.4086, "step": 3253 }, { "epoch": 0.27, "grad_norm": 3.349545026797728, "learning_rate": 8.610077529191265e-06, "loss": 0.9609, "step": 3254 }, { "epoch": 0.27, "grad_norm": 4.389377079746561, "learning_rate": 8.60916151050325e-06, "loss": 1.0769, "step": 3255 }, { "epoch": 0.27, "grad_norm": 3.202524400456061, "learning_rate": 8.60824523882872e-06, "loss": 0.7872, "step": 3256 }, { "epoch": 0.27, "grad_norm": 3.505244005208422, "learning_rate": 8.607328714231901e-06, "loss": 0.7738, "step": 3257 }, { "epoch": 0.27, "grad_norm": 1.492210496875677, "learning_rate": 8.60641193677704e-06, "loss": 0.1749, "step": 3258 }, { "epoch": 0.27, "grad_norm": 3.211838371020348, "learning_rate": 8.605494906528395e-06, "loss": 0.9311, "step": 3259 }, { "epoch": 0.27, "grad_norm": 4.423420650706527, "learning_rate": 8.604577623550249e-06, "loss": 0.9224, "step": 3260 }, { "epoch": 0.27, "grad_norm": 4.158714351825901, "learning_rate": 8.603660087906901e-06, "loss": 1.0484, "step": 3261 }, { "epoch": 0.27, "grad_norm": 5.102169056933126, "learning_rate": 8.602742299662662e-06, "loss": 1.0258, "step": 3262 }, { "epoch": 0.27, "grad_norm": 4.499002455252406, "learning_rate": 8.601824258881868e-06, "loss": 1.3883, "step": 3263 }, { "epoch": 0.27, "grad_norm": 2.4907076006441042, "learning_rate": 8.600905965628867e-06, "loss": 0.5417, "step": 3264 }, { "epoch": 0.27, "grad_norm": 2.8531022936766153, "learning_rate": 8.59998741996803e-06, "loss": 0.877, "step": 3265 }, { "epoch": 0.27, "grad_norm": 4.370751097548014, "learning_rate": 8.599068621963741e-06, "loss": 1.0476, "step": 3266 }, { "epoch": 0.27, "grad_norm": 4.508339914520264, "learning_rate": 8.598149571680407e-06, "loss": 1.4139, "step": 3267 }, { "epoch": 0.27, "grad_norm": 3.3080389351523425, "learning_rate": 8.597230269182446e-06, "loss": 0.7074, "step": 3268 }, { "epoch": 0.27, "grad_norm": 3.562432608615904, "learning_rate": 8.596310714534299e-06, "loss": 0.4951, "step": 3269 }, { "epoch": 0.27, "grad_norm": 4.437605479178292, "learning_rate": 8.59539090780042e-06, "loss": 1.112, "step": 3270 }, { "epoch": 0.27, "grad_norm": 2.508607042973389, "learning_rate": 8.59447084904529e-06, "loss": 0.4823, "step": 3271 }, { "epoch": 0.27, "grad_norm": 4.228500440504984, "learning_rate": 8.593550538333392e-06, "loss": 1.4294, "step": 3272 }, { "epoch": 0.27, "grad_norm": 4.767059649852298, "learning_rate": 8.592629975729242e-06, "loss": 0.9167, "step": 3273 }, { "epoch": 0.27, "grad_norm": 3.269612391619527, "learning_rate": 8.591709161297366e-06, "loss": 0.6488, "step": 3274 }, { "epoch": 0.27, "grad_norm": 4.846307992233512, "learning_rate": 8.59078809510231e-06, "loss": 0.9216, "step": 3275 }, { "epoch": 0.27, "grad_norm": 5.826302142300191, "learning_rate": 8.589866777208632e-06, "loss": 1.4859, "step": 3276 }, { "epoch": 0.27, "grad_norm": 2.9845686316737927, "learning_rate": 8.588945207680922e-06, "loss": 0.5955, "step": 3277 }, { "epoch": 0.27, "grad_norm": 6.516434459764189, "learning_rate": 8.588023386583766e-06, "loss": 1.227, "step": 3278 }, { "epoch": 0.27, "grad_norm": 4.000628683865046, "learning_rate": 8.587101313981789e-06, "loss": 0.429, "step": 3279 }, { "epoch": 0.27, "grad_norm": 1.149387629155255, "learning_rate": 8.586178989939622e-06, "loss": 0.2166, "step": 3280 }, { "epoch": 0.27, "grad_norm": 5.674756287794339, "learning_rate": 8.585256414521912e-06, "loss": 1.4346, "step": 3281 }, { "epoch": 0.27, "grad_norm": 3.814894563094977, "learning_rate": 8.584333587793334e-06, "loss": 1.093, "step": 3282 }, { "epoch": 0.27, "grad_norm": 3.2099645602466165, "learning_rate": 8.583410509818567e-06, "loss": 0.8124, "step": 3283 }, { "epoch": 0.27, "grad_norm": 3.7785344671897114, "learning_rate": 8.58248718066232e-06, "loss": 0.9876, "step": 3284 }, { "epoch": 0.27, "grad_norm": 4.827799117265657, "learning_rate": 8.581563600389313e-06, "loss": 1.1371, "step": 3285 }, { "epoch": 0.27, "grad_norm": 5.027555913927014, "learning_rate": 8.580639769064283e-06, "loss": 0.9466, "step": 3286 }, { "epoch": 0.27, "grad_norm": 3.4193502984889474, "learning_rate": 8.57971568675199e-06, "loss": 0.5155, "step": 3287 }, { "epoch": 0.27, "grad_norm": 4.811872675998102, "learning_rate": 8.578791353517205e-06, "loss": 0.6685, "step": 3288 }, { "epoch": 0.27, "grad_norm": 4.0447413505751895, "learning_rate": 8.577866769424722e-06, "loss": 0.7002, "step": 3289 }, { "epoch": 0.27, "grad_norm": 4.67709329214413, "learning_rate": 8.57694193453935e-06, "loss": 1.059, "step": 3290 }, { "epoch": 0.27, "grad_norm": 3.9639687943958597, "learning_rate": 8.576016848925914e-06, "loss": 0.9254, "step": 3291 }, { "epoch": 0.27, "grad_norm": 3.756176769055125, "learning_rate": 8.57509151264926e-06, "loss": 0.689, "step": 3292 }, { "epoch": 0.27, "grad_norm": 3.9754738372845124, "learning_rate": 8.574165925774249e-06, "loss": 0.8303, "step": 3293 }, { "epoch": 0.27, "grad_norm": 2.7067266196734256, "learning_rate": 8.573240088365764e-06, "loss": 0.5972, "step": 3294 }, { "epoch": 0.27, "grad_norm": 4.318813164893715, "learning_rate": 8.572314000488697e-06, "loss": 0.9459, "step": 3295 }, { "epoch": 0.27, "grad_norm": 4.176265666423131, "learning_rate": 8.571387662207966e-06, "loss": 1.0976, "step": 3296 }, { "epoch": 0.27, "grad_norm": 5.027574899271077, "learning_rate": 8.570461073588503e-06, "loss": 1.4062, "step": 3297 }, { "epoch": 0.27, "grad_norm": 3.5759335168554047, "learning_rate": 8.569534234695258e-06, "loss": 0.7524, "step": 3298 }, { "epoch": 0.27, "grad_norm": 2.98753427562043, "learning_rate": 8.568607145593197e-06, "loss": 0.5578, "step": 3299 }, { "epoch": 0.27, "grad_norm": 4.824454307674025, "learning_rate": 8.567679806347307e-06, "loss": 1.1906, "step": 3300 }, { "epoch": 0.27, "grad_norm": 3.8811060287963155, "learning_rate": 8.566752217022587e-06, "loss": 0.9038, "step": 3301 }, { "epoch": 0.27, "grad_norm": 3.1295118533835895, "learning_rate": 8.56582437768406e-06, "loss": 0.5656, "step": 3302 }, { "epoch": 0.27, "grad_norm": 4.189873115925194, "learning_rate": 8.564896288396762e-06, "loss": 0.7928, "step": 3303 }, { "epoch": 0.27, "grad_norm": 3.9550707861543692, "learning_rate": 8.56396794922575e-06, "loss": 0.9126, "step": 3304 }, { "epoch": 0.27, "grad_norm": 4.734076565039832, "learning_rate": 8.563039360236097e-06, "loss": 1.3105, "step": 3305 }, { "epoch": 0.27, "grad_norm": 3.7954476971871696, "learning_rate": 8.562110521492888e-06, "loss": 0.5605, "step": 3306 }, { "epoch": 0.27, "grad_norm": 4.913998182189051, "learning_rate": 8.561181433061234e-06, "loss": 1.4244, "step": 3307 }, { "epoch": 0.27, "grad_norm": 4.009470227880041, "learning_rate": 8.56025209500626e-06, "loss": 0.7917, "step": 3308 }, { "epoch": 0.27, "grad_norm": 2.665922735380563, "learning_rate": 8.55932250739311e-06, "loss": 0.3222, "step": 3309 }, { "epoch": 0.27, "grad_norm": 4.7198380743455095, "learning_rate": 8.558392670286942e-06, "loss": 1.1366, "step": 3310 }, { "epoch": 0.27, "grad_norm": 3.178748702987763, "learning_rate": 8.557462583752934e-06, "loss": 0.7711, "step": 3311 }, { "epoch": 0.27, "grad_norm": 2.438568163220989, "learning_rate": 8.55653224785628e-06, "loss": 0.4922, "step": 3312 }, { "epoch": 0.27, "grad_norm": 6.330476916402365, "learning_rate": 8.555601662662194e-06, "loss": 1.2982, "step": 3313 }, { "epoch": 0.27, "grad_norm": 3.6678253271213106, "learning_rate": 8.554670828235905e-06, "loss": 0.5943, "step": 3314 }, { "epoch": 0.27, "grad_norm": 2.1007843639528807, "learning_rate": 8.553739744642662e-06, "loss": 0.3446, "step": 3315 }, { "epoch": 0.27, "grad_norm": 0.5255229994819723, "learning_rate": 8.552808411947727e-06, "loss": 0.0892, "step": 3316 }, { "epoch": 0.27, "grad_norm": 1.245052826510659, "learning_rate": 8.551876830216385e-06, "loss": 0.2156, "step": 3317 }, { "epoch": 0.27, "grad_norm": 3.4013826162449154, "learning_rate": 8.550944999513936e-06, "loss": 0.8355, "step": 3318 }, { "epoch": 0.27, "grad_norm": 1.9915229079069694, "learning_rate": 8.550012919905696e-06, "loss": 0.3716, "step": 3319 }, { "epoch": 0.27, "grad_norm": 4.177043562454798, "learning_rate": 8.549080591457e-06, "loss": 0.8958, "step": 3320 }, { "epoch": 0.27, "grad_norm": 2.4423048906076823, "learning_rate": 8.5481480142332e-06, "loss": 0.4812, "step": 3321 }, { "epoch": 0.27, "grad_norm": 3.207681663736577, "learning_rate": 8.547215188299664e-06, "loss": 0.5496, "step": 3322 }, { "epoch": 0.27, "grad_norm": 1.3400518150108705, "learning_rate": 8.546282113721785e-06, "loss": 0.2233, "step": 3323 }, { "epoch": 0.27, "grad_norm": 3.521282854097466, "learning_rate": 8.54534879056496e-06, "loss": 0.7035, "step": 3324 }, { "epoch": 0.27, "grad_norm": 3.027767335477696, "learning_rate": 8.544415218894615e-06, "loss": 0.6919, "step": 3325 }, { "epoch": 0.27, "grad_norm": 3.016276645524086, "learning_rate": 8.543481398776188e-06, "loss": 0.6924, "step": 3326 }, { "epoch": 0.27, "grad_norm": 4.264797794444411, "learning_rate": 8.542547330275138e-06, "loss": 0.9471, "step": 3327 }, { "epoch": 0.27, "grad_norm": 3.2930852371271047, "learning_rate": 8.541613013456935e-06, "loss": 0.8721, "step": 3328 }, { "epoch": 0.27, "grad_norm": 3.3653326087802973, "learning_rate": 8.540678448387075e-06, "loss": 0.7226, "step": 3329 }, { "epoch": 0.27, "grad_norm": 5.672402910371198, "learning_rate": 8.539743635131064e-06, "loss": 1.0184, "step": 3330 }, { "epoch": 0.27, "grad_norm": 5.302459581678297, "learning_rate": 8.538808573754428e-06, "loss": 1.2515, "step": 3331 }, { "epoch": 0.27, "grad_norm": 3.9777333277068743, "learning_rate": 8.537873264322714e-06, "loss": 0.8424, "step": 3332 }, { "epoch": 0.27, "grad_norm": 3.2118263151992004, "learning_rate": 8.53693770690148e-06, "loss": 0.736, "step": 3333 }, { "epoch": 0.27, "grad_norm": 3.505969676427309, "learning_rate": 8.536001901556306e-06, "loss": 0.6689, "step": 3334 }, { "epoch": 0.27, "grad_norm": 3.2270756392293136, "learning_rate": 8.535065848352785e-06, "loss": 0.6879, "step": 3335 }, { "epoch": 0.27, "grad_norm": 3.698836210287015, "learning_rate": 8.534129547356536e-06, "loss": 0.6287, "step": 3336 }, { "epoch": 0.27, "grad_norm": 4.961390143155534, "learning_rate": 8.533192998633184e-06, "loss": 0.9411, "step": 3337 }, { "epoch": 0.27, "grad_norm": 4.4226150856526845, "learning_rate": 8.532256202248382e-06, "loss": 1.0278, "step": 3338 }, { "epoch": 0.27, "grad_norm": 1.1314777414437935, "learning_rate": 8.53131915826779e-06, "loss": 0.3731, "step": 3339 }, { "epoch": 0.27, "grad_norm": 3.7013183433744246, "learning_rate": 8.530381866757096e-06, "loss": 0.6113, "step": 3340 }, { "epoch": 0.27, "grad_norm": 4.1012435037945, "learning_rate": 8.529444327781995e-06, "loss": 1.0921, "step": 3341 }, { "epoch": 0.27, "grad_norm": 2.828225586633404, "learning_rate": 8.528506541408208e-06, "loss": 0.471, "step": 3342 }, { "epoch": 0.27, "grad_norm": 4.270768950776447, "learning_rate": 8.527568507701467e-06, "loss": 0.6118, "step": 3343 }, { "epoch": 0.27, "grad_norm": 4.133735752099045, "learning_rate": 8.526630226727528e-06, "loss": 0.8534, "step": 3344 }, { "epoch": 0.27, "grad_norm": 6.2286250088537605, "learning_rate": 8.525691698552157e-06, "loss": 1.2391, "step": 3345 }, { "epoch": 0.27, "grad_norm": 3.674507236090259, "learning_rate": 8.52475292324114e-06, "loss": 0.9262, "step": 3346 }, { "epoch": 0.27, "grad_norm": 3.251410586765895, "learning_rate": 8.523813900860285e-06, "loss": 0.7037, "step": 3347 }, { "epoch": 0.27, "grad_norm": 4.489785459479057, "learning_rate": 8.52287463147541e-06, "loss": 0.8188, "step": 3348 }, { "epoch": 0.27, "grad_norm": 4.275572233721359, "learning_rate": 8.521935115152357e-06, "loss": 0.8396, "step": 3349 }, { "epoch": 0.27, "grad_norm": 2.0843978814099873, "learning_rate": 8.520995351956977e-06, "loss": 0.416, "step": 3350 }, { "epoch": 0.27, "grad_norm": 5.3651970008548, "learning_rate": 8.52005534195515e-06, "loss": 1.806, "step": 3351 }, { "epoch": 0.27, "grad_norm": 2.6903373871188485, "learning_rate": 8.51911508521276e-06, "loss": 0.562, "step": 3352 }, { "epoch": 0.27, "grad_norm": 3.201406982156682, "learning_rate": 8.518174581795718e-06, "loss": 0.6777, "step": 3353 }, { "epoch": 0.27, "grad_norm": 2.8101727232527187, "learning_rate": 8.517233831769951e-06, "loss": 0.641, "step": 3354 }, { "epoch": 0.27, "grad_norm": 3.201730798911782, "learning_rate": 8.516292835201396e-06, "loss": 0.7662, "step": 3355 }, { "epoch": 0.27, "grad_norm": 4.016548229239503, "learning_rate": 8.51535159215602e-06, "loss": 0.9928, "step": 3356 }, { "epoch": 0.27, "grad_norm": 2.594045982399574, "learning_rate": 8.514410102699794e-06, "loss": 0.3828, "step": 3357 }, { "epoch": 0.27, "grad_norm": 5.063304614421123, "learning_rate": 8.513468366898714e-06, "loss": 0.965, "step": 3358 }, { "epoch": 0.27, "grad_norm": 5.613244946794997, "learning_rate": 8.512526384818794e-06, "loss": 0.9638, "step": 3359 }, { "epoch": 0.27, "grad_norm": 4.133040387545648, "learning_rate": 8.511584156526059e-06, "loss": 0.8531, "step": 3360 }, { "epoch": 0.27, "grad_norm": 3.5220936512990617, "learning_rate": 8.510641682086557e-06, "loss": 0.782, "step": 3361 }, { "epoch": 0.27, "grad_norm": 3.2675450795257666, "learning_rate": 8.509698961566356e-06, "loss": 0.6078, "step": 3362 }, { "epoch": 0.27, "grad_norm": 5.7808428333412145, "learning_rate": 8.508755995031527e-06, "loss": 0.9535, "step": 3363 }, { "epoch": 0.27, "grad_norm": 5.202309335801023, "learning_rate": 8.507812782548174e-06, "loss": 1.0453, "step": 3364 }, { "epoch": 0.28, "grad_norm": 4.494824079311042, "learning_rate": 8.506869324182411e-06, "loss": 1.143, "step": 3365 }, { "epoch": 0.28, "grad_norm": 3.022960989901094, "learning_rate": 8.505925620000373e-06, "loss": 0.625, "step": 3366 }, { "epoch": 0.28, "grad_norm": 5.417571085131394, "learning_rate": 8.504981670068204e-06, "loss": 1.2577, "step": 3367 }, { "epoch": 0.28, "grad_norm": 5.54414024670515, "learning_rate": 8.504037474452073e-06, "loss": 1.224, "step": 3368 }, { "epoch": 0.28, "grad_norm": 3.328681112964479, "learning_rate": 8.503093033218168e-06, "loss": 0.7479, "step": 3369 }, { "epoch": 0.28, "grad_norm": 2.581582710005775, "learning_rate": 8.502148346432683e-06, "loss": 0.6942, "step": 3370 }, { "epoch": 0.28, "grad_norm": 3.630141365785073, "learning_rate": 8.501203414161844e-06, "loss": 0.8597, "step": 3371 }, { "epoch": 0.28, "grad_norm": 2.4991407245673773, "learning_rate": 8.50025823647188e-06, "loss": 0.5979, "step": 3372 }, { "epoch": 0.28, "grad_norm": 3.313086390452513, "learning_rate": 8.499312813429047e-06, "loss": 0.8321, "step": 3373 }, { "epoch": 0.28, "grad_norm": 2.098169768904149, "learning_rate": 8.498367145099618e-06, "loss": 0.3456, "step": 3374 }, { "epoch": 0.28, "grad_norm": 4.49992191921518, "learning_rate": 8.497421231549873e-06, "loss": 1.3791, "step": 3375 }, { "epoch": 0.28, "grad_norm": 3.2400728584587615, "learning_rate": 8.496475072846125e-06, "loss": 0.5226, "step": 3376 }, { "epoch": 0.28, "grad_norm": 3.5645603139882116, "learning_rate": 8.495528669054688e-06, "loss": 0.579, "step": 3377 }, { "epoch": 0.28, "grad_norm": 4.016893323985373, "learning_rate": 8.494582020241905e-06, "loss": 1.2116, "step": 3378 }, { "epoch": 0.28, "grad_norm": 3.0975535418531996, "learning_rate": 8.493635126474128e-06, "loss": 0.8675, "step": 3379 }, { "epoch": 0.28, "grad_norm": 4.967808603430733, "learning_rate": 8.492687987817736e-06, "loss": 1.4063, "step": 3380 }, { "epoch": 0.28, "grad_norm": 3.069151077502174, "learning_rate": 8.491740604339115e-06, "loss": 0.5528, "step": 3381 }, { "epoch": 0.28, "grad_norm": 4.29931499894164, "learning_rate": 8.490792976104676e-06, "loss": 0.8807, "step": 3382 }, { "epoch": 0.28, "grad_norm": 4.600143238803147, "learning_rate": 8.48984510318084e-06, "loss": 0.886, "step": 3383 }, { "epoch": 0.28, "grad_norm": 3.7937467755769947, "learning_rate": 8.48889698563405e-06, "loss": 1.1959, "step": 3384 }, { "epoch": 0.28, "grad_norm": 2.2159369615732913, "learning_rate": 8.487948623530765e-06, "loss": 0.3218, "step": 3385 }, { "epoch": 0.28, "grad_norm": 3.567853307581026, "learning_rate": 8.48700001693746e-06, "loss": 0.6045, "step": 3386 }, { "epoch": 0.28, "grad_norm": 5.204283266026797, "learning_rate": 8.48605116592063e-06, "loss": 0.8285, "step": 3387 }, { "epoch": 0.28, "grad_norm": 4.522280887836711, "learning_rate": 8.485102070546786e-06, "loss": 0.9568, "step": 3388 }, { "epoch": 0.28, "grad_norm": 3.688467153997024, "learning_rate": 8.484152730882453e-06, "loss": 0.5945, "step": 3389 }, { "epoch": 0.28, "grad_norm": 3.046191178978568, "learning_rate": 8.483203146994174e-06, "loss": 0.4337, "step": 3390 }, { "epoch": 0.28, "grad_norm": 4.301048854694765, "learning_rate": 8.482253318948516e-06, "loss": 1.2326, "step": 3391 }, { "epoch": 0.28, "grad_norm": 3.0677386148310584, "learning_rate": 8.481303246812056e-06, "loss": 0.6228, "step": 3392 }, { "epoch": 0.28, "grad_norm": 3.9716547402830846, "learning_rate": 8.480352930651387e-06, "loss": 0.9784, "step": 3393 }, { "epoch": 0.28, "grad_norm": 4.63926742491477, "learning_rate": 8.479402370533127e-06, "loss": 0.9952, "step": 3394 }, { "epoch": 0.28, "grad_norm": 3.2588996439244484, "learning_rate": 8.478451566523902e-06, "loss": 0.7426, "step": 3395 }, { "epoch": 0.28, "grad_norm": 2.91189519136294, "learning_rate": 8.47750051869036e-06, "loss": 0.6852, "step": 3396 }, { "epoch": 0.28, "grad_norm": 4.392249566403095, "learning_rate": 8.476549227099164e-06, "loss": 0.7342, "step": 3397 }, { "epoch": 0.28, "grad_norm": 2.808611635862418, "learning_rate": 8.475597691817e-06, "loss": 0.7754, "step": 3398 }, { "epoch": 0.28, "grad_norm": 3.1065486891336858, "learning_rate": 8.474645912910562e-06, "loss": 0.7521, "step": 3399 }, { "epoch": 0.28, "grad_norm": 4.844818704294859, "learning_rate": 8.473693890446568e-06, "loss": 1.1064, "step": 3400 }, { "epoch": 0.28, "grad_norm": 3.4964248348112372, "learning_rate": 8.472741624491749e-06, "loss": 0.829, "step": 3401 }, { "epoch": 0.28, "grad_norm": 5.813746484461821, "learning_rate": 8.471789115112857e-06, "loss": 1.6135, "step": 3402 }, { "epoch": 0.28, "grad_norm": 6.04943564553509, "learning_rate": 8.470836362376657e-06, "loss": 1.5515, "step": 3403 }, { "epoch": 0.28, "grad_norm": 2.974791909097457, "learning_rate": 8.469883366349932e-06, "loss": 0.6906, "step": 3404 }, { "epoch": 0.28, "grad_norm": 4.369584392055278, "learning_rate": 8.468930127099486e-06, "loss": 1.2062, "step": 3405 }, { "epoch": 0.28, "grad_norm": 3.0886044700598396, "learning_rate": 8.467976644692131e-06, "loss": 0.7033, "step": 3406 }, { "epoch": 0.28, "grad_norm": 4.00103892834991, "learning_rate": 8.46702291919471e-06, "loss": 0.9827, "step": 3407 }, { "epoch": 0.28, "grad_norm": 5.134494477289156, "learning_rate": 8.466068950674068e-06, "loss": 1.0653, "step": 3408 }, { "epoch": 0.28, "grad_norm": 4.323040388604154, "learning_rate": 8.465114739197079e-06, "loss": 1.0076, "step": 3409 }, { "epoch": 0.28, "grad_norm": 4.5968184604234805, "learning_rate": 8.464160284830627e-06, "loss": 1.2016, "step": 3410 }, { "epoch": 0.28, "grad_norm": 2.7621565259260055, "learning_rate": 8.463205587641614e-06, "loss": 0.2877, "step": 3411 }, { "epoch": 0.28, "grad_norm": 4.938771720970346, "learning_rate": 8.462250647696962e-06, "loss": 1.5023, "step": 3412 }, { "epoch": 0.28, "grad_norm": 2.9458559677127023, "learning_rate": 8.461295465063605e-06, "loss": 0.6964, "step": 3413 }, { "epoch": 0.28, "grad_norm": 3.4891976793370416, "learning_rate": 8.460340039808504e-06, "loss": 1.0164, "step": 3414 }, { "epoch": 0.28, "grad_norm": 3.8104414866522105, "learning_rate": 8.459384371998622e-06, "loss": 0.7668, "step": 3415 }, { "epoch": 0.28, "grad_norm": 3.732488335406169, "learning_rate": 8.458428461700951e-06, "loss": 0.841, "step": 3416 }, { "epoch": 0.28, "grad_norm": 2.7949169717721754, "learning_rate": 8.457472308982498e-06, "loss": 0.6091, "step": 3417 }, { "epoch": 0.28, "grad_norm": 2.3442232976274724, "learning_rate": 8.456515913910282e-06, "loss": 0.4215, "step": 3418 }, { "epoch": 0.28, "grad_norm": 2.660297546667241, "learning_rate": 8.455559276551343e-06, "loss": 0.3241, "step": 3419 }, { "epoch": 0.28, "grad_norm": 3.1989636832644086, "learning_rate": 8.454602396972737e-06, "loss": 0.6127, "step": 3420 }, { "epoch": 0.28, "grad_norm": 2.3692484321829754, "learning_rate": 8.453645275241538e-06, "loss": 0.4041, "step": 3421 }, { "epoch": 0.28, "grad_norm": 3.5031828824437254, "learning_rate": 8.452687911424836e-06, "loss": 0.8936, "step": 3422 }, { "epoch": 0.28, "grad_norm": 3.276633330790473, "learning_rate": 8.451730305589737e-06, "loss": 0.5023, "step": 3423 }, { "epoch": 0.28, "grad_norm": 4.379893635875519, "learning_rate": 8.450772457803365e-06, "loss": 0.5751, "step": 3424 }, { "epoch": 0.28, "grad_norm": 5.221177909343238, "learning_rate": 8.449814368132862e-06, "loss": 1.0798, "step": 3425 }, { "epoch": 0.28, "grad_norm": 4.1564530399167605, "learning_rate": 8.448856036645386e-06, "loss": 1.1126, "step": 3426 }, { "epoch": 0.28, "grad_norm": 5.136210138891469, "learning_rate": 8.447897463408113e-06, "loss": 1.3091, "step": 3427 }, { "epoch": 0.28, "grad_norm": 4.802072054536583, "learning_rate": 8.44693864848823e-06, "loss": 0.9722, "step": 3428 }, { "epoch": 0.28, "grad_norm": 5.438403174790669, "learning_rate": 8.44597959195295e-06, "loss": 1.4541, "step": 3429 }, { "epoch": 0.28, "grad_norm": 3.858668859920182, "learning_rate": 8.445020293869497e-06, "loss": 0.7488, "step": 3430 }, { "epoch": 0.28, "grad_norm": 3.384226705250362, "learning_rate": 8.444060754305115e-06, "loss": 0.6044, "step": 3431 }, { "epoch": 0.28, "grad_norm": 4.210435131363074, "learning_rate": 8.443100973327063e-06, "loss": 0.8752, "step": 3432 }, { "epoch": 0.28, "grad_norm": 4.611685456780522, "learning_rate": 8.442140951002616e-06, "loss": 0.7336, "step": 3433 }, { "epoch": 0.28, "grad_norm": 1.968448930951749, "learning_rate": 8.441180687399068e-06, "loss": 0.3585, "step": 3434 }, { "epoch": 0.28, "grad_norm": 2.1026752437935223, "learning_rate": 8.440220182583731e-06, "loss": 0.4757, "step": 3435 }, { "epoch": 0.28, "grad_norm": 3.6772016795236184, "learning_rate": 8.439259436623933e-06, "loss": 0.8092, "step": 3436 }, { "epoch": 0.28, "grad_norm": 2.929545885760317, "learning_rate": 8.438298449587014e-06, "loss": 0.4462, "step": 3437 }, { "epoch": 0.28, "grad_norm": 2.8387804784920716, "learning_rate": 8.437337221540337e-06, "loss": 0.638, "step": 3438 }, { "epoch": 0.28, "grad_norm": 6.15289257004559, "learning_rate": 8.436375752551282e-06, "loss": 1.1346, "step": 3439 }, { "epoch": 0.28, "grad_norm": 3.3656847914305503, "learning_rate": 8.43541404268724e-06, "loss": 0.7933, "step": 3440 }, { "epoch": 0.28, "grad_norm": 2.8082542811817923, "learning_rate": 8.434452092015624e-06, "loss": 0.6105, "step": 3441 }, { "epoch": 0.28, "grad_norm": 4.434561121019493, "learning_rate": 8.433489900603866e-06, "loss": 1.2958, "step": 3442 }, { "epoch": 0.28, "grad_norm": 5.036759914134969, "learning_rate": 8.432527468519405e-06, "loss": 1.3316, "step": 3443 }, { "epoch": 0.28, "grad_norm": 3.488323094554122, "learning_rate": 8.43156479582971e-06, "loss": 0.746, "step": 3444 }, { "epoch": 0.28, "grad_norm": 3.2517439549549003, "learning_rate": 8.430601882602256e-06, "loss": 0.6266, "step": 3445 }, { "epoch": 0.28, "grad_norm": 4.502813425517545, "learning_rate": 8.429638728904538e-06, "loss": 1.0791, "step": 3446 }, { "epoch": 0.28, "grad_norm": 5.22014969866628, "learning_rate": 8.428675334804073e-06, "loss": 1.3465, "step": 3447 }, { "epoch": 0.28, "grad_norm": 3.552856686401315, "learning_rate": 8.42771170036839e-06, "loss": 0.4467, "step": 3448 }, { "epoch": 0.28, "grad_norm": 3.423996933534739, "learning_rate": 8.426747825665032e-06, "loss": 0.6019, "step": 3449 }, { "epoch": 0.28, "grad_norm": 3.81325929155305, "learning_rate": 8.425783710761565e-06, "loss": 0.8259, "step": 3450 }, { "epoch": 0.28, "grad_norm": 5.4597466616695405, "learning_rate": 8.42481935572557e-06, "loss": 1.0784, "step": 3451 }, { "epoch": 0.28, "grad_norm": 5.190375688125212, "learning_rate": 8.423854760624641e-06, "loss": 1.4251, "step": 3452 }, { "epoch": 0.28, "grad_norm": 4.350206830956461, "learning_rate": 8.422889925526396e-06, "loss": 1.0581, "step": 3453 }, { "epoch": 0.28, "grad_norm": 2.675596277735707, "learning_rate": 8.421924850498464e-06, "loss": 0.318, "step": 3454 }, { "epoch": 0.28, "grad_norm": 4.567263639604266, "learning_rate": 8.420959535608491e-06, "loss": 0.8752, "step": 3455 }, { "epoch": 0.28, "grad_norm": 4.188249554694329, "learning_rate": 8.419993980924141e-06, "loss": 1.2736, "step": 3456 }, { "epoch": 0.28, "grad_norm": 4.804489030767843, "learning_rate": 8.4190281865131e-06, "loss": 1.0392, "step": 3457 }, { "epoch": 0.28, "grad_norm": 3.910357957576581, "learning_rate": 8.41806215244306e-06, "loss": 0.9776, "step": 3458 }, { "epoch": 0.28, "grad_norm": 4.0388462868255, "learning_rate": 8.417095878781742e-06, "loss": 1.0865, "step": 3459 }, { "epoch": 0.28, "grad_norm": 3.290045246104023, "learning_rate": 8.41612936559687e-06, "loss": 0.4962, "step": 3460 }, { "epoch": 0.28, "grad_norm": 4.627539575503532, "learning_rate": 8.4151626129562e-06, "loss": 0.6425, "step": 3461 }, { "epoch": 0.28, "grad_norm": 4.649946228294108, "learning_rate": 8.414195620927491e-06, "loss": 1.0886, "step": 3462 }, { "epoch": 0.28, "grad_norm": 4.51517747858038, "learning_rate": 8.41322838957853e-06, "loss": 0.8784, "step": 3463 }, { "epoch": 0.28, "grad_norm": 4.330803877844067, "learning_rate": 8.412260918977112e-06, "loss": 0.8254, "step": 3464 }, { "epoch": 0.28, "grad_norm": 1.3595172213056332, "learning_rate": 8.411293209191054e-06, "loss": 0.1939, "step": 3465 }, { "epoch": 0.28, "grad_norm": 3.892873810491101, "learning_rate": 8.410325260288188e-06, "loss": 1.0629, "step": 3466 }, { "epoch": 0.28, "grad_norm": 3.569474704149108, "learning_rate": 8.409357072336363e-06, "loss": 0.5773, "step": 3467 }, { "epoch": 0.28, "grad_norm": 3.545847930059436, "learning_rate": 8.408388645403445e-06, "loss": 0.6537, "step": 3468 }, { "epoch": 0.28, "grad_norm": 5.18436426463238, "learning_rate": 8.407419979557317e-06, "loss": 1.4713, "step": 3469 }, { "epoch": 0.28, "grad_norm": 4.442562603742148, "learning_rate": 8.406451074865875e-06, "loss": 1.0713, "step": 3470 }, { "epoch": 0.28, "grad_norm": 3.6746294802722166, "learning_rate": 8.405481931397042e-06, "loss": 0.8966, "step": 3471 }, { "epoch": 0.28, "grad_norm": 3.414364511955644, "learning_rate": 8.404512549218741e-06, "loss": 0.6344, "step": 3472 }, { "epoch": 0.28, "grad_norm": 2.5411981015933045, "learning_rate": 8.40354292839893e-06, "loss": 0.5917, "step": 3473 }, { "epoch": 0.28, "grad_norm": 2.4104669592059573, "learning_rate": 8.402573069005573e-06, "loss": 0.3866, "step": 3474 }, { "epoch": 0.28, "grad_norm": 2.8931161075128085, "learning_rate": 8.40160297110665e-06, "loss": 0.559, "step": 3475 }, { "epoch": 0.28, "grad_norm": 2.6508457374642984, "learning_rate": 8.400632634770163e-06, "loss": 0.5797, "step": 3476 }, { "epoch": 0.28, "grad_norm": 2.405419805992831, "learning_rate": 8.399662060064126e-06, "loss": 0.4704, "step": 3477 }, { "epoch": 0.28, "grad_norm": 3.5565941443329865, "learning_rate": 8.398691247056577e-06, "loss": 0.8638, "step": 3478 }, { "epoch": 0.28, "grad_norm": 4.3152786905128035, "learning_rate": 8.397720195815561e-06, "loss": 0.9719, "step": 3479 }, { "epoch": 0.28, "grad_norm": 4.155128845995398, "learning_rate": 8.396748906409147e-06, "loss": 1.1606, "step": 3480 }, { "epoch": 0.28, "grad_norm": 2.602097103846587, "learning_rate": 8.395777378905417e-06, "loss": 0.6643, "step": 3481 }, { "epoch": 0.28, "grad_norm": 3.734829924575268, "learning_rate": 8.394805613372471e-06, "loss": 0.6499, "step": 3482 }, { "epoch": 0.28, "grad_norm": 3.0622166922148346, "learning_rate": 8.393833609878426e-06, "loss": 0.5855, "step": 3483 }, { "epoch": 0.28, "grad_norm": 3.236309198802574, "learning_rate": 8.392861368491415e-06, "loss": 0.7262, "step": 3484 }, { "epoch": 0.28, "grad_norm": 3.706892030666024, "learning_rate": 8.391888889279589e-06, "loss": 0.8424, "step": 3485 }, { "epoch": 0.28, "grad_norm": 4.003666146621652, "learning_rate": 8.390916172311113e-06, "loss": 0.8174, "step": 3486 }, { "epoch": 0.29, "grad_norm": 2.7056523946650533, "learning_rate": 8.389943217654169e-06, "loss": 0.687, "step": 3487 }, { "epoch": 0.29, "grad_norm": 4.099632712182386, "learning_rate": 8.38897002537696e-06, "loss": 1.0527, "step": 3488 }, { "epoch": 0.29, "grad_norm": 3.0084346188837983, "learning_rate": 8.387996595547699e-06, "loss": 0.7832, "step": 3489 }, { "epoch": 0.29, "grad_norm": 3.1426806013722053, "learning_rate": 8.387022928234623e-06, "loss": 0.7562, "step": 3490 }, { "epoch": 0.29, "grad_norm": 3.9478691188997646, "learning_rate": 8.38604902350598e-06, "loss": 0.7023, "step": 3491 }, { "epoch": 0.29, "grad_norm": 3.9094567058010594, "learning_rate": 8.385074881430036e-06, "loss": 0.8844, "step": 3492 }, { "epoch": 0.29, "grad_norm": 4.2910983396938285, "learning_rate": 8.384100502075076e-06, "loss": 1.015, "step": 3493 }, { "epoch": 0.29, "grad_norm": 4.628523203688381, "learning_rate": 8.383125885509398e-06, "loss": 1.1924, "step": 3494 }, { "epoch": 0.29, "grad_norm": 3.288493469772743, "learning_rate": 8.382151031801318e-06, "loss": 0.5543, "step": 3495 }, { "epoch": 0.29, "grad_norm": 5.178578709877618, "learning_rate": 8.381175941019171e-06, "loss": 1.2796, "step": 3496 }, { "epoch": 0.29, "grad_norm": 3.1525444590197744, "learning_rate": 8.380200613231305e-06, "loss": 0.6884, "step": 3497 }, { "epoch": 0.29, "grad_norm": 2.9848281841139404, "learning_rate": 8.379225048506085e-06, "loss": 0.6297, "step": 3498 }, { "epoch": 0.29, "grad_norm": 2.5167439822359685, "learning_rate": 8.378249246911898e-06, "loss": 0.7095, "step": 3499 }, { "epoch": 0.29, "grad_norm": 3.7118260838690333, "learning_rate": 8.377273208517138e-06, "loss": 0.9257, "step": 3500 }, { "epoch": 0.29, "grad_norm": 3.303460637133216, "learning_rate": 8.376296933390227e-06, "loss": 0.9383, "step": 3501 }, { "epoch": 0.29, "grad_norm": 3.786574910586223, "learning_rate": 8.375320421599595e-06, "loss": 0.3783, "step": 3502 }, { "epoch": 0.29, "grad_norm": 3.643732629168323, "learning_rate": 8.374343673213689e-06, "loss": 0.8174, "step": 3503 }, { "epoch": 0.29, "grad_norm": 2.9863507062348122, "learning_rate": 8.373366688300978e-06, "loss": 0.7188, "step": 3504 }, { "epoch": 0.29, "grad_norm": 3.426477659359341, "learning_rate": 8.37238946692994e-06, "loss": 0.8369, "step": 3505 }, { "epoch": 0.29, "grad_norm": 2.820979127414353, "learning_rate": 8.37141200916908e-06, "loss": 0.7249, "step": 3506 }, { "epoch": 0.29, "grad_norm": 4.560419011175925, "learning_rate": 8.37043431508691e-06, "loss": 0.9008, "step": 3507 }, { "epoch": 0.29, "grad_norm": 2.813495080021344, "learning_rate": 8.369456384751963e-06, "loss": 0.5069, "step": 3508 }, { "epoch": 0.29, "grad_norm": 1.9553754956034446, "learning_rate": 8.368478218232787e-06, "loss": 0.3191, "step": 3509 }, { "epoch": 0.29, "grad_norm": 3.3542405637871937, "learning_rate": 8.367499815597947e-06, "loss": 0.6272, "step": 3510 }, { "epoch": 0.29, "grad_norm": 4.501836087508141, "learning_rate": 8.366521176916027e-06, "loss": 1.3518, "step": 3511 }, { "epoch": 0.29, "grad_norm": 3.610952413356651, "learning_rate": 8.365542302255623e-06, "loss": 0.5242, "step": 3512 }, { "epoch": 0.29, "grad_norm": 3.8167889077572505, "learning_rate": 8.364563191685348e-06, "loss": 0.9545, "step": 3513 }, { "epoch": 0.29, "grad_norm": 5.0670028406576355, "learning_rate": 8.363583845273839e-06, "loss": 1.4594, "step": 3514 }, { "epoch": 0.29, "grad_norm": 3.519355454572228, "learning_rate": 8.362604263089739e-06, "loss": 0.5279, "step": 3515 }, { "epoch": 0.29, "grad_norm": 2.5810225460305403, "learning_rate": 8.361624445201715e-06, "loss": 0.5254, "step": 3516 }, { "epoch": 0.29, "grad_norm": 1.9515789420431564, "learning_rate": 8.360644391678448e-06, "loss": 0.4145, "step": 3517 }, { "epoch": 0.29, "grad_norm": 4.796650732563646, "learning_rate": 8.359664102588633e-06, "loss": 1.2567, "step": 3518 }, { "epoch": 0.29, "grad_norm": 2.5323364328106757, "learning_rate": 8.358683578000987e-06, "loss": 0.7193, "step": 3519 }, { "epoch": 0.29, "grad_norm": 3.897656459189634, "learning_rate": 8.357702817984239e-06, "loss": 0.6481, "step": 3520 }, { "epoch": 0.29, "grad_norm": 3.38233537413345, "learning_rate": 8.356721822607136e-06, "loss": 0.8116, "step": 3521 }, { "epoch": 0.29, "grad_norm": 4.32681111980777, "learning_rate": 8.355740591938445e-06, "loss": 0.8616, "step": 3522 }, { "epoch": 0.29, "grad_norm": 2.2162808086280132, "learning_rate": 8.354759126046937e-06, "loss": 0.5248, "step": 3523 }, { "epoch": 0.29, "grad_norm": 4.434389364827244, "learning_rate": 8.353777425001417e-06, "loss": 1.2069, "step": 3524 }, { "epoch": 0.29, "grad_norm": 2.824519616989892, "learning_rate": 8.352795488870695e-06, "loss": 0.6385, "step": 3525 }, { "epoch": 0.29, "grad_norm": 2.8981735074257484, "learning_rate": 8.351813317723601e-06, "loss": 0.6773, "step": 3526 }, { "epoch": 0.29, "grad_norm": 5.326576867153255, "learning_rate": 8.35083091162898e-06, "loss": 1.1598, "step": 3527 }, { "epoch": 0.29, "grad_norm": 4.421449357052687, "learning_rate": 8.349848270655696e-06, "loss": 0.7413, "step": 3528 }, { "epoch": 0.29, "grad_norm": 2.4048118640068776, "learning_rate": 8.348865394872625e-06, "loss": 0.6517, "step": 3529 }, { "epoch": 0.29, "grad_norm": 3.8012794215788777, "learning_rate": 8.347882284348665e-06, "loss": 1.1257, "step": 3530 }, { "epoch": 0.29, "grad_norm": 4.094074924068292, "learning_rate": 8.346898939152728e-06, "loss": 0.7916, "step": 3531 }, { "epoch": 0.29, "grad_norm": 2.981656102798751, "learning_rate": 8.34591535935374e-06, "loss": 0.8597, "step": 3532 }, { "epoch": 0.29, "grad_norm": 5.362189708212367, "learning_rate": 8.344931545020646e-06, "loss": 1.3468, "step": 3533 }, { "epoch": 0.29, "grad_norm": 3.011845342498781, "learning_rate": 8.343947496222409e-06, "loss": 0.6193, "step": 3534 }, { "epoch": 0.29, "grad_norm": 4.339480083175476, "learning_rate": 8.342963213028005e-06, "loss": 1.1736, "step": 3535 }, { "epoch": 0.29, "grad_norm": 3.9558061142349534, "learning_rate": 8.34197869550643e-06, "loss": 0.7738, "step": 3536 }, { "epoch": 0.29, "grad_norm": 5.154641996932149, "learning_rate": 8.340993943726692e-06, "loss": 1.3008, "step": 3537 }, { "epoch": 0.29, "grad_norm": 4.497783457109824, "learning_rate": 8.340008957757817e-06, "loss": 0.9407, "step": 3538 }, { "epoch": 0.29, "grad_norm": 4.138359581887209, "learning_rate": 8.339023737668851e-06, "loss": 1.0942, "step": 3539 }, { "epoch": 0.29, "grad_norm": 3.806807753455482, "learning_rate": 8.338038283528852e-06, "loss": 0.9245, "step": 3540 }, { "epoch": 0.29, "grad_norm": 4.201461202068064, "learning_rate": 8.337052595406896e-06, "loss": 0.8242, "step": 3541 }, { "epoch": 0.29, "grad_norm": 3.7793788421250945, "learning_rate": 8.336066673372079e-06, "loss": 0.8631, "step": 3542 }, { "epoch": 0.29, "grad_norm": 4.354344052829089, "learning_rate": 8.335080517493503e-06, "loss": 1.5768, "step": 3543 }, { "epoch": 0.29, "grad_norm": 3.216675886871792, "learning_rate": 8.3340941278403e-06, "loss": 0.7338, "step": 3544 }, { "epoch": 0.29, "grad_norm": 3.86311833127007, "learning_rate": 8.333107504481606e-06, "loss": 1.0233, "step": 3545 }, { "epoch": 0.29, "grad_norm": 1.9121852510210247, "learning_rate": 8.332120647486583e-06, "loss": 0.402, "step": 3546 }, { "epoch": 0.29, "grad_norm": 1.7083309623026617, "learning_rate": 8.331133556924404e-06, "loss": 0.466, "step": 3547 }, { "epoch": 0.29, "grad_norm": 4.756683675731017, "learning_rate": 8.33014623286426e-06, "loss": 1.0057, "step": 3548 }, { "epoch": 0.29, "grad_norm": 4.173372658028368, "learning_rate": 8.32915867537536e-06, "loss": 0.7373, "step": 3549 }, { "epoch": 0.29, "grad_norm": 5.611139422628014, "learning_rate": 8.32817088452692e-06, "loss": 1.2505, "step": 3550 }, { "epoch": 0.29, "grad_norm": 2.8996910690245694, "learning_rate": 8.32718286038819e-06, "loss": 0.8201, "step": 3551 }, { "epoch": 0.29, "grad_norm": 4.909677954824557, "learning_rate": 8.326194603028419e-06, "loss": 0.8776, "step": 3552 }, { "epoch": 0.29, "grad_norm": 5.2155694907097425, "learning_rate": 8.325206112516883e-06, "loss": 1.5689, "step": 3553 }, { "epoch": 0.29, "grad_norm": 3.877852476122359, "learning_rate": 8.324217388922871e-06, "loss": 0.8479, "step": 3554 }, { "epoch": 0.29, "grad_norm": 3.7553956611936457, "learning_rate": 8.323228432315684e-06, "loss": 0.8556, "step": 3555 }, { "epoch": 0.29, "grad_norm": 3.6617576423421254, "learning_rate": 8.32223924276465e-06, "loss": 1.0142, "step": 3556 }, { "epoch": 0.29, "grad_norm": 2.0526872916953827, "learning_rate": 8.321249820339102e-06, "loss": 0.3802, "step": 3557 }, { "epoch": 0.29, "grad_norm": 2.872143189106044, "learning_rate": 8.320260165108395e-06, "loss": 0.6965, "step": 3558 }, { "epoch": 0.29, "grad_norm": 1.806996264415691, "learning_rate": 8.3192702771419e-06, "loss": 0.287, "step": 3559 }, { "epoch": 0.29, "grad_norm": 2.699748900635862, "learning_rate": 8.318280156509007e-06, "loss": 0.5149, "step": 3560 }, { "epoch": 0.29, "grad_norm": 3.74006251254508, "learning_rate": 8.317289803279112e-06, "loss": 0.615, "step": 3561 }, { "epoch": 0.29, "grad_norm": 4.9784451434125705, "learning_rate": 8.316299217521641e-06, "loss": 1.4028, "step": 3562 }, { "epoch": 0.29, "grad_norm": 4.017270239393142, "learning_rate": 8.315308399306027e-06, "loss": 0.9775, "step": 3563 }, { "epoch": 0.29, "grad_norm": 3.42080669867183, "learning_rate": 8.314317348701724e-06, "loss": 0.7133, "step": 3564 }, { "epoch": 0.29, "grad_norm": 4.893028050718947, "learning_rate": 8.313326065778198e-06, "loss": 1.2777, "step": 3565 }, { "epoch": 0.29, "grad_norm": 4.55452023460235, "learning_rate": 8.312334550604934e-06, "loss": 1.2704, "step": 3566 }, { "epoch": 0.29, "grad_norm": 4.1051621769345426, "learning_rate": 8.311342803251431e-06, "loss": 1.1103, "step": 3567 }, { "epoch": 0.29, "grad_norm": 4.133309248809495, "learning_rate": 8.310350823787213e-06, "loss": 0.9868, "step": 3568 }, { "epoch": 0.29, "grad_norm": 3.6100634341728, "learning_rate": 8.309358612281805e-06, "loss": 0.905, "step": 3569 }, { "epoch": 0.29, "grad_norm": 2.6992294908098087, "learning_rate": 8.308366168804764e-06, "loss": 0.5854, "step": 3570 }, { "epoch": 0.29, "grad_norm": 2.193632411771907, "learning_rate": 8.30737349342565e-06, "loss": 0.6192, "step": 3571 }, { "epoch": 0.29, "grad_norm": 2.956331830855571, "learning_rate": 8.30638058621405e-06, "loss": 0.8116, "step": 3572 }, { "epoch": 0.29, "grad_norm": 3.4776215746659096, "learning_rate": 8.305387447239559e-06, "loss": 0.5024, "step": 3573 }, { "epoch": 0.29, "grad_norm": 3.9348804554264616, "learning_rate": 8.304394076571794e-06, "loss": 0.8389, "step": 3574 }, { "epoch": 0.29, "grad_norm": 2.9240920418295615, "learning_rate": 8.303400474280384e-06, "loss": 0.8246, "step": 3575 }, { "epoch": 0.29, "grad_norm": 5.604727604154122, "learning_rate": 8.302406640434978e-06, "loss": 1.1539, "step": 3576 }, { "epoch": 0.29, "grad_norm": 4.3571536012994, "learning_rate": 8.301412575105238e-06, "loss": 1.0719, "step": 3577 }, { "epoch": 0.29, "grad_norm": 3.614080600263977, "learning_rate": 8.300418278360844e-06, "loss": 0.4737, "step": 3578 }, { "epoch": 0.29, "grad_norm": 3.5897488641657365, "learning_rate": 8.299423750271493e-06, "loss": 0.9158, "step": 3579 }, { "epoch": 0.29, "grad_norm": 3.6018999721838405, "learning_rate": 8.298428990906896e-06, "loss": 0.7084, "step": 3580 }, { "epoch": 0.29, "grad_norm": 0.9521900164481667, "learning_rate": 8.297434000336781e-06, "loss": 0.1663, "step": 3581 }, { "epoch": 0.29, "grad_norm": 3.416897765599965, "learning_rate": 8.296438778630893e-06, "loss": 0.7562, "step": 3582 }, { "epoch": 0.29, "grad_norm": 3.0830365100554555, "learning_rate": 8.295443325858994e-06, "loss": 0.9708, "step": 3583 }, { "epoch": 0.29, "grad_norm": 1.5648254742585337, "learning_rate": 8.294447642090857e-06, "loss": 0.2603, "step": 3584 }, { "epoch": 0.29, "grad_norm": 3.5890801278182125, "learning_rate": 8.293451727396283e-06, "loss": 0.84, "step": 3585 }, { "epoch": 0.29, "grad_norm": 4.596402155220595, "learning_rate": 8.29245558184507e-06, "loss": 0.5958, "step": 3586 }, { "epoch": 0.29, "grad_norm": 4.645888567946884, "learning_rate": 8.291459205507054e-06, "loss": 1.1768, "step": 3587 }, { "epoch": 0.29, "grad_norm": 2.7688250555200846, "learning_rate": 8.29046259845207e-06, "loss": 0.6303, "step": 3588 }, { "epoch": 0.29, "grad_norm": 5.681310621502814, "learning_rate": 8.289465760749977e-06, "loss": 1.7021, "step": 3589 }, { "epoch": 0.29, "grad_norm": 4.613193942592307, "learning_rate": 8.288468692470652e-06, "loss": 1.0909, "step": 3590 }, { "epoch": 0.29, "grad_norm": 3.8875252487754874, "learning_rate": 8.287471393683984e-06, "loss": 1.1162, "step": 3591 }, { "epoch": 0.29, "grad_norm": 2.9841346177304655, "learning_rate": 8.286473864459876e-06, "loss": 0.7506, "step": 3592 }, { "epoch": 0.29, "grad_norm": 5.508869631913947, "learning_rate": 8.285476104868252e-06, "loss": 1.1238, "step": 3593 }, { "epoch": 0.29, "grad_norm": 5.837820743640624, "learning_rate": 8.284478114979056e-06, "loss": 0.7888, "step": 3594 }, { "epoch": 0.29, "grad_norm": 2.836434081449681, "learning_rate": 8.283479894862233e-06, "loss": 0.5704, "step": 3595 }, { "epoch": 0.29, "grad_norm": 3.2250188976590355, "learning_rate": 8.282481444587764e-06, "loss": 0.6061, "step": 3596 }, { "epoch": 0.29, "grad_norm": 3.100264445911385, "learning_rate": 8.281482764225628e-06, "loss": 0.5391, "step": 3597 }, { "epoch": 0.29, "grad_norm": 4.284825992082469, "learning_rate": 8.280483853845831e-06, "loss": 1.4993, "step": 3598 }, { "epoch": 0.29, "grad_norm": 3.683705715157677, "learning_rate": 8.279484713518395e-06, "loss": 1.1575, "step": 3599 }, { "epoch": 0.29, "grad_norm": 3.422930359517672, "learning_rate": 8.27848534331335e-06, "loss": 0.5458, "step": 3600 }, { "epoch": 0.29, "grad_norm": 4.337143182155289, "learning_rate": 8.277485743300753e-06, "loss": 0.6768, "step": 3601 }, { "epoch": 0.29, "grad_norm": 5.951202647514589, "learning_rate": 8.27648591355067e-06, "loss": 1.0886, "step": 3602 }, { "epoch": 0.29, "grad_norm": 3.1163177763316767, "learning_rate": 8.27548585413318e-06, "loss": 0.4608, "step": 3603 }, { "epoch": 0.29, "grad_norm": 4.473268162565548, "learning_rate": 8.274485565118389e-06, "loss": 0.9527, "step": 3604 }, { "epoch": 0.29, "grad_norm": 4.2134953665323875, "learning_rate": 8.27348504657641e-06, "loss": 1.02, "step": 3605 }, { "epoch": 0.29, "grad_norm": 4.865807389021216, "learning_rate": 8.272484298577375e-06, "loss": 1.2196, "step": 3606 }, { "epoch": 0.29, "grad_norm": 4.4482018269506804, "learning_rate": 8.271483321191433e-06, "loss": 1.0805, "step": 3607 }, { "epoch": 0.29, "grad_norm": 3.789927696800828, "learning_rate": 8.270482114488748e-06, "loss": 0.7778, "step": 3608 }, { "epoch": 0.29, "grad_norm": 4.3632231589887605, "learning_rate": 8.269480678539498e-06, "loss": 0.8847, "step": 3609 }, { "epoch": 0.3, "grad_norm": 3.1027736871674576, "learning_rate": 8.268479013413883e-06, "loss": 0.679, "step": 3610 }, { "epoch": 0.3, "grad_norm": 1.9596146365430251, "learning_rate": 8.267477119182115e-06, "loss": 0.3198, "step": 3611 }, { "epoch": 0.3, "grad_norm": 1.3201503867779574, "learning_rate": 8.266474995914419e-06, "loss": 0.1946, "step": 3612 }, { "epoch": 0.3, "grad_norm": 2.4227795740721945, "learning_rate": 8.265472643681042e-06, "loss": 0.5886, "step": 3613 }, { "epoch": 0.3, "grad_norm": 1.9999825595731449, "learning_rate": 8.264470062552246e-06, "loss": 0.501, "step": 3614 }, { "epoch": 0.3, "grad_norm": 2.466292481798756, "learning_rate": 8.263467252598303e-06, "loss": 0.3048, "step": 3615 }, { "epoch": 0.3, "grad_norm": 3.3983750894590083, "learning_rate": 8.26246421388951e-06, "loss": 0.8234, "step": 3616 }, { "epoch": 0.3, "grad_norm": 2.9845882181511003, "learning_rate": 8.261460946496172e-06, "loss": 0.7159, "step": 3617 }, { "epoch": 0.3, "grad_norm": 2.963902341128361, "learning_rate": 8.260457450488617e-06, "loss": 0.5597, "step": 3618 }, { "epoch": 0.3, "grad_norm": 4.58947166693942, "learning_rate": 8.259453725937184e-06, "loss": 0.9475, "step": 3619 }, { "epoch": 0.3, "grad_norm": 1.8608228824499164, "learning_rate": 8.25844977291223e-06, "loss": 0.4321, "step": 3620 }, { "epoch": 0.3, "grad_norm": 3.1629799771517217, "learning_rate": 8.257445591484128e-06, "loss": 0.7059, "step": 3621 }, { "epoch": 0.3, "grad_norm": 3.4020188663153315, "learning_rate": 8.256441181723265e-06, "loss": 0.5953, "step": 3622 }, { "epoch": 0.3, "grad_norm": 4.750190553423603, "learning_rate": 8.255436543700048e-06, "loss": 0.9565, "step": 3623 }, { "epoch": 0.3, "grad_norm": 3.420792324678391, "learning_rate": 8.254431677484898e-06, "loss": 0.9565, "step": 3624 }, { "epoch": 0.3, "grad_norm": 3.610989213133846, "learning_rate": 8.25342658314825e-06, "loss": 1.0059, "step": 3625 }, { "epoch": 0.3, "grad_norm": 3.5987006785906437, "learning_rate": 8.252421260760558e-06, "loss": 0.9466, "step": 3626 }, { "epoch": 0.3, "grad_norm": 4.658086668796395, "learning_rate": 8.25141571039229e-06, "loss": 1.2601, "step": 3627 }, { "epoch": 0.3, "grad_norm": 4.424797321212776, "learning_rate": 8.25040993211393e-06, "loss": 0.8074, "step": 3628 }, { "epoch": 0.3, "grad_norm": 2.945071389194256, "learning_rate": 8.24940392599598e-06, "loss": 0.6433, "step": 3629 }, { "epoch": 0.3, "grad_norm": 4.658612357174698, "learning_rate": 8.248397692108957e-06, "loss": 1.1627, "step": 3630 }, { "epoch": 0.3, "grad_norm": 5.488384567872834, "learning_rate": 8.247391230523393e-06, "loss": 1.3089, "step": 3631 }, { "epoch": 0.3, "grad_norm": 2.272377230796433, "learning_rate": 8.246384541309835e-06, "loss": 0.4514, "step": 3632 }, { "epoch": 0.3, "grad_norm": 4.105369688695962, "learning_rate": 8.24537762453885e-06, "loss": 0.8004, "step": 3633 }, { "epoch": 0.3, "grad_norm": 4.537957023672104, "learning_rate": 8.244370480281018e-06, "loss": 0.6889, "step": 3634 }, { "epoch": 0.3, "grad_norm": 4.014372334228167, "learning_rate": 8.243363108606934e-06, "loss": 1.0708, "step": 3635 }, { "epoch": 0.3, "grad_norm": 3.644010528781357, "learning_rate": 8.24235550958721e-06, "loss": 0.6836, "step": 3636 }, { "epoch": 0.3, "grad_norm": 3.8196684083357106, "learning_rate": 8.241347683292478e-06, "loss": 0.9738, "step": 3637 }, { "epoch": 0.3, "grad_norm": 5.07292046837355, "learning_rate": 8.240339629793379e-06, "loss": 1.3078, "step": 3638 }, { "epoch": 0.3, "grad_norm": 4.013052169089353, "learning_rate": 8.239331349160573e-06, "loss": 0.5815, "step": 3639 }, { "epoch": 0.3, "grad_norm": 1.653944549068703, "learning_rate": 8.238322841464738e-06, "loss": 0.3109, "step": 3640 }, { "epoch": 0.3, "grad_norm": 3.115651595817726, "learning_rate": 8.237314106776563e-06, "loss": 0.8189, "step": 3641 }, { "epoch": 0.3, "grad_norm": 3.3410805462475524, "learning_rate": 8.236305145166761e-06, "loss": 0.808, "step": 3642 }, { "epoch": 0.3, "grad_norm": 2.9670019774770364, "learning_rate": 8.23529595670605e-06, "loss": 0.6854, "step": 3643 }, { "epoch": 0.3, "grad_norm": 2.958379765225836, "learning_rate": 8.234286541465175e-06, "loss": 0.534, "step": 3644 }, { "epoch": 0.3, "grad_norm": 3.855724854233526, "learning_rate": 8.233276899514887e-06, "loss": 1.0768, "step": 3645 }, { "epoch": 0.3, "grad_norm": 4.519939370491934, "learning_rate": 8.232267030925963e-06, "loss": 1.2004, "step": 3646 }, { "epoch": 0.3, "grad_norm": 4.796719534626249, "learning_rate": 8.231256935769183e-06, "loss": 1.1672, "step": 3647 }, { "epoch": 0.3, "grad_norm": 4.58716913978193, "learning_rate": 8.230246614115357e-06, "loss": 1.191, "step": 3648 }, { "epoch": 0.3, "grad_norm": 2.5302816001180433, "learning_rate": 8.2292360660353e-06, "loss": 0.461, "step": 3649 }, { "epoch": 0.3, "grad_norm": 2.9954865922410283, "learning_rate": 8.228225291599849e-06, "loss": 0.8287, "step": 3650 }, { "epoch": 0.3, "grad_norm": 4.078826919973614, "learning_rate": 8.227214290879855e-06, "loss": 0.6676, "step": 3651 }, { "epoch": 0.3, "grad_norm": 3.6359386984785216, "learning_rate": 8.226203063946184e-06, "loss": 0.5838, "step": 3652 }, { "epoch": 0.3, "grad_norm": 4.600655242064704, "learning_rate": 8.225191610869717e-06, "loss": 0.8791, "step": 3653 }, { "epoch": 0.3, "grad_norm": 3.1916381293937146, "learning_rate": 8.224179931721354e-06, "loss": 0.5714, "step": 3654 }, { "epoch": 0.3, "grad_norm": 3.793635684996372, "learning_rate": 8.22316802657201e-06, "loss": 0.542, "step": 3655 }, { "epoch": 0.3, "grad_norm": 3.6517913737323298, "learning_rate": 8.222155895492616e-06, "loss": 0.8904, "step": 3656 }, { "epoch": 0.3, "grad_norm": 4.309634828087789, "learning_rate": 8.221143538554116e-06, "loss": 0.9436, "step": 3657 }, { "epoch": 0.3, "grad_norm": 5.59453233275118, "learning_rate": 8.220130955827472e-06, "loss": 1.6515, "step": 3658 }, { "epoch": 0.3, "grad_norm": 4.0694475479979255, "learning_rate": 8.219118147383663e-06, "loss": 1.2232, "step": 3659 }, { "epoch": 0.3, "grad_norm": 3.7378378994927486, "learning_rate": 8.218105113293681e-06, "loss": 1.0414, "step": 3660 }, { "epoch": 0.3, "grad_norm": 2.278096243544778, "learning_rate": 8.217091853628535e-06, "loss": 0.4086, "step": 3661 }, { "epoch": 0.3, "grad_norm": 4.204213682458238, "learning_rate": 8.216078368459253e-06, "loss": 0.7492, "step": 3662 }, { "epoch": 0.3, "grad_norm": 5.626511416048154, "learning_rate": 8.21506465785687e-06, "loss": 1.181, "step": 3663 }, { "epoch": 0.3, "grad_norm": 4.263531538490547, "learning_rate": 8.21405072189245e-06, "loss": 0.8094, "step": 3664 }, { "epoch": 0.3, "grad_norm": 3.842607134945213, "learning_rate": 8.213036560637062e-06, "loss": 0.9559, "step": 3665 }, { "epoch": 0.3, "grad_norm": 2.6295866100852026, "learning_rate": 8.21202217416179e-06, "loss": 0.6185, "step": 3666 }, { "epoch": 0.3, "grad_norm": 2.613191946253642, "learning_rate": 8.211007562537747e-06, "loss": 0.4489, "step": 3667 }, { "epoch": 0.3, "grad_norm": 1.9761898696424298, "learning_rate": 8.209992725836047e-06, "loss": 0.4938, "step": 3668 }, { "epoch": 0.3, "grad_norm": 3.3974288514895035, "learning_rate": 8.208977664127827e-06, "loss": 1.1194, "step": 3669 }, { "epoch": 0.3, "grad_norm": 2.378116796132323, "learning_rate": 8.207962377484237e-06, "loss": 0.4233, "step": 3670 }, { "epoch": 0.3, "grad_norm": 4.093985355862017, "learning_rate": 8.206946865976446e-06, "loss": 1.0518, "step": 3671 }, { "epoch": 0.3, "grad_norm": 3.9070968664524504, "learning_rate": 8.205931129675637e-06, "loss": 0.7833, "step": 3672 }, { "epoch": 0.3, "grad_norm": 3.719407576076978, "learning_rate": 8.204915168653007e-06, "loss": 0.6622, "step": 3673 }, { "epoch": 0.3, "grad_norm": 2.7822855101196815, "learning_rate": 8.203898982979773e-06, "loss": 0.6128, "step": 3674 }, { "epoch": 0.3, "grad_norm": 3.4931324684841822, "learning_rate": 8.202882572727161e-06, "loss": 0.5935, "step": 3675 }, { "epoch": 0.3, "grad_norm": 4.591226514045569, "learning_rate": 8.201865937966423e-06, "loss": 1.1802, "step": 3676 }, { "epoch": 0.3, "grad_norm": 4.043022550682876, "learning_rate": 8.200849078768816e-06, "loss": 0.6867, "step": 3677 }, { "epoch": 0.3, "grad_norm": 3.6300983589675506, "learning_rate": 8.199831995205619e-06, "loss": 0.9301, "step": 3678 }, { "epoch": 0.3, "grad_norm": 3.276523837771925, "learning_rate": 8.198814687348123e-06, "loss": 0.5072, "step": 3679 }, { "epoch": 0.3, "grad_norm": 3.306168790115253, "learning_rate": 8.19779715526764e-06, "loss": 1.0553, "step": 3680 }, { "epoch": 0.3, "grad_norm": 3.0588477007410275, "learning_rate": 8.196779399035492e-06, "loss": 0.5845, "step": 3681 }, { "epoch": 0.3, "grad_norm": 2.9399757382701606, "learning_rate": 8.195761418723023e-06, "loss": 0.6021, "step": 3682 }, { "epoch": 0.3, "grad_norm": 2.9964090262739225, "learning_rate": 8.194743214401587e-06, "loss": 0.7194, "step": 3683 }, { "epoch": 0.3, "grad_norm": 3.500776219734207, "learning_rate": 8.193724786142552e-06, "loss": 0.9013, "step": 3684 }, { "epoch": 0.3, "grad_norm": 3.1845474908110916, "learning_rate": 8.192706134017312e-06, "loss": 1.0675, "step": 3685 }, { "epoch": 0.3, "grad_norm": 4.336093403098695, "learning_rate": 8.191687258097264e-06, "loss": 1.2922, "step": 3686 }, { "epoch": 0.3, "grad_norm": 5.091688178982156, "learning_rate": 8.19066815845383e-06, "loss": 1.1675, "step": 3687 }, { "epoch": 0.3, "grad_norm": 4.095007486460994, "learning_rate": 8.189648835158445e-06, "loss": 0.9352, "step": 3688 }, { "epoch": 0.3, "grad_norm": 4.983663848468049, "learning_rate": 8.188629288282557e-06, "loss": 0.9149, "step": 3689 }, { "epoch": 0.3, "grad_norm": 4.755782459984909, "learning_rate": 8.187609517897634e-06, "loss": 1.4777, "step": 3690 }, { "epoch": 0.3, "grad_norm": 3.4191866976219076, "learning_rate": 8.186589524075155e-06, "loss": 1.014, "step": 3691 }, { "epoch": 0.3, "grad_norm": 3.8368571027938874, "learning_rate": 8.18556930688662e-06, "loss": 1.0577, "step": 3692 }, { "epoch": 0.3, "grad_norm": 3.6395547430488384, "learning_rate": 8.18454886640354e-06, "loss": 0.9151, "step": 3693 }, { "epoch": 0.3, "grad_norm": 5.425943955850182, "learning_rate": 8.183528202697441e-06, "loss": 1.344, "step": 3694 }, { "epoch": 0.3, "grad_norm": 2.573148618469356, "learning_rate": 8.18250731583987e-06, "loss": 0.2741, "step": 3695 }, { "epoch": 0.3, "grad_norm": 4.766066715900613, "learning_rate": 8.181486205902391e-06, "loss": 0.8259, "step": 3696 }, { "epoch": 0.3, "grad_norm": 1.4938051351655979, "learning_rate": 8.180464872956572e-06, "loss": 0.2153, "step": 3697 }, { "epoch": 0.3, "grad_norm": 4.04645686085355, "learning_rate": 8.179443317074008e-06, "loss": 0.897, "step": 3698 }, { "epoch": 0.3, "grad_norm": 4.281302380409822, "learning_rate": 8.178421538326303e-06, "loss": 0.8353, "step": 3699 }, { "epoch": 0.3, "grad_norm": 4.739685727324987, "learning_rate": 8.17739953678508e-06, "loss": 1.5031, "step": 3700 }, { "epoch": 0.3, "grad_norm": 4.517623643422315, "learning_rate": 8.17637731252198e-06, "loss": 1.1821, "step": 3701 }, { "epoch": 0.3, "grad_norm": 3.058673056478612, "learning_rate": 8.175354865608651e-06, "loss": 0.5239, "step": 3702 }, { "epoch": 0.3, "grad_norm": 4.307110932872007, "learning_rate": 8.174332196116767e-06, "loss": 1.2173, "step": 3703 }, { "epoch": 0.3, "grad_norm": 4.667206926341169, "learning_rate": 8.173309304118011e-06, "loss": 0.812, "step": 3704 }, { "epoch": 0.3, "grad_norm": 2.5122682057515293, "learning_rate": 8.172286189684082e-06, "loss": 0.5349, "step": 3705 }, { "epoch": 0.3, "grad_norm": 3.206192713863934, "learning_rate": 8.171262852886697e-06, "loss": 0.8859, "step": 3706 }, { "epoch": 0.3, "grad_norm": 3.745896340372273, "learning_rate": 8.170239293797588e-06, "loss": 0.6659, "step": 3707 }, { "epoch": 0.3, "grad_norm": 3.472030233684138, "learning_rate": 8.169215512488502e-06, "loss": 0.937, "step": 3708 }, { "epoch": 0.3, "grad_norm": 2.4137168251087555, "learning_rate": 8.1681915090312e-06, "loss": 0.5604, "step": 3709 }, { "epoch": 0.3, "grad_norm": 3.9059515725854372, "learning_rate": 8.167167283497462e-06, "loss": 1.3055, "step": 3710 }, { "epoch": 0.3, "grad_norm": 3.6243245126837236, "learning_rate": 8.16614283595908e-06, "loss": 1.3093, "step": 3711 }, { "epoch": 0.3, "grad_norm": 2.7052226321091295, "learning_rate": 8.165118166487866e-06, "loss": 0.563, "step": 3712 }, { "epoch": 0.3, "grad_norm": 2.6276544526695704, "learning_rate": 8.164093275155642e-06, "loss": 0.4162, "step": 3713 }, { "epoch": 0.3, "grad_norm": 3.414009897639559, "learning_rate": 8.16306816203425e-06, "loss": 0.5708, "step": 3714 }, { "epoch": 0.3, "grad_norm": 4.191779412183785, "learning_rate": 8.162042827195545e-06, "loss": 0.7869, "step": 3715 }, { "epoch": 0.3, "grad_norm": 3.966228928181594, "learning_rate": 8.1610172707114e-06, "loss": 1.0857, "step": 3716 }, { "epoch": 0.3, "grad_norm": 2.416053641244488, "learning_rate": 8.1599914926537e-06, "loss": 0.3816, "step": 3717 }, { "epoch": 0.3, "grad_norm": 3.4520616146089678, "learning_rate": 8.158965493094349e-06, "loss": 0.8468, "step": 3718 }, { "epoch": 0.3, "grad_norm": 2.866070693077448, "learning_rate": 8.157939272105265e-06, "loss": 0.3885, "step": 3719 }, { "epoch": 0.3, "grad_norm": 2.3887081665511363, "learning_rate": 8.15691282975838e-06, "loss": 0.5784, "step": 3720 }, { "epoch": 0.3, "grad_norm": 2.593655781114713, "learning_rate": 8.155886166125647e-06, "loss": 0.381, "step": 3721 }, { "epoch": 0.3, "grad_norm": 1.1882794822789533, "learning_rate": 8.154859281279028e-06, "loss": 0.2174, "step": 3722 }, { "epoch": 0.3, "grad_norm": 3.8400830066215126, "learning_rate": 8.153832175290503e-06, "loss": 0.8367, "step": 3723 }, { "epoch": 0.3, "grad_norm": 3.357996923956879, "learning_rate": 8.152804848232067e-06, "loss": 0.7642, "step": 3724 }, { "epoch": 0.3, "grad_norm": 2.0239486314492354, "learning_rate": 8.151777300175733e-06, "loss": 0.3608, "step": 3725 }, { "epoch": 0.3, "grad_norm": 3.4587234054487888, "learning_rate": 8.150749531193527e-06, "loss": 0.8073, "step": 3726 }, { "epoch": 0.3, "grad_norm": 3.8268776728496308, "learning_rate": 8.14972154135749e-06, "loss": 0.8313, "step": 3727 }, { "epoch": 0.3, "grad_norm": 2.592494371196147, "learning_rate": 8.14869333073968e-06, "loss": 0.527, "step": 3728 }, { "epoch": 0.3, "grad_norm": 4.116864773442245, "learning_rate": 8.147664899412174e-06, "loss": 0.7039, "step": 3729 }, { "epoch": 0.3, "grad_norm": 3.763498367287853, "learning_rate": 8.146636247447053e-06, "loss": 0.766, "step": 3730 }, { "epoch": 0.3, "grad_norm": 3.474528023608184, "learning_rate": 8.145607374916428e-06, "loss": 1.0169, "step": 3731 }, { "epoch": 0.31, "grad_norm": 4.734071201766395, "learning_rate": 8.144578281892414e-06, "loss": 0.9553, "step": 3732 }, { "epoch": 0.31, "grad_norm": 4.092806982413093, "learning_rate": 8.14354896844715e-06, "loss": 1.055, "step": 3733 }, { "epoch": 0.31, "grad_norm": 5.42314577570228, "learning_rate": 8.142519434652782e-06, "loss": 0.8633, "step": 3734 }, { "epoch": 0.31, "grad_norm": 4.467308023579122, "learning_rate": 8.141489680581481e-06, "loss": 1.3204, "step": 3735 }, { "epoch": 0.31, "grad_norm": 2.970938266610763, "learning_rate": 8.140459706305422e-06, "loss": 0.6799, "step": 3736 }, { "epoch": 0.31, "grad_norm": 4.45068394944959, "learning_rate": 8.139429511896806e-06, "loss": 1.2216, "step": 3737 }, { "epoch": 0.31, "grad_norm": 3.082640105344123, "learning_rate": 8.138399097427843e-06, "loss": 0.5311, "step": 3738 }, { "epoch": 0.31, "grad_norm": 4.065166150068355, "learning_rate": 8.137368462970762e-06, "loss": 0.995, "step": 3739 }, { "epoch": 0.31, "grad_norm": 1.3988745842381092, "learning_rate": 8.136337608597807e-06, "loss": 0.1561, "step": 3740 }, { "epoch": 0.31, "grad_norm": 3.16548411325666, "learning_rate": 8.135306534381233e-06, "loss": 0.4723, "step": 3741 }, { "epoch": 0.31, "grad_norm": 4.435847542644207, "learning_rate": 8.134275240393318e-06, "loss": 1.2246, "step": 3742 }, { "epoch": 0.31, "grad_norm": 4.052208610377437, "learning_rate": 8.133243726706348e-06, "loss": 1.0259, "step": 3743 }, { "epoch": 0.31, "grad_norm": 3.9003020589030672, "learning_rate": 8.132211993392629e-06, "loss": 1.0583, "step": 3744 }, { "epoch": 0.31, "grad_norm": 5.149256679099094, "learning_rate": 8.131180040524482e-06, "loss": 1.1742, "step": 3745 }, { "epoch": 0.31, "grad_norm": 1.8161975702048256, "learning_rate": 8.130147868174238e-06, "loss": 0.354, "step": 3746 }, { "epoch": 0.31, "grad_norm": 3.830405910264299, "learning_rate": 8.129115476414253e-06, "loss": 0.9095, "step": 3747 }, { "epoch": 0.31, "grad_norm": 3.274315406313386, "learning_rate": 8.12808286531689e-06, "loss": 1.2045, "step": 3748 }, { "epoch": 0.31, "grad_norm": 5.008066219732132, "learning_rate": 8.127050034954533e-06, "loss": 1.0741, "step": 3749 }, { "epoch": 0.31, "grad_norm": 4.177893875464701, "learning_rate": 8.126016985399576e-06, "loss": 0.7071, "step": 3750 }, { "epoch": 0.31, "grad_norm": 4.260607866286649, "learning_rate": 8.124983716724434e-06, "loss": 1.4082, "step": 3751 }, { "epoch": 0.31, "grad_norm": 5.137610931060111, "learning_rate": 8.123950229001533e-06, "loss": 1.4752, "step": 3752 }, { "epoch": 0.31, "grad_norm": 1.9728338746060647, "learning_rate": 8.122916522303314e-06, "loss": 0.3583, "step": 3753 }, { "epoch": 0.31, "grad_norm": 3.642077452337778, "learning_rate": 8.121882596702241e-06, "loss": 0.7528, "step": 3754 }, { "epoch": 0.31, "grad_norm": 4.488596401108742, "learning_rate": 8.120848452270784e-06, "loss": 1.0414, "step": 3755 }, { "epoch": 0.31, "grad_norm": 4.429872932359214, "learning_rate": 8.119814089081433e-06, "loss": 0.9902, "step": 3756 }, { "epoch": 0.31, "grad_norm": 4.132381624984531, "learning_rate": 8.118779507206692e-06, "loss": 1.2691, "step": 3757 }, { "epoch": 0.31, "grad_norm": 4.051652764278989, "learning_rate": 8.117744706719077e-06, "loss": 0.7505, "step": 3758 }, { "epoch": 0.31, "grad_norm": 3.7541486157549886, "learning_rate": 8.11670968769113e-06, "loss": 0.8725, "step": 3759 }, { "epoch": 0.31, "grad_norm": 2.2188128012707553, "learning_rate": 8.115674450195397e-06, "loss": 0.4125, "step": 3760 }, { "epoch": 0.31, "grad_norm": 3.8775421294704433, "learning_rate": 8.114638994304442e-06, "loss": 0.9549, "step": 3761 }, { "epoch": 0.31, "grad_norm": 2.24804227327714, "learning_rate": 8.113603320090852e-06, "loss": 0.5652, "step": 3762 }, { "epoch": 0.31, "grad_norm": 2.378678797966362, "learning_rate": 8.112567427627218e-06, "loss": 0.5553, "step": 3763 }, { "epoch": 0.31, "grad_norm": 1.9313481307698672, "learning_rate": 8.111531316986155e-06, "loss": 0.3617, "step": 3764 }, { "epoch": 0.31, "grad_norm": 3.841068469149828, "learning_rate": 8.110494988240287e-06, "loss": 0.8996, "step": 3765 }, { "epoch": 0.31, "grad_norm": 2.5605387433865445, "learning_rate": 8.109458441462257e-06, "loss": 0.6492, "step": 3766 }, { "epoch": 0.31, "grad_norm": 4.577317967150967, "learning_rate": 8.108421676724721e-06, "loss": 0.9732, "step": 3767 }, { "epoch": 0.31, "grad_norm": 4.808841322327526, "learning_rate": 8.107384694100355e-06, "loss": 0.9966, "step": 3768 }, { "epoch": 0.31, "grad_norm": 4.424400232186057, "learning_rate": 8.106347493661846e-06, "loss": 0.9183, "step": 3769 }, { "epoch": 0.31, "grad_norm": 3.6727410221654617, "learning_rate": 8.105310075481895e-06, "loss": 0.7357, "step": 3770 }, { "epoch": 0.31, "grad_norm": 4.092483526607276, "learning_rate": 8.104272439633225e-06, "loss": 1.1123, "step": 3771 }, { "epoch": 0.31, "grad_norm": 4.640259166857718, "learning_rate": 8.103234586188564e-06, "loss": 1.1403, "step": 3772 }, { "epoch": 0.31, "grad_norm": 3.7035333161123165, "learning_rate": 8.102196515220664e-06, "loss": 0.6131, "step": 3773 }, { "epoch": 0.31, "grad_norm": 3.32058535500473, "learning_rate": 8.10115822680229e-06, "loss": 0.6775, "step": 3774 }, { "epoch": 0.31, "grad_norm": 3.5768608057991385, "learning_rate": 8.100119721006221e-06, "loss": 0.7513, "step": 3775 }, { "epoch": 0.31, "grad_norm": 4.461663425433065, "learning_rate": 8.09908099790525e-06, "loss": 1.1056, "step": 3776 }, { "epoch": 0.31, "grad_norm": 4.728957538433035, "learning_rate": 8.098042057572188e-06, "loss": 1.4042, "step": 3777 }, { "epoch": 0.31, "grad_norm": 5.687046450091034, "learning_rate": 8.097002900079862e-06, "loss": 1.29, "step": 3778 }, { "epoch": 0.31, "grad_norm": 4.511627277637122, "learning_rate": 8.095963525501111e-06, "loss": 1.322, "step": 3779 }, { "epoch": 0.31, "grad_norm": 2.8622060976057337, "learning_rate": 8.094923933908789e-06, "loss": 0.5612, "step": 3780 }, { "epoch": 0.31, "grad_norm": 3.995257314579237, "learning_rate": 8.093884125375769e-06, "loss": 0.9435, "step": 3781 }, { "epoch": 0.31, "grad_norm": 3.2454494244850616, "learning_rate": 8.092844099974936e-06, "loss": 0.3857, "step": 3782 }, { "epoch": 0.31, "grad_norm": 3.781446530884619, "learning_rate": 8.091803857779194e-06, "loss": 0.8944, "step": 3783 }, { "epoch": 0.31, "grad_norm": 2.7753792270276065, "learning_rate": 8.090763398861455e-06, "loss": 0.5534, "step": 3784 }, { "epoch": 0.31, "grad_norm": 2.6585854485006717, "learning_rate": 8.089722723294654e-06, "loss": 0.5455, "step": 3785 }, { "epoch": 0.31, "grad_norm": 3.2715132195590995, "learning_rate": 8.088681831151737e-06, "loss": 0.9291, "step": 3786 }, { "epoch": 0.31, "grad_norm": 3.6559595986554685, "learning_rate": 8.087640722505663e-06, "loss": 0.7922, "step": 3787 }, { "epoch": 0.31, "grad_norm": 2.9979353328556613, "learning_rate": 8.086599397429413e-06, "loss": 0.6418, "step": 3788 }, { "epoch": 0.31, "grad_norm": 2.749128142979223, "learning_rate": 8.085557855995979e-06, "loss": 0.5899, "step": 3789 }, { "epoch": 0.31, "grad_norm": 4.23753924224674, "learning_rate": 8.084516098278367e-06, "loss": 1.0294, "step": 3790 }, { "epoch": 0.31, "grad_norm": 5.2067365626434885, "learning_rate": 8.083474124349602e-06, "loss": 1.2521, "step": 3791 }, { "epoch": 0.31, "grad_norm": 3.053171316466954, "learning_rate": 8.082431934282717e-06, "loss": 0.6479, "step": 3792 }, { "epoch": 0.31, "grad_norm": 5.178116731766731, "learning_rate": 8.081389528150772e-06, "loss": 1.078, "step": 3793 }, { "epoch": 0.31, "grad_norm": 3.8930725769871146, "learning_rate": 8.080346906026827e-06, "loss": 0.9376, "step": 3794 }, { "epoch": 0.31, "grad_norm": 3.874489840565244, "learning_rate": 8.079304067983972e-06, "loss": 0.5448, "step": 3795 }, { "epoch": 0.31, "grad_norm": 3.0668608875262424, "learning_rate": 8.078261014095303e-06, "loss": 0.8477, "step": 3796 }, { "epoch": 0.31, "grad_norm": 4.441895416564226, "learning_rate": 8.077217744433934e-06, "loss": 1.211, "step": 3797 }, { "epoch": 0.31, "grad_norm": 2.2771698264465536, "learning_rate": 8.076174259072994e-06, "loss": 0.5761, "step": 3798 }, { "epoch": 0.31, "grad_norm": 3.614567455901114, "learning_rate": 8.075130558085625e-06, "loss": 1.0329, "step": 3799 }, { "epoch": 0.31, "grad_norm": 3.913152393460195, "learning_rate": 8.074086641544985e-06, "loss": 0.7162, "step": 3800 }, { "epoch": 0.31, "grad_norm": 2.877290036991181, "learning_rate": 8.073042509524252e-06, "loss": 0.6069, "step": 3801 }, { "epoch": 0.31, "grad_norm": 3.6871844033861536, "learning_rate": 8.071998162096613e-06, "loss": 0.6831, "step": 3802 }, { "epoch": 0.31, "grad_norm": 3.592996585961057, "learning_rate": 8.070953599335271e-06, "loss": 1.1962, "step": 3803 }, { "epoch": 0.31, "grad_norm": 3.986885576237931, "learning_rate": 8.069908821313446e-06, "loss": 0.8866, "step": 3804 }, { "epoch": 0.31, "grad_norm": 2.7512065263487377, "learning_rate": 8.068863828104377e-06, "loss": 0.6925, "step": 3805 }, { "epoch": 0.31, "grad_norm": 3.0276423770928598, "learning_rate": 8.067818619781304e-06, "loss": 0.6365, "step": 3806 }, { "epoch": 0.31, "grad_norm": 2.167495623338151, "learning_rate": 8.0667731964175e-06, "loss": 0.6901, "step": 3807 }, { "epoch": 0.31, "grad_norm": 2.766886335698619, "learning_rate": 8.065727558086241e-06, "loss": 0.4542, "step": 3808 }, { "epoch": 0.31, "grad_norm": 4.171617751108701, "learning_rate": 8.064681704860824e-06, "loss": 0.8883, "step": 3809 }, { "epoch": 0.31, "grad_norm": 2.7588444811396613, "learning_rate": 8.063635636814555e-06, "loss": 0.7518, "step": 3810 }, { "epoch": 0.31, "grad_norm": 4.447026050799633, "learning_rate": 8.06258935402076e-06, "loss": 1.1046, "step": 3811 }, { "epoch": 0.31, "grad_norm": 3.0596379802357623, "learning_rate": 8.061542856552782e-06, "loss": 0.6165, "step": 3812 }, { "epoch": 0.31, "grad_norm": 4.767255884519906, "learning_rate": 8.06049614448397e-06, "loss": 1.2185, "step": 3813 }, { "epoch": 0.31, "grad_norm": 2.873121207979868, "learning_rate": 8.059449217887703e-06, "loss": 0.7159, "step": 3814 }, { "epoch": 0.31, "grad_norm": 2.283891610711034, "learning_rate": 8.058402076837357e-06, "loss": 0.5596, "step": 3815 }, { "epoch": 0.31, "grad_norm": 4.062392687744952, "learning_rate": 8.057354721406336e-06, "loss": 0.8439, "step": 3816 }, { "epoch": 0.31, "grad_norm": 4.557955428306261, "learning_rate": 8.056307151668056e-06, "loss": 1.1224, "step": 3817 }, { "epoch": 0.31, "grad_norm": 4.138074034895307, "learning_rate": 8.055259367695944e-06, "loss": 1.0132, "step": 3818 }, { "epoch": 0.31, "grad_norm": 2.846901873436069, "learning_rate": 8.054211369563448e-06, "loss": 0.7708, "step": 3819 }, { "epoch": 0.31, "grad_norm": 4.566826926447175, "learning_rate": 8.053163157344025e-06, "loss": 1.1192, "step": 3820 }, { "epoch": 0.31, "grad_norm": 5.438548471422388, "learning_rate": 8.052114731111154e-06, "loss": 1.3369, "step": 3821 }, { "epoch": 0.31, "grad_norm": 3.2034914657844618, "learning_rate": 8.051066090938325e-06, "loss": 0.6045, "step": 3822 }, { "epoch": 0.31, "grad_norm": 4.354183372517673, "learning_rate": 8.050017236899038e-06, "loss": 0.6668, "step": 3823 }, { "epoch": 0.31, "grad_norm": 5.856461759994505, "learning_rate": 8.048968169066817e-06, "loss": 1.0358, "step": 3824 }, { "epoch": 0.31, "grad_norm": 3.1394448375714346, "learning_rate": 8.047918887515199e-06, "loss": 0.6115, "step": 3825 }, { "epoch": 0.31, "grad_norm": 3.307289435541749, "learning_rate": 8.046869392317729e-06, "loss": 0.4689, "step": 3826 }, { "epoch": 0.31, "grad_norm": 3.4991103377960795, "learning_rate": 8.045819683547976e-06, "loss": 0.6016, "step": 3827 }, { "epoch": 0.31, "grad_norm": 2.902513823352388, "learning_rate": 8.044769761279516e-06, "loss": 0.8296, "step": 3828 }, { "epoch": 0.31, "grad_norm": 1.7915524409926955, "learning_rate": 8.04371962558595e-06, "loss": 0.4232, "step": 3829 }, { "epoch": 0.31, "grad_norm": 3.357581688423257, "learning_rate": 8.042669276540882e-06, "loss": 0.7239, "step": 3830 }, { "epoch": 0.31, "grad_norm": 5.034115483169221, "learning_rate": 8.041618714217941e-06, "loss": 1.0947, "step": 3831 }, { "epoch": 0.31, "grad_norm": 3.4102344394295265, "learning_rate": 8.040567938690764e-06, "loss": 0.7779, "step": 3832 }, { "epoch": 0.31, "grad_norm": 2.9394698900528162, "learning_rate": 8.039516950033008e-06, "loss": 0.7769, "step": 3833 }, { "epoch": 0.31, "grad_norm": 3.7683839440198894, "learning_rate": 8.038465748318342e-06, "loss": 0.7318, "step": 3834 }, { "epoch": 0.31, "grad_norm": 3.805228173708322, "learning_rate": 8.03741433362045e-06, "loss": 0.9185, "step": 3835 }, { "epoch": 0.31, "grad_norm": 4.114508005145592, "learning_rate": 8.036362706013033e-06, "loss": 1.2705, "step": 3836 }, { "epoch": 0.31, "grad_norm": 2.760045632096391, "learning_rate": 8.035310865569806e-06, "loss": 0.7342, "step": 3837 }, { "epoch": 0.31, "grad_norm": 4.133031716360296, "learning_rate": 8.034258812364492e-06, "loss": 0.6147, "step": 3838 }, { "epoch": 0.31, "grad_norm": 2.8580255170415314, "learning_rate": 8.033206546470846e-06, "loss": 0.6132, "step": 3839 }, { "epoch": 0.31, "grad_norm": 3.789984994703072, "learning_rate": 8.032154067962617e-06, "loss": 0.9953, "step": 3840 }, { "epoch": 0.31, "grad_norm": 4.252557561048338, "learning_rate": 8.031101376913586e-06, "loss": 0.693, "step": 3841 }, { "epoch": 0.31, "grad_norm": 3.403266683476786, "learning_rate": 8.03004847339754e-06, "loss": 1.0276, "step": 3842 }, { "epoch": 0.31, "grad_norm": 1.288651909984002, "learning_rate": 8.028995357488284e-06, "loss": 0.1872, "step": 3843 }, { "epoch": 0.31, "grad_norm": 3.199625220667626, "learning_rate": 8.027942029259633e-06, "loss": 0.5979, "step": 3844 }, { "epoch": 0.31, "grad_norm": 5.0408324902372055, "learning_rate": 8.026888488785426e-06, "loss": 0.9398, "step": 3845 }, { "epoch": 0.31, "grad_norm": 4.251594099668823, "learning_rate": 8.025834736139506e-06, "loss": 0.8767, "step": 3846 }, { "epoch": 0.31, "grad_norm": 2.90119502716278, "learning_rate": 8.024780771395741e-06, "loss": 0.7032, "step": 3847 }, { "epoch": 0.31, "grad_norm": 5.3691113441484335, "learning_rate": 8.023726594628008e-06, "loss": 1.0865, "step": 3848 }, { "epoch": 0.31, "grad_norm": 3.272557560423606, "learning_rate": 8.0226722059102e-06, "loss": 0.9431, "step": 3849 }, { "epoch": 0.31, "grad_norm": 5.1018996779982, "learning_rate": 8.021617605316225e-06, "loss": 1.1198, "step": 3850 }, { "epoch": 0.31, "grad_norm": 4.34270314985889, "learning_rate": 8.020562792920007e-06, "loss": 1.4252, "step": 3851 }, { "epoch": 0.31, "grad_norm": 3.6576903851374674, "learning_rate": 8.019507768795481e-06, "loss": 0.5995, "step": 3852 }, { "epoch": 0.31, "grad_norm": 2.676769230408065, "learning_rate": 8.018452533016604e-06, "loss": 0.6054, "step": 3853 }, { "epoch": 0.32, "grad_norm": 5.304160513568824, "learning_rate": 8.017397085657337e-06, "loss": 1.1721, "step": 3854 }, { "epoch": 0.32, "grad_norm": 3.834167523884977, "learning_rate": 8.016341426791669e-06, "loss": 1.001, "step": 3855 }, { "epoch": 0.32, "grad_norm": 3.332024439659001, "learning_rate": 8.015285556493592e-06, "loss": 0.4388, "step": 3856 }, { "epoch": 0.32, "grad_norm": 4.011369652265117, "learning_rate": 8.014229474837122e-06, "loss": 1.1744, "step": 3857 }, { "epoch": 0.32, "grad_norm": 3.196051480762647, "learning_rate": 8.013173181896283e-06, "loss": 0.4594, "step": 3858 }, { "epoch": 0.32, "grad_norm": 3.8879096058266005, "learning_rate": 8.012116677745116e-06, "loss": 1.116, "step": 3859 }, { "epoch": 0.32, "grad_norm": 3.1754368357717064, "learning_rate": 8.011059962457681e-06, "loss": 0.605, "step": 3860 }, { "epoch": 0.32, "grad_norm": 2.411006122417595, "learning_rate": 8.010003036108045e-06, "loss": 0.402, "step": 3861 }, { "epoch": 0.32, "grad_norm": 3.7769095221665934, "learning_rate": 8.008945898770298e-06, "loss": 0.5581, "step": 3862 }, { "epoch": 0.32, "grad_norm": 3.4745966802453316, "learning_rate": 8.007888550518536e-06, "loss": 0.5202, "step": 3863 }, { "epoch": 0.32, "grad_norm": 3.0437840671956957, "learning_rate": 8.006830991426879e-06, "loss": 0.8083, "step": 3864 }, { "epoch": 0.32, "grad_norm": 3.4753333783047267, "learning_rate": 8.005773221569453e-06, "loss": 0.8619, "step": 3865 }, { "epoch": 0.32, "grad_norm": 4.218536691525492, "learning_rate": 8.004715241020407e-06, "loss": 1.2444, "step": 3866 }, { "epoch": 0.32, "grad_norm": 4.707391376778872, "learning_rate": 8.0036570498539e-06, "loss": 1.2269, "step": 3867 }, { "epoch": 0.32, "grad_norm": 3.269846290227814, "learning_rate": 8.002598648144104e-06, "loss": 0.7239, "step": 3868 }, { "epoch": 0.32, "grad_norm": 4.416247878295309, "learning_rate": 8.00154003596521e-06, "loss": 0.8834, "step": 3869 }, { "epoch": 0.32, "grad_norm": 2.61950119442241, "learning_rate": 8.000481213391422e-06, "loss": 0.5118, "step": 3870 }, { "epoch": 0.32, "grad_norm": 4.452753986663066, "learning_rate": 7.999422180496961e-06, "loss": 1.2589, "step": 3871 }, { "epoch": 0.32, "grad_norm": 3.1217098336646676, "learning_rate": 7.998362937356057e-06, "loss": 0.7483, "step": 3872 }, { "epoch": 0.32, "grad_norm": 2.665134235943946, "learning_rate": 7.99730348404296e-06, "loss": 0.57, "step": 3873 }, { "epoch": 0.32, "grad_norm": 4.284385585767888, "learning_rate": 7.996243820631936e-06, "loss": 1.1521, "step": 3874 }, { "epoch": 0.32, "grad_norm": 3.167941665748867, "learning_rate": 7.995183947197256e-06, "loss": 0.8497, "step": 3875 }, { "epoch": 0.32, "grad_norm": 4.1403467646995615, "learning_rate": 7.994123863813217e-06, "loss": 0.8655, "step": 3876 }, { "epoch": 0.32, "grad_norm": 3.602304879232952, "learning_rate": 7.993063570554128e-06, "loss": 0.6588, "step": 3877 }, { "epoch": 0.32, "grad_norm": 3.783080663293601, "learning_rate": 7.992003067494307e-06, "loss": 0.8908, "step": 3878 }, { "epoch": 0.32, "grad_norm": 2.8033073441225937, "learning_rate": 7.990942354708093e-06, "loss": 0.6443, "step": 3879 }, { "epoch": 0.32, "grad_norm": 3.077322224246064, "learning_rate": 7.989881432269835e-06, "loss": 0.8065, "step": 3880 }, { "epoch": 0.32, "grad_norm": 3.7208368668094294, "learning_rate": 7.988820300253902e-06, "loss": 0.755, "step": 3881 }, { "epoch": 0.32, "grad_norm": 3.6569711380052623, "learning_rate": 7.987758958734672e-06, "loss": 1.0911, "step": 3882 }, { "epoch": 0.32, "grad_norm": 3.7112812737009873, "learning_rate": 7.986697407786543e-06, "loss": 1.043, "step": 3883 }, { "epoch": 0.32, "grad_norm": 3.831832105716206, "learning_rate": 7.985635647483922e-06, "loss": 1.0517, "step": 3884 }, { "epoch": 0.32, "grad_norm": 3.195462499961567, "learning_rate": 7.984573677901238e-06, "loss": 0.6196, "step": 3885 }, { "epoch": 0.32, "grad_norm": 3.0927161199014352, "learning_rate": 7.983511499112927e-06, "loss": 0.747, "step": 3886 }, { "epoch": 0.32, "grad_norm": 3.1604091144922686, "learning_rate": 7.982449111193445e-06, "loss": 0.7857, "step": 3887 }, { "epoch": 0.32, "grad_norm": 4.184526063278354, "learning_rate": 7.98138651421726e-06, "loss": 0.7998, "step": 3888 }, { "epoch": 0.32, "grad_norm": 5.414539531775396, "learning_rate": 7.980323708258854e-06, "loss": 1.2143, "step": 3889 }, { "epoch": 0.32, "grad_norm": 5.0841779869278705, "learning_rate": 7.979260693392726e-06, "loss": 1.0425, "step": 3890 }, { "epoch": 0.32, "grad_norm": 3.97584223991958, "learning_rate": 7.978197469693392e-06, "loss": 0.7008, "step": 3891 }, { "epoch": 0.32, "grad_norm": 3.040826383131776, "learning_rate": 7.977134037235375e-06, "loss": 0.5522, "step": 3892 }, { "epoch": 0.32, "grad_norm": 3.171089779923319, "learning_rate": 7.976070396093217e-06, "loss": 0.5813, "step": 3893 }, { "epoch": 0.32, "grad_norm": 2.3970568499228593, "learning_rate": 7.975006546341478e-06, "loss": 0.366, "step": 3894 }, { "epoch": 0.32, "grad_norm": 3.1551192192534327, "learning_rate": 7.973942488054726e-06, "loss": 0.6433, "step": 3895 }, { "epoch": 0.32, "grad_norm": 2.4197719603725343, "learning_rate": 7.97287822130755e-06, "loss": 0.5926, "step": 3896 }, { "epoch": 0.32, "grad_norm": 3.6648438074557177, "learning_rate": 7.971813746174548e-06, "loss": 0.5191, "step": 3897 }, { "epoch": 0.32, "grad_norm": 4.228504648321421, "learning_rate": 7.970749062730336e-06, "loss": 0.8926, "step": 3898 }, { "epoch": 0.32, "grad_norm": 4.388096166572964, "learning_rate": 7.969684171049543e-06, "loss": 0.7807, "step": 3899 }, { "epoch": 0.32, "grad_norm": 5.4729634968726595, "learning_rate": 7.968619071206813e-06, "loss": 1.2072, "step": 3900 }, { "epoch": 0.32, "grad_norm": 4.345484389252216, "learning_rate": 7.967553763276808e-06, "loss": 0.8457, "step": 3901 }, { "epoch": 0.32, "grad_norm": 3.7935286469224163, "learning_rate": 7.966488247334196e-06, "loss": 0.8782, "step": 3902 }, { "epoch": 0.32, "grad_norm": 4.29533451863232, "learning_rate": 7.96542252345367e-06, "loss": 0.58, "step": 3903 }, { "epoch": 0.32, "grad_norm": 5.365676149628274, "learning_rate": 7.96435659170993e-06, "loss": 1.2744, "step": 3904 }, { "epoch": 0.32, "grad_norm": 3.6534210181216107, "learning_rate": 7.963290452177695e-06, "loss": 0.9971, "step": 3905 }, { "epoch": 0.32, "grad_norm": 1.6606709479755615, "learning_rate": 7.962224104931692e-06, "loss": 0.2075, "step": 3906 }, { "epoch": 0.32, "grad_norm": 3.692509435384667, "learning_rate": 7.961157550046674e-06, "loss": 0.8298, "step": 3907 }, { "epoch": 0.32, "grad_norm": 3.3064776406188825, "learning_rate": 7.9600907875974e-06, "loss": 0.6335, "step": 3908 }, { "epoch": 0.32, "grad_norm": 3.2577188657508187, "learning_rate": 7.959023817658642e-06, "loss": 0.9199, "step": 3909 }, { "epoch": 0.32, "grad_norm": 2.065028697756866, "learning_rate": 7.957956640305192e-06, "loss": 0.4054, "step": 3910 }, { "epoch": 0.32, "grad_norm": 1.275405899307535, "learning_rate": 7.956889255611855e-06, "loss": 0.1956, "step": 3911 }, { "epoch": 0.32, "grad_norm": 2.874605605122154, "learning_rate": 7.955821663653448e-06, "loss": 0.5685, "step": 3912 }, { "epoch": 0.32, "grad_norm": 3.4400953132304206, "learning_rate": 7.95475386450481e-06, "loss": 0.8722, "step": 3913 }, { "epoch": 0.32, "grad_norm": 4.918856789489256, "learning_rate": 7.953685858240783e-06, "loss": 1.1118, "step": 3914 }, { "epoch": 0.32, "grad_norm": 5.0210892735943915, "learning_rate": 7.952617644936231e-06, "loss": 1.0947, "step": 3915 }, { "epoch": 0.32, "grad_norm": 4.330205720746106, "learning_rate": 7.951549224666034e-06, "loss": 1.2001, "step": 3916 }, { "epoch": 0.32, "grad_norm": 3.272606217468375, "learning_rate": 7.950480597505079e-06, "loss": 0.834, "step": 3917 }, { "epoch": 0.32, "grad_norm": 3.6360691348276872, "learning_rate": 7.949411763528276e-06, "loss": 0.7878, "step": 3918 }, { "epoch": 0.32, "grad_norm": 3.357258381230591, "learning_rate": 7.948342722810544e-06, "loss": 0.6788, "step": 3919 }, { "epoch": 0.32, "grad_norm": 4.790471200518028, "learning_rate": 7.94727347542682e-06, "loss": 1.0941, "step": 3920 }, { "epoch": 0.32, "grad_norm": 2.880762080944756, "learning_rate": 7.946204021452049e-06, "loss": 0.5526, "step": 3921 }, { "epoch": 0.32, "grad_norm": 4.906717414163718, "learning_rate": 7.9451343609612e-06, "loss": 1.049, "step": 3922 }, { "epoch": 0.32, "grad_norm": 3.4014886526948636, "learning_rate": 7.94406449402925e-06, "loss": 0.7849, "step": 3923 }, { "epoch": 0.32, "grad_norm": 2.5413370736232705, "learning_rate": 7.942994420731189e-06, "loss": 0.402, "step": 3924 }, { "epoch": 0.32, "grad_norm": 3.1905916450375775, "learning_rate": 7.94192414114203e-06, "loss": 0.9411, "step": 3925 }, { "epoch": 0.32, "grad_norm": 2.654706974498102, "learning_rate": 7.940853655336791e-06, "loss": 0.5907, "step": 3926 }, { "epoch": 0.32, "grad_norm": 3.672345696377473, "learning_rate": 7.93978296339051e-06, "loss": 0.7108, "step": 3927 }, { "epoch": 0.32, "grad_norm": 3.6847837849040816, "learning_rate": 7.938712065378235e-06, "loss": 0.9222, "step": 3928 }, { "epoch": 0.32, "grad_norm": 4.046999168227711, "learning_rate": 7.937640961375036e-06, "loss": 0.8271, "step": 3929 }, { "epoch": 0.32, "grad_norm": 3.5800151195645653, "learning_rate": 7.936569651455987e-06, "loss": 0.8926, "step": 3930 }, { "epoch": 0.32, "grad_norm": 3.6666933200400034, "learning_rate": 7.93549813569619e-06, "loss": 0.6696, "step": 3931 }, { "epoch": 0.32, "grad_norm": 4.425401258509315, "learning_rate": 7.934426414170747e-06, "loss": 1.005, "step": 3932 }, { "epoch": 0.32, "grad_norm": 3.191697810533967, "learning_rate": 7.933354486954781e-06, "loss": 0.9103, "step": 3933 }, { "epoch": 0.32, "grad_norm": 2.755872248436024, "learning_rate": 7.932282354123433e-06, "loss": 0.4972, "step": 3934 }, { "epoch": 0.32, "grad_norm": 5.279827015704212, "learning_rate": 7.931210015751854e-06, "loss": 0.9444, "step": 3935 }, { "epoch": 0.32, "grad_norm": 1.8322074800850854, "learning_rate": 7.93013747191521e-06, "loss": 0.3787, "step": 3936 }, { "epoch": 0.32, "grad_norm": 4.503534226337361, "learning_rate": 7.92906472268868e-06, "loss": 1.0117, "step": 3937 }, { "epoch": 0.32, "grad_norm": 3.9678540703978524, "learning_rate": 7.92799176814746e-06, "loss": 0.7385, "step": 3938 }, { "epoch": 0.32, "grad_norm": 3.4197151347721753, "learning_rate": 7.92691860836676e-06, "loss": 0.9999, "step": 3939 }, { "epoch": 0.32, "grad_norm": 4.05089929572889, "learning_rate": 7.925845243421803e-06, "loss": 1.1175, "step": 3940 }, { "epoch": 0.32, "grad_norm": 3.487274292627951, "learning_rate": 7.924771673387828e-06, "loss": 0.7277, "step": 3941 }, { "epoch": 0.32, "grad_norm": 3.203089793147763, "learning_rate": 7.92369789834009e-06, "loss": 0.6635, "step": 3942 }, { "epoch": 0.32, "grad_norm": 4.680738296993968, "learning_rate": 7.92262391835385e-06, "loss": 1.0936, "step": 3943 }, { "epoch": 0.32, "grad_norm": 4.252418062665237, "learning_rate": 7.921549733504394e-06, "loss": 1.4216, "step": 3944 }, { "epoch": 0.32, "grad_norm": 3.721036769804235, "learning_rate": 7.920475343867016e-06, "loss": 0.8752, "step": 3945 }, { "epoch": 0.32, "grad_norm": 4.060450737205802, "learning_rate": 7.919400749517026e-06, "loss": 0.9217, "step": 3946 }, { "epoch": 0.32, "grad_norm": 3.1952284512374827, "learning_rate": 7.91832595052975e-06, "loss": 0.6315, "step": 3947 }, { "epoch": 0.32, "grad_norm": 2.6845820229727635, "learning_rate": 7.917250946980525e-06, "loss": 0.6788, "step": 3948 }, { "epoch": 0.32, "grad_norm": 3.2111912846058237, "learning_rate": 7.916175738944703e-06, "loss": 0.962, "step": 3949 }, { "epoch": 0.32, "grad_norm": 3.310775447454021, "learning_rate": 7.915100326497655e-06, "loss": 0.5742, "step": 3950 }, { "epoch": 0.32, "grad_norm": 4.454874359101468, "learning_rate": 7.91402470971476e-06, "loss": 1.0579, "step": 3951 }, { "epoch": 0.32, "grad_norm": 3.777140529355507, "learning_rate": 7.912948888671415e-06, "loss": 0.848, "step": 3952 }, { "epoch": 0.32, "grad_norm": 4.917375205195893, "learning_rate": 7.911872863443028e-06, "loss": 1.0811, "step": 3953 }, { "epoch": 0.32, "grad_norm": 4.20008656594547, "learning_rate": 7.910796634105029e-06, "loss": 0.8757, "step": 3954 }, { "epoch": 0.32, "grad_norm": 3.3461871368791574, "learning_rate": 7.90972020073285e-06, "loss": 0.6789, "step": 3955 }, { "epoch": 0.32, "grad_norm": 4.726416038016359, "learning_rate": 7.90864356340195e-06, "loss": 0.8677, "step": 3956 }, { "epoch": 0.32, "grad_norm": 3.5161194158107385, "learning_rate": 7.907566722187796e-06, "loss": 0.8401, "step": 3957 }, { "epoch": 0.32, "grad_norm": 4.44494707113555, "learning_rate": 7.906489677165868e-06, "loss": 0.765, "step": 3958 }, { "epoch": 0.32, "grad_norm": 3.571669862485657, "learning_rate": 7.905412428411663e-06, "loss": 0.9774, "step": 3959 }, { "epoch": 0.32, "grad_norm": 3.7726222805080134, "learning_rate": 7.90433497600069e-06, "loss": 0.8009, "step": 3960 }, { "epoch": 0.32, "grad_norm": 2.904481349413059, "learning_rate": 7.903257320008475e-06, "loss": 0.6383, "step": 3961 }, { "epoch": 0.32, "grad_norm": 4.329707161356198, "learning_rate": 7.902179460510558e-06, "loss": 1.1996, "step": 3962 }, { "epoch": 0.32, "grad_norm": 4.031137913745396, "learning_rate": 7.901101397582489e-06, "loss": 1.0229, "step": 3963 }, { "epoch": 0.32, "grad_norm": 5.233129840323534, "learning_rate": 7.90002313129984e-06, "loss": 1.5545, "step": 3964 }, { "epoch": 0.32, "grad_norm": 3.5120920452991, "learning_rate": 7.898944661738188e-06, "loss": 0.8688, "step": 3965 }, { "epoch": 0.32, "grad_norm": 3.8309770680435657, "learning_rate": 7.897865988973133e-06, "loss": 0.6724, "step": 3966 }, { "epoch": 0.32, "grad_norm": 2.735828399323141, "learning_rate": 7.896787113080284e-06, "loss": 0.6444, "step": 3967 }, { "epoch": 0.32, "grad_norm": 4.959729189806863, "learning_rate": 7.895708034135265e-06, "loss": 1.0035, "step": 3968 }, { "epoch": 0.32, "grad_norm": 4.190551313321349, "learning_rate": 7.894628752213715e-06, "loss": 0.6192, "step": 3969 }, { "epoch": 0.32, "grad_norm": 3.8311685842134064, "learning_rate": 7.893549267391287e-06, "loss": 0.8306, "step": 3970 }, { "epoch": 0.32, "grad_norm": 4.819702650845054, "learning_rate": 7.892469579743647e-06, "loss": 0.7833, "step": 3971 }, { "epoch": 0.32, "grad_norm": 2.5640523641815736, "learning_rate": 7.891389689346479e-06, "loss": 0.3882, "step": 3972 }, { "epoch": 0.32, "grad_norm": 2.3126765364576305, "learning_rate": 7.890309596275476e-06, "loss": 0.5957, "step": 3973 }, { "epoch": 0.32, "grad_norm": 2.89337526452459, "learning_rate": 7.88922930060635e-06, "loss": 0.6068, "step": 3974 }, { "epoch": 0.32, "grad_norm": 3.134076773870964, "learning_rate": 7.888148802414824e-06, "loss": 0.8467, "step": 3975 }, { "epoch": 0.32, "grad_norm": 4.898634541280175, "learning_rate": 7.887068101776633e-06, "loss": 0.9021, "step": 3976 }, { "epoch": 0.33, "grad_norm": 2.921800857438866, "learning_rate": 7.885987198767538e-06, "loss": 0.4761, "step": 3977 }, { "epoch": 0.33, "grad_norm": 3.193341180831775, "learning_rate": 7.884906093463295e-06, "loss": 0.585, "step": 3978 }, { "epoch": 0.33, "grad_norm": 2.8833964995321897, "learning_rate": 7.883824785939694e-06, "loss": 0.4738, "step": 3979 }, { "epoch": 0.33, "grad_norm": 3.5041822053988834, "learning_rate": 7.882743276272524e-06, "loss": 0.5757, "step": 3980 }, { "epoch": 0.33, "grad_norm": 4.479653639530403, "learning_rate": 7.881661564537598e-06, "loss": 1.0564, "step": 3981 }, { "epoch": 0.33, "grad_norm": 2.7477370824532636, "learning_rate": 7.880579650810736e-06, "loss": 0.7131, "step": 3982 }, { "epoch": 0.33, "grad_norm": 2.8493434819124897, "learning_rate": 7.879497535167776e-06, "loss": 0.5089, "step": 3983 }, { "epoch": 0.33, "grad_norm": 4.462570167528221, "learning_rate": 7.87841521768457e-06, "loss": 0.9516, "step": 3984 }, { "epoch": 0.33, "grad_norm": 2.7688333887424483, "learning_rate": 7.877332698436986e-06, "loss": 0.4017, "step": 3985 }, { "epoch": 0.33, "grad_norm": 1.9950498856678862, "learning_rate": 7.876249977500902e-06, "loss": 0.3658, "step": 3986 }, { "epoch": 0.33, "grad_norm": 4.095597838558024, "learning_rate": 7.875167054952211e-06, "loss": 1.2446, "step": 3987 }, { "epoch": 0.33, "grad_norm": 4.749989836918884, "learning_rate": 7.874083930866822e-06, "loss": 1.0899, "step": 3988 }, { "epoch": 0.33, "grad_norm": 2.9102664158338305, "learning_rate": 7.873000605320658e-06, "loss": 0.6445, "step": 3989 }, { "epoch": 0.33, "grad_norm": 4.269179065194626, "learning_rate": 7.871917078389655e-06, "loss": 1.2587, "step": 3990 }, { "epoch": 0.33, "grad_norm": 4.166224022227581, "learning_rate": 7.870833350149764e-06, "loss": 0.941, "step": 3991 }, { "epoch": 0.33, "grad_norm": 4.300253791870946, "learning_rate": 7.869749420676949e-06, "loss": 0.9979, "step": 3992 }, { "epoch": 0.33, "grad_norm": 3.4175409176355664, "learning_rate": 7.868665290047189e-06, "loss": 0.7883, "step": 3993 }, { "epoch": 0.33, "grad_norm": 4.183480038731687, "learning_rate": 7.867580958336474e-06, "loss": 1.0306, "step": 3994 }, { "epoch": 0.33, "grad_norm": 3.974127330270877, "learning_rate": 7.866496425620816e-06, "loss": 0.9258, "step": 3995 }, { "epoch": 0.33, "grad_norm": 5.185511981526038, "learning_rate": 7.865411691976232e-06, "loss": 1.3501, "step": 3996 }, { "epoch": 0.33, "grad_norm": 1.2284723128580735, "learning_rate": 7.864326757478762e-06, "loss": 0.1758, "step": 3997 }, { "epoch": 0.33, "grad_norm": 3.218909079487965, "learning_rate": 7.86324162220445e-06, "loss": 0.8576, "step": 3998 }, { "epoch": 0.33, "grad_norm": 5.174994398303952, "learning_rate": 7.86215628622936e-06, "loss": 1.5497, "step": 3999 }, { "epoch": 0.33, "grad_norm": 2.383769075116413, "learning_rate": 7.86107074962957e-06, "loss": 0.6589, "step": 4000 }, { "epoch": 0.33, "grad_norm": 3.153776451524846, "learning_rate": 7.859985012481174e-06, "loss": 0.5676, "step": 4001 }, { "epoch": 0.33, "grad_norm": 3.212803702193593, "learning_rate": 7.858899074860276e-06, "loss": 0.6885, "step": 4002 }, { "epoch": 0.33, "grad_norm": 5.179651835380731, "learning_rate": 7.857812936842992e-06, "loss": 1.0661, "step": 4003 }, { "epoch": 0.33, "grad_norm": 2.44607333850051, "learning_rate": 7.856726598505459e-06, "loss": 0.5944, "step": 4004 }, { "epoch": 0.33, "grad_norm": 4.3703513487093195, "learning_rate": 7.855640059923826e-06, "loss": 0.9042, "step": 4005 }, { "epoch": 0.33, "grad_norm": 4.3136300965208285, "learning_rate": 7.85455332117425e-06, "loss": 1.0606, "step": 4006 }, { "epoch": 0.33, "grad_norm": 2.5904783749373816, "learning_rate": 7.85346638233291e-06, "loss": 0.2983, "step": 4007 }, { "epoch": 0.33, "grad_norm": 3.986458941824462, "learning_rate": 7.852379243475998e-06, "loss": 1.0047, "step": 4008 }, { "epoch": 0.33, "grad_norm": 3.4523716432618428, "learning_rate": 7.851291904679712e-06, "loss": 0.8155, "step": 4009 }, { "epoch": 0.33, "grad_norm": 2.525587156791062, "learning_rate": 7.850204366020271e-06, "loss": 0.4656, "step": 4010 }, { "epoch": 0.33, "grad_norm": 3.1498400625389915, "learning_rate": 7.84911662757391e-06, "loss": 0.9032, "step": 4011 }, { "epoch": 0.33, "grad_norm": 4.740206562939246, "learning_rate": 7.848028689416872e-06, "loss": 1.2643, "step": 4012 }, { "epoch": 0.33, "grad_norm": 3.0050724670801054, "learning_rate": 7.846940551625417e-06, "loss": 0.8215, "step": 4013 }, { "epoch": 0.33, "grad_norm": 4.3537970585947905, "learning_rate": 7.845852214275821e-06, "loss": 1.0797, "step": 4014 }, { "epoch": 0.33, "grad_norm": 3.0278943481498746, "learning_rate": 7.84476367744437e-06, "loss": 0.69, "step": 4015 }, { "epoch": 0.33, "grad_norm": 3.741402674292105, "learning_rate": 7.843674941207364e-06, "loss": 0.9737, "step": 4016 }, { "epoch": 0.33, "grad_norm": 4.492878113334946, "learning_rate": 7.842586005641123e-06, "loss": 0.8798, "step": 4017 }, { "epoch": 0.33, "grad_norm": 3.533598324291426, "learning_rate": 7.84149687082197e-06, "loss": 0.7147, "step": 4018 }, { "epoch": 0.33, "grad_norm": 4.769338538009378, "learning_rate": 7.840407536826256e-06, "loss": 0.8957, "step": 4019 }, { "epoch": 0.33, "grad_norm": 4.860169527682215, "learning_rate": 7.839318003730332e-06, "loss": 0.9229, "step": 4020 }, { "epoch": 0.33, "grad_norm": 3.5413965574010757, "learning_rate": 7.838228271610575e-06, "loss": 0.3215, "step": 4021 }, { "epoch": 0.33, "grad_norm": 3.664172854252663, "learning_rate": 7.837138340543368e-06, "loss": 0.9405, "step": 4022 }, { "epoch": 0.33, "grad_norm": 4.704049865569673, "learning_rate": 7.836048210605109e-06, "loss": 1.0005, "step": 4023 }, { "epoch": 0.33, "grad_norm": 3.0229149140174143, "learning_rate": 7.834957881872215e-06, "loss": 0.5615, "step": 4024 }, { "epoch": 0.33, "grad_norm": 3.293901840144184, "learning_rate": 7.83386735442111e-06, "loss": 0.8158, "step": 4025 }, { "epoch": 0.33, "grad_norm": 2.9068154726580633, "learning_rate": 7.832776628328237e-06, "loss": 0.6568, "step": 4026 }, { "epoch": 0.33, "grad_norm": 4.3477590814031055, "learning_rate": 7.831685703670052e-06, "loss": 0.8672, "step": 4027 }, { "epoch": 0.33, "grad_norm": 2.9376357638709742, "learning_rate": 7.83059458052302e-06, "loss": 0.6062, "step": 4028 }, { "epoch": 0.33, "grad_norm": 5.25054189403912, "learning_rate": 7.829503258963629e-06, "loss": 1.1562, "step": 4029 }, { "epoch": 0.33, "grad_norm": 5.477460824502643, "learning_rate": 7.82841173906837e-06, "loss": 1.5191, "step": 4030 }, { "epoch": 0.33, "grad_norm": 5.789155502998915, "learning_rate": 7.827320020913762e-06, "loss": 1.2872, "step": 4031 }, { "epoch": 0.33, "grad_norm": 4.208558129868596, "learning_rate": 7.826228104576324e-06, "loss": 0.9987, "step": 4032 }, { "epoch": 0.33, "grad_norm": 4.821842829169687, "learning_rate": 7.825135990132592e-06, "loss": 1.5145, "step": 4033 }, { "epoch": 0.33, "grad_norm": 3.6215811257063426, "learning_rate": 7.824043677659127e-06, "loss": 1.0486, "step": 4034 }, { "epoch": 0.33, "grad_norm": 3.879403051650573, "learning_rate": 7.822951167232488e-06, "loss": 1.1389, "step": 4035 }, { "epoch": 0.33, "grad_norm": 3.3982667736288654, "learning_rate": 7.821858458929256e-06, "loss": 0.6997, "step": 4036 }, { "epoch": 0.33, "grad_norm": 5.189642795215071, "learning_rate": 7.82076555282603e-06, "loss": 1.3593, "step": 4037 }, { "epoch": 0.33, "grad_norm": 4.261351402183224, "learning_rate": 7.819672448999413e-06, "loss": 1.0783, "step": 4038 }, { "epoch": 0.33, "grad_norm": 3.0582346994452942, "learning_rate": 7.81857914752603e-06, "loss": 0.8212, "step": 4039 }, { "epoch": 0.33, "grad_norm": 5.128775511789336, "learning_rate": 7.817485648482514e-06, "loss": 1.4036, "step": 4040 }, { "epoch": 0.33, "grad_norm": 3.1862480949901624, "learning_rate": 7.816391951945517e-06, "loss": 0.5946, "step": 4041 }, { "epoch": 0.33, "grad_norm": 3.5393890511012724, "learning_rate": 7.8152980579917e-06, "loss": 0.5955, "step": 4042 }, { "epoch": 0.33, "grad_norm": 4.291111423854695, "learning_rate": 7.814203966697742e-06, "loss": 1.3313, "step": 4043 }, { "epoch": 0.33, "grad_norm": 3.198816817119858, "learning_rate": 7.813109678140333e-06, "loss": 0.6167, "step": 4044 }, { "epoch": 0.33, "grad_norm": 5.178105362579311, "learning_rate": 7.812015192396178e-06, "loss": 1.0517, "step": 4045 }, { "epoch": 0.33, "grad_norm": 2.2557654770972406, "learning_rate": 7.810920509541997e-06, "loss": 0.6081, "step": 4046 }, { "epoch": 0.33, "grad_norm": 4.158509861411965, "learning_rate": 7.80982562965452e-06, "loss": 1.0021, "step": 4047 }, { "epoch": 0.33, "grad_norm": 3.6075015660901797, "learning_rate": 7.808730552810496e-06, "loss": 0.5844, "step": 4048 }, { "epoch": 0.33, "grad_norm": 2.118174912930761, "learning_rate": 7.807635279086682e-06, "loss": 0.5398, "step": 4049 }, { "epoch": 0.33, "grad_norm": 4.087999613701567, "learning_rate": 7.806539808559856e-06, "loss": 0.7191, "step": 4050 }, { "epoch": 0.33, "grad_norm": 1.2024083038375055, "learning_rate": 7.805444141306804e-06, "loss": 0.2323, "step": 4051 }, { "epoch": 0.33, "grad_norm": 4.287541535163385, "learning_rate": 7.804348277404324e-06, "loss": 0.802, "step": 4052 }, { "epoch": 0.33, "grad_norm": 3.3546151266802453, "learning_rate": 7.803252216929236e-06, "loss": 0.7602, "step": 4053 }, { "epoch": 0.33, "grad_norm": 5.237050962673722, "learning_rate": 7.802155959958368e-06, "loss": 0.6012, "step": 4054 }, { "epoch": 0.33, "grad_norm": 3.6069184684330327, "learning_rate": 7.80105950656856e-06, "loss": 0.8598, "step": 4055 }, { "epoch": 0.33, "grad_norm": 2.9434142461101187, "learning_rate": 7.799962856836674e-06, "loss": 0.2683, "step": 4056 }, { "epoch": 0.33, "grad_norm": 3.135090704148495, "learning_rate": 7.798866010839577e-06, "loss": 0.4931, "step": 4057 }, { "epoch": 0.33, "grad_norm": 2.8851883209140317, "learning_rate": 7.79776896865415e-06, "loss": 0.4785, "step": 4058 }, { "epoch": 0.33, "grad_norm": 4.578701701147169, "learning_rate": 7.796671730357296e-06, "loss": 1.5124, "step": 4059 }, { "epoch": 0.33, "grad_norm": 4.839867870120802, "learning_rate": 7.795574296025925e-06, "loss": 1.0868, "step": 4060 }, { "epoch": 0.33, "grad_norm": 4.327365478228468, "learning_rate": 7.794476665736964e-06, "loss": 1.2185, "step": 4061 }, { "epoch": 0.33, "grad_norm": 3.7570060148375393, "learning_rate": 7.793378839567348e-06, "loss": 0.7382, "step": 4062 }, { "epoch": 0.33, "grad_norm": 2.8834551320663824, "learning_rate": 7.792280817594033e-06, "loss": 0.775, "step": 4063 }, { "epoch": 0.33, "grad_norm": 3.0132087833361663, "learning_rate": 7.791182599893985e-06, "loss": 0.5297, "step": 4064 }, { "epoch": 0.33, "grad_norm": 6.041447540688191, "learning_rate": 7.790084186544183e-06, "loss": 1.5578, "step": 4065 }, { "epoch": 0.33, "grad_norm": 1.3766809671719165, "learning_rate": 7.788985577621623e-06, "loss": 0.2116, "step": 4066 }, { "epoch": 0.33, "grad_norm": 3.3972620375857447, "learning_rate": 7.78788677320331e-06, "loss": 0.7537, "step": 4067 }, { "epoch": 0.33, "grad_norm": 2.9631226229157055, "learning_rate": 7.78678777336627e-06, "loss": 1.0203, "step": 4068 }, { "epoch": 0.33, "grad_norm": 2.6928746766462877, "learning_rate": 7.785688578187534e-06, "loss": 0.6483, "step": 4069 }, { "epoch": 0.33, "grad_norm": 2.1103911778195075, "learning_rate": 7.784589187744151e-06, "loss": 0.2568, "step": 4070 }, { "epoch": 0.33, "grad_norm": 5.080635885354421, "learning_rate": 7.783489602113185e-06, "loss": 0.7317, "step": 4071 }, { "epoch": 0.33, "grad_norm": 2.883177504009893, "learning_rate": 7.782389821371712e-06, "loss": 0.5972, "step": 4072 }, { "epoch": 0.33, "grad_norm": 4.327020316159079, "learning_rate": 7.781289845596821e-06, "loss": 0.9701, "step": 4073 }, { "epoch": 0.33, "grad_norm": 2.7278052835176005, "learning_rate": 7.780189674865617e-06, "loss": 0.5983, "step": 4074 }, { "epoch": 0.33, "grad_norm": 4.160715692148022, "learning_rate": 7.779089309255217e-06, "loss": 0.7912, "step": 4075 }, { "epoch": 0.33, "grad_norm": 4.248414112566387, "learning_rate": 7.777988748842748e-06, "loss": 1.1924, "step": 4076 }, { "epoch": 0.33, "grad_norm": 4.836771315743967, "learning_rate": 7.77688799370536e-06, "loss": 1.2628, "step": 4077 }, { "epoch": 0.33, "grad_norm": 4.348685058036905, "learning_rate": 7.775787043920209e-06, "loss": 0.9582, "step": 4078 }, { "epoch": 0.33, "grad_norm": 4.85058506058486, "learning_rate": 7.774685899564465e-06, "loss": 0.8381, "step": 4079 }, { "epoch": 0.33, "grad_norm": 3.9380355388535526, "learning_rate": 7.773584560715315e-06, "loss": 0.8012, "step": 4080 }, { "epoch": 0.33, "grad_norm": 3.2877638211599227, "learning_rate": 7.772483027449957e-06, "loss": 0.7378, "step": 4081 }, { "epoch": 0.33, "grad_norm": 3.58337541740198, "learning_rate": 7.771381299845607e-06, "loss": 0.7549, "step": 4082 }, { "epoch": 0.33, "grad_norm": 2.431569297930181, "learning_rate": 7.770279377979488e-06, "loss": 0.349, "step": 4083 }, { "epoch": 0.33, "grad_norm": 5.677928014296555, "learning_rate": 7.76917726192884e-06, "loss": 1.2377, "step": 4084 }, { "epoch": 0.33, "grad_norm": 5.046569984878376, "learning_rate": 7.768074951770919e-06, "loss": 1.0108, "step": 4085 }, { "epoch": 0.33, "grad_norm": 4.008142292994773, "learning_rate": 7.76697244758299e-06, "loss": 0.5098, "step": 4086 }, { "epoch": 0.33, "grad_norm": 3.4447675454103566, "learning_rate": 7.765869749442334e-06, "loss": 0.7362, "step": 4087 }, { "epoch": 0.33, "grad_norm": 5.213593544623172, "learning_rate": 7.764766857426247e-06, "loss": 1.1057, "step": 4088 }, { "epoch": 0.33, "grad_norm": 3.7927402365139145, "learning_rate": 7.763663771612033e-06, "loss": 0.7684, "step": 4089 }, { "epoch": 0.33, "grad_norm": 4.515848562656643, "learning_rate": 7.762560492077018e-06, "loss": 0.8077, "step": 4090 }, { "epoch": 0.33, "grad_norm": 2.687411542813131, "learning_rate": 7.761457018898536e-06, "loss": 0.7698, "step": 4091 }, { "epoch": 0.33, "grad_norm": 4.173316022459569, "learning_rate": 7.760353352153933e-06, "loss": 0.8834, "step": 4092 }, { "epoch": 0.33, "grad_norm": 3.5145776395485453, "learning_rate": 7.759249491920573e-06, "loss": 0.7676, "step": 4093 }, { "epoch": 0.33, "grad_norm": 5.379993757189861, "learning_rate": 7.758145438275834e-06, "loss": 0.9946, "step": 4094 }, { "epoch": 0.33, "grad_norm": 5.071372107617467, "learning_rate": 7.757041191297102e-06, "loss": 0.9479, "step": 4095 }, { "epoch": 0.33, "grad_norm": 3.354744362202982, "learning_rate": 7.755936751061781e-06, "loss": 0.6599, "step": 4096 }, { "epoch": 0.33, "grad_norm": 3.615773819555657, "learning_rate": 7.75483211764729e-06, "loss": 0.7168, "step": 4097 }, { "epoch": 0.33, "grad_norm": 4.520254715236886, "learning_rate": 7.753727291131052e-06, "loss": 0.8517, "step": 4098 }, { "epoch": 0.34, "grad_norm": 4.185083001280689, "learning_rate": 7.75262227159052e-06, "loss": 0.9201, "step": 4099 }, { "epoch": 0.34, "grad_norm": 3.808745561079665, "learning_rate": 7.751517059103142e-06, "loss": 0.9437, "step": 4100 }, { "epoch": 0.34, "grad_norm": 1.7571468364421476, "learning_rate": 7.750411653746395e-06, "loss": 0.4474, "step": 4101 }, { "epoch": 0.34, "grad_norm": 4.549902143678165, "learning_rate": 7.74930605559776e-06, "loss": 0.8425, "step": 4102 }, { "epoch": 0.34, "grad_norm": 4.1746358152158685, "learning_rate": 7.748200264734735e-06, "loss": 1.2278, "step": 4103 }, { "epoch": 0.34, "grad_norm": 3.987321493415824, "learning_rate": 7.747094281234833e-06, "loss": 0.4285, "step": 4104 }, { "epoch": 0.34, "grad_norm": 2.0173645566645195, "learning_rate": 7.745988105175577e-06, "loss": 0.4033, "step": 4105 }, { "epoch": 0.34, "grad_norm": 4.354547097155099, "learning_rate": 7.744881736634506e-06, "loss": 1.3625, "step": 4106 }, { "epoch": 0.34, "grad_norm": 3.332371613972039, "learning_rate": 7.74377517568917e-06, "loss": 0.7414, "step": 4107 }, { "epoch": 0.34, "grad_norm": 2.1920844245645, "learning_rate": 7.742668422417137e-06, "loss": 0.3366, "step": 4108 }, { "epoch": 0.34, "grad_norm": 3.4605536417478153, "learning_rate": 7.74156147689598e-06, "loss": 0.7064, "step": 4109 }, { "epoch": 0.34, "grad_norm": 3.4351461374391827, "learning_rate": 7.740454339203298e-06, "loss": 0.6305, "step": 4110 }, { "epoch": 0.34, "grad_norm": 4.341168273700383, "learning_rate": 7.739347009416693e-06, "loss": 0.9239, "step": 4111 }, { "epoch": 0.34, "grad_norm": 3.1896550569997317, "learning_rate": 7.738239487613783e-06, "loss": 0.4106, "step": 4112 }, { "epoch": 0.34, "grad_norm": 2.772165868649677, "learning_rate": 7.737131773872202e-06, "loss": 0.7461, "step": 4113 }, { "epoch": 0.34, "grad_norm": 3.172701606361016, "learning_rate": 7.736023868269598e-06, "loss": 0.572, "step": 4114 }, { "epoch": 0.34, "grad_norm": 3.2955721559098285, "learning_rate": 7.734915770883624e-06, "loss": 0.6784, "step": 4115 }, { "epoch": 0.34, "grad_norm": 5.013100997652645, "learning_rate": 7.733807481791959e-06, "loss": 1.4583, "step": 4116 }, { "epoch": 0.34, "grad_norm": 3.8655354170040597, "learning_rate": 7.732699001072288e-06, "loss": 1.1896, "step": 4117 }, { "epoch": 0.34, "grad_norm": 1.9395350071445177, "learning_rate": 7.73159032880231e-06, "loss": 0.4108, "step": 4118 }, { "epoch": 0.34, "grad_norm": 3.7818826869175437, "learning_rate": 7.730481465059736e-06, "loss": 0.7055, "step": 4119 }, { "epoch": 0.34, "grad_norm": 5.15552039543157, "learning_rate": 7.729372409922295e-06, "loss": 1.4362, "step": 4120 }, { "epoch": 0.34, "grad_norm": 3.2196597029324914, "learning_rate": 7.728263163467727e-06, "loss": 0.6513, "step": 4121 }, { "epoch": 0.34, "grad_norm": 4.255860561163699, "learning_rate": 7.727153725773783e-06, "loss": 0.8975, "step": 4122 }, { "epoch": 0.34, "grad_norm": 3.043423662983045, "learning_rate": 7.726044096918232e-06, "loss": 0.6755, "step": 4123 }, { "epoch": 0.34, "grad_norm": 4.680203207964453, "learning_rate": 7.724934276978855e-06, "loss": 1.0033, "step": 4124 }, { "epoch": 0.34, "grad_norm": 2.746901777325029, "learning_rate": 7.723824266033444e-06, "loss": 0.5562, "step": 4125 }, { "epoch": 0.34, "grad_norm": 2.272360035766652, "learning_rate": 7.722714064159805e-06, "loss": 0.3457, "step": 4126 }, { "epoch": 0.34, "grad_norm": 4.7240643115152325, "learning_rate": 7.72160367143576e-06, "loss": 0.7799, "step": 4127 }, { "epoch": 0.34, "grad_norm": 4.591078813080817, "learning_rate": 7.720493087939143e-06, "loss": 1.2883, "step": 4128 }, { "epoch": 0.34, "grad_norm": 4.210409977210689, "learning_rate": 7.719382313747799e-06, "loss": 0.7096, "step": 4129 }, { "epoch": 0.34, "grad_norm": 3.6817128508281134, "learning_rate": 7.71827134893959e-06, "loss": 0.6766, "step": 4130 }, { "epoch": 0.34, "grad_norm": 1.7634823470854484, "learning_rate": 7.717160193592387e-06, "loss": 0.4359, "step": 4131 }, { "epoch": 0.34, "grad_norm": 2.9420876984077293, "learning_rate": 7.716048847784082e-06, "loss": 0.7967, "step": 4132 }, { "epoch": 0.34, "grad_norm": 1.148358437503598, "learning_rate": 7.714937311592573e-06, "loss": 0.1711, "step": 4133 }, { "epoch": 0.34, "grad_norm": 4.250724443509678, "learning_rate": 7.713825585095775e-06, "loss": 1.2042, "step": 4134 }, { "epoch": 0.34, "grad_norm": 4.758507214850353, "learning_rate": 7.712713668371612e-06, "loss": 1.0143, "step": 4135 }, { "epoch": 0.34, "grad_norm": 2.7488317621633676, "learning_rate": 7.711601561498027e-06, "loss": 0.5658, "step": 4136 }, { "epoch": 0.34, "grad_norm": 3.397262375524663, "learning_rate": 7.710489264552974e-06, "loss": 0.8973, "step": 4137 }, { "epoch": 0.34, "grad_norm": 2.83739358311308, "learning_rate": 7.709376777614418e-06, "loss": 0.5373, "step": 4138 }, { "epoch": 0.34, "grad_norm": 2.5305706042930973, "learning_rate": 7.708264100760343e-06, "loss": 0.4427, "step": 4139 }, { "epoch": 0.34, "grad_norm": 3.6652675405330037, "learning_rate": 7.707151234068741e-06, "loss": 0.735, "step": 4140 }, { "epoch": 0.34, "grad_norm": 3.988081527589675, "learning_rate": 7.706038177617618e-06, "loss": 0.7667, "step": 4141 }, { "epoch": 0.34, "grad_norm": 3.477405865454765, "learning_rate": 7.704924931484997e-06, "loss": 0.613, "step": 4142 }, { "epoch": 0.34, "grad_norm": 4.063093742168863, "learning_rate": 7.703811495748908e-06, "loss": 1.087, "step": 4143 }, { "epoch": 0.34, "grad_norm": 2.503004822250554, "learning_rate": 7.702697870487401e-06, "loss": 0.4957, "step": 4144 }, { "epoch": 0.34, "grad_norm": 5.030254929001747, "learning_rate": 7.701584055778536e-06, "loss": 1.126, "step": 4145 }, { "epoch": 0.34, "grad_norm": 4.257522661426769, "learning_rate": 7.700470051700385e-06, "loss": 0.8968, "step": 4146 }, { "epoch": 0.34, "grad_norm": 3.9125424198868415, "learning_rate": 7.699355858331038e-06, "loss": 0.5521, "step": 4147 }, { "epoch": 0.34, "grad_norm": 3.844248185026583, "learning_rate": 7.698241475748592e-06, "loss": 0.6196, "step": 4148 }, { "epoch": 0.34, "grad_norm": 3.9157845125845605, "learning_rate": 7.69712690403116e-06, "loss": 0.7622, "step": 4149 }, { "epoch": 0.34, "grad_norm": 1.053419733377103, "learning_rate": 7.696012143256873e-06, "loss": 0.1924, "step": 4150 }, { "epoch": 0.34, "grad_norm": 3.6960704618009794, "learning_rate": 7.694897193503865e-06, "loss": 1.0025, "step": 4151 }, { "epoch": 0.34, "grad_norm": 3.4272805195111458, "learning_rate": 7.693782054850293e-06, "loss": 0.8893, "step": 4152 }, { "epoch": 0.34, "grad_norm": 2.636670957115418, "learning_rate": 7.692666727374321e-06, "loss": 0.7311, "step": 4153 }, { "epoch": 0.34, "grad_norm": 3.4288545667039547, "learning_rate": 7.691551211154132e-06, "loss": 0.7569, "step": 4154 }, { "epoch": 0.34, "grad_norm": 3.996161453088842, "learning_rate": 7.690435506267916e-06, "loss": 1.0075, "step": 4155 }, { "epoch": 0.34, "grad_norm": 4.473680046059575, "learning_rate": 7.689319612793878e-06, "loss": 0.964, "step": 4156 }, { "epoch": 0.34, "grad_norm": 5.043930889076822, "learning_rate": 7.68820353081024e-06, "loss": 1.343, "step": 4157 }, { "epoch": 0.34, "grad_norm": 3.8010199690411755, "learning_rate": 7.687087260395237e-06, "loss": 0.962, "step": 4158 }, { "epoch": 0.34, "grad_norm": 4.846913545191014, "learning_rate": 7.685970801627108e-06, "loss": 0.9527, "step": 4159 }, { "epoch": 0.34, "grad_norm": 2.7844959560021967, "learning_rate": 7.684854154584117e-06, "loss": 0.5735, "step": 4160 }, { "epoch": 0.34, "grad_norm": 4.320667753043801, "learning_rate": 7.68373731934453e-06, "loss": 1.2572, "step": 4161 }, { "epoch": 0.34, "grad_norm": 3.6840322336012488, "learning_rate": 7.682620295986642e-06, "loss": 0.7284, "step": 4162 }, { "epoch": 0.34, "grad_norm": 2.9959981512529237, "learning_rate": 7.681503084588743e-06, "loss": 0.673, "step": 4163 }, { "epoch": 0.34, "grad_norm": 2.1346317197548474, "learning_rate": 7.680385685229148e-06, "loss": 0.389, "step": 4164 }, { "epoch": 0.34, "grad_norm": 5.405414090668094, "learning_rate": 7.679268097986183e-06, "loss": 1.4987, "step": 4165 }, { "epoch": 0.34, "grad_norm": 3.157420352861571, "learning_rate": 7.678150322938183e-06, "loss": 0.5769, "step": 4166 }, { "epoch": 0.34, "grad_norm": 3.3666285293683225, "learning_rate": 7.6770323601635e-06, "loss": 0.6687, "step": 4167 }, { "epoch": 0.34, "grad_norm": 4.277406369736445, "learning_rate": 7.675914209740503e-06, "loss": 0.7282, "step": 4168 }, { "epoch": 0.34, "grad_norm": 1.9396861983972804, "learning_rate": 7.67479587174756e-06, "loss": 0.2985, "step": 4169 }, { "epoch": 0.34, "grad_norm": 4.241210008332523, "learning_rate": 7.67367734626307e-06, "loss": 1.2679, "step": 4170 }, { "epoch": 0.34, "grad_norm": 3.780035706850577, "learning_rate": 7.672558633365434e-06, "loss": 0.8957, "step": 4171 }, { "epoch": 0.34, "grad_norm": 4.875053635380888, "learning_rate": 7.67143973313307e-06, "loss": 1.4268, "step": 4172 }, { "epoch": 0.34, "grad_norm": 2.7887924259311805, "learning_rate": 7.670320645644404e-06, "loss": 0.6755, "step": 4173 }, { "epoch": 0.34, "grad_norm": 4.04293308169156, "learning_rate": 7.669201370977885e-06, "loss": 0.9438, "step": 4174 }, { "epoch": 0.34, "grad_norm": 4.2374797347176925, "learning_rate": 7.668081909211964e-06, "loss": 0.8673, "step": 4175 }, { "epoch": 0.34, "grad_norm": 3.8677793397185134, "learning_rate": 7.666962260425113e-06, "loss": 0.903, "step": 4176 }, { "epoch": 0.34, "grad_norm": 4.348804643728993, "learning_rate": 7.665842424695815e-06, "loss": 0.9382, "step": 4177 }, { "epoch": 0.34, "grad_norm": 2.9757059499309753, "learning_rate": 7.664722402102564e-06, "loss": 0.5758, "step": 4178 }, { "epoch": 0.34, "grad_norm": 2.5385242534408654, "learning_rate": 7.663602192723871e-06, "loss": 0.6308, "step": 4179 }, { "epoch": 0.34, "grad_norm": 4.389637679225512, "learning_rate": 7.662481796638254e-06, "loss": 1.2565, "step": 4180 }, { "epoch": 0.34, "grad_norm": 4.035115559574408, "learning_rate": 7.661361213924252e-06, "loss": 0.8521, "step": 4181 }, { "epoch": 0.34, "grad_norm": 3.824584879682278, "learning_rate": 7.660240444660411e-06, "loss": 0.8049, "step": 4182 }, { "epoch": 0.34, "grad_norm": 4.586953981275565, "learning_rate": 7.659119488925292e-06, "loss": 1.082, "step": 4183 }, { "epoch": 0.34, "grad_norm": 2.9302524382227393, "learning_rate": 7.65799834679747e-06, "loss": 0.7177, "step": 4184 }, { "epoch": 0.34, "grad_norm": 4.019542010284757, "learning_rate": 7.656877018355533e-06, "loss": 0.9594, "step": 4185 }, { "epoch": 0.34, "grad_norm": 2.68219286689825, "learning_rate": 7.655755503678075e-06, "loss": 0.4558, "step": 4186 }, { "epoch": 0.34, "grad_norm": 4.0411454882488655, "learning_rate": 7.654633802843718e-06, "loss": 0.9238, "step": 4187 }, { "epoch": 0.34, "grad_norm": 3.1291071195040256, "learning_rate": 7.653511915931082e-06, "loss": 0.485, "step": 4188 }, { "epoch": 0.34, "grad_norm": 3.2706829407485616, "learning_rate": 7.65238984301881e-06, "loss": 0.9747, "step": 4189 }, { "epoch": 0.34, "grad_norm": 4.027229745479082, "learning_rate": 7.651267584185554e-06, "loss": 1.2054, "step": 4190 }, { "epoch": 0.34, "grad_norm": 3.778073601555418, "learning_rate": 7.650145139509978e-06, "loss": 0.7593, "step": 4191 }, { "epoch": 0.34, "grad_norm": 4.897854125935847, "learning_rate": 7.649022509070761e-06, "loss": 0.6984, "step": 4192 }, { "epoch": 0.34, "grad_norm": 2.233075336679899, "learning_rate": 7.647899692946594e-06, "loss": 0.4863, "step": 4193 }, { "epoch": 0.34, "grad_norm": 3.915603508098223, "learning_rate": 7.646776691216185e-06, "loss": 0.681, "step": 4194 }, { "epoch": 0.34, "grad_norm": 4.258355781356614, "learning_rate": 7.645653503958246e-06, "loss": 0.7473, "step": 4195 }, { "epoch": 0.34, "grad_norm": 1.7927823875566085, "learning_rate": 7.64453013125151e-06, "loss": 0.2872, "step": 4196 }, { "epoch": 0.34, "grad_norm": 3.508724132851226, "learning_rate": 7.643406573174724e-06, "loss": 0.7924, "step": 4197 }, { "epoch": 0.34, "grad_norm": 5.050262457545431, "learning_rate": 7.642282829806639e-06, "loss": 1.0736, "step": 4198 }, { "epoch": 0.34, "grad_norm": 4.029013736020635, "learning_rate": 7.641158901226026e-06, "loss": 0.8475, "step": 4199 }, { "epoch": 0.34, "grad_norm": 3.696466610682216, "learning_rate": 7.64003478751167e-06, "loss": 0.6701, "step": 4200 }, { "epoch": 0.34, "grad_norm": 3.143479928983292, "learning_rate": 7.638910488742364e-06, "loss": 0.729, "step": 4201 }, { "epoch": 0.34, "grad_norm": 4.727763069861252, "learning_rate": 7.637786004996918e-06, "loss": 1.209, "step": 4202 }, { "epoch": 0.34, "grad_norm": 4.851506905628155, "learning_rate": 7.636661336354152e-06, "loss": 1.0456, "step": 4203 }, { "epoch": 0.34, "grad_norm": 2.9358162237417544, "learning_rate": 7.635536482892902e-06, "loss": 0.6617, "step": 4204 }, { "epoch": 0.34, "grad_norm": 5.672038616038138, "learning_rate": 7.634411444692014e-06, "loss": 1.3754, "step": 4205 }, { "epoch": 0.34, "grad_norm": 2.3060864557234773, "learning_rate": 7.63328622183035e-06, "loss": 0.5077, "step": 4206 }, { "epoch": 0.34, "grad_norm": 5.1549988023402165, "learning_rate": 7.63216081438678e-06, "loss": 1.3093, "step": 4207 }, { "epoch": 0.34, "grad_norm": 2.4609699622097643, "learning_rate": 7.631035222440192e-06, "loss": 0.3794, "step": 4208 }, { "epoch": 0.34, "grad_norm": 3.602474189020254, "learning_rate": 7.629909446069487e-06, "loss": 0.7048, "step": 4209 }, { "epoch": 0.34, "grad_norm": 3.785531242848229, "learning_rate": 7.628783485353573e-06, "loss": 0.8278, "step": 4210 }, { "epoch": 0.34, "grad_norm": 3.5722162724562665, "learning_rate": 7.6276573403713796e-06, "loss": 0.8092, "step": 4211 }, { "epoch": 0.34, "grad_norm": 3.458727519165083, "learning_rate": 7.626531011201841e-06, "loss": 0.9704, "step": 4212 }, { "epoch": 0.34, "grad_norm": 3.8927187042148437, "learning_rate": 7.625404497923909e-06, "loss": 1.0258, "step": 4213 }, { "epoch": 0.34, "grad_norm": 3.848434988336024, "learning_rate": 7.6242778006165484e-06, "loss": 0.7578, "step": 4214 }, { "epoch": 0.34, "grad_norm": 1.755430091464229, "learning_rate": 7.623150919358734e-06, "loss": 0.4261, "step": 4215 }, { "epoch": 0.34, "grad_norm": 4.131447734058058, "learning_rate": 7.6220238542294565e-06, "loss": 1.1711, "step": 4216 }, { "epoch": 0.34, "grad_norm": 2.4337178438962437, "learning_rate": 7.620896605307717e-06, "loss": 0.6572, "step": 4217 }, { "epoch": 0.34, "grad_norm": 4.2111164733993345, "learning_rate": 7.619769172672533e-06, "loss": 0.9089, "step": 4218 }, { "epoch": 0.34, "grad_norm": 5.3934535906184164, "learning_rate": 7.61864155640293e-06, "loss": 1.6321, "step": 4219 }, { "epoch": 0.34, "grad_norm": 4.380431114335264, "learning_rate": 7.6175137565779524e-06, "loss": 0.9619, "step": 4220 }, { "epoch": 0.35, "grad_norm": 3.5949226198374613, "learning_rate": 7.616385773276651e-06, "loss": 0.707, "step": 4221 }, { "epoch": 0.35, "grad_norm": 4.375986072874407, "learning_rate": 7.615257606578093e-06, "loss": 0.9955, "step": 4222 }, { "epoch": 0.35, "grad_norm": 3.0945232970753622, "learning_rate": 7.6141292565613574e-06, "loss": 0.6822, "step": 4223 }, { "epoch": 0.35, "grad_norm": 3.121369107003071, "learning_rate": 7.613000723305539e-06, "loss": 0.7806, "step": 4224 }, { "epoch": 0.35, "grad_norm": 3.842882239042579, "learning_rate": 7.611872006889741e-06, "loss": 1.1877, "step": 4225 }, { "epoch": 0.35, "grad_norm": 3.235155562415778, "learning_rate": 7.610743107393083e-06, "loss": 0.68, "step": 4226 }, { "epoch": 0.35, "grad_norm": 5.197630275967312, "learning_rate": 7.609614024894694e-06, "loss": 0.9361, "step": 4227 }, { "epoch": 0.35, "grad_norm": 1.8510247652966472, "learning_rate": 7.60848475947372e-06, "loss": 0.3807, "step": 4228 }, { "epoch": 0.35, "grad_norm": 1.3552331105985607, "learning_rate": 7.607355311209317e-06, "loss": 0.1851, "step": 4229 }, { "epoch": 0.35, "grad_norm": 2.2471461785944435, "learning_rate": 7.606225680180652e-06, "loss": 0.3754, "step": 4230 }, { "epoch": 0.35, "grad_norm": 3.5961883792714393, "learning_rate": 7.605095866466912e-06, "loss": 0.6708, "step": 4231 }, { "epoch": 0.35, "grad_norm": 3.8820743215669724, "learning_rate": 7.603965870147285e-06, "loss": 0.8477, "step": 4232 }, { "epoch": 0.35, "grad_norm": 2.420318348105564, "learning_rate": 7.602835691300986e-06, "loss": 0.2926, "step": 4233 }, { "epoch": 0.35, "grad_norm": 3.7725992633943646, "learning_rate": 7.601705330007232e-06, "loss": 0.8095, "step": 4234 }, { "epoch": 0.35, "grad_norm": 2.561808281233337, "learning_rate": 7.600574786345257e-06, "loss": 0.5723, "step": 4235 }, { "epoch": 0.35, "grad_norm": 5.3420721342527875, "learning_rate": 7.599444060394308e-06, "loss": 1.0933, "step": 4236 }, { "epoch": 0.35, "grad_norm": 2.303114674088338, "learning_rate": 7.598313152233643e-06, "loss": 0.5405, "step": 4237 }, { "epoch": 0.35, "grad_norm": 5.639902615736915, "learning_rate": 7.597182061942533e-06, "loss": 1.2133, "step": 4238 }, { "epoch": 0.35, "grad_norm": 4.119971811972484, "learning_rate": 7.5960507896002636e-06, "loss": 1.0088, "step": 4239 }, { "epoch": 0.35, "grad_norm": 2.9814954955260253, "learning_rate": 7.594919335286133e-06, "loss": 0.8402, "step": 4240 }, { "epoch": 0.35, "grad_norm": 4.235203233182246, "learning_rate": 7.593787699079449e-06, "loss": 0.7345, "step": 4241 }, { "epoch": 0.35, "grad_norm": 2.8363860198831965, "learning_rate": 7.592655881059536e-06, "loss": 0.6156, "step": 4242 }, { "epoch": 0.35, "grad_norm": 4.137444392466568, "learning_rate": 7.591523881305728e-06, "loss": 1.2916, "step": 4243 }, { "epoch": 0.35, "grad_norm": 3.376223279509348, "learning_rate": 7.5903916998973745e-06, "loss": 0.7987, "step": 4244 }, { "epoch": 0.35, "grad_norm": 2.541874168567518, "learning_rate": 7.589259336913839e-06, "loss": 0.552, "step": 4245 }, { "epoch": 0.35, "grad_norm": 3.0811400669956592, "learning_rate": 7.588126792434489e-06, "loss": 0.777, "step": 4246 }, { "epoch": 0.35, "grad_norm": 3.6244134270135215, "learning_rate": 7.586994066538715e-06, "loss": 1.0474, "step": 4247 }, { "epoch": 0.35, "grad_norm": 3.294865922920346, "learning_rate": 7.585861159305917e-06, "loss": 0.6377, "step": 4248 }, { "epoch": 0.35, "grad_norm": 5.027424272181276, "learning_rate": 7.584728070815504e-06, "loss": 1.2547, "step": 4249 }, { "epoch": 0.35, "grad_norm": 4.073784678794884, "learning_rate": 7.583594801146903e-06, "loss": 0.9806, "step": 4250 }, { "epoch": 0.35, "grad_norm": 3.7825420370232674, "learning_rate": 7.582461350379551e-06, "loss": 0.8304, "step": 4251 }, { "epoch": 0.35, "grad_norm": 3.0319950587374103, "learning_rate": 7.581327718592896e-06, "loss": 0.778, "step": 4252 }, { "epoch": 0.35, "grad_norm": 2.0521053776449354, "learning_rate": 7.580193905866402e-06, "loss": 0.415, "step": 4253 }, { "epoch": 0.35, "grad_norm": 2.885777423711224, "learning_rate": 7.579059912279545e-06, "loss": 0.78, "step": 4254 }, { "epoch": 0.35, "grad_norm": 4.715809617328955, "learning_rate": 7.577925737911811e-06, "loss": 1.4035, "step": 4255 }, { "epoch": 0.35, "grad_norm": 2.577394588141058, "learning_rate": 7.576791382842702e-06, "loss": 0.5588, "step": 4256 }, { "epoch": 0.35, "grad_norm": 3.162114084015856, "learning_rate": 7.575656847151732e-06, "loss": 0.7035, "step": 4257 }, { "epoch": 0.35, "grad_norm": 3.5025398505743817, "learning_rate": 7.574522130918426e-06, "loss": 0.9669, "step": 4258 }, { "epoch": 0.35, "grad_norm": 4.689264485842858, "learning_rate": 7.5733872342223235e-06, "loss": 0.985, "step": 4259 }, { "epoch": 0.35, "grad_norm": 3.1512518368625324, "learning_rate": 7.572252157142976e-06, "loss": 0.5427, "step": 4260 }, { "epoch": 0.35, "grad_norm": 3.136962873078472, "learning_rate": 7.571116899759945e-06, "loss": 0.5495, "step": 4261 }, { "epoch": 0.35, "grad_norm": 3.657164795694336, "learning_rate": 7.56998146215281e-06, "loss": 0.7432, "step": 4262 }, { "epoch": 0.35, "grad_norm": 3.5593972705527133, "learning_rate": 7.568845844401158e-06, "loss": 0.9293, "step": 4263 }, { "epoch": 0.35, "grad_norm": 4.104600859719557, "learning_rate": 7.567710046584593e-06, "loss": 0.937, "step": 4264 }, { "epoch": 0.35, "grad_norm": 3.3107309830162897, "learning_rate": 7.566574068782728e-06, "loss": 0.8032, "step": 4265 }, { "epoch": 0.35, "grad_norm": 3.597493941109041, "learning_rate": 7.565437911075191e-06, "loss": 1.1043, "step": 4266 }, { "epoch": 0.35, "grad_norm": 2.8071170485716794, "learning_rate": 7.564301573541621e-06, "loss": 0.8017, "step": 4267 }, { "epoch": 0.35, "grad_norm": 1.9680658468583032, "learning_rate": 7.563165056261671e-06, "loss": 0.4377, "step": 4268 }, { "epoch": 0.35, "grad_norm": 4.4083963853423915, "learning_rate": 7.562028359315005e-06, "loss": 0.9813, "step": 4269 }, { "epoch": 0.35, "grad_norm": 2.065016615189242, "learning_rate": 7.560891482781301e-06, "loss": 0.3513, "step": 4270 }, { "epoch": 0.35, "grad_norm": 4.22180599717921, "learning_rate": 7.559754426740249e-06, "loss": 0.9598, "step": 4271 }, { "epoch": 0.35, "grad_norm": 5.177975680038039, "learning_rate": 7.558617191271551e-06, "loss": 1.0541, "step": 4272 }, { "epoch": 0.35, "grad_norm": 4.206282755330498, "learning_rate": 7.557479776454923e-06, "loss": 0.9705, "step": 4273 }, { "epoch": 0.35, "grad_norm": 3.3772672282559864, "learning_rate": 7.556342182370095e-06, "loss": 0.7269, "step": 4274 }, { "epoch": 0.35, "grad_norm": 4.815793144389638, "learning_rate": 7.5552044090968035e-06, "loss": 1.1305, "step": 4275 }, { "epoch": 0.35, "grad_norm": 4.397789333338332, "learning_rate": 7.554066456714804e-06, "loss": 0.7765, "step": 4276 }, { "epoch": 0.35, "grad_norm": 2.169891416483646, "learning_rate": 7.552928325303861e-06, "loss": 0.3282, "step": 4277 }, { "epoch": 0.35, "grad_norm": 2.905886108680131, "learning_rate": 7.551790014943752e-06, "loss": 0.6804, "step": 4278 }, { "epoch": 0.35, "grad_norm": 3.8025974684265913, "learning_rate": 7.55065152571427e-06, "loss": 0.7483, "step": 4279 }, { "epoch": 0.35, "grad_norm": 3.7342712449571205, "learning_rate": 7.549512857695216e-06, "loss": 0.7249, "step": 4280 }, { "epoch": 0.35, "grad_norm": 3.2459565813784925, "learning_rate": 7.5483740109664036e-06, "loss": 0.5911, "step": 4281 }, { "epoch": 0.35, "grad_norm": 2.5258338374757656, "learning_rate": 7.547234985607668e-06, "loss": 0.5009, "step": 4282 }, { "epoch": 0.35, "grad_norm": 5.486670744801288, "learning_rate": 7.546095781698842e-06, "loss": 1.0581, "step": 4283 }, { "epoch": 0.35, "grad_norm": 4.9298991349988945, "learning_rate": 7.544956399319785e-06, "loss": 1.068, "step": 4284 }, { "epoch": 0.35, "grad_norm": 4.172582064142233, "learning_rate": 7.543816838550359e-06, "loss": 1.1742, "step": 4285 }, { "epoch": 0.35, "grad_norm": 4.201667663325446, "learning_rate": 7.542677099470441e-06, "loss": 0.8472, "step": 4286 }, { "epoch": 0.35, "grad_norm": 3.1995176189199728, "learning_rate": 7.541537182159926e-06, "loss": 0.7892, "step": 4287 }, { "epoch": 0.35, "grad_norm": 2.328526607337528, "learning_rate": 7.540397086698716e-06, "loss": 0.7107, "step": 4288 }, { "epoch": 0.35, "grad_norm": 4.353118807139896, "learning_rate": 7.5392568131667265e-06, "loss": 0.8643, "step": 4289 }, { "epoch": 0.35, "grad_norm": 3.1174315933767485, "learning_rate": 7.538116361643883e-06, "loss": 0.5618, "step": 4290 }, { "epoch": 0.35, "grad_norm": 7.299874282257006, "learning_rate": 7.53697573221013e-06, "loss": 1.828, "step": 4291 }, { "epoch": 0.35, "grad_norm": 2.045071910752177, "learning_rate": 7.53583492494542e-06, "loss": 0.3556, "step": 4292 }, { "epoch": 0.35, "grad_norm": 4.386906087901318, "learning_rate": 7.534693939929716e-06, "loss": 0.9769, "step": 4293 }, { "epoch": 0.35, "grad_norm": 4.1939267691785265, "learning_rate": 7.533552777243e-06, "loss": 1.0888, "step": 4294 }, { "epoch": 0.35, "grad_norm": 2.7922405769680005, "learning_rate": 7.532411436965258e-06, "loss": 0.7094, "step": 4295 }, { "epoch": 0.35, "grad_norm": 3.0949378799917553, "learning_rate": 7.531269919176496e-06, "loss": 0.7065, "step": 4296 }, { "epoch": 0.35, "grad_norm": 3.3404622426864967, "learning_rate": 7.530128223956729e-06, "loss": 0.5209, "step": 4297 }, { "epoch": 0.35, "grad_norm": 3.6828821634625255, "learning_rate": 7.528986351385985e-06, "loss": 1.0418, "step": 4298 }, { "epoch": 0.35, "grad_norm": 2.3181216173240107, "learning_rate": 7.527844301544304e-06, "loss": 0.5293, "step": 4299 }, { "epoch": 0.35, "grad_norm": 4.96720934548505, "learning_rate": 7.526702074511738e-06, "loss": 1.2176, "step": 4300 }, { "epoch": 0.35, "grad_norm": 3.8152740806384444, "learning_rate": 7.5255596703683535e-06, "loss": 0.6621, "step": 4301 }, { "epoch": 0.35, "grad_norm": 3.3656367937880436, "learning_rate": 7.524417089194227e-06, "loss": 0.77, "step": 4302 }, { "epoch": 0.35, "grad_norm": 1.0161219249370117, "learning_rate": 7.523274331069449e-06, "loss": 0.1678, "step": 4303 }, { "epoch": 0.35, "grad_norm": 4.962457911789098, "learning_rate": 7.522131396074122e-06, "loss": 1.2677, "step": 4304 }, { "epoch": 0.35, "grad_norm": 3.7386987611806384, "learning_rate": 7.52098828428836e-06, "loss": 0.7596, "step": 4305 }, { "epoch": 0.35, "grad_norm": 3.930736277089549, "learning_rate": 7.5198449957922895e-06, "loss": 0.955, "step": 4306 }, { "epoch": 0.35, "grad_norm": 4.174764922991015, "learning_rate": 7.5187015306660524e-06, "loss": 0.7774, "step": 4307 }, { "epoch": 0.35, "grad_norm": 3.139106980850027, "learning_rate": 7.5175578889898016e-06, "loss": 0.7638, "step": 4308 }, { "epoch": 0.35, "grad_norm": 4.277844030429609, "learning_rate": 7.516414070843696e-06, "loss": 0.9127, "step": 4309 }, { "epoch": 0.35, "grad_norm": 3.377312607254998, "learning_rate": 7.515270076307917e-06, "loss": 0.88, "step": 4310 }, { "epoch": 0.35, "grad_norm": 4.753351823698591, "learning_rate": 7.514125905462651e-06, "loss": 1.3045, "step": 4311 }, { "epoch": 0.35, "grad_norm": 4.380965467013008, "learning_rate": 7.512981558388101e-06, "loss": 0.7882, "step": 4312 }, { "epoch": 0.35, "grad_norm": 4.472155010468368, "learning_rate": 7.51183703516448e-06, "loss": 0.7921, "step": 4313 }, { "epoch": 0.35, "grad_norm": 4.192913037162404, "learning_rate": 7.510692335872016e-06, "loss": 1.0474, "step": 4314 }, { "epoch": 0.35, "grad_norm": 3.2134565473107704, "learning_rate": 7.509547460590945e-06, "loss": 0.5618, "step": 4315 }, { "epoch": 0.35, "grad_norm": 4.918952899825416, "learning_rate": 7.508402409401519e-06, "loss": 1.2301, "step": 4316 }, { "epoch": 0.35, "grad_norm": 4.286764533220852, "learning_rate": 7.507257182384e-06, "loss": 0.6964, "step": 4317 }, { "epoch": 0.35, "grad_norm": 3.4183487003326625, "learning_rate": 7.506111779618663e-06, "loss": 0.8308, "step": 4318 }, { "epoch": 0.35, "grad_norm": 4.622961051435708, "learning_rate": 7.504966201185798e-06, "loss": 0.9083, "step": 4319 }, { "epoch": 0.35, "grad_norm": 3.51977662181751, "learning_rate": 7.503820447165705e-06, "loss": 0.6034, "step": 4320 }, { "epoch": 0.35, "grad_norm": 4.032466768649381, "learning_rate": 7.502674517638694e-06, "loss": 0.7514, "step": 4321 }, { "epoch": 0.35, "grad_norm": 3.2389907475732223, "learning_rate": 7.5015284126850915e-06, "loss": 0.6963, "step": 4322 }, { "epoch": 0.35, "grad_norm": 4.987600811240381, "learning_rate": 7.500382132385234e-06, "loss": 1.0207, "step": 4323 }, { "epoch": 0.35, "grad_norm": 4.066603624193605, "learning_rate": 7.499235676819471e-06, "loss": 0.8864, "step": 4324 }, { "epoch": 0.35, "grad_norm": 3.769839249998782, "learning_rate": 7.498089046068163e-06, "loss": 0.5207, "step": 4325 }, { "epoch": 0.35, "grad_norm": 3.4436628569051204, "learning_rate": 7.496942240211685e-06, "loss": 1.22, "step": 4326 }, { "epoch": 0.35, "grad_norm": 2.0388291037490998, "learning_rate": 7.495795259330422e-06, "loss": 0.6388, "step": 4327 }, { "epoch": 0.35, "grad_norm": 3.42893247686087, "learning_rate": 7.494648103504774e-06, "loss": 0.8951, "step": 4328 }, { "epoch": 0.35, "grad_norm": 2.7612822548289877, "learning_rate": 7.49350077281515e-06, "loss": 0.5806, "step": 4329 }, { "epoch": 0.35, "grad_norm": 4.706151510063128, "learning_rate": 7.492353267341974e-06, "loss": 0.8698, "step": 4330 }, { "epoch": 0.35, "grad_norm": 4.6983960971622984, "learning_rate": 7.491205587165682e-06, "loss": 1.1328, "step": 4331 }, { "epoch": 0.35, "grad_norm": 3.319249197795334, "learning_rate": 7.490057732366718e-06, "loss": 0.9426, "step": 4332 }, { "epoch": 0.35, "grad_norm": 3.5025170873982514, "learning_rate": 7.4889097030255445e-06, "loss": 0.5951, "step": 4333 }, { "epoch": 0.35, "grad_norm": 3.4219007121333407, "learning_rate": 7.487761499222632e-06, "loss": 0.339, "step": 4334 }, { "epoch": 0.35, "grad_norm": 4.367668946465433, "learning_rate": 7.486613121038466e-06, "loss": 1.0931, "step": 4335 }, { "epoch": 0.35, "grad_norm": 2.9355104713653724, "learning_rate": 7.485464568553541e-06, "loss": 0.5703, "step": 4336 }, { "epoch": 0.35, "grad_norm": 4.17807422897983, "learning_rate": 7.484315841848368e-06, "loss": 0.9853, "step": 4337 }, { "epoch": 0.35, "grad_norm": 5.043315819974853, "learning_rate": 7.483166941003466e-06, "loss": 1.1789, "step": 4338 }, { "epoch": 0.35, "grad_norm": 3.530950037640052, "learning_rate": 7.482017866099367e-06, "loss": 0.8051, "step": 4339 }, { "epoch": 0.35, "grad_norm": 2.8380019274677557, "learning_rate": 7.480868617216619e-06, "loss": 0.3348, "step": 4340 }, { "epoch": 0.35, "grad_norm": 3.6038676133727834, "learning_rate": 7.479719194435776e-06, "loss": 0.8044, "step": 4341 }, { "epoch": 0.35, "grad_norm": 4.0925376126716975, "learning_rate": 7.478569597837411e-06, "loss": 0.9227, "step": 4342 }, { "epoch": 0.35, "grad_norm": 3.696213275373554, "learning_rate": 7.4774198275021014e-06, "loss": 0.8679, "step": 4343 }, { "epoch": 0.36, "grad_norm": 3.7817964848829764, "learning_rate": 7.476269883510445e-06, "loss": 0.935, "step": 4344 }, { "epoch": 0.36, "grad_norm": 3.406957888921831, "learning_rate": 7.475119765943049e-06, "loss": 0.9647, "step": 4345 }, { "epoch": 0.36, "grad_norm": 4.5175991949471355, "learning_rate": 7.473969474880527e-06, "loss": 1.0671, "step": 4346 }, { "epoch": 0.36, "grad_norm": 4.442572440305692, "learning_rate": 7.47281901040351e-06, "loss": 1.2769, "step": 4347 }, { "epoch": 0.36, "grad_norm": 3.039003798061688, "learning_rate": 7.471668372592644e-06, "loss": 0.7816, "step": 4348 }, { "epoch": 0.36, "grad_norm": 3.356063338687145, "learning_rate": 7.470517561528582e-06, "loss": 0.8141, "step": 4349 }, { "epoch": 0.36, "grad_norm": 3.627272893200747, "learning_rate": 7.46936657729199e-06, "loss": 0.8789, "step": 4350 }, { "epoch": 0.36, "grad_norm": 3.165112986350278, "learning_rate": 7.4682154199635475e-06, "loss": 0.9576, "step": 4351 }, { "epoch": 0.36, "grad_norm": 2.786959812117681, "learning_rate": 7.467064089623945e-06, "loss": 0.676, "step": 4352 }, { "epoch": 0.36, "grad_norm": 4.344544708423713, "learning_rate": 7.465912586353888e-06, "loss": 1.0466, "step": 4353 }, { "epoch": 0.36, "grad_norm": 5.141295381304714, "learning_rate": 7.464760910234091e-06, "loss": 1.1653, "step": 4354 }, { "epoch": 0.36, "grad_norm": 5.106223819266664, "learning_rate": 7.463609061345279e-06, "loss": 1.2457, "step": 4355 }, { "epoch": 0.36, "grad_norm": 5.190753808346917, "learning_rate": 7.462457039768194e-06, "loss": 1.1127, "step": 4356 }, { "epoch": 0.36, "grad_norm": 2.536070663602797, "learning_rate": 7.461304845583588e-06, "loss": 0.5238, "step": 4357 }, { "epoch": 0.36, "grad_norm": 3.067446549558834, "learning_rate": 7.460152478872224e-06, "loss": 0.7506, "step": 4358 }, { "epoch": 0.36, "grad_norm": 4.767927207227846, "learning_rate": 7.458999939714876e-06, "loss": 0.849, "step": 4359 }, { "epoch": 0.36, "grad_norm": 3.1358705863995686, "learning_rate": 7.4578472281923356e-06, "loss": 0.3897, "step": 4360 }, { "epoch": 0.36, "grad_norm": 4.149045683865217, "learning_rate": 7.456694344385401e-06, "loss": 1.053, "step": 4361 }, { "epoch": 0.36, "grad_norm": 3.297878415611068, "learning_rate": 7.455541288374885e-06, "loss": 0.5006, "step": 4362 }, { "epoch": 0.36, "grad_norm": 4.493178783840786, "learning_rate": 7.45438806024161e-06, "loss": 0.8101, "step": 4363 }, { "epoch": 0.36, "grad_norm": 3.92095481738485, "learning_rate": 7.453234660066413e-06, "loss": 0.9906, "step": 4364 }, { "epoch": 0.36, "grad_norm": 2.954136088552313, "learning_rate": 7.452081087930143e-06, "loss": 0.81, "step": 4365 }, { "epoch": 0.36, "grad_norm": 2.68244091356062, "learning_rate": 7.450927343913661e-06, "loss": 0.541, "step": 4366 }, { "epoch": 0.36, "grad_norm": 4.627231427015078, "learning_rate": 7.449773428097838e-06, "loss": 0.9492, "step": 4367 }, { "epoch": 0.36, "grad_norm": 5.8169653497783305, "learning_rate": 7.44861934056356e-06, "loss": 0.6861, "step": 4368 }, { "epoch": 0.36, "grad_norm": 4.8882242598826835, "learning_rate": 7.447465081391722e-06, "loss": 1.1398, "step": 4369 }, { "epoch": 0.36, "grad_norm": 4.067463057219432, "learning_rate": 7.446310650663234e-06, "loss": 0.7872, "step": 4370 }, { "epoch": 0.36, "grad_norm": 2.0416188880316226, "learning_rate": 7.445156048459016e-06, "loss": 0.4258, "step": 4371 }, { "epoch": 0.36, "grad_norm": 3.8461591247025497, "learning_rate": 7.444001274859999e-06, "loss": 1.0479, "step": 4372 }, { "epoch": 0.36, "grad_norm": 2.6675086056877957, "learning_rate": 7.4428463299471285e-06, "loss": 0.4093, "step": 4373 }, { "epoch": 0.36, "grad_norm": 2.8086978839082044, "learning_rate": 7.441691213801363e-06, "loss": 0.7284, "step": 4374 }, { "epoch": 0.36, "grad_norm": 3.849463434709465, "learning_rate": 7.440535926503669e-06, "loss": 0.4524, "step": 4375 }, { "epoch": 0.36, "grad_norm": 3.878760788811572, "learning_rate": 7.439380468135029e-06, "loss": 0.7494, "step": 4376 }, { "epoch": 0.36, "grad_norm": 4.46426717186742, "learning_rate": 7.4382248387764335e-06, "loss": 0.8812, "step": 4377 }, { "epoch": 0.36, "grad_norm": 3.307437375750391, "learning_rate": 7.437069038508888e-06, "loss": 0.6284, "step": 4378 }, { "epoch": 0.36, "grad_norm": 5.498403905595587, "learning_rate": 7.435913067413409e-06, "loss": 1.0276, "step": 4379 }, { "epoch": 0.36, "grad_norm": 2.338473989667431, "learning_rate": 7.4347569255710254e-06, "loss": 0.4613, "step": 4380 }, { "epoch": 0.36, "grad_norm": 4.099509016808251, "learning_rate": 7.433600613062777e-06, "loss": 0.9861, "step": 4381 }, { "epoch": 0.36, "grad_norm": 4.289778871357129, "learning_rate": 7.432444129969717e-06, "loss": 0.823, "step": 4382 }, { "epoch": 0.36, "grad_norm": 4.002809802943864, "learning_rate": 7.431287476372909e-06, "loss": 0.9164, "step": 4383 }, { "epoch": 0.36, "grad_norm": 3.526139381340818, "learning_rate": 7.43013065235343e-06, "loss": 0.7189, "step": 4384 }, { "epoch": 0.36, "grad_norm": 3.7695362431841057, "learning_rate": 7.4289736579923685e-06, "loss": 0.7519, "step": 4385 }, { "epoch": 0.36, "grad_norm": 3.6250804019629017, "learning_rate": 7.427816493370825e-06, "loss": 0.8639, "step": 4386 }, { "epoch": 0.36, "grad_norm": 4.744309483105921, "learning_rate": 7.426659158569911e-06, "loss": 0.9766, "step": 4387 }, { "epoch": 0.36, "grad_norm": 4.399109856726638, "learning_rate": 7.425501653670751e-06, "loss": 1.0083, "step": 4388 }, { "epoch": 0.36, "grad_norm": 4.538105144605974, "learning_rate": 7.4243439787544805e-06, "loss": 1.0412, "step": 4389 }, { "epoch": 0.36, "grad_norm": 4.200788192853599, "learning_rate": 7.423186133902247e-06, "loss": 1.1422, "step": 4390 }, { "epoch": 0.36, "grad_norm": 3.5083238263558525, "learning_rate": 7.422028119195213e-06, "loss": 0.6474, "step": 4391 }, { "epoch": 0.36, "grad_norm": 2.88018038458345, "learning_rate": 7.420869934714548e-06, "loss": 0.6862, "step": 4392 }, { "epoch": 0.36, "grad_norm": 4.0792825826252725, "learning_rate": 7.419711580541436e-06, "loss": 1.0469, "step": 4393 }, { "epoch": 0.36, "grad_norm": 1.1807818355369477, "learning_rate": 7.418553056757072e-06, "loss": 0.1271, "step": 4394 }, { "epoch": 0.36, "grad_norm": 2.50969090143382, "learning_rate": 7.417394363442665e-06, "loss": 0.5851, "step": 4395 }, { "epoch": 0.36, "grad_norm": 2.4359477881273057, "learning_rate": 7.416235500679433e-06, "loss": 0.5199, "step": 4396 }, { "epoch": 0.36, "grad_norm": 3.6176376746670478, "learning_rate": 7.41507646854861e-06, "loss": 0.7529, "step": 4397 }, { "epoch": 0.36, "grad_norm": 2.4386064681048185, "learning_rate": 7.4139172671314344e-06, "loss": 0.3244, "step": 4398 }, { "epoch": 0.36, "grad_norm": 3.6355116762246937, "learning_rate": 7.412757896509164e-06, "loss": 0.9841, "step": 4399 }, { "epoch": 0.36, "grad_norm": 4.632425199290458, "learning_rate": 7.411598356763068e-06, "loss": 1.095, "step": 4400 }, { "epoch": 0.36, "grad_norm": 2.1940908728026596, "learning_rate": 7.41043864797442e-06, "loss": 0.568, "step": 4401 }, { "epoch": 0.36, "grad_norm": 4.46000689781777, "learning_rate": 7.409278770224515e-06, "loss": 1.1823, "step": 4402 }, { "epoch": 0.36, "grad_norm": 3.912980192437, "learning_rate": 7.4081187235946515e-06, "loss": 0.8146, "step": 4403 }, { "epoch": 0.36, "grad_norm": 3.9160335596719977, "learning_rate": 7.406958508166147e-06, "loss": 0.8216, "step": 4404 }, { "epoch": 0.36, "grad_norm": 4.55346932823823, "learning_rate": 7.405798124020326e-06, "loss": 0.9, "step": 4405 }, { "epoch": 0.36, "grad_norm": 2.8839523637298212, "learning_rate": 7.4046375712385256e-06, "loss": 0.6489, "step": 4406 }, { "epoch": 0.36, "grad_norm": 2.26364676153241, "learning_rate": 7.403476849902096e-06, "loss": 0.5996, "step": 4407 }, { "epoch": 0.36, "grad_norm": 5.13212097104461, "learning_rate": 7.402315960092401e-06, "loss": 1.1889, "step": 4408 }, { "epoch": 0.36, "grad_norm": 4.462942185439612, "learning_rate": 7.401154901890812e-06, "loss": 0.8067, "step": 4409 }, { "epoch": 0.36, "grad_norm": 3.815555272242939, "learning_rate": 7.399993675378714e-06, "loss": 0.7739, "step": 4410 }, { "epoch": 0.36, "grad_norm": 5.445708292478904, "learning_rate": 7.398832280637504e-06, "loss": 1.3827, "step": 4411 }, { "epoch": 0.36, "grad_norm": 3.00619628143818, "learning_rate": 7.397670717748591e-06, "loss": 0.4527, "step": 4412 }, { "epoch": 0.36, "grad_norm": 3.454879259467784, "learning_rate": 7.396508986793393e-06, "loss": 0.7629, "step": 4413 }, { "epoch": 0.36, "grad_norm": 3.8921905481022745, "learning_rate": 7.395347087853349e-06, "loss": 0.7617, "step": 4414 }, { "epoch": 0.36, "grad_norm": 3.457760151179242, "learning_rate": 7.394185021009895e-06, "loss": 0.8917, "step": 4415 }, { "epoch": 0.36, "grad_norm": 2.1954274713742685, "learning_rate": 7.393022786344492e-06, "loss": 0.4957, "step": 4416 }, { "epoch": 0.36, "grad_norm": 4.4738524987507375, "learning_rate": 7.391860383938607e-06, "loss": 0.8411, "step": 4417 }, { "epoch": 0.36, "grad_norm": 3.476168980159589, "learning_rate": 7.390697813873718e-06, "loss": 0.6591, "step": 4418 }, { "epoch": 0.36, "grad_norm": 4.836743690795182, "learning_rate": 7.389535076231315e-06, "loss": 1.1051, "step": 4419 }, { "epoch": 0.36, "grad_norm": 3.70651116649257, "learning_rate": 7.3883721710929045e-06, "loss": 1.0457, "step": 4420 }, { "epoch": 0.36, "grad_norm": 1.7965196368932832, "learning_rate": 7.387209098539998e-06, "loss": 0.2635, "step": 4421 }, { "epoch": 0.36, "grad_norm": 4.633654664167891, "learning_rate": 7.386045858654123e-06, "loss": 1.1022, "step": 4422 }, { "epoch": 0.36, "grad_norm": 3.2827126308298826, "learning_rate": 7.384882451516817e-06, "loss": 0.7331, "step": 4423 }, { "epoch": 0.36, "grad_norm": 4.420810694777215, "learning_rate": 7.383718877209631e-06, "loss": 1.0322, "step": 4424 }, { "epoch": 0.36, "grad_norm": 2.4038382834630596, "learning_rate": 7.382555135814126e-06, "loss": 0.4581, "step": 4425 }, { "epoch": 0.36, "grad_norm": 6.085944848338009, "learning_rate": 7.381391227411875e-06, "loss": 1.3082, "step": 4426 }, { "epoch": 0.36, "grad_norm": 3.6484173790981114, "learning_rate": 7.380227152084461e-06, "loss": 0.6663, "step": 4427 }, { "epoch": 0.36, "grad_norm": 2.625304518689477, "learning_rate": 7.379062909913484e-06, "loss": 0.3384, "step": 4428 }, { "epoch": 0.36, "grad_norm": 5.806506871861837, "learning_rate": 7.37789850098055e-06, "loss": 1.1255, "step": 4429 }, { "epoch": 0.36, "grad_norm": 2.0405473109459544, "learning_rate": 7.37673392536728e-06, "loss": 0.5441, "step": 4430 }, { "epoch": 0.36, "grad_norm": 3.217603664239242, "learning_rate": 7.375569183155306e-06, "loss": 0.6702, "step": 4431 }, { "epoch": 0.36, "grad_norm": 3.465887043027505, "learning_rate": 7.37440427442627e-06, "loss": 0.8486, "step": 4432 }, { "epoch": 0.36, "grad_norm": 4.5703859822195945, "learning_rate": 7.373239199261828e-06, "loss": 0.9924, "step": 4433 }, { "epoch": 0.36, "grad_norm": 3.86403338734041, "learning_rate": 7.372073957743646e-06, "loss": 1.0849, "step": 4434 }, { "epoch": 0.36, "grad_norm": 4.8492373869585315, "learning_rate": 7.370908549953404e-06, "loss": 0.9398, "step": 4435 }, { "epoch": 0.36, "grad_norm": 4.111972033867977, "learning_rate": 7.369742975972789e-06, "loss": 0.7484, "step": 4436 }, { "epoch": 0.36, "grad_norm": 3.7084509760487485, "learning_rate": 7.368577235883508e-06, "loss": 0.7273, "step": 4437 }, { "epoch": 0.36, "grad_norm": 4.40040600697199, "learning_rate": 7.367411329767267e-06, "loss": 0.8338, "step": 4438 }, { "epoch": 0.36, "grad_norm": 4.070106174285915, "learning_rate": 7.366245257705798e-06, "loss": 0.5525, "step": 4439 }, { "epoch": 0.36, "grad_norm": 2.343881665537808, "learning_rate": 7.365079019780832e-06, "loss": 0.5944, "step": 4440 }, { "epoch": 0.36, "grad_norm": 4.12321303109076, "learning_rate": 7.36391261607412e-06, "loss": 1.0556, "step": 4441 }, { "epoch": 0.36, "grad_norm": 3.297429956371091, "learning_rate": 7.3627460466674215e-06, "loss": 0.8856, "step": 4442 }, { "epoch": 0.36, "grad_norm": 4.004084573771542, "learning_rate": 7.361579311642508e-06, "loss": 0.8428, "step": 4443 }, { "epoch": 0.36, "grad_norm": 4.137291119020999, "learning_rate": 7.360412411081163e-06, "loss": 0.8561, "step": 4444 }, { "epoch": 0.36, "grad_norm": 4.425871984712271, "learning_rate": 7.35924534506518e-06, "loss": 1.0632, "step": 4445 }, { "epoch": 0.36, "grad_norm": 4.263514033800204, "learning_rate": 7.3580781136763656e-06, "loss": 0.9843, "step": 4446 }, { "epoch": 0.36, "grad_norm": 1.3638618591766134, "learning_rate": 7.356910716996538e-06, "loss": 0.2298, "step": 4447 }, { "epoch": 0.36, "grad_norm": 2.188339227415263, "learning_rate": 7.355743155107526e-06, "loss": 0.3663, "step": 4448 }, { "epoch": 0.36, "grad_norm": 3.7041795126120665, "learning_rate": 7.354575428091172e-06, "loss": 0.7665, "step": 4449 }, { "epoch": 0.36, "grad_norm": 4.401883725066224, "learning_rate": 7.353407536029327e-06, "loss": 0.7469, "step": 4450 }, { "epoch": 0.36, "grad_norm": 3.841773200375602, "learning_rate": 7.352239479003857e-06, "loss": 0.9938, "step": 4451 }, { "epoch": 0.36, "grad_norm": 4.715651076061036, "learning_rate": 7.351071257096634e-06, "loss": 1.2306, "step": 4452 }, { "epoch": 0.36, "grad_norm": 5.563175322043322, "learning_rate": 7.349902870389549e-06, "loss": 1.028, "step": 4453 }, { "epoch": 0.36, "grad_norm": 2.465074526425466, "learning_rate": 7.3487343189645e-06, "loss": 0.3103, "step": 4454 }, { "epoch": 0.36, "grad_norm": 3.055709724006543, "learning_rate": 7.347565602903397e-06, "loss": 0.6235, "step": 4455 }, { "epoch": 0.36, "grad_norm": 3.656519516509362, "learning_rate": 7.346396722288162e-06, "loss": 0.679, "step": 4456 }, { "epoch": 0.36, "grad_norm": 3.9727904646221956, "learning_rate": 7.345227677200728e-06, "loss": 0.5582, "step": 4457 }, { "epoch": 0.36, "grad_norm": 2.824112013113073, "learning_rate": 7.34405846772304e-06, "loss": 0.798, "step": 4458 }, { "epoch": 0.36, "grad_norm": 1.1629289610763556, "learning_rate": 7.3428890939370545e-06, "loss": 0.1277, "step": 4459 }, { "epoch": 0.36, "grad_norm": 4.477409798493978, "learning_rate": 7.341719555924741e-06, "loss": 0.7387, "step": 4460 }, { "epoch": 0.36, "grad_norm": 3.1155639227406398, "learning_rate": 7.3405498537680765e-06, "loss": 0.5929, "step": 4461 }, { "epoch": 0.36, "grad_norm": 3.3179173250112637, "learning_rate": 7.339379987549054e-06, "loss": 0.5323, "step": 4462 }, { "epoch": 0.36, "grad_norm": 3.758447610320383, "learning_rate": 7.338209957349677e-06, "loss": 1.1001, "step": 4463 }, { "epoch": 0.36, "grad_norm": 2.9869551230787956, "learning_rate": 7.337039763251956e-06, "loss": 0.5098, "step": 4464 }, { "epoch": 0.36, "grad_norm": 4.228207987999737, "learning_rate": 7.335869405337919e-06, "loss": 1.0302, "step": 4465 }, { "epoch": 0.37, "grad_norm": 5.227773063389464, "learning_rate": 7.334698883689601e-06, "loss": 1.0155, "step": 4466 }, { "epoch": 0.37, "grad_norm": 1.9999135138495103, "learning_rate": 7.333528198389053e-06, "loss": 0.3648, "step": 4467 }, { "epoch": 0.37, "grad_norm": 4.16060025922408, "learning_rate": 7.332357349518334e-06, "loss": 0.6752, "step": 4468 }, { "epoch": 0.37, "grad_norm": 4.04076814093737, "learning_rate": 7.331186337159515e-06, "loss": 1.1512, "step": 4469 }, { "epoch": 0.37, "grad_norm": 4.613831500731791, "learning_rate": 7.3300151613946805e-06, "loss": 0.9678, "step": 4470 }, { "epoch": 0.37, "grad_norm": 4.1838442374567535, "learning_rate": 7.328843822305922e-06, "loss": 0.7081, "step": 4471 }, { "epoch": 0.37, "grad_norm": 3.4458421972312783, "learning_rate": 7.327672319975348e-06, "loss": 0.8107, "step": 4472 }, { "epoch": 0.37, "grad_norm": 2.2583030157807977, "learning_rate": 7.326500654485071e-06, "loss": 0.5454, "step": 4473 }, { "epoch": 0.37, "grad_norm": 4.715040107956233, "learning_rate": 7.325328825917226e-06, "loss": 1.1163, "step": 4474 }, { "epoch": 0.37, "grad_norm": 2.0790611672845545, "learning_rate": 7.3241568343539505e-06, "loss": 0.5023, "step": 4475 }, { "epoch": 0.37, "grad_norm": 2.4918898891044106, "learning_rate": 7.322984679877394e-06, "loss": 0.4306, "step": 4476 }, { "epoch": 0.37, "grad_norm": 1.8286276573382796, "learning_rate": 7.3218123625697225e-06, "loss": 0.4407, "step": 4477 }, { "epoch": 0.37, "grad_norm": 1.8752323357289453, "learning_rate": 7.320639882513108e-06, "loss": 0.4156, "step": 4478 }, { "epoch": 0.37, "grad_norm": 3.1699152957715624, "learning_rate": 7.319467239789738e-06, "loss": 0.7109, "step": 4479 }, { "epoch": 0.37, "grad_norm": 4.585118381742184, "learning_rate": 7.318294434481808e-06, "loss": 1.0924, "step": 4480 }, { "epoch": 0.37, "grad_norm": 3.345711180050005, "learning_rate": 7.317121466671528e-06, "loss": 0.827, "step": 4481 }, { "epoch": 0.37, "grad_norm": 2.9439381191619036, "learning_rate": 7.3159483364411175e-06, "loss": 0.6872, "step": 4482 }, { "epoch": 0.37, "grad_norm": 2.0173118862391557, "learning_rate": 7.314775043872807e-06, "loss": 0.4207, "step": 4483 }, { "epoch": 0.37, "grad_norm": 2.989630248205145, "learning_rate": 7.31360158904884e-06, "loss": 0.7898, "step": 4484 }, { "epoch": 0.37, "grad_norm": 4.960866870194185, "learning_rate": 7.3124279720514715e-06, "loss": 0.8696, "step": 4485 }, { "epoch": 0.37, "grad_norm": 3.2405716519452077, "learning_rate": 7.311254192962964e-06, "loss": 0.716, "step": 4486 }, { "epoch": 0.37, "grad_norm": 3.4706557443470807, "learning_rate": 7.3100802518655975e-06, "loss": 0.8487, "step": 4487 }, { "epoch": 0.37, "grad_norm": 4.719720262071688, "learning_rate": 7.308906148841659e-06, "loss": 1.1216, "step": 4488 }, { "epoch": 0.37, "grad_norm": 4.172493649847887, "learning_rate": 7.307731883973447e-06, "loss": 0.6986, "step": 4489 }, { "epoch": 0.37, "grad_norm": 2.934165336710921, "learning_rate": 7.306557457343273e-06, "loss": 0.554, "step": 4490 }, { "epoch": 0.37, "grad_norm": 4.820002123608001, "learning_rate": 7.30538286903346e-06, "loss": 0.6866, "step": 4491 }, { "epoch": 0.37, "grad_norm": 4.646670289136569, "learning_rate": 7.3042081191263415e-06, "loss": 1.1311, "step": 4492 }, { "epoch": 0.37, "grad_norm": 2.7813068574299105, "learning_rate": 7.30303320770426e-06, "loss": 0.4627, "step": 4493 }, { "epoch": 0.37, "grad_norm": 1.7240575636139102, "learning_rate": 7.301858134849575e-06, "loss": 0.2563, "step": 4494 }, { "epoch": 0.37, "grad_norm": 4.305120771122525, "learning_rate": 7.3006829006446535e-06, "loss": 0.8042, "step": 4495 }, { "epoch": 0.37, "grad_norm": 5.261801668048653, "learning_rate": 7.299507505171871e-06, "loss": 1.3942, "step": 4496 }, { "epoch": 0.37, "grad_norm": 4.204142139929167, "learning_rate": 7.298331948513622e-06, "loss": 1.1927, "step": 4497 }, { "epoch": 0.37, "grad_norm": 4.044871850015931, "learning_rate": 7.297156230752303e-06, "loss": 1.2654, "step": 4498 }, { "epoch": 0.37, "grad_norm": 5.337850294767246, "learning_rate": 7.295980351970331e-06, "loss": 0.908, "step": 4499 }, { "epoch": 0.37, "grad_norm": 3.053857052514148, "learning_rate": 7.29480431225013e-06, "loss": 0.6775, "step": 4500 }, { "epoch": 0.37, "grad_norm": 4.949137459079822, "learning_rate": 7.2936281116741314e-06, "loss": 0.9686, "step": 4501 }, { "epoch": 0.37, "grad_norm": 4.037736967540822, "learning_rate": 7.292451750324785e-06, "loss": 0.7931, "step": 4502 }, { "epoch": 0.37, "grad_norm": 4.3779809612700555, "learning_rate": 7.291275228284549e-06, "loss": 0.9467, "step": 4503 }, { "epoch": 0.37, "grad_norm": 3.418346099332919, "learning_rate": 7.290098545635889e-06, "loss": 0.7067, "step": 4504 }, { "epoch": 0.37, "grad_norm": 2.232539189776654, "learning_rate": 7.288921702461289e-06, "loss": 0.3651, "step": 4505 }, { "epoch": 0.37, "grad_norm": 3.5749360072395358, "learning_rate": 7.287744698843237e-06, "loss": 0.7257, "step": 4506 }, { "epoch": 0.37, "grad_norm": 2.0033083083553005, "learning_rate": 7.28656753486424e-06, "loss": 0.3874, "step": 4507 }, { "epoch": 0.37, "grad_norm": 5.740670590431987, "learning_rate": 7.285390210606809e-06, "loss": 1.3633, "step": 4508 }, { "epoch": 0.37, "grad_norm": 3.5485675390190594, "learning_rate": 7.28421272615347e-06, "loss": 0.8501, "step": 4509 }, { "epoch": 0.37, "grad_norm": 3.9958369179935214, "learning_rate": 7.283035081586761e-06, "loss": 0.9872, "step": 4510 }, { "epoch": 0.37, "grad_norm": 4.60533465076516, "learning_rate": 7.281857276989228e-06, "loss": 1.1645, "step": 4511 }, { "epoch": 0.37, "grad_norm": 3.431379925508597, "learning_rate": 7.28067931244343e-06, "loss": 0.8068, "step": 4512 }, { "epoch": 0.37, "grad_norm": 4.452343209098598, "learning_rate": 7.279501188031939e-06, "loss": 1.1694, "step": 4513 }, { "epoch": 0.37, "grad_norm": 3.6836445048120234, "learning_rate": 7.278322903837334e-06, "loss": 0.7585, "step": 4514 }, { "epoch": 0.37, "grad_norm": 2.9059582544576688, "learning_rate": 7.2771444599422096e-06, "loss": 0.6519, "step": 4515 }, { "epoch": 0.37, "grad_norm": 4.883169272294483, "learning_rate": 7.275965856429167e-06, "loss": 0.7735, "step": 4516 }, { "epoch": 0.37, "grad_norm": 2.639805399214749, "learning_rate": 7.274787093380825e-06, "loss": 0.7228, "step": 4517 }, { "epoch": 0.37, "grad_norm": 3.0174849334069562, "learning_rate": 7.273608170879807e-06, "loss": 0.8193, "step": 4518 }, { "epoch": 0.37, "grad_norm": 4.095551984163171, "learning_rate": 7.27242908900875e-06, "loss": 0.9427, "step": 4519 }, { "epoch": 0.37, "grad_norm": 1.704871089442316, "learning_rate": 7.271249847850306e-06, "loss": 0.429, "step": 4520 }, { "epoch": 0.37, "grad_norm": 4.820630439574544, "learning_rate": 7.27007044748713e-06, "loss": 0.9946, "step": 4521 }, { "epoch": 0.37, "grad_norm": 3.82734560676249, "learning_rate": 7.268890888001896e-06, "loss": 0.9962, "step": 4522 }, { "epoch": 0.37, "grad_norm": 3.9606092407357973, "learning_rate": 7.267711169477284e-06, "loss": 0.8341, "step": 4523 }, { "epoch": 0.37, "grad_norm": 3.245772471440562, "learning_rate": 7.266531291995989e-06, "loss": 0.6565, "step": 4524 }, { "epoch": 0.37, "grad_norm": 3.211696537207009, "learning_rate": 7.265351255640713e-06, "loss": 0.983, "step": 4525 }, { "epoch": 0.37, "grad_norm": 4.468724734753304, "learning_rate": 7.2641710604941754e-06, "loss": 0.8992, "step": 4526 }, { "epoch": 0.37, "grad_norm": 4.586107456241061, "learning_rate": 7.262990706639097e-06, "loss": 1.0833, "step": 4527 }, { "epoch": 0.37, "grad_norm": 4.473517836636879, "learning_rate": 7.261810194158221e-06, "loss": 0.8026, "step": 4528 }, { "epoch": 0.37, "grad_norm": 3.502267514552507, "learning_rate": 7.260629523134293e-06, "loss": 0.8131, "step": 4529 }, { "epoch": 0.37, "grad_norm": 2.220683416304137, "learning_rate": 7.259448693650073e-06, "loss": 0.42, "step": 4530 }, { "epoch": 0.37, "grad_norm": 2.8593087635128094, "learning_rate": 7.258267705788334e-06, "loss": 0.3321, "step": 4531 }, { "epoch": 0.37, "grad_norm": 2.6463886519185356, "learning_rate": 7.2570865596318565e-06, "loss": 0.5994, "step": 4532 }, { "epoch": 0.37, "grad_norm": 3.4815018579928054, "learning_rate": 7.255905255263434e-06, "loss": 0.6225, "step": 4533 }, { "epoch": 0.37, "grad_norm": 4.056716362932539, "learning_rate": 7.254723792765872e-06, "loss": 1.0677, "step": 4534 }, { "epoch": 0.37, "grad_norm": 4.332364971141776, "learning_rate": 7.253542172221982e-06, "loss": 0.7248, "step": 4535 }, { "epoch": 0.37, "grad_norm": 3.471393689780005, "learning_rate": 7.252360393714595e-06, "loss": 0.8082, "step": 4536 }, { "epoch": 0.37, "grad_norm": 3.980817164222064, "learning_rate": 7.251178457326547e-06, "loss": 0.6879, "step": 4537 }, { "epoch": 0.37, "grad_norm": 4.319603041963911, "learning_rate": 7.249996363140686e-06, "loss": 0.7517, "step": 4538 }, { "epoch": 0.37, "grad_norm": 3.220525399600135, "learning_rate": 7.24881411123987e-06, "loss": 0.9474, "step": 4539 }, { "epoch": 0.37, "grad_norm": 3.4638507168662445, "learning_rate": 7.247631701706974e-06, "loss": 0.6042, "step": 4540 }, { "epoch": 0.37, "grad_norm": 2.2917495569563706, "learning_rate": 7.246449134624878e-06, "loss": 0.2501, "step": 4541 }, { "epoch": 0.37, "grad_norm": 2.9933586455885357, "learning_rate": 7.2452664100764725e-06, "loss": 0.5591, "step": 4542 }, { "epoch": 0.37, "grad_norm": 3.604732548760846, "learning_rate": 7.244083528144663e-06, "loss": 0.6803, "step": 4543 }, { "epoch": 0.37, "grad_norm": 4.220440325230384, "learning_rate": 7.242900488912364e-06, "loss": 0.7553, "step": 4544 }, { "epoch": 0.37, "grad_norm": 4.861147346123085, "learning_rate": 7.241717292462505e-06, "loss": 1.7752, "step": 4545 }, { "epoch": 0.37, "grad_norm": 4.233779493000437, "learning_rate": 7.240533938878016e-06, "loss": 0.6548, "step": 4546 }, { "epoch": 0.37, "grad_norm": 5.0855895495993035, "learning_rate": 7.239350428241851e-06, "loss": 1.3626, "step": 4547 }, { "epoch": 0.37, "grad_norm": 4.136242258149596, "learning_rate": 7.238166760636966e-06, "loss": 1.0945, "step": 4548 }, { "epoch": 0.37, "grad_norm": 2.536209408736868, "learning_rate": 7.236982936146332e-06, "loss": 0.3704, "step": 4549 }, { "epoch": 0.37, "grad_norm": 2.679206527422662, "learning_rate": 7.235798954852929e-06, "loss": 0.4141, "step": 4550 }, { "epoch": 0.37, "grad_norm": 5.609558168860129, "learning_rate": 7.23461481683975e-06, "loss": 1.0022, "step": 4551 }, { "epoch": 0.37, "grad_norm": 3.9874756521860313, "learning_rate": 7.233430522189797e-06, "loss": 0.8539, "step": 4552 }, { "epoch": 0.37, "grad_norm": 4.117818020127755, "learning_rate": 7.232246070986084e-06, "loss": 0.8997, "step": 4553 }, { "epoch": 0.37, "grad_norm": 3.8538029708155297, "learning_rate": 7.2310614633116376e-06, "loss": 0.7597, "step": 4554 }, { "epoch": 0.37, "grad_norm": 3.0481157383577426, "learning_rate": 7.22987669924949e-06, "loss": 0.6272, "step": 4555 }, { "epoch": 0.37, "grad_norm": 3.4563412200333063, "learning_rate": 7.2286917788826926e-06, "loss": 0.8794, "step": 4556 }, { "epoch": 0.37, "grad_norm": 5.077233686569749, "learning_rate": 7.2275067022943005e-06, "loss": 1.1632, "step": 4557 }, { "epoch": 0.37, "grad_norm": 3.0498537414819245, "learning_rate": 7.226321469567381e-06, "loss": 0.4963, "step": 4558 }, { "epoch": 0.37, "grad_norm": 4.4327224589889855, "learning_rate": 7.225136080785016e-06, "loss": 1.0744, "step": 4559 }, { "epoch": 0.37, "grad_norm": 3.911606630606663, "learning_rate": 7.223950536030297e-06, "loss": 1.0902, "step": 4560 }, { "epoch": 0.37, "grad_norm": 5.122772441786447, "learning_rate": 7.2227648353863225e-06, "loss": 1.3166, "step": 4561 }, { "epoch": 0.37, "grad_norm": 4.281493251686354, "learning_rate": 7.221578978936207e-06, "loss": 0.7545, "step": 4562 }, { "epoch": 0.37, "grad_norm": 3.8442070070178835, "learning_rate": 7.220392966763072e-06, "loss": 0.7848, "step": 4563 }, { "epoch": 0.37, "grad_norm": 4.704511580743091, "learning_rate": 7.219206798950056e-06, "loss": 0.9293, "step": 4564 }, { "epoch": 0.37, "grad_norm": 3.8800793017540376, "learning_rate": 7.218020475580301e-06, "loss": 0.7855, "step": 4565 }, { "epoch": 0.37, "grad_norm": 4.285617047839879, "learning_rate": 7.216833996736963e-06, "loss": 0.8195, "step": 4566 }, { "epoch": 0.37, "grad_norm": 5.473580101703317, "learning_rate": 7.2156473625032075e-06, "loss": 1.4403, "step": 4567 }, { "epoch": 0.37, "grad_norm": 4.510327755197747, "learning_rate": 7.2144605729622166e-06, "loss": 1.1779, "step": 4568 }, { "epoch": 0.37, "grad_norm": 3.0337100328946316, "learning_rate": 7.213273628197176e-06, "loss": 0.642, "step": 4569 }, { "epoch": 0.37, "grad_norm": 4.993271610598595, "learning_rate": 7.212086528291286e-06, "loss": 0.9385, "step": 4570 }, { "epoch": 0.37, "grad_norm": 3.5938483151970684, "learning_rate": 7.2108992733277595e-06, "loss": 0.5875, "step": 4571 }, { "epoch": 0.37, "grad_norm": 2.905467746768345, "learning_rate": 7.209711863389815e-06, "loss": 0.7408, "step": 4572 }, { "epoch": 0.37, "grad_norm": 4.3494342996801905, "learning_rate": 7.208524298560684e-06, "loss": 0.7769, "step": 4573 }, { "epoch": 0.37, "grad_norm": 3.3883729246932255, "learning_rate": 7.207336578923613e-06, "loss": 0.5969, "step": 4574 }, { "epoch": 0.37, "grad_norm": 1.1952773938067043, "learning_rate": 7.206148704561853e-06, "loss": 0.2003, "step": 4575 }, { "epoch": 0.37, "grad_norm": 3.2361353989913852, "learning_rate": 7.20496067555867e-06, "loss": 0.7028, "step": 4576 }, { "epoch": 0.37, "grad_norm": 3.4319513704107956, "learning_rate": 7.20377249199734e-06, "loss": 0.6851, "step": 4577 }, { "epoch": 0.37, "grad_norm": 4.76818205154338, "learning_rate": 7.2025841539611485e-06, "loss": 0.7971, "step": 4578 }, { "epoch": 0.37, "grad_norm": 2.088168301961054, "learning_rate": 7.201395661533395e-06, "loss": 0.3357, "step": 4579 }, { "epoch": 0.37, "grad_norm": 4.965093420537595, "learning_rate": 7.200207014797385e-06, "loss": 0.8798, "step": 4580 }, { "epoch": 0.37, "grad_norm": 3.1078580794068604, "learning_rate": 7.19901821383644e-06, "loss": 0.4883, "step": 4581 }, { "epoch": 0.37, "grad_norm": 3.9574296718617967, "learning_rate": 7.197829258733886e-06, "loss": 1.0293, "step": 4582 }, { "epoch": 0.37, "grad_norm": 3.736266789641212, "learning_rate": 7.1966401495730675e-06, "loss": 0.6452, "step": 4583 }, { "epoch": 0.37, "grad_norm": 2.842006382916651, "learning_rate": 7.195450886437334e-06, "loss": 0.3723, "step": 4584 }, { "epoch": 0.37, "grad_norm": 3.885450874992221, "learning_rate": 7.1942614694100476e-06, "loss": 0.8921, "step": 4585 }, { "epoch": 0.37, "grad_norm": 4.33205324545829, "learning_rate": 7.1930718985745815e-06, "loss": 1.0317, "step": 4586 }, { "epoch": 0.37, "grad_norm": 3.603549135962034, "learning_rate": 7.1918821740143196e-06, "loss": 0.88, "step": 4587 }, { "epoch": 0.38, "grad_norm": 2.2702414115235223, "learning_rate": 7.190692295812658e-06, "loss": 0.4342, "step": 4588 }, { "epoch": 0.38, "grad_norm": 3.7315755603115504, "learning_rate": 7.189502264053e-06, "loss": 0.8002, "step": 4589 }, { "epoch": 0.38, "grad_norm": 5.350066189445501, "learning_rate": 7.188312078818761e-06, "loss": 1.4728, "step": 4590 }, { "epoch": 0.38, "grad_norm": 3.6486634707171044, "learning_rate": 7.18712174019337e-06, "loss": 0.8281, "step": 4591 }, { "epoch": 0.38, "grad_norm": 3.2919211998095594, "learning_rate": 7.185931248260262e-06, "loss": 0.78, "step": 4592 }, { "epoch": 0.38, "grad_norm": 3.5585559243371536, "learning_rate": 7.1847406031028866e-06, "loss": 0.4944, "step": 4593 }, { "epoch": 0.38, "grad_norm": 2.0813044061523454, "learning_rate": 7.183549804804704e-06, "loss": 0.3836, "step": 4594 }, { "epoch": 0.38, "grad_norm": 2.3181687589362014, "learning_rate": 7.182358853449183e-06, "loss": 0.4667, "step": 4595 }, { "epoch": 0.38, "grad_norm": 2.826918066825633, "learning_rate": 7.181167749119804e-06, "loss": 0.5078, "step": 4596 }, { "epoch": 0.38, "grad_norm": 4.630806612184872, "learning_rate": 7.179976491900058e-06, "loss": 1.3874, "step": 4597 }, { "epoch": 0.38, "grad_norm": 3.0602457597906483, "learning_rate": 7.178785081873446e-06, "loss": 0.5455, "step": 4598 }, { "epoch": 0.38, "grad_norm": 2.5222743372079512, "learning_rate": 7.177593519123483e-06, "loss": 0.4905, "step": 4599 }, { "epoch": 0.38, "grad_norm": 3.188427047383891, "learning_rate": 7.176401803733691e-06, "loss": 0.7658, "step": 4600 }, { "epoch": 0.38, "grad_norm": 3.7162178644039856, "learning_rate": 7.175209935787605e-06, "loss": 0.9036, "step": 4601 }, { "epoch": 0.38, "grad_norm": 4.602496845755888, "learning_rate": 7.174017915368769e-06, "loss": 0.9856, "step": 4602 }, { "epoch": 0.38, "grad_norm": 3.381026091099391, "learning_rate": 7.172825742560737e-06, "loss": 0.6102, "step": 4603 }, { "epoch": 0.38, "grad_norm": 4.774135585966469, "learning_rate": 7.171633417447078e-06, "loss": 0.8705, "step": 4604 }, { "epoch": 0.38, "grad_norm": 2.7188104057779228, "learning_rate": 7.170440940111367e-06, "loss": 0.6872, "step": 4605 }, { "epoch": 0.38, "grad_norm": 3.4582576816064883, "learning_rate": 7.169248310637192e-06, "loss": 0.9781, "step": 4606 }, { "epoch": 0.38, "grad_norm": 3.6689730999616215, "learning_rate": 7.168055529108151e-06, "loss": 0.8557, "step": 4607 }, { "epoch": 0.38, "grad_norm": 3.716459932166737, "learning_rate": 7.166862595607853e-06, "loss": 0.8339, "step": 4608 }, { "epoch": 0.38, "grad_norm": 4.144471309239014, "learning_rate": 7.165669510219917e-06, "loss": 1.0876, "step": 4609 }, { "epoch": 0.38, "grad_norm": 4.411698165275617, "learning_rate": 7.164476273027973e-06, "loss": 0.961, "step": 4610 }, { "epoch": 0.38, "grad_norm": 3.257966379763284, "learning_rate": 7.163282884115662e-06, "loss": 0.6124, "step": 4611 }, { "epoch": 0.38, "grad_norm": 4.2342756996710795, "learning_rate": 7.1620893435666375e-06, "loss": 0.8242, "step": 4612 }, { "epoch": 0.38, "grad_norm": 1.9126104647795454, "learning_rate": 7.160895651464557e-06, "loss": 0.4754, "step": 4613 }, { "epoch": 0.38, "grad_norm": 4.460995081548863, "learning_rate": 7.159701807893097e-06, "loss": 1.2291, "step": 4614 }, { "epoch": 0.38, "grad_norm": 5.141431103320525, "learning_rate": 7.1585078129359385e-06, "loss": 0.9833, "step": 4615 }, { "epoch": 0.38, "grad_norm": 4.277701753729068, "learning_rate": 7.157313666676775e-06, "loss": 0.9546, "step": 4616 }, { "epoch": 0.38, "grad_norm": 2.7258342660221495, "learning_rate": 7.156119369199315e-06, "loss": 0.5565, "step": 4617 }, { "epoch": 0.38, "grad_norm": 3.75708470979647, "learning_rate": 7.154924920587269e-06, "loss": 0.8413, "step": 4618 }, { "epoch": 0.38, "grad_norm": 4.422246019971207, "learning_rate": 7.153730320924365e-06, "loss": 0.9829, "step": 4619 }, { "epoch": 0.38, "grad_norm": 5.067999875100732, "learning_rate": 7.152535570294339e-06, "loss": 0.9425, "step": 4620 }, { "epoch": 0.38, "grad_norm": 3.242856394561573, "learning_rate": 7.151340668780935e-06, "loss": 0.8913, "step": 4621 }, { "epoch": 0.38, "grad_norm": 2.493206777102948, "learning_rate": 7.150145616467916e-06, "loss": 0.5349, "step": 4622 }, { "epoch": 0.38, "grad_norm": 3.618796024916159, "learning_rate": 7.148950413439044e-06, "loss": 0.8845, "step": 4623 }, { "epoch": 0.38, "grad_norm": 2.8203092018052547, "learning_rate": 7.147755059778101e-06, "loss": 0.4983, "step": 4624 }, { "epoch": 0.38, "grad_norm": 4.312465249288361, "learning_rate": 7.146559555568876e-06, "loss": 0.6341, "step": 4625 }, { "epoch": 0.38, "grad_norm": 5.007985232158787, "learning_rate": 7.145363900895168e-06, "loss": 0.8231, "step": 4626 }, { "epoch": 0.38, "grad_norm": 3.36413579721577, "learning_rate": 7.144168095840786e-06, "loss": 0.6309, "step": 4627 }, { "epoch": 0.38, "grad_norm": 3.760414558968082, "learning_rate": 7.142972140489555e-06, "loss": 0.9974, "step": 4628 }, { "epoch": 0.38, "grad_norm": 3.611181049430095, "learning_rate": 7.1417760349253005e-06, "loss": 0.9156, "step": 4629 }, { "epoch": 0.38, "grad_norm": 3.496988257817793, "learning_rate": 7.140579779231866e-06, "loss": 0.5388, "step": 4630 }, { "epoch": 0.38, "grad_norm": 3.9493649618885285, "learning_rate": 7.139383373493107e-06, "loss": 0.5925, "step": 4631 }, { "epoch": 0.38, "grad_norm": 4.630837403273287, "learning_rate": 7.1381868177928834e-06, "loss": 0.7775, "step": 4632 }, { "epoch": 0.38, "grad_norm": 4.618547196665511, "learning_rate": 7.1369901122150694e-06, "loss": 0.8912, "step": 4633 }, { "epoch": 0.38, "grad_norm": 3.5624447846221794, "learning_rate": 7.1357932568435496e-06, "loss": 0.9665, "step": 4634 }, { "epoch": 0.38, "grad_norm": 3.767445364020535, "learning_rate": 7.134596251762217e-06, "loss": 1.3131, "step": 4635 }, { "epoch": 0.38, "grad_norm": 4.482513277230487, "learning_rate": 7.1333990970549764e-06, "loss": 1.0189, "step": 4636 }, { "epoch": 0.38, "grad_norm": 4.301321079555022, "learning_rate": 7.132201792805744e-06, "loss": 0.8224, "step": 4637 }, { "epoch": 0.38, "grad_norm": 3.05399806054526, "learning_rate": 7.131004339098445e-06, "loss": 0.5469, "step": 4638 }, { "epoch": 0.38, "grad_norm": 4.792266429876065, "learning_rate": 7.129806736017015e-06, "loss": 0.8165, "step": 4639 }, { "epoch": 0.38, "grad_norm": 4.72525028465202, "learning_rate": 7.128608983645404e-06, "loss": 0.6924, "step": 4640 }, { "epoch": 0.38, "grad_norm": 3.587819774226734, "learning_rate": 7.127411082067566e-06, "loss": 1.0171, "step": 4641 }, { "epoch": 0.38, "grad_norm": 3.247042866915213, "learning_rate": 7.126213031367471e-06, "loss": 0.5919, "step": 4642 }, { "epoch": 0.38, "grad_norm": 3.3236833483018917, "learning_rate": 7.1250148316290936e-06, "loss": 0.5729, "step": 4643 }, { "epoch": 0.38, "grad_norm": 4.0220944780262355, "learning_rate": 7.123816482936425e-06, "loss": 0.9583, "step": 4644 }, { "epoch": 0.38, "grad_norm": 3.455372502724746, "learning_rate": 7.122617985373466e-06, "loss": 0.6043, "step": 4645 }, { "epoch": 0.38, "grad_norm": 4.784670669336466, "learning_rate": 7.12141933902422e-06, "loss": 1.4889, "step": 4646 }, { "epoch": 0.38, "grad_norm": 3.5993468761311838, "learning_rate": 7.120220543972714e-06, "loss": 0.7301, "step": 4647 }, { "epoch": 0.38, "grad_norm": 4.180978435085142, "learning_rate": 7.119021600302973e-06, "loss": 0.9515, "step": 4648 }, { "epoch": 0.38, "grad_norm": 3.3315021353161502, "learning_rate": 7.117822508099042e-06, "loss": 0.7091, "step": 4649 }, { "epoch": 0.38, "grad_norm": 3.7542941945456128, "learning_rate": 7.116623267444969e-06, "loss": 0.7678, "step": 4650 }, { "epoch": 0.38, "grad_norm": 3.664733599625183, "learning_rate": 7.115423878424817e-06, "loss": 1.1193, "step": 4651 }, { "epoch": 0.38, "grad_norm": 4.516391532806129, "learning_rate": 7.114224341122655e-06, "loss": 1.5861, "step": 4652 }, { "epoch": 0.38, "grad_norm": 2.9343289636014185, "learning_rate": 7.113024655622571e-06, "loss": 0.4885, "step": 4653 }, { "epoch": 0.38, "grad_norm": 3.356400620423507, "learning_rate": 7.111824822008653e-06, "loss": 0.6609, "step": 4654 }, { "epoch": 0.38, "grad_norm": 4.128175145715575, "learning_rate": 7.110624840365005e-06, "loss": 0.9746, "step": 4655 }, { "epoch": 0.38, "grad_norm": 3.2156393306671833, "learning_rate": 7.109424710775742e-06, "loss": 0.484, "step": 4656 }, { "epoch": 0.38, "grad_norm": 3.1678849959490245, "learning_rate": 7.108224433324987e-06, "loss": 0.555, "step": 4657 }, { "epoch": 0.38, "grad_norm": 2.5124720518160513, "learning_rate": 7.107024008096874e-06, "loss": 0.5308, "step": 4658 }, { "epoch": 0.38, "grad_norm": 2.8215022406943118, "learning_rate": 7.105823435175549e-06, "loss": 0.4427, "step": 4659 }, { "epoch": 0.38, "grad_norm": 4.791148483114985, "learning_rate": 7.104622714645165e-06, "loss": 1.3376, "step": 4660 }, { "epoch": 0.38, "grad_norm": 3.2189910061655698, "learning_rate": 7.103421846589888e-06, "loss": 0.5059, "step": 4661 }, { "epoch": 0.38, "grad_norm": 3.2099581856724844, "learning_rate": 7.102220831093893e-06, "loss": 0.5016, "step": 4662 }, { "epoch": 0.38, "grad_norm": 3.551899396842402, "learning_rate": 7.101019668241368e-06, "loss": 0.7423, "step": 4663 }, { "epoch": 0.38, "grad_norm": 2.839274902607117, "learning_rate": 7.099818358116506e-06, "loss": 0.8159, "step": 4664 }, { "epoch": 0.38, "grad_norm": 3.091721458837044, "learning_rate": 7.0986169008035175e-06, "loss": 0.7227, "step": 4665 }, { "epoch": 0.38, "grad_norm": 4.497212176209524, "learning_rate": 7.097415296386617e-06, "loss": 0.9619, "step": 4666 }, { "epoch": 0.38, "grad_norm": 4.133435638548798, "learning_rate": 7.096213544950032e-06, "loss": 0.9201, "step": 4667 }, { "epoch": 0.38, "grad_norm": 1.7868724618128153, "learning_rate": 7.095011646578001e-06, "loss": 0.3228, "step": 4668 }, { "epoch": 0.38, "grad_norm": 2.6219947054937496, "learning_rate": 7.093809601354769e-06, "loss": 0.615, "step": 4669 }, { "epoch": 0.38, "grad_norm": 3.4273523018159104, "learning_rate": 7.092607409364597e-06, "loss": 0.6523, "step": 4670 }, { "epoch": 0.38, "grad_norm": 4.217834982538416, "learning_rate": 7.0914050706917536e-06, "loss": 0.7628, "step": 4671 }, { "epoch": 0.38, "grad_norm": 4.023922645696487, "learning_rate": 7.090202585420516e-06, "loss": 1.1023, "step": 4672 }, { "epoch": 0.38, "grad_norm": 3.1929940805848345, "learning_rate": 7.088999953635174e-06, "loss": 0.6617, "step": 4673 }, { "epoch": 0.38, "grad_norm": 4.27375063412841, "learning_rate": 7.087797175420028e-06, "loss": 1.1346, "step": 4674 }, { "epoch": 0.38, "grad_norm": 5.14383253909754, "learning_rate": 7.086594250859383e-06, "loss": 1.0806, "step": 4675 }, { "epoch": 0.38, "grad_norm": 3.798510194907834, "learning_rate": 7.085391180037564e-06, "loss": 0.8079, "step": 4676 }, { "epoch": 0.38, "grad_norm": 4.836075392491869, "learning_rate": 7.084187963038899e-06, "loss": 0.9677, "step": 4677 }, { "epoch": 0.38, "grad_norm": 4.100173145603222, "learning_rate": 7.082984599947727e-06, "loss": 0.8584, "step": 4678 }, { "epoch": 0.38, "grad_norm": 1.6888219478857118, "learning_rate": 7.0817810908483995e-06, "loss": 0.2592, "step": 4679 }, { "epoch": 0.38, "grad_norm": 3.1288526522734137, "learning_rate": 7.080577435825279e-06, "loss": 0.8195, "step": 4680 }, { "epoch": 0.38, "grad_norm": 3.1526606107911697, "learning_rate": 7.079373634962735e-06, "loss": 0.8578, "step": 4681 }, { "epoch": 0.38, "grad_norm": 3.7193182479352145, "learning_rate": 7.0781696883451486e-06, "loss": 0.9729, "step": 4682 }, { "epoch": 0.38, "grad_norm": 1.8929230926532878, "learning_rate": 7.076965596056911e-06, "loss": 0.4022, "step": 4683 }, { "epoch": 0.38, "grad_norm": 2.720322440920711, "learning_rate": 7.075761358182423e-06, "loss": 0.5376, "step": 4684 }, { "epoch": 0.38, "grad_norm": 4.45190880528166, "learning_rate": 7.074556974806098e-06, "loss": 0.9728, "step": 4685 }, { "epoch": 0.38, "grad_norm": 3.503468316807604, "learning_rate": 7.073352446012357e-06, "loss": 0.7926, "step": 4686 }, { "epoch": 0.38, "grad_norm": 5.194545270931051, "learning_rate": 7.072147771885633e-06, "loss": 1.4915, "step": 4687 }, { "epoch": 0.38, "grad_norm": 5.941519706504127, "learning_rate": 7.07094295251037e-06, "loss": 1.0565, "step": 4688 }, { "epoch": 0.38, "grad_norm": 4.2702531812199265, "learning_rate": 7.069737987971017e-06, "loss": 0.771, "step": 4689 }, { "epoch": 0.38, "grad_norm": 2.864746525153197, "learning_rate": 7.06853287835204e-06, "loss": 0.6395, "step": 4690 }, { "epoch": 0.38, "grad_norm": 4.320869939847818, "learning_rate": 7.06732762373791e-06, "loss": 0.8968, "step": 4691 }, { "epoch": 0.38, "grad_norm": 3.8603919227263654, "learning_rate": 7.06612222421311e-06, "loss": 0.5715, "step": 4692 }, { "epoch": 0.38, "grad_norm": 2.815299350955375, "learning_rate": 7.064916679862134e-06, "loss": 0.2505, "step": 4693 }, { "epoch": 0.38, "grad_norm": 3.416622876392271, "learning_rate": 7.0637109907694855e-06, "loss": 0.6977, "step": 4694 }, { "epoch": 0.38, "grad_norm": 4.325136418083608, "learning_rate": 7.062505157019678e-06, "loss": 1.155, "step": 4695 }, { "epoch": 0.38, "grad_norm": 4.29304724423045, "learning_rate": 7.061299178697234e-06, "loss": 0.8864, "step": 4696 }, { "epoch": 0.38, "grad_norm": 4.2348968803133, "learning_rate": 7.06009305588669e-06, "loss": 1.2064, "step": 4697 }, { "epoch": 0.38, "grad_norm": 4.325225458239991, "learning_rate": 7.058886788672588e-06, "loss": 0.816, "step": 4698 }, { "epoch": 0.38, "grad_norm": 2.7366570370091066, "learning_rate": 7.057680377139482e-06, "loss": 0.549, "step": 4699 }, { "epoch": 0.38, "grad_norm": 3.6770758782743433, "learning_rate": 7.056473821371936e-06, "loss": 1.0477, "step": 4700 }, { "epoch": 0.38, "grad_norm": 3.9101510171746092, "learning_rate": 7.055267121454525e-06, "loss": 1.0068, "step": 4701 }, { "epoch": 0.38, "grad_norm": 4.387053921551004, "learning_rate": 7.054060277471834e-06, "loss": 1.2587, "step": 4702 }, { "epoch": 0.38, "grad_norm": 3.8716796704049807, "learning_rate": 7.052853289508458e-06, "loss": 0.7437, "step": 4703 }, { "epoch": 0.38, "grad_norm": 3.101094078305496, "learning_rate": 7.051646157648998e-06, "loss": 0.5624, "step": 4704 }, { "epoch": 0.38, "grad_norm": 3.2949686093473995, "learning_rate": 7.050438881978073e-06, "loss": 0.5994, "step": 4705 }, { "epoch": 0.38, "grad_norm": 4.680571162710411, "learning_rate": 7.049231462580306e-06, "loss": 0.984, "step": 4706 }, { "epoch": 0.38, "grad_norm": 3.764122359448531, "learning_rate": 7.0480238995403305e-06, "loss": 1.0566, "step": 4707 }, { "epoch": 0.38, "grad_norm": 3.3283288351922162, "learning_rate": 7.046816192942794e-06, "loss": 0.867, "step": 4708 }, { "epoch": 0.38, "grad_norm": 3.8178504026156137, "learning_rate": 7.045608342872349e-06, "loss": 0.6565, "step": 4709 }, { "epoch": 0.38, "grad_norm": 4.594460551811602, "learning_rate": 7.044400349413661e-06, "loss": 1.315, "step": 4710 }, { "epoch": 0.39, "grad_norm": 2.49430484143887, "learning_rate": 7.043192212651407e-06, "loss": 0.326, "step": 4711 }, { "epoch": 0.39, "grad_norm": 3.924263522486229, "learning_rate": 7.041983932670271e-06, "loss": 1.0455, "step": 4712 }, { "epoch": 0.39, "grad_norm": 3.046648117170458, "learning_rate": 7.040775509554948e-06, "loss": 0.531, "step": 4713 }, { "epoch": 0.39, "grad_norm": 3.7765304854224135, "learning_rate": 7.039566943390144e-06, "loss": 0.717, "step": 4714 }, { "epoch": 0.39, "grad_norm": 3.7657904788378427, "learning_rate": 7.038358234260572e-06, "loss": 0.9202, "step": 4715 }, { "epoch": 0.39, "grad_norm": 2.952201527018048, "learning_rate": 7.037149382250959e-06, "loss": 0.5853, "step": 4716 }, { "epoch": 0.39, "grad_norm": 4.1458126725943245, "learning_rate": 7.035940387446041e-06, "loss": 0.6387, "step": 4717 }, { "epoch": 0.39, "grad_norm": 4.345177420459916, "learning_rate": 7.03473124993056e-06, "loss": 0.7602, "step": 4718 }, { "epoch": 0.39, "grad_norm": 1.9731307704986518, "learning_rate": 7.033521969789275e-06, "loss": 0.3163, "step": 4719 }, { "epoch": 0.39, "grad_norm": 3.9952518683111844, "learning_rate": 7.03231254710695e-06, "loss": 0.7432, "step": 4720 }, { "epoch": 0.39, "grad_norm": 2.6574995418647163, "learning_rate": 7.031102981968361e-06, "loss": 0.5555, "step": 4721 }, { "epoch": 0.39, "grad_norm": 2.1569025298242925, "learning_rate": 7.029893274458291e-06, "loss": 0.4483, "step": 4722 }, { "epoch": 0.39, "grad_norm": 3.535936462310264, "learning_rate": 7.028683424661538e-06, "loss": 0.9622, "step": 4723 }, { "epoch": 0.39, "grad_norm": 2.7605565413311677, "learning_rate": 7.0274734326629035e-06, "loss": 0.6126, "step": 4724 }, { "epoch": 0.39, "grad_norm": 2.8761513436527633, "learning_rate": 7.026263298547207e-06, "loss": 0.7268, "step": 4725 }, { "epoch": 0.39, "grad_norm": 4.849127956931701, "learning_rate": 7.025053022399271e-06, "loss": 0.9776, "step": 4726 }, { "epoch": 0.39, "grad_norm": 2.2426848629555685, "learning_rate": 7.02384260430393e-06, "loss": 0.4053, "step": 4727 }, { "epoch": 0.39, "grad_norm": 4.547617522634692, "learning_rate": 7.022632044346032e-06, "loss": 1.2811, "step": 4728 }, { "epoch": 0.39, "grad_norm": 2.7078112908147984, "learning_rate": 7.0214213426104295e-06, "loss": 0.6024, "step": 4729 }, { "epoch": 0.39, "grad_norm": 3.5512443500532593, "learning_rate": 7.020210499181988e-06, "loss": 0.7873, "step": 4730 }, { "epoch": 0.39, "grad_norm": 3.8017442415960017, "learning_rate": 7.0189995141455836e-06, "loss": 0.6312, "step": 4731 }, { "epoch": 0.39, "grad_norm": 4.198275952054718, "learning_rate": 7.017788387586097e-06, "loss": 0.9264, "step": 4732 }, { "epoch": 0.39, "grad_norm": 3.9401947156048407, "learning_rate": 7.016577119588428e-06, "loss": 0.8761, "step": 4733 }, { "epoch": 0.39, "grad_norm": 3.407470671577025, "learning_rate": 7.015365710237479e-06, "loss": 0.8748, "step": 4734 }, { "epoch": 0.39, "grad_norm": 5.3772780839984895, "learning_rate": 7.0141541596181654e-06, "loss": 1.3098, "step": 4735 }, { "epoch": 0.39, "grad_norm": 4.125854112287967, "learning_rate": 7.01294246781541e-06, "loss": 0.606, "step": 4736 }, { "epoch": 0.39, "grad_norm": 2.953373275840162, "learning_rate": 7.0117306349141485e-06, "loss": 0.5249, "step": 4737 }, { "epoch": 0.39, "grad_norm": 4.831197244885152, "learning_rate": 7.010518660999324e-06, "loss": 1.3313, "step": 4738 }, { "epoch": 0.39, "grad_norm": 3.08326069466575, "learning_rate": 7.009306546155889e-06, "loss": 0.6701, "step": 4739 }, { "epoch": 0.39, "grad_norm": 3.6870448741867894, "learning_rate": 7.008094290468813e-06, "loss": 0.9529, "step": 4740 }, { "epoch": 0.39, "grad_norm": 4.124650538400718, "learning_rate": 7.006881894023065e-06, "loss": 0.8456, "step": 4741 }, { "epoch": 0.39, "grad_norm": 3.983367153795829, "learning_rate": 7.005669356903631e-06, "loss": 1.1963, "step": 4742 }, { "epoch": 0.39, "grad_norm": 4.0868385441047606, "learning_rate": 7.004456679195503e-06, "loss": 0.8454, "step": 4743 }, { "epoch": 0.39, "grad_norm": 3.0532609433396725, "learning_rate": 7.003243860983686e-06, "loss": 0.6778, "step": 4744 }, { "epoch": 0.39, "grad_norm": 4.0415860533912085, "learning_rate": 7.002030902353193e-06, "loss": 0.8216, "step": 4745 }, { "epoch": 0.39, "grad_norm": 4.584568112521553, "learning_rate": 7.000817803389045e-06, "loss": 0.9213, "step": 4746 }, { "epoch": 0.39, "grad_norm": 3.4185218446827954, "learning_rate": 6.999604564176277e-06, "loss": 0.8299, "step": 4747 }, { "epoch": 0.39, "grad_norm": 5.722918145577284, "learning_rate": 6.998391184799932e-06, "loss": 1.7364, "step": 4748 }, { "epoch": 0.39, "grad_norm": 4.229748668312871, "learning_rate": 6.99717766534506e-06, "loss": 0.8441, "step": 4749 }, { "epoch": 0.39, "grad_norm": 3.1861083971798494, "learning_rate": 6.995964005896727e-06, "loss": 0.4733, "step": 4750 }, { "epoch": 0.39, "grad_norm": 3.2484214364248305, "learning_rate": 6.994750206540004e-06, "loss": 0.7676, "step": 4751 }, { "epoch": 0.39, "grad_norm": 5.449813091236215, "learning_rate": 6.993536267359974e-06, "loss": 0.9787, "step": 4752 }, { "epoch": 0.39, "grad_norm": 3.6520084624347993, "learning_rate": 6.992322188441725e-06, "loss": 0.9034, "step": 4753 }, { "epoch": 0.39, "grad_norm": 2.1529575535292254, "learning_rate": 6.991107969870363e-06, "loss": 0.4434, "step": 4754 }, { "epoch": 0.39, "grad_norm": 4.303898302529973, "learning_rate": 6.989893611730996e-06, "loss": 0.6798, "step": 4755 }, { "epoch": 0.39, "grad_norm": 3.822231187335992, "learning_rate": 6.988679114108747e-06, "loss": 1.0074, "step": 4756 }, { "epoch": 0.39, "grad_norm": 3.588487698818553, "learning_rate": 6.987464477088748e-06, "loss": 0.8526, "step": 4757 }, { "epoch": 0.39, "grad_norm": 2.1697715748788413, "learning_rate": 6.986249700756138e-06, "loss": 0.3875, "step": 4758 }, { "epoch": 0.39, "grad_norm": 4.4852047598958436, "learning_rate": 6.985034785196069e-06, "loss": 1.0415, "step": 4759 }, { "epoch": 0.39, "grad_norm": 4.216136059924069, "learning_rate": 6.983819730493699e-06, "loss": 0.6872, "step": 4760 }, { "epoch": 0.39, "grad_norm": 3.5697743111338145, "learning_rate": 6.9826045367342e-06, "loss": 0.6744, "step": 4761 }, { "epoch": 0.39, "grad_norm": 3.286634141739833, "learning_rate": 6.981389204002751e-06, "loss": 0.7238, "step": 4762 }, { "epoch": 0.39, "grad_norm": 3.5571073222616736, "learning_rate": 6.980173732384543e-06, "loss": 0.5277, "step": 4763 }, { "epoch": 0.39, "grad_norm": 4.818274543221162, "learning_rate": 6.978958121964773e-06, "loss": 1.4853, "step": 4764 }, { "epoch": 0.39, "grad_norm": 3.9200206363249097, "learning_rate": 6.977742372828652e-06, "loss": 0.9199, "step": 4765 }, { "epoch": 0.39, "grad_norm": 3.176623938334342, "learning_rate": 6.976526485061397e-06, "loss": 0.6575, "step": 4766 }, { "epoch": 0.39, "grad_norm": 5.174931941643446, "learning_rate": 6.975310458748238e-06, "loss": 1.1418, "step": 4767 }, { "epoch": 0.39, "grad_norm": 2.2719845268830756, "learning_rate": 6.9740942939744125e-06, "loss": 0.5741, "step": 4768 }, { "epoch": 0.39, "grad_norm": 5.788764754495829, "learning_rate": 6.972877990825167e-06, "loss": 0.7534, "step": 4769 }, { "epoch": 0.39, "grad_norm": 3.6073263780058684, "learning_rate": 6.971661549385762e-06, "loss": 0.6959, "step": 4770 }, { "epoch": 0.39, "grad_norm": 5.604762311779483, "learning_rate": 6.970444969741462e-06, "loss": 1.6464, "step": 4771 }, { "epoch": 0.39, "grad_norm": 2.7151129366464, "learning_rate": 6.969228251977545e-06, "loss": 0.552, "step": 4772 }, { "epoch": 0.39, "grad_norm": 2.972356743488502, "learning_rate": 6.968011396179297e-06, "loss": 0.7818, "step": 4773 }, { "epoch": 0.39, "grad_norm": 2.365531384342218, "learning_rate": 6.966794402432017e-06, "loss": 0.469, "step": 4774 }, { "epoch": 0.39, "grad_norm": 5.091976058428769, "learning_rate": 6.965577270821008e-06, "loss": 0.8084, "step": 4775 }, { "epoch": 0.39, "grad_norm": 4.840710225808364, "learning_rate": 6.964360001431586e-06, "loss": 1.1058, "step": 4776 }, { "epoch": 0.39, "grad_norm": 4.307707946660783, "learning_rate": 6.963142594349077e-06, "loss": 1.0741, "step": 4777 }, { "epoch": 0.39, "grad_norm": 5.7821674714992835, "learning_rate": 6.961925049658816e-06, "loss": 1.1528, "step": 4778 }, { "epoch": 0.39, "grad_norm": 3.610844148978979, "learning_rate": 6.960707367446147e-06, "loss": 0.9325, "step": 4779 }, { "epoch": 0.39, "grad_norm": 3.9451373481729353, "learning_rate": 6.959489547796426e-06, "loss": 0.6211, "step": 4780 }, { "epoch": 0.39, "grad_norm": 3.697845570706351, "learning_rate": 6.958271590795014e-06, "loss": 0.5125, "step": 4781 }, { "epoch": 0.39, "grad_norm": 3.993889001188721, "learning_rate": 6.957053496527286e-06, "loss": 1.0208, "step": 4782 }, { "epoch": 0.39, "grad_norm": 2.9045862533892004, "learning_rate": 6.955835265078626e-06, "loss": 1.0204, "step": 4783 }, { "epoch": 0.39, "grad_norm": 3.574727633134075, "learning_rate": 6.954616896534424e-06, "loss": 0.8613, "step": 4784 }, { "epoch": 0.39, "grad_norm": 3.0174056463238688, "learning_rate": 6.953398390980086e-06, "loss": 0.5144, "step": 4785 }, { "epoch": 0.39, "grad_norm": 2.568291338854759, "learning_rate": 6.95217974850102e-06, "loss": 0.7935, "step": 4786 }, { "epoch": 0.39, "grad_norm": 3.635262649776755, "learning_rate": 6.950960969182649e-06, "loss": 0.6803, "step": 4787 }, { "epoch": 0.39, "grad_norm": 4.877628339574649, "learning_rate": 6.949742053110408e-06, "loss": 0.9124, "step": 4788 }, { "epoch": 0.39, "grad_norm": 3.122801296062488, "learning_rate": 6.9485230003697325e-06, "loss": 0.6189, "step": 4789 }, { "epoch": 0.39, "grad_norm": 3.4939221191245924, "learning_rate": 6.947303811046074e-06, "loss": 0.6689, "step": 4790 }, { "epoch": 0.39, "grad_norm": 3.140755038586852, "learning_rate": 6.946084485224895e-06, "loss": 0.6994, "step": 4791 }, { "epoch": 0.39, "grad_norm": 4.215834949502133, "learning_rate": 6.944865022991661e-06, "loss": 0.986, "step": 4792 }, { "epoch": 0.39, "grad_norm": 3.4420112654537522, "learning_rate": 6.943645424431854e-06, "loss": 0.6215, "step": 4793 }, { "epoch": 0.39, "grad_norm": 4.709803801079052, "learning_rate": 6.942425689630962e-06, "loss": 0.7751, "step": 4794 }, { "epoch": 0.39, "grad_norm": 4.797934906655287, "learning_rate": 6.941205818674482e-06, "loss": 1.1691, "step": 4795 }, { "epoch": 0.39, "grad_norm": 4.187981917818049, "learning_rate": 6.93998581164792e-06, "loss": 1.2553, "step": 4796 }, { "epoch": 0.39, "grad_norm": 4.89920836804541, "learning_rate": 6.9387656686367985e-06, "loss": 1.244, "step": 4797 }, { "epoch": 0.39, "grad_norm": 2.9961056564406827, "learning_rate": 6.93754538972664e-06, "loss": 0.5682, "step": 4798 }, { "epoch": 0.39, "grad_norm": 3.6553136476349426, "learning_rate": 6.936324975002983e-06, "loss": 0.8373, "step": 4799 }, { "epoch": 0.39, "grad_norm": 3.015444678582228, "learning_rate": 6.935104424551372e-06, "loss": 0.7753, "step": 4800 }, { "epoch": 0.39, "grad_norm": 3.835661988834962, "learning_rate": 6.933883738457361e-06, "loss": 0.7163, "step": 4801 }, { "epoch": 0.39, "grad_norm": 3.730630702598074, "learning_rate": 6.932662916806516e-06, "loss": 0.9613, "step": 4802 }, { "epoch": 0.39, "grad_norm": 4.070507776187825, "learning_rate": 6.931441959684414e-06, "loss": 0.7724, "step": 4803 }, { "epoch": 0.39, "grad_norm": 4.472275772354918, "learning_rate": 6.930220867176633e-06, "loss": 0.8967, "step": 4804 }, { "epoch": 0.39, "grad_norm": 5.17464067657258, "learning_rate": 6.928999639368773e-06, "loss": 1.3153, "step": 4805 }, { "epoch": 0.39, "grad_norm": 3.7710448689759484, "learning_rate": 6.927778276346431e-06, "loss": 0.7007, "step": 4806 }, { "epoch": 0.39, "grad_norm": 2.7912421280625472, "learning_rate": 6.926556778195224e-06, "loss": 0.7555, "step": 4807 }, { "epoch": 0.39, "grad_norm": 2.6303058642059765, "learning_rate": 6.925335145000769e-06, "loss": 0.4378, "step": 4808 }, { "epoch": 0.39, "grad_norm": 3.552899346086931, "learning_rate": 6.9241133768487005e-06, "loss": 0.8362, "step": 4809 }, { "epoch": 0.39, "grad_norm": 3.691047824294737, "learning_rate": 6.922891473824655e-06, "loss": 0.8027, "step": 4810 }, { "epoch": 0.39, "grad_norm": 4.868971910272877, "learning_rate": 6.92166943601429e-06, "loss": 1.427, "step": 4811 }, { "epoch": 0.39, "grad_norm": 3.197455573046435, "learning_rate": 6.9204472635032586e-06, "loss": 0.8535, "step": 4812 }, { "epoch": 0.39, "grad_norm": 2.943066512717961, "learning_rate": 6.9192249563772304e-06, "loss": 0.7115, "step": 4813 }, { "epoch": 0.39, "grad_norm": 2.7013442623838446, "learning_rate": 6.918002514721887e-06, "loss": 0.6296, "step": 4814 }, { "epoch": 0.39, "grad_norm": 3.3205044878653345, "learning_rate": 6.916779938622916e-06, "loss": 0.6106, "step": 4815 }, { "epoch": 0.39, "grad_norm": 2.576051596597386, "learning_rate": 6.9155572281660114e-06, "loss": 0.4611, "step": 4816 }, { "epoch": 0.39, "grad_norm": 4.3346066642862535, "learning_rate": 6.914334383436882e-06, "loss": 1.1357, "step": 4817 }, { "epoch": 0.39, "grad_norm": 4.324620550091956, "learning_rate": 6.913111404521242e-06, "loss": 0.8622, "step": 4818 }, { "epoch": 0.39, "grad_norm": 5.50650194741537, "learning_rate": 6.9118882915048204e-06, "loss": 1.2789, "step": 4819 }, { "epoch": 0.39, "grad_norm": 2.085677384828208, "learning_rate": 6.9106650444733495e-06, "loss": 0.3836, "step": 4820 }, { "epoch": 0.39, "grad_norm": 5.459250967794102, "learning_rate": 6.909441663512574e-06, "loss": 0.9297, "step": 4821 }, { "epoch": 0.39, "grad_norm": 3.723734873755664, "learning_rate": 6.908218148708248e-06, "loss": 1.0447, "step": 4822 }, { "epoch": 0.39, "grad_norm": 3.082525983452769, "learning_rate": 6.906994500146134e-06, "loss": 0.7147, "step": 4823 }, { "epoch": 0.39, "grad_norm": 4.72503396605867, "learning_rate": 6.905770717912006e-06, "loss": 1.0701, "step": 4824 }, { "epoch": 0.39, "grad_norm": 4.2688936455944555, "learning_rate": 6.904546802091644e-06, "loss": 1.1725, "step": 4825 }, { "epoch": 0.39, "grad_norm": 3.991151581247189, "learning_rate": 6.903322752770839e-06, "loss": 0.8043, "step": 4826 }, { "epoch": 0.39, "grad_norm": 4.291152192141413, "learning_rate": 6.9020985700353925e-06, "loss": 1.0158, "step": 4827 }, { "epoch": 0.39, "grad_norm": 3.054654981801179, "learning_rate": 6.900874253971115e-06, "loss": 0.7262, "step": 4828 }, { "epoch": 0.39, "grad_norm": 3.352789982756976, "learning_rate": 6.899649804663824e-06, "loss": 0.4879, "step": 4829 }, { "epoch": 0.39, "grad_norm": 4.69679378842051, "learning_rate": 6.898425222199349e-06, "loss": 0.7028, "step": 4830 }, { "epoch": 0.39, "grad_norm": 4.316293810375166, "learning_rate": 6.897200506663531e-06, "loss": 1.0383, "step": 4831 }, { "epoch": 0.39, "grad_norm": 3.9655308923279713, "learning_rate": 6.89597565814221e-06, "loss": 0.7912, "step": 4832 }, { "epoch": 0.4, "grad_norm": 3.3248114459066294, "learning_rate": 6.894750676721248e-06, "loss": 0.6337, "step": 4833 }, { "epoch": 0.4, "grad_norm": 4.877496016078539, "learning_rate": 6.893525562486511e-06, "loss": 1.5636, "step": 4834 }, { "epoch": 0.4, "grad_norm": 2.0601769129749345, "learning_rate": 6.89230031552387e-06, "loss": 0.6569, "step": 4835 }, { "epoch": 0.4, "grad_norm": 3.5687417670290493, "learning_rate": 6.891074935919213e-06, "loss": 0.9939, "step": 4836 }, { "epoch": 0.4, "grad_norm": 4.141206013039817, "learning_rate": 6.889849423758435e-06, "loss": 1.0678, "step": 4837 }, { "epoch": 0.4, "grad_norm": 4.782425765103793, "learning_rate": 6.888623779127436e-06, "loss": 0.8534, "step": 4838 }, { "epoch": 0.4, "grad_norm": 3.249219968307424, "learning_rate": 6.887398002112129e-06, "loss": 1.0996, "step": 4839 }, { "epoch": 0.4, "grad_norm": 4.089008116773115, "learning_rate": 6.886172092798436e-06, "loss": 1.0721, "step": 4840 }, { "epoch": 0.4, "grad_norm": 2.914132826221444, "learning_rate": 6.8849460512722874e-06, "loss": 0.4047, "step": 4841 }, { "epoch": 0.4, "grad_norm": 1.2686723684403072, "learning_rate": 6.883719877619625e-06, "loss": 0.2041, "step": 4842 }, { "epoch": 0.4, "grad_norm": 5.565081196331538, "learning_rate": 6.882493571926396e-06, "loss": 1.047, "step": 4843 }, { "epoch": 0.4, "grad_norm": 2.948217418062479, "learning_rate": 6.881267134278562e-06, "loss": 0.5319, "step": 4844 }, { "epoch": 0.4, "grad_norm": 4.177452726225325, "learning_rate": 6.880040564762089e-06, "loss": 1.0535, "step": 4845 }, { "epoch": 0.4, "grad_norm": 3.215548927000322, "learning_rate": 6.878813863462953e-06, "loss": 0.9454, "step": 4846 }, { "epoch": 0.4, "grad_norm": 3.090912258099464, "learning_rate": 6.877587030467142e-06, "loss": 0.7074, "step": 4847 }, { "epoch": 0.4, "grad_norm": 3.7783187665986246, "learning_rate": 6.876360065860653e-06, "loss": 0.7078, "step": 4848 }, { "epoch": 0.4, "grad_norm": 3.5361133983410573, "learning_rate": 6.875132969729488e-06, "loss": 0.7058, "step": 4849 }, { "epoch": 0.4, "grad_norm": 3.377776290693036, "learning_rate": 6.873905742159661e-06, "loss": 0.8566, "step": 4850 }, { "epoch": 0.4, "grad_norm": 2.8147401543582333, "learning_rate": 6.872678383237199e-06, "loss": 0.5288, "step": 4851 }, { "epoch": 0.4, "grad_norm": 4.080223092160162, "learning_rate": 6.871450893048132e-06, "loss": 1.1785, "step": 4852 }, { "epoch": 0.4, "grad_norm": 3.8363511837021016, "learning_rate": 6.8702232716785015e-06, "loss": 1.0499, "step": 4853 }, { "epoch": 0.4, "grad_norm": 3.670029911106436, "learning_rate": 6.868995519214359e-06, "loss": 1.0351, "step": 4854 }, { "epoch": 0.4, "grad_norm": 4.94734432167073, "learning_rate": 6.867767635741765e-06, "loss": 1.0276, "step": 4855 }, { "epoch": 0.4, "grad_norm": 3.5924768730461567, "learning_rate": 6.866539621346786e-06, "loss": 0.4919, "step": 4856 }, { "epoch": 0.4, "grad_norm": 1.6637855824823422, "learning_rate": 6.865311476115506e-06, "loss": 0.3427, "step": 4857 }, { "epoch": 0.4, "grad_norm": 4.41025917393053, "learning_rate": 6.864083200134007e-06, "loss": 0.7974, "step": 4858 }, { "epoch": 0.4, "grad_norm": 2.333879761950064, "learning_rate": 6.862854793488388e-06, "loss": 0.4166, "step": 4859 }, { "epoch": 0.4, "grad_norm": 2.7012765849009965, "learning_rate": 6.861626256264757e-06, "loss": 0.5799, "step": 4860 }, { "epoch": 0.4, "grad_norm": 4.0504789769281935, "learning_rate": 6.860397588549227e-06, "loss": 1.1706, "step": 4861 }, { "epoch": 0.4, "grad_norm": 2.9441842501381243, "learning_rate": 6.859168790427921e-06, "loss": 0.3816, "step": 4862 }, { "epoch": 0.4, "grad_norm": 3.2684089653925614, "learning_rate": 6.857939861986976e-06, "loss": 0.7132, "step": 4863 }, { "epoch": 0.4, "grad_norm": 4.049869237519961, "learning_rate": 6.856710803312531e-06, "loss": 0.99, "step": 4864 }, { "epoch": 0.4, "grad_norm": 2.98686435202232, "learning_rate": 6.85548161449074e-06, "loss": 0.5555, "step": 4865 }, { "epoch": 0.4, "grad_norm": 3.66733054774265, "learning_rate": 6.854252295607761e-06, "loss": 0.9083, "step": 4866 }, { "epoch": 0.4, "grad_norm": 3.0811161745650453, "learning_rate": 6.8530228467497685e-06, "loss": 0.6796, "step": 4867 }, { "epoch": 0.4, "grad_norm": 3.4239574539894875, "learning_rate": 6.8517932680029374e-06, "loss": 0.9727, "step": 4868 }, { "epoch": 0.4, "grad_norm": 5.419282291306946, "learning_rate": 6.850563559453458e-06, "loss": 0.8212, "step": 4869 }, { "epoch": 0.4, "grad_norm": 4.033435941073197, "learning_rate": 6.849333721187525e-06, "loss": 0.8775, "step": 4870 }, { "epoch": 0.4, "grad_norm": 4.107942841853004, "learning_rate": 6.848103753291349e-06, "loss": 1.1675, "step": 4871 }, { "epoch": 0.4, "grad_norm": 4.517224443664284, "learning_rate": 6.8468736558511405e-06, "loss": 1.021, "step": 4872 }, { "epoch": 0.4, "grad_norm": 3.4227119961292236, "learning_rate": 6.845643428953127e-06, "loss": 0.9713, "step": 4873 }, { "epoch": 0.4, "grad_norm": 4.666843444544057, "learning_rate": 6.844413072683542e-06, "loss": 0.7418, "step": 4874 }, { "epoch": 0.4, "grad_norm": 3.315576569835842, "learning_rate": 6.8431825871286275e-06, "loss": 0.7384, "step": 4875 }, { "epoch": 0.4, "grad_norm": 3.1521853040591217, "learning_rate": 6.841951972374636e-06, "loss": 0.5464, "step": 4876 }, { "epoch": 0.4, "grad_norm": 3.3331402848579645, "learning_rate": 6.840721228507826e-06, "loss": 0.9008, "step": 4877 }, { "epoch": 0.4, "grad_norm": 4.730462628966787, "learning_rate": 6.839490355614468e-06, "loss": 1.1883, "step": 4878 }, { "epoch": 0.4, "grad_norm": 4.152897251668137, "learning_rate": 6.838259353780843e-06, "loss": 0.6253, "step": 4879 }, { "epoch": 0.4, "grad_norm": 1.4914931988995255, "learning_rate": 6.8370282230932375e-06, "loss": 0.2271, "step": 4880 }, { "epoch": 0.4, "grad_norm": 3.220985605585712, "learning_rate": 6.835796963637947e-06, "loss": 0.5998, "step": 4881 }, { "epoch": 0.4, "grad_norm": 4.224144863216822, "learning_rate": 6.83456557550128e-06, "loss": 1.1766, "step": 4882 }, { "epoch": 0.4, "grad_norm": 4.6436117844737055, "learning_rate": 6.83333405876955e-06, "loss": 1.1108, "step": 4883 }, { "epoch": 0.4, "grad_norm": 4.825779692682705, "learning_rate": 6.832102413529082e-06, "loss": 0.8362, "step": 4884 }, { "epoch": 0.4, "grad_norm": 3.0609627060824423, "learning_rate": 6.830870639866207e-06, "loss": 0.5508, "step": 4885 }, { "epoch": 0.4, "grad_norm": 3.765868409693761, "learning_rate": 6.829638737867268e-06, "loss": 1.0675, "step": 4886 }, { "epoch": 0.4, "grad_norm": 0.8755726868889399, "learning_rate": 6.828406707618616e-06, "loss": 0.1566, "step": 4887 }, { "epoch": 0.4, "grad_norm": 3.431109974323921, "learning_rate": 6.827174549206612e-06, "loss": 0.9188, "step": 4888 }, { "epoch": 0.4, "grad_norm": 3.6741186744873215, "learning_rate": 6.825942262717623e-06, "loss": 1.0093, "step": 4889 }, { "epoch": 0.4, "grad_norm": 2.414868717211905, "learning_rate": 6.824709848238028e-06, "loss": 0.3435, "step": 4890 }, { "epoch": 0.4, "grad_norm": 3.4953640571943225, "learning_rate": 6.823477305854215e-06, "loss": 0.7189, "step": 4891 }, { "epoch": 0.4, "grad_norm": 3.071480562210039, "learning_rate": 6.822244635652579e-06, "loss": 0.5268, "step": 4892 }, { "epoch": 0.4, "grad_norm": 3.9820593782485667, "learning_rate": 6.821011837719522e-06, "loss": 0.8389, "step": 4893 }, { "epoch": 0.4, "grad_norm": 2.7425034378072586, "learning_rate": 6.819778912141461e-06, "loss": 0.7943, "step": 4894 }, { "epoch": 0.4, "grad_norm": 1.5328573063544362, "learning_rate": 6.818545859004819e-06, "loss": 0.1973, "step": 4895 }, { "epoch": 0.4, "grad_norm": 5.588916469323275, "learning_rate": 6.817312678396026e-06, "loss": 1.0831, "step": 4896 }, { "epoch": 0.4, "grad_norm": 3.555749972340232, "learning_rate": 6.8160793704015234e-06, "loss": 0.6709, "step": 4897 }, { "epoch": 0.4, "grad_norm": 2.8676428553444286, "learning_rate": 6.81484593510776e-06, "loss": 0.7756, "step": 4898 }, { "epoch": 0.4, "grad_norm": 3.780370674790176, "learning_rate": 6.813612372601196e-06, "loss": 0.5553, "step": 4899 }, { "epoch": 0.4, "grad_norm": 4.940664038442654, "learning_rate": 6.812378682968297e-06, "loss": 0.9143, "step": 4900 }, { "epoch": 0.4, "grad_norm": 3.548389415177721, "learning_rate": 6.811144866295541e-06, "loss": 0.7957, "step": 4901 }, { "epoch": 0.4, "grad_norm": 3.0121381310770756, "learning_rate": 6.8099109226694095e-06, "loss": 0.6322, "step": 4902 }, { "epoch": 0.4, "grad_norm": 4.40491945442787, "learning_rate": 6.8086768521764006e-06, "loss": 1.3037, "step": 4903 }, { "epoch": 0.4, "grad_norm": 3.4569409355456053, "learning_rate": 6.807442654903015e-06, "loss": 0.7545, "step": 4904 }, { "epoch": 0.4, "grad_norm": 4.398706886637341, "learning_rate": 6.806208330935766e-06, "loss": 1.2965, "step": 4905 }, { "epoch": 0.4, "grad_norm": 4.009674249846179, "learning_rate": 6.804973880361172e-06, "loss": 0.4626, "step": 4906 }, { "epoch": 0.4, "grad_norm": 4.770078543481889, "learning_rate": 6.8037393032657665e-06, "loss": 1.2375, "step": 4907 }, { "epoch": 0.4, "grad_norm": 3.9386078500281094, "learning_rate": 6.802504599736085e-06, "loss": 1.0183, "step": 4908 }, { "epoch": 0.4, "grad_norm": 3.705145761727528, "learning_rate": 6.801269769858676e-06, "loss": 0.773, "step": 4909 }, { "epoch": 0.4, "grad_norm": 3.7872201645622794, "learning_rate": 6.800034813720093e-06, "loss": 0.5834, "step": 4910 }, { "epoch": 0.4, "grad_norm": 1.5884509932720416, "learning_rate": 6.798799731406906e-06, "loss": 0.222, "step": 4911 }, { "epoch": 0.4, "grad_norm": 3.9759992294640036, "learning_rate": 6.797564523005685e-06, "loss": 0.691, "step": 4912 }, { "epoch": 0.4, "grad_norm": 4.130717058700437, "learning_rate": 6.796329188603015e-06, "loss": 1.0033, "step": 4913 }, { "epoch": 0.4, "grad_norm": 2.3091944709110024, "learning_rate": 6.795093728285485e-06, "loss": 0.5313, "step": 4914 }, { "epoch": 0.4, "grad_norm": 4.183029532855425, "learning_rate": 6.793858142139698e-06, "loss": 0.9509, "step": 4915 }, { "epoch": 0.4, "grad_norm": 3.924495972395091, "learning_rate": 6.792622430252263e-06, "loss": 0.9398, "step": 4916 }, { "epoch": 0.4, "grad_norm": 4.319719235075423, "learning_rate": 6.791386592709795e-06, "loss": 0.906, "step": 4917 }, { "epoch": 0.4, "grad_norm": 3.862168877419175, "learning_rate": 6.790150629598924e-06, "loss": 0.8313, "step": 4918 }, { "epoch": 0.4, "grad_norm": 3.5343487433565732, "learning_rate": 6.788914541006284e-06, "loss": 0.8099, "step": 4919 }, { "epoch": 0.4, "grad_norm": 6.140203940052827, "learning_rate": 6.787678327018521e-06, "loss": 1.2256, "step": 4920 }, { "epoch": 0.4, "grad_norm": 5.050709535933509, "learning_rate": 6.786441987722288e-06, "loss": 0.8507, "step": 4921 }, { "epoch": 0.4, "grad_norm": 2.55419400867438, "learning_rate": 6.785205523204244e-06, "loss": 0.4897, "step": 4922 }, { "epoch": 0.4, "grad_norm": 2.2000464736683516, "learning_rate": 6.783968933551064e-06, "loss": 0.3428, "step": 4923 }, { "epoch": 0.4, "grad_norm": 5.170422918668265, "learning_rate": 6.782732218849425e-06, "loss": 1.2579, "step": 4924 }, { "epoch": 0.4, "grad_norm": 3.0477546435397898, "learning_rate": 6.781495379186016e-06, "loss": 0.7338, "step": 4925 }, { "epoch": 0.4, "grad_norm": 4.076146588457267, "learning_rate": 6.780258414647534e-06, "loss": 0.9534, "step": 4926 }, { "epoch": 0.4, "grad_norm": 3.711725811895843, "learning_rate": 6.779021325320684e-06, "loss": 0.6164, "step": 4927 }, { "epoch": 0.4, "grad_norm": 2.6802883358122496, "learning_rate": 6.7777841112921825e-06, "loss": 0.3153, "step": 4928 }, { "epoch": 0.4, "grad_norm": 2.8613066773022577, "learning_rate": 6.776546772648751e-06, "loss": 0.7101, "step": 4929 }, { "epoch": 0.4, "grad_norm": 2.823045677823471, "learning_rate": 6.775309309477123e-06, "loss": 0.401, "step": 4930 }, { "epoch": 0.4, "grad_norm": 3.2928797004917074, "learning_rate": 6.7740717218640374e-06, "loss": 0.6771, "step": 4931 }, { "epoch": 0.4, "grad_norm": 3.8684327129382496, "learning_rate": 6.772834009896248e-06, "loss": 0.9103, "step": 4932 }, { "epoch": 0.4, "grad_norm": 3.648521213162987, "learning_rate": 6.771596173660506e-06, "loss": 0.7071, "step": 4933 }, { "epoch": 0.4, "grad_norm": 3.804648349814525, "learning_rate": 6.770358213243584e-06, "loss": 1.1199, "step": 4934 }, { "epoch": 0.4, "grad_norm": 2.1312186278869145, "learning_rate": 6.769120128732256e-06, "loss": 0.3414, "step": 4935 }, { "epoch": 0.4, "grad_norm": 2.662486515215794, "learning_rate": 6.7678819202133054e-06, "loss": 0.2188, "step": 4936 }, { "epoch": 0.4, "grad_norm": 3.5205019556917874, "learning_rate": 6.766643587773527e-06, "loss": 0.5434, "step": 4937 }, { "epoch": 0.4, "grad_norm": 3.6374269679557463, "learning_rate": 6.76540513149972e-06, "loss": 0.878, "step": 4938 }, { "epoch": 0.4, "grad_norm": 4.7359192378862724, "learning_rate": 6.764166551478699e-06, "loss": 0.842, "step": 4939 }, { "epoch": 0.4, "grad_norm": 4.778568949153722, "learning_rate": 6.762927847797279e-06, "loss": 1.1776, "step": 4940 }, { "epoch": 0.4, "grad_norm": 4.048195079758534, "learning_rate": 6.761689020542288e-06, "loss": 0.6763, "step": 4941 }, { "epoch": 0.4, "grad_norm": 1.911603612677543, "learning_rate": 6.760450069800565e-06, "loss": 0.4051, "step": 4942 }, { "epoch": 0.4, "grad_norm": 3.2709982715047503, "learning_rate": 6.759210995658953e-06, "loss": 1.0407, "step": 4943 }, { "epoch": 0.4, "grad_norm": 4.101935661507507, "learning_rate": 6.757971798204307e-06, "loss": 0.6369, "step": 4944 }, { "epoch": 0.4, "grad_norm": 4.350693122627702, "learning_rate": 6.756732477523489e-06, "loss": 0.9884, "step": 4945 }, { "epoch": 0.4, "grad_norm": 3.426764541479174, "learning_rate": 6.755493033703367e-06, "loss": 0.6377, "step": 4946 }, { "epoch": 0.4, "grad_norm": 5.511797157726579, "learning_rate": 6.754253466830827e-06, "loss": 1.0663, "step": 4947 }, { "epoch": 0.4, "grad_norm": 4.986812080177097, "learning_rate": 6.753013776992752e-06, "loss": 1.2665, "step": 4948 }, { "epoch": 0.4, "grad_norm": 3.622379462419554, "learning_rate": 6.751773964276039e-06, "loss": 0.523, "step": 4949 }, { "epoch": 0.4, "grad_norm": 4.52878001591544, "learning_rate": 6.750534028767596e-06, "loss": 0.6513, "step": 4950 }, { "epoch": 0.4, "grad_norm": 5.057689733164265, "learning_rate": 6.7492939705543355e-06, "loss": 1.0922, "step": 4951 }, { "epoch": 0.4, "grad_norm": 4.326429923094338, "learning_rate": 6.748053789723181e-06, "loss": 1.0292, "step": 4952 }, { "epoch": 0.4, "grad_norm": 3.5933440981978935, "learning_rate": 6.746813486361063e-06, "loss": 0.7417, "step": 4953 }, { "epoch": 0.4, "grad_norm": 3.0535524993846352, "learning_rate": 6.745573060554922e-06, "loss": 0.6412, "step": 4954 }, { "epoch": 0.41, "grad_norm": 5.09612405979385, "learning_rate": 6.744332512391707e-06, "loss": 1.0326, "step": 4955 }, { "epoch": 0.41, "grad_norm": 3.9193694002324544, "learning_rate": 6.743091841958373e-06, "loss": 0.8915, "step": 4956 }, { "epoch": 0.41, "grad_norm": 3.4841669402172886, "learning_rate": 6.741851049341888e-06, "loss": 0.7924, "step": 4957 }, { "epoch": 0.41, "grad_norm": 4.163209288640059, "learning_rate": 6.740610134629224e-06, "loss": 1.0377, "step": 4958 }, { "epoch": 0.41, "grad_norm": 3.709614902603784, "learning_rate": 6.739369097907365e-06, "loss": 0.6028, "step": 4959 }, { "epoch": 0.41, "grad_norm": 4.164803894961905, "learning_rate": 6.7381279392633025e-06, "loss": 0.6679, "step": 4960 }, { "epoch": 0.41, "grad_norm": 3.2438255695122615, "learning_rate": 6.736886658784034e-06, "loss": 0.5955, "step": 4961 }, { "epoch": 0.41, "grad_norm": 5.618511131153098, "learning_rate": 6.735645256556572e-06, "loss": 1.2125, "step": 4962 }, { "epoch": 0.41, "grad_norm": 3.0953174041968565, "learning_rate": 6.734403732667931e-06, "loss": 0.6487, "step": 4963 }, { "epoch": 0.41, "grad_norm": 3.3826746576217044, "learning_rate": 6.733162087205135e-06, "loss": 0.7166, "step": 4964 }, { "epoch": 0.41, "grad_norm": 3.7470232526694387, "learning_rate": 6.731920320255221e-06, "loss": 0.7716, "step": 4965 }, { "epoch": 0.41, "grad_norm": 3.0457182226061286, "learning_rate": 6.730678431905228e-06, "loss": 0.7626, "step": 4966 }, { "epoch": 0.41, "grad_norm": 4.626770885537802, "learning_rate": 6.7294364222422104e-06, "loss": 1.2037, "step": 4967 }, { "epoch": 0.41, "grad_norm": 4.745128912279462, "learning_rate": 6.728194291353226e-06, "loss": 1.2067, "step": 4968 }, { "epoch": 0.41, "grad_norm": 2.6009434566161396, "learning_rate": 6.7269520393253414e-06, "loss": 0.3551, "step": 4969 }, { "epoch": 0.41, "grad_norm": 4.0433053176479525, "learning_rate": 6.725709666245637e-06, "loss": 0.8137, "step": 4970 }, { "epoch": 0.41, "grad_norm": 2.612255664240316, "learning_rate": 6.7244671722011946e-06, "loss": 0.5242, "step": 4971 }, { "epoch": 0.41, "grad_norm": 5.227490852490809, "learning_rate": 6.723224557279107e-06, "loss": 1.0358, "step": 4972 }, { "epoch": 0.41, "grad_norm": 3.9207008712420093, "learning_rate": 6.721981821566476e-06, "loss": 0.8894, "step": 4973 }, { "epoch": 0.41, "grad_norm": 3.6462179783860873, "learning_rate": 6.7207389651504175e-06, "loss": 1.1477, "step": 4974 }, { "epoch": 0.41, "grad_norm": 3.202301954200226, "learning_rate": 6.719495988118043e-06, "loss": 0.7103, "step": 4975 }, { "epoch": 0.41, "grad_norm": 2.6300639065375386, "learning_rate": 6.718252890556485e-06, "loss": 0.5941, "step": 4976 }, { "epoch": 0.41, "grad_norm": 2.548896662632624, "learning_rate": 6.717009672552877e-06, "loss": 0.6483, "step": 4977 }, { "epoch": 0.41, "grad_norm": 3.513029543800947, "learning_rate": 6.715766334194362e-06, "loss": 0.7638, "step": 4978 }, { "epoch": 0.41, "grad_norm": 4.354263444583725, "learning_rate": 6.714522875568095e-06, "loss": 0.8984, "step": 4979 }, { "epoch": 0.41, "grad_norm": 2.937833410720314, "learning_rate": 6.713279296761237e-06, "loss": 0.4593, "step": 4980 }, { "epoch": 0.41, "grad_norm": 2.705053803134201, "learning_rate": 6.712035597860955e-06, "loss": 0.6847, "step": 4981 }, { "epoch": 0.41, "grad_norm": 4.107071080317969, "learning_rate": 6.710791778954429e-06, "loss": 1.013, "step": 4982 }, { "epoch": 0.41, "grad_norm": 4.549944846724847, "learning_rate": 6.709547840128844e-06, "loss": 0.8946, "step": 4983 }, { "epoch": 0.41, "grad_norm": 9.743906409309483, "learning_rate": 6.708303781471396e-06, "loss": 0.6407, "step": 4984 }, { "epoch": 0.41, "grad_norm": 3.9118164028408535, "learning_rate": 6.707059603069288e-06, "loss": 0.947, "step": 4985 }, { "epoch": 0.41, "grad_norm": 4.0791859941056, "learning_rate": 6.705815305009731e-06, "loss": 0.6561, "step": 4986 }, { "epoch": 0.41, "grad_norm": 2.574974857317388, "learning_rate": 6.7045708873799435e-06, "loss": 0.6531, "step": 4987 }, { "epoch": 0.41, "grad_norm": 4.563827132306292, "learning_rate": 6.703326350267157e-06, "loss": 1.0643, "step": 4988 }, { "epoch": 0.41, "grad_norm": 3.784500592588589, "learning_rate": 6.7020816937586046e-06, "loss": 0.7969, "step": 4989 }, { "epoch": 0.41, "grad_norm": 3.9799773996493015, "learning_rate": 6.7008369179415324e-06, "loss": 0.7725, "step": 4990 }, { "epoch": 0.41, "grad_norm": 3.510790901955249, "learning_rate": 6.699592022903197e-06, "loss": 0.9653, "step": 4991 }, { "epoch": 0.41, "grad_norm": 3.180373643015554, "learning_rate": 6.698347008730854e-06, "loss": 0.676, "step": 4992 }, { "epoch": 0.41, "grad_norm": 2.9645250432962915, "learning_rate": 6.697101875511779e-06, "loss": 0.6309, "step": 4993 }, { "epoch": 0.41, "grad_norm": 3.2167881441671975, "learning_rate": 6.695856623333249e-06, "loss": 0.5562, "step": 4994 }, { "epoch": 0.41, "grad_norm": 1.7854948379887672, "learning_rate": 6.694611252282549e-06, "loss": 0.3688, "step": 4995 }, { "epoch": 0.41, "grad_norm": 4.119302845105482, "learning_rate": 6.693365762446975e-06, "loss": 1.0407, "step": 4996 }, { "epoch": 0.41, "grad_norm": 4.19548604602088, "learning_rate": 6.692120153913831e-06, "loss": 0.5511, "step": 4997 }, { "epoch": 0.41, "grad_norm": 3.9396715204049366, "learning_rate": 6.690874426770428e-06, "loss": 0.7435, "step": 4998 }, { "epoch": 0.41, "grad_norm": 5.067757458057823, "learning_rate": 6.6896285811040865e-06, "loss": 0.8161, "step": 4999 }, { "epoch": 0.41, "grad_norm": 4.027032846338074, "learning_rate": 6.688382617002135e-06, "loss": 1.0669, "step": 5000 }, { "epoch": 0.41, "grad_norm": 3.804959960690094, "learning_rate": 6.687136534551909e-06, "loss": 1.174, "step": 5001 }, { "epoch": 0.41, "grad_norm": 3.4409453582294325, "learning_rate": 6.685890333840757e-06, "loss": 0.5652, "step": 5002 }, { "epoch": 0.41, "grad_norm": 3.6361198348967276, "learning_rate": 6.6846440149560276e-06, "loss": 0.7007, "step": 5003 }, { "epoch": 0.41, "grad_norm": 4.93626377499621, "learning_rate": 6.683397577985084e-06, "loss": 0.9234, "step": 5004 }, { "epoch": 0.41, "grad_norm": 3.49474660728893, "learning_rate": 6.6821510230152975e-06, "loss": 0.4029, "step": 5005 }, { "epoch": 0.41, "grad_norm": 2.615337291292612, "learning_rate": 6.680904350134044e-06, "loss": 0.5586, "step": 5006 }, { "epoch": 0.41, "grad_norm": 1.4573510011122839, "learning_rate": 6.679657559428712e-06, "loss": 0.4093, "step": 5007 }, { "epoch": 0.41, "grad_norm": 4.085463953356308, "learning_rate": 6.678410650986694e-06, "loss": 0.7689, "step": 5008 }, { "epoch": 0.41, "grad_norm": 2.620326708654164, "learning_rate": 6.677163624895393e-06, "loss": 0.4192, "step": 5009 }, { "epoch": 0.41, "grad_norm": 4.145993087796699, "learning_rate": 6.6759164812422225e-06, "loss": 0.6969, "step": 5010 }, { "epoch": 0.41, "grad_norm": 4.507549152610467, "learning_rate": 6.674669220114601e-06, "loss": 1.1422, "step": 5011 }, { "epoch": 0.41, "grad_norm": 3.4986053398734898, "learning_rate": 6.673421841599954e-06, "loss": 0.6505, "step": 5012 }, { "epoch": 0.41, "grad_norm": 2.8082322958596393, "learning_rate": 6.672174345785718e-06, "loss": 0.3428, "step": 5013 }, { "epoch": 0.41, "grad_norm": 3.544467121237856, "learning_rate": 6.6709267327593396e-06, "loss": 0.625, "step": 5014 }, { "epoch": 0.41, "grad_norm": 3.9603559962796795, "learning_rate": 6.669679002608267e-06, "loss": 0.6185, "step": 5015 }, { "epoch": 0.41, "grad_norm": 4.217658841777538, "learning_rate": 6.668431155419963e-06, "loss": 0.9385, "step": 5016 }, { "epoch": 0.41, "grad_norm": 3.1587212821777464, "learning_rate": 6.6671831912818985e-06, "loss": 0.5103, "step": 5017 }, { "epoch": 0.41, "grad_norm": 1.0508066768483084, "learning_rate": 6.6659351102815475e-06, "loss": 0.1753, "step": 5018 }, { "epoch": 0.41, "grad_norm": 3.1266872495955336, "learning_rate": 6.664686912506393e-06, "loss": 0.3366, "step": 5019 }, { "epoch": 0.41, "grad_norm": 3.31249644910684, "learning_rate": 6.663438598043932e-06, "loss": 0.6046, "step": 5020 }, { "epoch": 0.41, "grad_norm": 3.1792768803257747, "learning_rate": 6.662190166981665e-06, "loss": 0.8227, "step": 5021 }, { "epoch": 0.41, "grad_norm": 2.0539249160343376, "learning_rate": 6.6609416194071e-06, "loss": 0.3148, "step": 5022 }, { "epoch": 0.41, "grad_norm": 2.899219170184907, "learning_rate": 6.659692955407757e-06, "loss": 0.6692, "step": 5023 }, { "epoch": 0.41, "grad_norm": 4.542525499890373, "learning_rate": 6.65844417507116e-06, "loss": 0.9865, "step": 5024 }, { "epoch": 0.41, "grad_norm": 2.938901666456711, "learning_rate": 6.657195278484845e-06, "loss": 0.8797, "step": 5025 }, { "epoch": 0.41, "grad_norm": 3.461312687276681, "learning_rate": 6.6559462657363525e-06, "loss": 0.8987, "step": 5026 }, { "epoch": 0.41, "grad_norm": 3.123690030958761, "learning_rate": 6.654697136913233e-06, "loss": 0.9183, "step": 5027 }, { "epoch": 0.41, "grad_norm": 3.4249287381361073, "learning_rate": 6.653447892103047e-06, "loss": 0.4914, "step": 5028 }, { "epoch": 0.41, "grad_norm": 3.428473580726565, "learning_rate": 6.652198531393358e-06, "loss": 0.8932, "step": 5029 }, { "epoch": 0.41, "grad_norm": 5.264837748812746, "learning_rate": 6.650949054871742e-06, "loss": 1.3844, "step": 5030 }, { "epoch": 0.41, "grad_norm": 2.8526595143421294, "learning_rate": 6.649699462625784e-06, "loss": 0.6511, "step": 5031 }, { "epoch": 0.41, "grad_norm": 3.886729359381155, "learning_rate": 6.648449754743072e-06, "loss": 0.951, "step": 5032 }, { "epoch": 0.41, "grad_norm": 4.10215164550763, "learning_rate": 6.647199931311207e-06, "loss": 0.7978, "step": 5033 }, { "epoch": 0.41, "grad_norm": 3.4323786311499314, "learning_rate": 6.645949992417795e-06, "loss": 0.9083, "step": 5034 }, { "epoch": 0.41, "grad_norm": 2.0846317512520756, "learning_rate": 6.644699938150452e-06, "loss": 0.333, "step": 5035 }, { "epoch": 0.41, "grad_norm": 4.03665806830898, "learning_rate": 6.6434497685968e-06, "loss": 0.7356, "step": 5036 }, { "epoch": 0.41, "grad_norm": 3.1071041268719446, "learning_rate": 6.642199483844473e-06, "loss": 0.4502, "step": 5037 }, { "epoch": 0.41, "grad_norm": 3.7463856308086165, "learning_rate": 6.640949083981108e-06, "loss": 0.9525, "step": 5038 }, { "epoch": 0.41, "grad_norm": 3.950570863817365, "learning_rate": 6.639698569094353e-06, "loss": 1.06, "step": 5039 }, { "epoch": 0.41, "grad_norm": 4.304358286996472, "learning_rate": 6.638447939271866e-06, "loss": 0.7415, "step": 5040 }, { "epoch": 0.41, "grad_norm": 4.653097435243491, "learning_rate": 6.637197194601309e-06, "loss": 1.3601, "step": 5041 }, { "epoch": 0.41, "grad_norm": 3.391416070719546, "learning_rate": 6.635946335170352e-06, "loss": 0.8694, "step": 5042 }, { "epoch": 0.41, "grad_norm": 2.807569406655368, "learning_rate": 6.634695361066679e-06, "loss": 0.643, "step": 5043 }, { "epoch": 0.41, "grad_norm": 3.5645027321889433, "learning_rate": 6.633444272377974e-06, "loss": 0.7314, "step": 5044 }, { "epoch": 0.41, "grad_norm": 1.8905815390481095, "learning_rate": 6.632193069191934e-06, "loss": 0.4264, "step": 5045 }, { "epoch": 0.41, "grad_norm": 4.678692969410927, "learning_rate": 6.630941751596264e-06, "loss": 0.9051, "step": 5046 }, { "epoch": 0.41, "grad_norm": 2.0461687288099317, "learning_rate": 6.629690319678674e-06, "loss": 0.3569, "step": 5047 }, { "epoch": 0.41, "grad_norm": 3.4543454838653753, "learning_rate": 6.6284387735268865e-06, "loss": 0.8512, "step": 5048 }, { "epoch": 0.41, "grad_norm": 3.1412107825446887, "learning_rate": 6.627187113228627e-06, "loss": 0.7086, "step": 5049 }, { "epoch": 0.41, "grad_norm": 2.3819860329330944, "learning_rate": 6.625935338871632e-06, "loss": 0.4417, "step": 5050 }, { "epoch": 0.41, "grad_norm": 3.70029031761103, "learning_rate": 6.624683450543647e-06, "loss": 0.7293, "step": 5051 }, { "epoch": 0.41, "grad_norm": 2.5089283676811505, "learning_rate": 6.623431448332421e-06, "loss": 0.3025, "step": 5052 }, { "epoch": 0.41, "grad_norm": 2.9728192523556984, "learning_rate": 6.622179332325718e-06, "loss": 0.7548, "step": 5053 }, { "epoch": 0.41, "grad_norm": 2.8230702830268473, "learning_rate": 6.620927102611302e-06, "loss": 0.7466, "step": 5054 }, { "epoch": 0.41, "grad_norm": 4.773342740786333, "learning_rate": 6.619674759276951e-06, "loss": 1.38, "step": 5055 }, { "epoch": 0.41, "grad_norm": 4.359982031316425, "learning_rate": 6.6184223024104474e-06, "loss": 1.1946, "step": 5056 }, { "epoch": 0.41, "grad_norm": 4.312823363429766, "learning_rate": 6.6171697320995855e-06, "loss": 1.1666, "step": 5057 }, { "epoch": 0.41, "grad_norm": 4.862686956689697, "learning_rate": 6.615917048432161e-06, "loss": 1.3333, "step": 5058 }, { "epoch": 0.41, "grad_norm": 5.1330910039124475, "learning_rate": 6.614664251495986e-06, "loss": 1.6148, "step": 5059 }, { "epoch": 0.41, "grad_norm": 3.906527461127112, "learning_rate": 6.613411341378872e-06, "loss": 0.7501, "step": 5060 }, { "epoch": 0.41, "grad_norm": 4.627117957234848, "learning_rate": 6.6121583181686466e-06, "loss": 1.197, "step": 5061 }, { "epoch": 0.41, "grad_norm": 4.1981226308634, "learning_rate": 6.610905181953138e-06, "loss": 0.9891, "step": 5062 }, { "epoch": 0.41, "grad_norm": 3.7675032094973404, "learning_rate": 6.609651932820187e-06, "loss": 0.4652, "step": 5063 }, { "epoch": 0.41, "grad_norm": 4.3054489860527765, "learning_rate": 6.608398570857642e-06, "loss": 0.9609, "step": 5064 }, { "epoch": 0.41, "grad_norm": 4.036451784571845, "learning_rate": 6.607145096153355e-06, "loss": 0.8836, "step": 5065 }, { "epoch": 0.41, "grad_norm": 2.7342217509523232, "learning_rate": 6.605891508795193e-06, "loss": 0.4318, "step": 5066 }, { "epoch": 0.41, "grad_norm": 4.884013713134834, "learning_rate": 6.604637808871023e-06, "loss": 0.915, "step": 5067 }, { "epoch": 0.41, "grad_norm": 2.360546766458889, "learning_rate": 6.603383996468727e-06, "loss": 0.3974, "step": 5068 }, { "epoch": 0.41, "grad_norm": 1.9703620324064468, "learning_rate": 6.602130071676191e-06, "loss": 0.4134, "step": 5069 }, { "epoch": 0.41, "grad_norm": 3.6625200237642317, "learning_rate": 6.600876034581308e-06, "loss": 0.9043, "step": 5070 }, { "epoch": 0.41, "grad_norm": 3.4910179839017763, "learning_rate": 6.599621885271984e-06, "loss": 0.6769, "step": 5071 }, { "epoch": 0.41, "grad_norm": 4.013895703767678, "learning_rate": 6.5983676238361284e-06, "loss": 1.139, "step": 5072 }, { "epoch": 0.41, "grad_norm": 2.950931574451492, "learning_rate": 6.5971132503616554e-06, "loss": 0.5599, "step": 5073 }, { "epoch": 0.41, "grad_norm": 3.736885945190558, "learning_rate": 6.5958587649364955e-06, "loss": 1.0086, "step": 5074 }, { "epoch": 0.41, "grad_norm": 4.896035336353438, "learning_rate": 6.5946041676485815e-06, "loss": 1.1428, "step": 5075 }, { "epoch": 0.41, "grad_norm": 3.677453534155014, "learning_rate": 6.593349458585855e-06, "loss": 0.864, "step": 5076 }, { "epoch": 0.41, "grad_norm": 1.7658904975909975, "learning_rate": 6.592094637836266e-06, "loss": 0.4789, "step": 5077 }, { "epoch": 0.42, "grad_norm": 3.702326669703679, "learning_rate": 6.5908397054877715e-06, "loss": 0.8829, "step": 5078 }, { "epoch": 0.42, "grad_norm": 3.204164682234299, "learning_rate": 6.589584661628338e-06, "loss": 0.7511, "step": 5079 }, { "epoch": 0.42, "grad_norm": 2.0722343576460274, "learning_rate": 6.588329506345936e-06, "loss": 0.3892, "step": 5080 }, { "epoch": 0.42, "grad_norm": 1.3100602640670984, "learning_rate": 6.587074239728549e-06, "loss": 0.2382, "step": 5081 }, { "epoch": 0.42, "grad_norm": 3.720024571628034, "learning_rate": 6.585818861864164e-06, "loss": 0.9731, "step": 5082 }, { "epoch": 0.42, "grad_norm": 4.2744365581092225, "learning_rate": 6.584563372840779e-06, "loss": 1.0198, "step": 5083 }, { "epoch": 0.42, "grad_norm": 3.324752110388792, "learning_rate": 6.583307772746397e-06, "loss": 0.5498, "step": 5084 }, { "epoch": 0.42, "grad_norm": 5.008820777817378, "learning_rate": 6.582052061669032e-06, "loss": 1.1406, "step": 5085 }, { "epoch": 0.42, "grad_norm": 3.971044782311768, "learning_rate": 6.580796239696701e-06, "loss": 1.0095, "step": 5086 }, { "epoch": 0.42, "grad_norm": 3.7736435523199208, "learning_rate": 6.579540306917434e-06, "loss": 0.9718, "step": 5087 }, { "epoch": 0.42, "grad_norm": 1.449587139629432, "learning_rate": 6.578284263419266e-06, "loss": 0.1805, "step": 5088 }, { "epoch": 0.42, "grad_norm": 4.2797921454680115, "learning_rate": 6.5770281092902385e-06, "loss": 0.7411, "step": 5089 }, { "epoch": 0.42, "grad_norm": 2.885074877905152, "learning_rate": 6.575771844618405e-06, "loss": 0.5535, "step": 5090 }, { "epoch": 0.42, "grad_norm": 3.8296362782063436, "learning_rate": 6.574515469491823e-06, "loss": 0.7637, "step": 5091 }, { "epoch": 0.42, "grad_norm": 3.861826826046879, "learning_rate": 6.573258983998558e-06, "loss": 0.612, "step": 5092 }, { "epoch": 0.42, "grad_norm": 4.555017757308554, "learning_rate": 6.572002388226686e-06, "loss": 1.2151, "step": 5093 }, { "epoch": 0.42, "grad_norm": 4.497326683154025, "learning_rate": 6.570745682264288e-06, "loss": 0.7634, "step": 5094 }, { "epoch": 0.42, "grad_norm": 3.187260057733963, "learning_rate": 6.569488866199454e-06, "loss": 0.5992, "step": 5095 }, { "epoch": 0.42, "grad_norm": 2.917652628497841, "learning_rate": 6.568231940120279e-06, "loss": 0.9032, "step": 5096 }, { "epoch": 0.42, "grad_norm": 5.302888393164354, "learning_rate": 6.566974904114871e-06, "loss": 0.9961, "step": 5097 }, { "epoch": 0.42, "grad_norm": 1.9869652572565435, "learning_rate": 6.565717758271342e-06, "loss": 0.488, "step": 5098 }, { "epoch": 0.42, "grad_norm": 3.894664576934961, "learning_rate": 6.5644605026778115e-06, "loss": 0.9216, "step": 5099 }, { "epoch": 0.42, "grad_norm": 6.1909555246544485, "learning_rate": 6.563203137422409e-06, "loss": 1.1481, "step": 5100 }, { "epoch": 0.42, "grad_norm": 4.448891006913339, "learning_rate": 6.561945662593268e-06, "loss": 1.0079, "step": 5101 }, { "epoch": 0.42, "grad_norm": 3.68256971253895, "learning_rate": 6.5606880782785365e-06, "loss": 0.9523, "step": 5102 }, { "epoch": 0.42, "grad_norm": 2.2626027984745805, "learning_rate": 6.559430384566361e-06, "loss": 0.4791, "step": 5103 }, { "epoch": 0.42, "grad_norm": 2.2812898718664627, "learning_rate": 6.558172581544904e-06, "loss": 0.6137, "step": 5104 }, { "epoch": 0.42, "grad_norm": 2.9689861187037994, "learning_rate": 6.5569146693023285e-06, "loss": 0.4125, "step": 5105 }, { "epoch": 0.42, "grad_norm": 3.3895446517519527, "learning_rate": 6.5556566479268105e-06, "loss": 0.7106, "step": 5106 }, { "epoch": 0.42, "grad_norm": 4.13831278802137, "learning_rate": 6.5543985175065315e-06, "loss": 1.0889, "step": 5107 }, { "epoch": 0.42, "grad_norm": 3.307667915755631, "learning_rate": 6.553140278129683e-06, "loss": 0.4637, "step": 5108 }, { "epoch": 0.42, "grad_norm": 4.620203642781845, "learning_rate": 6.551881929884458e-06, "loss": 1.3451, "step": 5109 }, { "epoch": 0.42, "grad_norm": 2.818227667612198, "learning_rate": 6.550623472859063e-06, "loss": 0.4817, "step": 5110 }, { "epoch": 0.42, "grad_norm": 2.744101031433427, "learning_rate": 6.549364907141713e-06, "loss": 0.4434, "step": 5111 }, { "epoch": 0.42, "grad_norm": 2.6282621358430904, "learning_rate": 6.5481062328206265e-06, "loss": 0.5363, "step": 5112 }, { "epoch": 0.42, "grad_norm": 3.7443038737454217, "learning_rate": 6.546847449984028e-06, "loss": 0.8433, "step": 5113 }, { "epoch": 0.42, "grad_norm": 3.5481004467431294, "learning_rate": 6.5455885587201574e-06, "loss": 0.5598, "step": 5114 }, { "epoch": 0.42, "grad_norm": 3.827802691559211, "learning_rate": 6.544329559117254e-06, "loss": 0.896, "step": 5115 }, { "epoch": 0.42, "grad_norm": 2.65077598090812, "learning_rate": 6.543070451263569e-06, "loss": 0.4954, "step": 5116 }, { "epoch": 0.42, "grad_norm": 3.689678199470181, "learning_rate": 6.5418112352473616e-06, "loss": 0.6008, "step": 5117 }, { "epoch": 0.42, "grad_norm": 2.3606194456689513, "learning_rate": 6.540551911156896e-06, "loss": 0.4282, "step": 5118 }, { "epoch": 0.42, "grad_norm": 3.317531169578305, "learning_rate": 6.5392924790804475e-06, "loss": 0.7708, "step": 5119 }, { "epoch": 0.42, "grad_norm": 2.7304560986881934, "learning_rate": 6.538032939106295e-06, "loss": 0.6627, "step": 5120 }, { "epoch": 0.42, "grad_norm": 2.198971090333942, "learning_rate": 6.536773291322726e-06, "loss": 0.3177, "step": 5121 }, { "epoch": 0.42, "grad_norm": 3.217417961186106, "learning_rate": 6.5355135358180365e-06, "loss": 0.5996, "step": 5122 }, { "epoch": 0.42, "grad_norm": 3.115620532143598, "learning_rate": 6.5342536726805325e-06, "loss": 0.7306, "step": 5123 }, { "epoch": 0.42, "grad_norm": 2.446784294414177, "learning_rate": 6.532993701998522e-06, "loss": 0.4508, "step": 5124 }, { "epoch": 0.42, "grad_norm": 2.8874074345338547, "learning_rate": 6.531733623860326e-06, "loss": 0.4286, "step": 5125 }, { "epoch": 0.42, "grad_norm": 4.661011299093553, "learning_rate": 6.5304734383542664e-06, "loss": 1.1787, "step": 5126 }, { "epoch": 0.42, "grad_norm": 2.880231091993628, "learning_rate": 6.5292131455686825e-06, "loss": 0.6272, "step": 5127 }, { "epoch": 0.42, "grad_norm": 4.639270364858419, "learning_rate": 6.527952745591911e-06, "loss": 1.0348, "step": 5128 }, { "epoch": 0.42, "grad_norm": 3.2309889550281254, "learning_rate": 6.526692238512301e-06, "loss": 0.7215, "step": 5129 }, { "epoch": 0.42, "grad_norm": 2.240868644277337, "learning_rate": 6.5254316244182096e-06, "loss": 0.4796, "step": 5130 }, { "epoch": 0.42, "grad_norm": 4.479293937527797, "learning_rate": 6.524170903398001e-06, "loss": 0.9256, "step": 5131 }, { "epoch": 0.42, "grad_norm": 3.3407275596290247, "learning_rate": 6.522910075540043e-06, "loss": 0.9344, "step": 5132 }, { "epoch": 0.42, "grad_norm": 2.091374171071857, "learning_rate": 6.5216491409327174e-06, "loss": 0.3943, "step": 5133 }, { "epoch": 0.42, "grad_norm": 3.700049334269497, "learning_rate": 6.5203880996644105e-06, "loss": 1.0703, "step": 5134 }, { "epoch": 0.42, "grad_norm": 2.604011669234343, "learning_rate": 6.519126951823516e-06, "loss": 0.4824, "step": 5135 }, { "epoch": 0.42, "grad_norm": 3.522206485399803, "learning_rate": 6.51786569749843e-06, "loss": 0.4807, "step": 5136 }, { "epoch": 0.42, "grad_norm": 3.886140827984647, "learning_rate": 6.516604336777565e-06, "loss": 0.8842, "step": 5137 }, { "epoch": 0.42, "grad_norm": 5.020753694642666, "learning_rate": 6.515342869749337e-06, "loss": 0.9779, "step": 5138 }, { "epoch": 0.42, "grad_norm": 2.913893544134915, "learning_rate": 6.5140812965021685e-06, "loss": 0.6798, "step": 5139 }, { "epoch": 0.42, "grad_norm": 3.839134577631026, "learning_rate": 6.512819617124491e-06, "loss": 0.9442, "step": 5140 }, { "epoch": 0.42, "grad_norm": 3.5516407268408408, "learning_rate": 6.511557831704741e-06, "loss": 0.7877, "step": 5141 }, { "epoch": 0.42, "grad_norm": 0.8731293715755902, "learning_rate": 6.510295940331367e-06, "loss": 0.1423, "step": 5142 }, { "epoch": 0.42, "grad_norm": 5.0190805563422805, "learning_rate": 6.509033943092819e-06, "loss": 0.8424, "step": 5143 }, { "epoch": 0.42, "grad_norm": 4.14674799879688, "learning_rate": 6.50777184007756e-06, "loss": 0.6919, "step": 5144 }, { "epoch": 0.42, "grad_norm": 2.113716663688054, "learning_rate": 6.506509631374056e-06, "loss": 0.419, "step": 5145 }, { "epoch": 0.42, "grad_norm": 3.937709065235011, "learning_rate": 6.5052473170707844e-06, "loss": 1.1737, "step": 5146 }, { "epoch": 0.42, "grad_norm": 4.277353913858537, "learning_rate": 6.5039848972562246e-06, "loss": 0.7087, "step": 5147 }, { "epoch": 0.42, "grad_norm": 3.3910715221425365, "learning_rate": 6.50272237201887e-06, "loss": 0.7598, "step": 5148 }, { "epoch": 0.42, "grad_norm": 3.939373388917265, "learning_rate": 6.501459741447217e-06, "loss": 0.8829, "step": 5149 }, { "epoch": 0.42, "grad_norm": 3.0479883517194137, "learning_rate": 6.500197005629772e-06, "loss": 0.5932, "step": 5150 }, { "epoch": 0.42, "grad_norm": 4.306912013821991, "learning_rate": 6.498934164655044e-06, "loss": 1.0467, "step": 5151 }, { "epoch": 0.42, "grad_norm": 2.8791463716466894, "learning_rate": 6.4976712186115545e-06, "loss": 0.5539, "step": 5152 }, { "epoch": 0.42, "grad_norm": 4.319894481396109, "learning_rate": 6.49640816758783e-06, "loss": 0.813, "step": 5153 }, { "epoch": 0.42, "grad_norm": 3.2850483337946716, "learning_rate": 6.495145011672406e-06, "loss": 0.8408, "step": 5154 }, { "epoch": 0.42, "grad_norm": 3.7827482983780927, "learning_rate": 6.493881750953823e-06, "loss": 0.9218, "step": 5155 }, { "epoch": 0.42, "grad_norm": 3.6812886117281316, "learning_rate": 6.49261838552063e-06, "loss": 0.7084, "step": 5156 }, { "epoch": 0.42, "grad_norm": 3.422633886344834, "learning_rate": 6.491354915461387e-06, "loss": 0.907, "step": 5157 }, { "epoch": 0.42, "grad_norm": 3.8343098758964165, "learning_rate": 6.490091340864654e-06, "loss": 1.0196, "step": 5158 }, { "epoch": 0.42, "grad_norm": 4.662496099515557, "learning_rate": 6.488827661819002e-06, "loss": 0.9483, "step": 5159 }, { "epoch": 0.42, "grad_norm": 5.168576346105122, "learning_rate": 6.48756387841301e-06, "loss": 1.0537, "step": 5160 }, { "epoch": 0.42, "grad_norm": 4.17476297436785, "learning_rate": 6.486299990735263e-06, "loss": 1.0399, "step": 5161 }, { "epoch": 0.42, "grad_norm": 2.3953438108264753, "learning_rate": 6.485035998874356e-06, "loss": 0.3847, "step": 5162 }, { "epoch": 0.42, "grad_norm": 4.156931565798611, "learning_rate": 6.48377190291889e-06, "loss": 0.9419, "step": 5163 }, { "epoch": 0.42, "grad_norm": 3.7204080529191734, "learning_rate": 6.482507702957469e-06, "loss": 0.804, "step": 5164 }, { "epoch": 0.42, "grad_norm": 4.306466886058456, "learning_rate": 6.481243399078712e-06, "loss": 0.8862, "step": 5165 }, { "epoch": 0.42, "grad_norm": 1.9172894698382654, "learning_rate": 6.479978991371239e-06, "loss": 0.3973, "step": 5166 }, { "epoch": 0.42, "grad_norm": 3.6456024896313104, "learning_rate": 6.478714479923677e-06, "loss": 0.7912, "step": 5167 }, { "epoch": 0.42, "grad_norm": 1.752028880061259, "learning_rate": 6.4774498648246675e-06, "loss": 0.3674, "step": 5168 }, { "epoch": 0.42, "grad_norm": 3.2718369483666523, "learning_rate": 6.4761851461628514e-06, "loss": 0.5094, "step": 5169 }, { "epoch": 0.42, "grad_norm": 3.5861334535536775, "learning_rate": 6.47492032402688e-06, "loss": 0.8346, "step": 5170 }, { "epoch": 0.42, "grad_norm": 3.514568973146496, "learning_rate": 6.473655398505414e-06, "loss": 0.9248, "step": 5171 }, { "epoch": 0.42, "grad_norm": 5.151764429495408, "learning_rate": 6.472390369687118e-06, "loss": 0.8877, "step": 5172 }, { "epoch": 0.42, "grad_norm": 2.7227006795204702, "learning_rate": 6.471125237660665e-06, "loss": 0.532, "step": 5173 }, { "epoch": 0.42, "grad_norm": 4.548160455981264, "learning_rate": 6.469860002514736e-06, "loss": 0.7771, "step": 5174 }, { "epoch": 0.42, "grad_norm": 2.223658539674634, "learning_rate": 6.468594664338016e-06, "loss": 0.4935, "step": 5175 }, { "epoch": 0.42, "grad_norm": 1.4522936109620626, "learning_rate": 6.467329223219201e-06, "loss": 0.2192, "step": 5176 }, { "epoch": 0.42, "grad_norm": 4.913587802323358, "learning_rate": 6.4660636792469955e-06, "loss": 1.3289, "step": 5177 }, { "epoch": 0.42, "grad_norm": 3.732764437814705, "learning_rate": 6.464798032510104e-06, "loss": 0.9273, "step": 5178 }, { "epoch": 0.42, "grad_norm": 3.9186169441628973, "learning_rate": 6.4635322830972465e-06, "loss": 0.9121, "step": 5179 }, { "epoch": 0.42, "grad_norm": 4.9273731689932845, "learning_rate": 6.462266431097146e-06, "loss": 0.7626, "step": 5180 }, { "epoch": 0.42, "grad_norm": 4.427628339205233, "learning_rate": 6.461000476598532e-06, "loss": 1.3493, "step": 5181 }, { "epoch": 0.42, "grad_norm": 2.0967838122151363, "learning_rate": 6.459734419690143e-06, "loss": 0.2514, "step": 5182 }, { "epoch": 0.42, "grad_norm": 1.88698206894186, "learning_rate": 6.458468260460724e-06, "loss": 0.4262, "step": 5183 }, { "epoch": 0.42, "grad_norm": 3.9352789215009603, "learning_rate": 6.457201998999025e-06, "loss": 1.0131, "step": 5184 }, { "epoch": 0.42, "grad_norm": 3.9065289517117976, "learning_rate": 6.455935635393811e-06, "loss": 0.7477, "step": 5185 }, { "epoch": 0.42, "grad_norm": 3.289868895267918, "learning_rate": 6.454669169733843e-06, "loss": 0.8929, "step": 5186 }, { "epoch": 0.42, "grad_norm": 2.8693499507499465, "learning_rate": 6.4534026021078966e-06, "loss": 0.5043, "step": 5187 }, { "epoch": 0.42, "grad_norm": 5.947055324633199, "learning_rate": 6.452135932604755e-06, "loss": 1.3338, "step": 5188 }, { "epoch": 0.42, "grad_norm": 4.990863485491423, "learning_rate": 6.450869161313205e-06, "loss": 1.1023, "step": 5189 }, { "epoch": 0.42, "grad_norm": 2.6732958963792015, "learning_rate": 6.4496022883220376e-06, "loss": 0.4853, "step": 5190 }, { "epoch": 0.42, "grad_norm": 4.5877289700187065, "learning_rate": 6.448335313720061e-06, "loss": 1.2412, "step": 5191 }, { "epoch": 0.42, "grad_norm": 3.922699463637487, "learning_rate": 6.447068237596081e-06, "loss": 0.7607, "step": 5192 }, { "epoch": 0.42, "grad_norm": 2.6481712138131246, "learning_rate": 6.445801060038915e-06, "loss": 0.5285, "step": 5193 }, { "epoch": 0.42, "grad_norm": 4.319125009354408, "learning_rate": 6.444533781137387e-06, "loss": 0.9255, "step": 5194 }, { "epoch": 0.42, "grad_norm": 3.0171710686961, "learning_rate": 6.443266400980328e-06, "loss": 0.8706, "step": 5195 }, { "epoch": 0.42, "grad_norm": 4.344644252269986, "learning_rate": 6.441998919656575e-06, "loss": 1.2445, "step": 5196 }, { "epoch": 0.42, "grad_norm": 2.909068690829836, "learning_rate": 6.440731337254975e-06, "loss": 0.7838, "step": 5197 }, { "epoch": 0.42, "grad_norm": 1.7922868575018684, "learning_rate": 6.439463653864376e-06, "loss": 0.318, "step": 5198 }, { "epoch": 0.42, "grad_norm": 4.713543045431059, "learning_rate": 6.43819586957364e-06, "loss": 0.693, "step": 5199 }, { "epoch": 0.43, "grad_norm": 3.86145422753484, "learning_rate": 6.436927984471634e-06, "loss": 1.1019, "step": 5200 }, { "epoch": 0.43, "grad_norm": 2.9079994710944934, "learning_rate": 6.435659998647228e-06, "loss": 0.8679, "step": 5201 }, { "epoch": 0.43, "grad_norm": 4.172939734126654, "learning_rate": 6.434391912189304e-06, "loss": 0.6581, "step": 5202 }, { "epoch": 0.43, "grad_norm": 2.971402010277261, "learning_rate": 6.433123725186752e-06, "loss": 0.8514, "step": 5203 }, { "epoch": 0.43, "grad_norm": 4.570102971289309, "learning_rate": 6.431855437728463e-06, "loss": 1.2454, "step": 5204 }, { "epoch": 0.43, "grad_norm": 2.120223974832511, "learning_rate": 6.430587049903336e-06, "loss": 0.743, "step": 5205 }, { "epoch": 0.43, "grad_norm": 4.162178464204561, "learning_rate": 6.429318561800286e-06, "loss": 1.0354, "step": 5206 }, { "epoch": 0.43, "grad_norm": 3.501800227045963, "learning_rate": 6.428049973508225e-06, "loss": 0.7937, "step": 5207 }, { "epoch": 0.43, "grad_norm": 2.94871567949659, "learning_rate": 6.426781285116075e-06, "loss": 0.6766, "step": 5208 }, { "epoch": 0.43, "grad_norm": 2.466894583246494, "learning_rate": 6.4255124967127665e-06, "loss": 0.4047, "step": 5209 }, { "epoch": 0.43, "grad_norm": 5.43354768501383, "learning_rate": 6.424243608387235e-06, "loss": 0.734, "step": 5210 }, { "epoch": 0.43, "grad_norm": 5.646302110542977, "learning_rate": 6.422974620228426e-06, "loss": 1.5648, "step": 5211 }, { "epoch": 0.43, "grad_norm": 2.5900228242446808, "learning_rate": 6.421705532325289e-06, "loss": 0.6069, "step": 5212 }, { "epoch": 0.43, "grad_norm": 4.402590196811873, "learning_rate": 6.420436344766781e-06, "loss": 0.8138, "step": 5213 }, { "epoch": 0.43, "grad_norm": 4.417897697444654, "learning_rate": 6.419167057641868e-06, "loss": 0.6705, "step": 5214 }, { "epoch": 0.43, "grad_norm": 2.35920113079431, "learning_rate": 6.417897671039519e-06, "loss": 0.4583, "step": 5215 }, { "epoch": 0.43, "grad_norm": 3.834644706331093, "learning_rate": 6.4166281850487135e-06, "loss": 0.9184, "step": 5216 }, { "epoch": 0.43, "grad_norm": 4.201190354437537, "learning_rate": 6.415358599758439e-06, "loss": 0.6057, "step": 5217 }, { "epoch": 0.43, "grad_norm": 2.498817582268024, "learning_rate": 6.414088915257686e-06, "loss": 0.4364, "step": 5218 }, { "epoch": 0.43, "grad_norm": 2.101896399093929, "learning_rate": 6.4128191316354525e-06, "loss": 0.4627, "step": 5219 }, { "epoch": 0.43, "grad_norm": 3.7323505431124886, "learning_rate": 6.411549248980748e-06, "loss": 0.8131, "step": 5220 }, { "epoch": 0.43, "grad_norm": 3.0423803714767366, "learning_rate": 6.410279267382585e-06, "loss": 0.7822, "step": 5221 }, { "epoch": 0.43, "grad_norm": 2.115267896822398, "learning_rate": 6.409009186929982e-06, "loss": 0.4279, "step": 5222 }, { "epoch": 0.43, "grad_norm": 2.012951896962024, "learning_rate": 6.407739007711969e-06, "loss": 0.3176, "step": 5223 }, { "epoch": 0.43, "grad_norm": 2.608909053480998, "learning_rate": 6.406468729817574e-06, "loss": 0.5814, "step": 5224 }, { "epoch": 0.43, "grad_norm": 3.4885622809302306, "learning_rate": 6.405198353335844e-06, "loss": 0.7136, "step": 5225 }, { "epoch": 0.43, "grad_norm": 3.275506002331668, "learning_rate": 6.403927878355825e-06, "loss": 0.5962, "step": 5226 }, { "epoch": 0.43, "grad_norm": 3.760382784091925, "learning_rate": 6.402657304966572e-06, "loss": 1.0417, "step": 5227 }, { "epoch": 0.43, "grad_norm": 3.418888169136374, "learning_rate": 6.401386633257146e-06, "loss": 0.6734, "step": 5228 }, { "epoch": 0.43, "grad_norm": 2.440478521109242, "learning_rate": 6.400115863316616e-06, "loss": 0.4783, "step": 5229 }, { "epoch": 0.43, "grad_norm": 4.641165967006061, "learning_rate": 6.398844995234057e-06, "loss": 0.8536, "step": 5230 }, { "epoch": 0.43, "grad_norm": 3.7431611684166457, "learning_rate": 6.397574029098552e-06, "loss": 0.5524, "step": 5231 }, { "epoch": 0.43, "grad_norm": 3.040278877345919, "learning_rate": 6.39630296499919e-06, "loss": 0.9451, "step": 5232 }, { "epoch": 0.43, "grad_norm": 3.409153375979241, "learning_rate": 6.3950318030250654e-06, "loss": 0.6427, "step": 5233 }, { "epoch": 0.43, "grad_norm": 3.7850491565280184, "learning_rate": 6.393760543265285e-06, "loss": 0.8784, "step": 5234 }, { "epoch": 0.43, "grad_norm": 3.452661896158122, "learning_rate": 6.392489185808954e-06, "loss": 0.7516, "step": 5235 }, { "epoch": 0.43, "grad_norm": 4.916918257978019, "learning_rate": 6.391217730745193e-06, "loss": 0.9683, "step": 5236 }, { "epoch": 0.43, "grad_norm": 4.432841017110807, "learning_rate": 6.3899461781631225e-06, "loss": 1.1874, "step": 5237 }, { "epoch": 0.43, "grad_norm": 3.5810353135567894, "learning_rate": 6.388674528151875e-06, "loss": 0.8334, "step": 5238 }, { "epoch": 0.43, "grad_norm": 3.7601699256718675, "learning_rate": 6.387402780800585e-06, "loss": 0.7938, "step": 5239 }, { "epoch": 0.43, "grad_norm": 3.490327124441589, "learning_rate": 6.386130936198399e-06, "loss": 0.8184, "step": 5240 }, { "epoch": 0.43, "grad_norm": 3.657950700172878, "learning_rate": 6.384858994434467e-06, "loss": 0.7991, "step": 5241 }, { "epoch": 0.43, "grad_norm": 5.360273681567106, "learning_rate": 6.383586955597945e-06, "loss": 1.0199, "step": 5242 }, { "epoch": 0.43, "grad_norm": 3.221993123701766, "learning_rate": 6.382314819778e-06, "loss": 0.4936, "step": 5243 }, { "epoch": 0.43, "grad_norm": 3.0920500320981583, "learning_rate": 6.381042587063803e-06, "loss": 0.5727, "step": 5244 }, { "epoch": 0.43, "grad_norm": 1.2347677618701909, "learning_rate": 6.379770257544529e-06, "loss": 0.1976, "step": 5245 }, { "epoch": 0.43, "grad_norm": 3.033352049988148, "learning_rate": 6.378497831309367e-06, "loss": 0.6842, "step": 5246 }, { "epoch": 0.43, "grad_norm": 4.4052660445723, "learning_rate": 6.377225308447503e-06, "loss": 0.8753, "step": 5247 }, { "epoch": 0.43, "grad_norm": 4.323082370180678, "learning_rate": 6.375952689048141e-06, "loss": 1.2976, "step": 5248 }, { "epoch": 0.43, "grad_norm": 2.142553012636817, "learning_rate": 6.374679973200484e-06, "loss": 0.3026, "step": 5249 }, { "epoch": 0.43, "grad_norm": 2.790216347839119, "learning_rate": 6.373407160993742e-06, "loss": 0.6913, "step": 5250 }, { "epoch": 0.43, "grad_norm": 3.9260276299107923, "learning_rate": 6.372134252517136e-06, "loss": 1.0458, "step": 5251 }, { "epoch": 0.43, "grad_norm": 3.4639919518325244, "learning_rate": 6.370861247859891e-06, "loss": 0.4821, "step": 5252 }, { "epoch": 0.43, "grad_norm": 3.3230537659127353, "learning_rate": 6.369588147111236e-06, "loss": 0.5219, "step": 5253 }, { "epoch": 0.43, "grad_norm": 4.368258655052682, "learning_rate": 6.368314950360416e-06, "loss": 0.8872, "step": 5254 }, { "epoch": 0.43, "grad_norm": 2.3266571921352304, "learning_rate": 6.36704165769667e-06, "loss": 0.3647, "step": 5255 }, { "epoch": 0.43, "grad_norm": 3.475615810880121, "learning_rate": 6.365768269209254e-06, "loss": 1.0257, "step": 5256 }, { "epoch": 0.43, "grad_norm": 4.926406979750642, "learning_rate": 6.364494784987427e-06, "loss": 1.1206, "step": 5257 }, { "epoch": 0.43, "grad_norm": 2.387279390308762, "learning_rate": 6.363221205120452e-06, "loss": 0.5091, "step": 5258 }, { "epoch": 0.43, "grad_norm": 4.051417370757732, "learning_rate": 6.361947529697605e-06, "loss": 1.0578, "step": 5259 }, { "epoch": 0.43, "grad_norm": 4.161788346493241, "learning_rate": 6.360673758808163e-06, "loss": 0.781, "step": 5260 }, { "epoch": 0.43, "grad_norm": 3.948150103760491, "learning_rate": 6.359399892541412e-06, "loss": 0.726, "step": 5261 }, { "epoch": 0.43, "grad_norm": 3.113461144271678, "learning_rate": 6.358125930986645e-06, "loss": 0.3269, "step": 5262 }, { "epoch": 0.43, "grad_norm": 2.04931175796063, "learning_rate": 6.356851874233161e-06, "loss": 0.5767, "step": 5263 }, { "epoch": 0.43, "grad_norm": 2.9176332686041966, "learning_rate": 6.355577722370264e-06, "loss": 0.7563, "step": 5264 }, { "epoch": 0.43, "grad_norm": 4.59725461401139, "learning_rate": 6.354303475487269e-06, "loss": 1.1339, "step": 5265 }, { "epoch": 0.43, "grad_norm": 2.889559779448481, "learning_rate": 6.353029133673496e-06, "loss": 0.9163, "step": 5266 }, { "epoch": 0.43, "grad_norm": 4.026137753654765, "learning_rate": 6.351754697018269e-06, "loss": 1.1465, "step": 5267 }, { "epoch": 0.43, "grad_norm": 3.1978137486717033, "learning_rate": 6.3504801656109195e-06, "loss": 0.6188, "step": 5268 }, { "epoch": 0.43, "grad_norm": 2.443129720076294, "learning_rate": 6.349205539540786e-06, "loss": 0.444, "step": 5269 }, { "epoch": 0.43, "grad_norm": 2.5323700976741597, "learning_rate": 6.3479308188972175e-06, "loss": 0.6203, "step": 5270 }, { "epoch": 0.43, "grad_norm": 4.506653631760839, "learning_rate": 6.346656003769565e-06, "loss": 0.8935, "step": 5271 }, { "epoch": 0.43, "grad_norm": 4.51929605166405, "learning_rate": 6.345381094247188e-06, "loss": 0.8745, "step": 5272 }, { "epoch": 0.43, "grad_norm": 2.7618715703681103, "learning_rate": 6.34410609041945e-06, "loss": 0.7381, "step": 5273 }, { "epoch": 0.43, "grad_norm": 2.9031600701897586, "learning_rate": 6.342830992375725e-06, "loss": 0.8362, "step": 5274 }, { "epoch": 0.43, "grad_norm": 4.069650157218168, "learning_rate": 6.341555800205392e-06, "loss": 1.0269, "step": 5275 }, { "epoch": 0.43, "grad_norm": 2.452049394631239, "learning_rate": 6.340280513997835e-06, "loss": 0.2514, "step": 5276 }, { "epoch": 0.43, "grad_norm": 4.893706571124117, "learning_rate": 6.3390051338424485e-06, "loss": 1.5367, "step": 5277 }, { "epoch": 0.43, "grad_norm": 3.659535833865813, "learning_rate": 6.337729659828627e-06, "loss": 0.9222, "step": 5278 }, { "epoch": 0.43, "grad_norm": 3.2514738051998213, "learning_rate": 6.33645409204578e-06, "loss": 0.7972, "step": 5279 }, { "epoch": 0.43, "grad_norm": 3.6843164318194446, "learning_rate": 6.3351784305833175e-06, "loss": 0.6152, "step": 5280 }, { "epoch": 0.43, "grad_norm": 3.5362471559949156, "learning_rate": 6.333902675530657e-06, "loss": 1.0854, "step": 5281 }, { "epoch": 0.43, "grad_norm": 3.556924473924575, "learning_rate": 6.332626826977224e-06, "loss": 1.0025, "step": 5282 }, { "epoch": 0.43, "grad_norm": 3.0381830710652, "learning_rate": 6.33135088501245e-06, "loss": 0.6271, "step": 5283 }, { "epoch": 0.43, "grad_norm": 4.457149041496543, "learning_rate": 6.330074849725774e-06, "loss": 0.7812, "step": 5284 }, { "epoch": 0.43, "grad_norm": 2.794346012715579, "learning_rate": 6.328798721206638e-06, "loss": 0.4939, "step": 5285 }, { "epoch": 0.43, "grad_norm": 2.158328439921532, "learning_rate": 6.327522499544496e-06, "loss": 0.4347, "step": 5286 }, { "epoch": 0.43, "grad_norm": 5.111621810371288, "learning_rate": 6.3262461848288034e-06, "loss": 1.3857, "step": 5287 }, { "epoch": 0.43, "grad_norm": 2.8505774832006874, "learning_rate": 6.324969777149026e-06, "loss": 0.6636, "step": 5288 }, { "epoch": 0.43, "grad_norm": 3.506604162373283, "learning_rate": 6.323693276594632e-06, "loss": 0.5305, "step": 5289 }, { "epoch": 0.43, "grad_norm": 2.8964055593354967, "learning_rate": 6.322416683255103e-06, "loss": 0.5667, "step": 5290 }, { "epoch": 0.43, "grad_norm": 2.76693745022548, "learning_rate": 6.321139997219917e-06, "loss": 0.5249, "step": 5291 }, { "epoch": 0.43, "grad_norm": 2.3431205458148905, "learning_rate": 6.319863218578568e-06, "loss": 0.6691, "step": 5292 }, { "epoch": 0.43, "grad_norm": 4.317627231571605, "learning_rate": 6.31858634742055e-06, "loss": 1.1668, "step": 5293 }, { "epoch": 0.43, "grad_norm": 6.154345697947195, "learning_rate": 6.317309383835368e-06, "loss": 1.2021, "step": 5294 }, { "epoch": 0.43, "grad_norm": 2.437607805714208, "learning_rate": 6.316032327912532e-06, "loss": 0.4886, "step": 5295 }, { "epoch": 0.43, "grad_norm": 2.6188452267712865, "learning_rate": 6.314755179741556e-06, "loss": 0.4352, "step": 5296 }, { "epoch": 0.43, "grad_norm": 3.2353476662810428, "learning_rate": 6.313477939411965e-06, "loss": 0.4746, "step": 5297 }, { "epoch": 0.43, "grad_norm": 3.243223467921312, "learning_rate": 6.312200607013287e-06, "loss": 1.0017, "step": 5298 }, { "epoch": 0.43, "grad_norm": 5.310191103900768, "learning_rate": 6.310923182635056e-06, "loss": 1.0909, "step": 5299 }, { "epoch": 0.43, "grad_norm": 3.4573373782516903, "learning_rate": 6.309645666366816e-06, "loss": 0.9911, "step": 5300 }, { "epoch": 0.43, "grad_norm": 3.5494617356408997, "learning_rate": 6.308368058298114e-06, "loss": 0.6491, "step": 5301 }, { "epoch": 0.43, "grad_norm": 1.0475818745256136, "learning_rate": 6.307090358518504e-06, "loss": 0.1533, "step": 5302 }, { "epoch": 0.43, "grad_norm": 4.288289841775113, "learning_rate": 6.30581256711755e-06, "loss": 0.9734, "step": 5303 }, { "epoch": 0.43, "grad_norm": 3.3351673026973065, "learning_rate": 6.304534684184816e-06, "loss": 0.5136, "step": 5304 }, { "epoch": 0.43, "grad_norm": 4.0374178588832, "learning_rate": 6.303256709809879e-06, "loss": 0.8494, "step": 5305 }, { "epoch": 0.43, "grad_norm": 1.5981949932303068, "learning_rate": 6.301978644082321e-06, "loss": 0.3312, "step": 5306 }, { "epoch": 0.43, "grad_norm": 1.8521095385310853, "learning_rate": 6.300700487091723e-06, "loss": 0.3675, "step": 5307 }, { "epoch": 0.43, "grad_norm": 4.542418793742306, "learning_rate": 6.299422238927683e-06, "loss": 1.0892, "step": 5308 }, { "epoch": 0.43, "grad_norm": 3.363744970945015, "learning_rate": 6.298143899679798e-06, "loss": 0.6085, "step": 5309 }, { "epoch": 0.43, "grad_norm": 3.358168052292183, "learning_rate": 6.296865469437675e-06, "loss": 0.8132, "step": 5310 }, { "epoch": 0.43, "grad_norm": 3.9215527271968007, "learning_rate": 6.295586948290928e-06, "loss": 1.1532, "step": 5311 }, { "epoch": 0.43, "grad_norm": 4.222417314593268, "learning_rate": 6.294308336329174e-06, "loss": 0.7616, "step": 5312 }, { "epoch": 0.43, "grad_norm": 2.7126852976017393, "learning_rate": 6.293029633642038e-06, "loss": 0.5614, "step": 5313 }, { "epoch": 0.43, "grad_norm": 2.361404869047012, "learning_rate": 6.291750840319152e-06, "loss": 0.495, "step": 5314 }, { "epoch": 0.43, "grad_norm": 2.676970942660408, "learning_rate": 6.2904719564501545e-06, "loss": 0.5076, "step": 5315 }, { "epoch": 0.43, "grad_norm": 3.7089502354975656, "learning_rate": 6.2891929821246875e-06, "loss": 0.7755, "step": 5316 }, { "epoch": 0.43, "grad_norm": 6.006691414153541, "learning_rate": 6.287913917432405e-06, "loss": 1.0736, "step": 5317 }, { "epoch": 0.43, "grad_norm": 3.859693190142713, "learning_rate": 6.286634762462961e-06, "loss": 0.6757, "step": 5318 }, { "epoch": 0.43, "grad_norm": 4.372561771884962, "learning_rate": 6.285355517306019e-06, "loss": 0.9634, "step": 5319 }, { "epoch": 0.43, "grad_norm": 5.077137875843918, "learning_rate": 6.2840761820512505e-06, "loss": 1.1677, "step": 5320 }, { "epoch": 0.43, "grad_norm": 1.1370738600649346, "learning_rate": 6.282796756788328e-06, "loss": 0.1414, "step": 5321 }, { "epoch": 0.43, "grad_norm": 3.220375852691493, "learning_rate": 6.281517241606938e-06, "loss": 0.4744, "step": 5322 }, { "epoch": 0.44, "grad_norm": 3.5808255674719076, "learning_rate": 6.280237636596765e-06, "loss": 0.5416, "step": 5323 }, { "epoch": 0.44, "grad_norm": 3.384436992824313, "learning_rate": 6.278957941847506e-06, "loss": 0.708, "step": 5324 }, { "epoch": 0.44, "grad_norm": 3.3301604915033787, "learning_rate": 6.27767815744886e-06, "loss": 0.8014, "step": 5325 }, { "epoch": 0.44, "grad_norm": 2.8885651689869096, "learning_rate": 6.276398283490537e-06, "loss": 0.6231, "step": 5326 }, { "epoch": 0.44, "grad_norm": 4.355467328726323, "learning_rate": 6.275118320062248e-06, "loss": 1.1165, "step": 5327 }, { "epoch": 0.44, "grad_norm": 4.197077206685935, "learning_rate": 6.273838267253716e-06, "loss": 1.0601, "step": 5328 }, { "epoch": 0.44, "grad_norm": 5.5896109313530795, "learning_rate": 6.272558125154663e-06, "loss": 1.3009, "step": 5329 }, { "epoch": 0.44, "grad_norm": 2.9650801372699567, "learning_rate": 6.271277893854825e-06, "loss": 0.6123, "step": 5330 }, { "epoch": 0.44, "grad_norm": 3.2756477223518687, "learning_rate": 6.26999757344394e-06, "loss": 0.5752, "step": 5331 }, { "epoch": 0.44, "grad_norm": 2.172137842304576, "learning_rate": 6.268717164011751e-06, "loss": 0.44, "step": 5332 }, { "epoch": 0.44, "grad_norm": 4.484492362826107, "learning_rate": 6.2674366656480105e-06, "loss": 1.3716, "step": 5333 }, { "epoch": 0.44, "grad_norm": 5.409549960962839, "learning_rate": 6.266156078442476e-06, "loss": 1.2073, "step": 5334 }, { "epoch": 0.44, "grad_norm": 2.4379919669246224, "learning_rate": 6.264875402484909e-06, "loss": 0.6082, "step": 5335 }, { "epoch": 0.44, "grad_norm": 2.4813981703057624, "learning_rate": 6.263594637865081e-06, "loss": 0.5548, "step": 5336 }, { "epoch": 0.44, "grad_norm": 5.174097987149481, "learning_rate": 6.262313784672771e-06, "loss": 0.7561, "step": 5337 }, { "epoch": 0.44, "grad_norm": 4.338098544074793, "learning_rate": 6.261032842997756e-06, "loss": 0.9866, "step": 5338 }, { "epoch": 0.44, "grad_norm": 3.5408474628918287, "learning_rate": 6.259751812929829e-06, "loss": 0.7631, "step": 5339 }, { "epoch": 0.44, "grad_norm": 3.8321054319008008, "learning_rate": 6.25847069455878e-06, "loss": 0.6125, "step": 5340 }, { "epoch": 0.44, "grad_norm": 2.8415935691356777, "learning_rate": 6.257189487974414e-06, "loss": 0.6153, "step": 5341 }, { "epoch": 0.44, "grad_norm": 4.96059793607359, "learning_rate": 6.255908193266533e-06, "loss": 0.7248, "step": 5342 }, { "epoch": 0.44, "grad_norm": 4.367845793763824, "learning_rate": 6.254626810524956e-06, "loss": 0.8438, "step": 5343 }, { "epoch": 0.44, "grad_norm": 2.9224084609222465, "learning_rate": 6.2533453398395e-06, "loss": 0.6552, "step": 5344 }, { "epoch": 0.44, "grad_norm": 3.990580969392164, "learning_rate": 6.25206378129999e-06, "loss": 0.9274, "step": 5345 }, { "epoch": 0.44, "grad_norm": 3.6908493507615985, "learning_rate": 6.250782134996257e-06, "loss": 0.6803, "step": 5346 }, { "epoch": 0.44, "grad_norm": 4.576204374110863, "learning_rate": 6.24950040101814e-06, "loss": 1.2582, "step": 5347 }, { "epoch": 0.44, "grad_norm": 1.4265933546341147, "learning_rate": 6.248218579455484e-06, "loss": 0.2474, "step": 5348 }, { "epoch": 0.44, "grad_norm": 3.1413602211082585, "learning_rate": 6.246936670398136e-06, "loss": 0.9275, "step": 5349 }, { "epoch": 0.44, "grad_norm": 3.004104808189576, "learning_rate": 6.245654673935955e-06, "loss": 0.7209, "step": 5350 }, { "epoch": 0.44, "grad_norm": 4.363007538770154, "learning_rate": 6.244372590158802e-06, "loss": 1.1466, "step": 5351 }, { "epoch": 0.44, "grad_norm": 5.362132366906052, "learning_rate": 6.243090419156547e-06, "loss": 1.5448, "step": 5352 }, { "epoch": 0.44, "grad_norm": 2.5829805256999094, "learning_rate": 6.241808161019063e-06, "loss": 0.5098, "step": 5353 }, { "epoch": 0.44, "grad_norm": 3.414527589241339, "learning_rate": 6.2405258158362315e-06, "loss": 0.7025, "step": 5354 }, { "epoch": 0.44, "grad_norm": 5.349450647296863, "learning_rate": 6.239243383697938e-06, "loss": 1.1286, "step": 5355 }, { "epoch": 0.44, "grad_norm": 3.115393927866742, "learning_rate": 6.237960864694077e-06, "loss": 0.913, "step": 5356 }, { "epoch": 0.44, "grad_norm": 3.3695423402768703, "learning_rate": 6.236678258914548e-06, "loss": 1.0175, "step": 5357 }, { "epoch": 0.44, "grad_norm": 3.415987708355679, "learning_rate": 6.235395566449253e-06, "loss": 0.9599, "step": 5358 }, { "epoch": 0.44, "grad_norm": 3.2467334640139955, "learning_rate": 6.234112787388107e-06, "loss": 0.448, "step": 5359 }, { "epoch": 0.44, "grad_norm": 3.8813058250264225, "learning_rate": 6.232829921821025e-06, "loss": 0.8177, "step": 5360 }, { "epoch": 0.44, "grad_norm": 4.876710756602804, "learning_rate": 6.2315469698379316e-06, "loss": 0.6011, "step": 5361 }, { "epoch": 0.44, "grad_norm": 2.4807173168245664, "learning_rate": 6.230263931528755e-06, "loss": 0.5266, "step": 5362 }, { "epoch": 0.44, "grad_norm": 3.781292797993209, "learning_rate": 6.2289808069834315e-06, "loss": 0.7123, "step": 5363 }, { "epoch": 0.44, "grad_norm": 2.1198830841349747, "learning_rate": 6.227697596291899e-06, "loss": 0.4698, "step": 5364 }, { "epoch": 0.44, "grad_norm": 3.6855322557577477, "learning_rate": 6.22641429954411e-06, "loss": 0.73, "step": 5365 }, { "epoch": 0.44, "grad_norm": 3.8384085488713446, "learning_rate": 6.225130916830017e-06, "loss": 0.8108, "step": 5366 }, { "epoch": 0.44, "grad_norm": 2.4435267183396716, "learning_rate": 6.223847448239577e-06, "loss": 0.5509, "step": 5367 }, { "epoch": 0.44, "grad_norm": 4.270433996869262, "learning_rate": 6.222563893862758e-06, "loss": 0.9696, "step": 5368 }, { "epoch": 0.44, "grad_norm": 2.5873863164036046, "learning_rate": 6.22128025378953e-06, "loss": 0.6469, "step": 5369 }, { "epoch": 0.44, "grad_norm": 3.631306525128808, "learning_rate": 6.219996528109872e-06, "loss": 0.6957, "step": 5370 }, { "epoch": 0.44, "grad_norm": 3.8230625464072774, "learning_rate": 6.218712716913766e-06, "loss": 0.7993, "step": 5371 }, { "epoch": 0.44, "grad_norm": 3.009789421544894, "learning_rate": 6.217428820291202e-06, "loss": 0.3, "step": 5372 }, { "epoch": 0.44, "grad_norm": 3.322712395684982, "learning_rate": 6.2161448383321766e-06, "loss": 0.7037, "step": 5373 }, { "epoch": 0.44, "grad_norm": 5.018911762920762, "learning_rate": 6.214860771126692e-06, "loss": 1.1273, "step": 5374 }, { "epoch": 0.44, "grad_norm": 5.427465808351938, "learning_rate": 6.213576618764752e-06, "loss": 1.2406, "step": 5375 }, { "epoch": 0.44, "grad_norm": 3.7019203466619572, "learning_rate": 6.2122923813363746e-06, "loss": 0.6044, "step": 5376 }, { "epoch": 0.44, "grad_norm": 3.4004793599648444, "learning_rate": 6.211008058931577e-06, "loss": 0.5663, "step": 5377 }, { "epoch": 0.44, "grad_norm": 5.592096067149256, "learning_rate": 6.209723651640383e-06, "loss": 1.1116, "step": 5378 }, { "epoch": 0.44, "grad_norm": 3.6651568911112204, "learning_rate": 6.208439159552826e-06, "loss": 0.7627, "step": 5379 }, { "epoch": 0.44, "grad_norm": 2.456885830028118, "learning_rate": 6.207154582758945e-06, "loss": 0.4029, "step": 5380 }, { "epoch": 0.44, "grad_norm": 1.2173491774046696, "learning_rate": 6.205869921348779e-06, "loss": 0.1786, "step": 5381 }, { "epoch": 0.44, "grad_norm": 3.2110055439490357, "learning_rate": 6.204585175412381e-06, "loss": 0.7859, "step": 5382 }, { "epoch": 0.44, "grad_norm": 4.813704688609762, "learning_rate": 6.203300345039804e-06, "loss": 1.3696, "step": 5383 }, { "epoch": 0.44, "grad_norm": 2.059360791876233, "learning_rate": 6.202015430321111e-06, "loss": 0.2938, "step": 5384 }, { "epoch": 0.44, "grad_norm": 4.339930811234842, "learning_rate": 6.200730431346366e-06, "loss": 1.0934, "step": 5385 }, { "epoch": 0.44, "grad_norm": 2.948427648322673, "learning_rate": 6.1994453482056436e-06, "loss": 0.642, "step": 5386 }, { "epoch": 0.44, "grad_norm": 3.2495473070357863, "learning_rate": 6.198160180989022e-06, "loss": 0.8292, "step": 5387 }, { "epoch": 0.44, "grad_norm": 3.579186010590315, "learning_rate": 6.196874929786587e-06, "loss": 0.5365, "step": 5388 }, { "epoch": 0.44, "grad_norm": 5.12675507132254, "learning_rate": 6.195589594688428e-06, "loss": 0.8934, "step": 5389 }, { "epoch": 0.44, "grad_norm": 3.3617677981629064, "learning_rate": 6.194304175784641e-06, "loss": 0.6774, "step": 5390 }, { "epoch": 0.44, "grad_norm": 4.848370066636426, "learning_rate": 6.19301867316533e-06, "loss": 1.2827, "step": 5391 }, { "epoch": 0.44, "grad_norm": 4.417159823089846, "learning_rate": 6.1917330869206015e-06, "loss": 1.1245, "step": 5392 }, { "epoch": 0.44, "grad_norm": 3.440488032675081, "learning_rate": 6.190447417140569e-06, "loss": 0.7665, "step": 5393 }, { "epoch": 0.44, "grad_norm": 3.7880391211632882, "learning_rate": 6.189161663915355e-06, "loss": 0.6494, "step": 5394 }, { "epoch": 0.44, "grad_norm": 4.784454644022496, "learning_rate": 6.187875827335082e-06, "loss": 0.8974, "step": 5395 }, { "epoch": 0.44, "grad_norm": 2.6564929135237456, "learning_rate": 6.186589907489884e-06, "loss": 0.4535, "step": 5396 }, { "epoch": 0.44, "grad_norm": 3.6595258116723364, "learning_rate": 6.185303904469898e-06, "loss": 0.7704, "step": 5397 }, { "epoch": 0.44, "grad_norm": 4.558221882463503, "learning_rate": 6.1840178183652665e-06, "loss": 0.9053, "step": 5398 }, { "epoch": 0.44, "grad_norm": 4.009016678164901, "learning_rate": 6.1827316492661395e-06, "loss": 0.934, "step": 5399 }, { "epoch": 0.44, "grad_norm": 2.933120148917259, "learning_rate": 6.181445397262671e-06, "loss": 0.5614, "step": 5400 }, { "epoch": 0.44, "grad_norm": 4.563426634437689, "learning_rate": 6.180159062445021e-06, "loss": 1.2745, "step": 5401 }, { "epoch": 0.44, "grad_norm": 3.930257212761975, "learning_rate": 6.178872644903355e-06, "loss": 0.8114, "step": 5402 }, { "epoch": 0.44, "grad_norm": 3.4791722485318206, "learning_rate": 6.177586144727851e-06, "loss": 0.85, "step": 5403 }, { "epoch": 0.44, "grad_norm": 4.404753109780831, "learning_rate": 6.17629956200868e-06, "loss": 0.9254, "step": 5404 }, { "epoch": 0.44, "grad_norm": 3.9481351872811214, "learning_rate": 6.17501289683603e-06, "loss": 0.8583, "step": 5405 }, { "epoch": 0.44, "grad_norm": 2.9426272635727204, "learning_rate": 6.17372614930009e-06, "loss": 0.9742, "step": 5406 }, { "epoch": 0.44, "grad_norm": 4.7767531685817355, "learning_rate": 6.172439319491055e-06, "loss": 1.239, "step": 5407 }, { "epoch": 0.44, "grad_norm": 4.140978720203091, "learning_rate": 6.171152407499127e-06, "loss": 1.0022, "step": 5408 }, { "epoch": 0.44, "grad_norm": 3.5839408873445144, "learning_rate": 6.16986541341451e-06, "loss": 0.4733, "step": 5409 }, { "epoch": 0.44, "grad_norm": 2.6971096050248082, "learning_rate": 6.168578337327419e-06, "loss": 0.6547, "step": 5410 }, { "epoch": 0.44, "grad_norm": 3.608322605111382, "learning_rate": 6.167291179328074e-06, "loss": 0.6168, "step": 5411 }, { "epoch": 0.44, "grad_norm": 3.6416010801486935, "learning_rate": 6.166003939506696e-06, "loss": 0.7506, "step": 5412 }, { "epoch": 0.44, "grad_norm": 4.090256949971821, "learning_rate": 6.164716617953515e-06, "loss": 0.8866, "step": 5413 }, { "epoch": 0.44, "grad_norm": 4.722911518681974, "learning_rate": 6.163429214758772e-06, "loss": 0.7578, "step": 5414 }, { "epoch": 0.44, "grad_norm": 3.9354120613448753, "learning_rate": 6.1621417300127015e-06, "loss": 0.6877, "step": 5415 }, { "epoch": 0.44, "grad_norm": 4.216105671503533, "learning_rate": 6.160854163805554e-06, "loss": 0.9651, "step": 5416 }, { "epoch": 0.44, "grad_norm": 4.631811758347738, "learning_rate": 6.159566516227582e-06, "loss": 1.1827, "step": 5417 }, { "epoch": 0.44, "grad_norm": 3.0660770200994407, "learning_rate": 6.158278787369043e-06, "loss": 0.7919, "step": 5418 }, { "epoch": 0.44, "grad_norm": 4.432485108761104, "learning_rate": 6.156990977320201e-06, "loss": 1.1963, "step": 5419 }, { "epoch": 0.44, "grad_norm": 3.591645022195573, "learning_rate": 6.155703086171328e-06, "loss": 0.9741, "step": 5420 }, { "epoch": 0.44, "grad_norm": 3.2928527957734675, "learning_rate": 6.154415114012697e-06, "loss": 0.5989, "step": 5421 }, { "epoch": 0.44, "grad_norm": 4.5588751254931115, "learning_rate": 6.1531270609345915e-06, "loss": 1.0163, "step": 5422 }, { "epoch": 0.44, "grad_norm": 4.696751098953714, "learning_rate": 6.151838927027299e-06, "loss": 1.1034, "step": 5423 }, { "epoch": 0.44, "grad_norm": 5.01743567800604, "learning_rate": 6.150550712381109e-06, "loss": 0.7967, "step": 5424 }, { "epoch": 0.44, "grad_norm": 4.214745265293675, "learning_rate": 6.149262417086321e-06, "loss": 0.9011, "step": 5425 }, { "epoch": 0.44, "grad_norm": 3.5396276536538127, "learning_rate": 6.14797404123324e-06, "loss": 0.9816, "step": 5426 }, { "epoch": 0.44, "grad_norm": 2.9307304526840565, "learning_rate": 6.146685584912174e-06, "loss": 0.3536, "step": 5427 }, { "epoch": 0.44, "grad_norm": 3.9738710837402182, "learning_rate": 6.1453970482134395e-06, "loss": 1.1024, "step": 5428 }, { "epoch": 0.44, "grad_norm": 3.634119645787071, "learning_rate": 6.1441084312273555e-06, "loss": 0.6975, "step": 5429 }, { "epoch": 0.44, "grad_norm": 3.9498673560304196, "learning_rate": 6.142819734044251e-06, "loss": 1.0039, "step": 5430 }, { "epoch": 0.44, "grad_norm": 3.557682544770897, "learning_rate": 6.141530956754457e-06, "loss": 0.8699, "step": 5431 }, { "epoch": 0.44, "grad_norm": 3.506189181916139, "learning_rate": 6.1402420994483104e-06, "loss": 0.7495, "step": 5432 }, { "epoch": 0.44, "grad_norm": 4.374658163440085, "learning_rate": 6.138953162216154e-06, "loss": 0.6774, "step": 5433 }, { "epoch": 0.44, "grad_norm": 2.5169033704099713, "learning_rate": 6.137664145148339e-06, "loss": 0.6085, "step": 5434 }, { "epoch": 0.44, "grad_norm": 3.7937381584883245, "learning_rate": 6.136375048335217e-06, "loss": 0.6849, "step": 5435 }, { "epoch": 0.44, "grad_norm": 4.520608587272846, "learning_rate": 6.13508587186715e-06, "loss": 0.9342, "step": 5436 }, { "epoch": 0.44, "grad_norm": 4.906253332892987, "learning_rate": 6.133796615834504e-06, "loss": 1.0895, "step": 5437 }, { "epoch": 0.44, "grad_norm": 5.032434857952008, "learning_rate": 6.13250728032765e-06, "loss": 1.4765, "step": 5438 }, { "epoch": 0.44, "grad_norm": 4.800388318250968, "learning_rate": 6.131217865436964e-06, "loss": 1.2376, "step": 5439 }, { "epoch": 0.44, "grad_norm": 4.600806621811746, "learning_rate": 6.129928371252829e-06, "loss": 0.8767, "step": 5440 }, { "epoch": 0.44, "grad_norm": 5.008348225563147, "learning_rate": 6.128638797865631e-06, "loss": 1.0546, "step": 5441 }, { "epoch": 0.44, "grad_norm": 4.12472089271931, "learning_rate": 6.127349145365766e-06, "loss": 1.1961, "step": 5442 }, { "epoch": 0.44, "grad_norm": 4.319833628907693, "learning_rate": 6.126059413843633e-06, "loss": 1.1684, "step": 5443 }, { "epoch": 0.44, "grad_norm": 3.48468306527288, "learning_rate": 6.1247696033896345e-06, "loss": 0.9538, "step": 5444 }, { "epoch": 0.45, "grad_norm": 2.1059854657164463, "learning_rate": 6.123479714094181e-06, "loss": 0.3268, "step": 5445 }, { "epoch": 0.45, "grad_norm": 2.9646551102691605, "learning_rate": 6.122189746047691e-06, "loss": 0.7663, "step": 5446 }, { "epoch": 0.45, "grad_norm": 3.4451680586833144, "learning_rate": 6.1208996993405835e-06, "loss": 1.1906, "step": 5447 }, { "epoch": 0.45, "grad_norm": 3.69245122401176, "learning_rate": 6.119609574063285e-06, "loss": 0.9925, "step": 5448 }, { "epoch": 0.45, "grad_norm": 4.7749565835361505, "learning_rate": 6.118319370306227e-06, "loss": 1.0335, "step": 5449 }, { "epoch": 0.45, "grad_norm": 3.74924286006014, "learning_rate": 6.117029088159849e-06, "loss": 0.7541, "step": 5450 }, { "epoch": 0.45, "grad_norm": 3.710865070074655, "learning_rate": 6.115738727714593e-06, "loss": 0.7989, "step": 5451 }, { "epoch": 0.45, "grad_norm": 3.197289043979481, "learning_rate": 6.114448289060908e-06, "loss": 0.539, "step": 5452 }, { "epoch": 0.45, "grad_norm": 3.880621264997749, "learning_rate": 6.113157772289246e-06, "loss": 0.8333, "step": 5453 }, { "epoch": 0.45, "grad_norm": 3.72748977598308, "learning_rate": 6.111867177490072e-06, "loss": 0.9621, "step": 5454 }, { "epoch": 0.45, "grad_norm": 3.43236946265601, "learning_rate": 6.1105765047538465e-06, "loss": 0.9359, "step": 5455 }, { "epoch": 0.45, "grad_norm": 3.8075159373402525, "learning_rate": 6.10928575417104e-06, "loss": 1.1357, "step": 5456 }, { "epoch": 0.45, "grad_norm": 4.3112380522958835, "learning_rate": 6.107994925832131e-06, "loss": 0.6632, "step": 5457 }, { "epoch": 0.45, "grad_norm": 5.6203833931869545, "learning_rate": 6.106704019827599e-06, "loss": 1.3663, "step": 5458 }, { "epoch": 0.45, "grad_norm": 2.6398419742734265, "learning_rate": 6.105413036247933e-06, "loss": 0.6478, "step": 5459 }, { "epoch": 0.45, "grad_norm": 4.119880358959511, "learning_rate": 6.104121975183623e-06, "loss": 0.9093, "step": 5460 }, { "epoch": 0.45, "grad_norm": 3.932584298045523, "learning_rate": 6.102830836725167e-06, "loss": 1.1778, "step": 5461 }, { "epoch": 0.45, "grad_norm": 3.117612576846759, "learning_rate": 6.10153962096307e-06, "loss": 0.9118, "step": 5462 }, { "epoch": 0.45, "grad_norm": 3.6671893745898414, "learning_rate": 6.100248327987839e-06, "loss": 1.0006, "step": 5463 }, { "epoch": 0.45, "grad_norm": 6.39738255545773, "learning_rate": 6.0989569578899885e-06, "loss": 1.1004, "step": 5464 }, { "epoch": 0.45, "grad_norm": 2.3645479788769417, "learning_rate": 6.097665510760037e-06, "loss": 0.4135, "step": 5465 }, { "epoch": 0.45, "grad_norm": 2.8817906212282347, "learning_rate": 6.096373986688512e-06, "loss": 0.499, "step": 5466 }, { "epoch": 0.45, "grad_norm": 3.431268970812689, "learning_rate": 6.09508238576594e-06, "loss": 0.9452, "step": 5467 }, { "epoch": 0.45, "grad_norm": 3.3163539589645685, "learning_rate": 6.093790708082861e-06, "loss": 0.4327, "step": 5468 }, { "epoch": 0.45, "grad_norm": 3.8907058897093836, "learning_rate": 6.092498953729812e-06, "loss": 1.168, "step": 5469 }, { "epoch": 0.45, "grad_norm": 2.771897691342272, "learning_rate": 6.091207122797341e-06, "loss": 0.4317, "step": 5470 }, { "epoch": 0.45, "grad_norm": 4.400698623834575, "learning_rate": 6.089915215376001e-06, "loss": 0.9681, "step": 5471 }, { "epoch": 0.45, "grad_norm": 2.2667063679311665, "learning_rate": 6.088623231556345e-06, "loss": 0.5894, "step": 5472 }, { "epoch": 0.45, "grad_norm": 3.1379010219634256, "learning_rate": 6.087331171428941e-06, "loss": 0.581, "step": 5473 }, { "epoch": 0.45, "grad_norm": 2.992031232407942, "learning_rate": 6.086039035084353e-06, "loss": 0.6718, "step": 5474 }, { "epoch": 0.45, "grad_norm": 4.954322977592264, "learning_rate": 6.084746822613154e-06, "loss": 1.194, "step": 5475 }, { "epoch": 0.45, "grad_norm": 2.707176933117051, "learning_rate": 6.083454534105924e-06, "loss": 0.4213, "step": 5476 }, { "epoch": 0.45, "grad_norm": 6.327659835343956, "learning_rate": 6.082162169653247e-06, "loss": 1.5028, "step": 5477 }, { "epoch": 0.45, "grad_norm": 4.502937909209937, "learning_rate": 6.080869729345712e-06, "loss": 0.5243, "step": 5478 }, { "epoch": 0.45, "grad_norm": 3.2022708381766205, "learning_rate": 6.079577213273911e-06, "loss": 0.9597, "step": 5479 }, { "epoch": 0.45, "grad_norm": 3.902687279888713, "learning_rate": 6.078284621528448e-06, "loss": 1.1178, "step": 5480 }, { "epoch": 0.45, "grad_norm": 2.3045093198536692, "learning_rate": 6.076991954199923e-06, "loss": 0.3561, "step": 5481 }, { "epoch": 0.45, "grad_norm": 2.5323094331506057, "learning_rate": 6.0756992113789514e-06, "loss": 0.7133, "step": 5482 }, { "epoch": 0.45, "grad_norm": 4.904239608362607, "learning_rate": 6.074406393156146e-06, "loss": 1.0177, "step": 5483 }, { "epoch": 0.45, "grad_norm": 2.155375095499139, "learning_rate": 6.073113499622127e-06, "loss": 0.3451, "step": 5484 }, { "epoch": 0.45, "grad_norm": 2.6957454743955696, "learning_rate": 6.071820530867524e-06, "loss": 0.4902, "step": 5485 }, { "epoch": 0.45, "grad_norm": 3.7827946076692256, "learning_rate": 6.070527486982965e-06, "loss": 0.782, "step": 5486 }, { "epoch": 0.45, "grad_norm": 4.670415124750267, "learning_rate": 6.0692343680590894e-06, "loss": 1.0284, "step": 5487 }, { "epoch": 0.45, "grad_norm": 2.632010818389444, "learning_rate": 6.067941174186537e-06, "loss": 0.72, "step": 5488 }, { "epoch": 0.45, "grad_norm": 4.421765823790791, "learning_rate": 6.066647905455955e-06, "loss": 1.3704, "step": 5489 }, { "epoch": 0.45, "grad_norm": 3.4884903471133044, "learning_rate": 6.065354561957998e-06, "loss": 0.9789, "step": 5490 }, { "epoch": 0.45, "grad_norm": 4.738458251237575, "learning_rate": 6.064061143783323e-06, "loss": 0.7934, "step": 5491 }, { "epoch": 0.45, "grad_norm": 4.025182361176615, "learning_rate": 6.0627676510225915e-06, "loss": 0.8666, "step": 5492 }, { "epoch": 0.45, "grad_norm": 3.467433330714114, "learning_rate": 6.061474083766475e-06, "loss": 0.6233, "step": 5493 }, { "epoch": 0.45, "grad_norm": 3.3534751064256008, "learning_rate": 6.060180442105643e-06, "loss": 0.5152, "step": 5494 }, { "epoch": 0.45, "grad_norm": 5.071042799742737, "learning_rate": 6.058886726130776e-06, "loss": 1.062, "step": 5495 }, { "epoch": 0.45, "grad_norm": 3.362853383119304, "learning_rate": 6.057592935932557e-06, "loss": 0.4955, "step": 5496 }, { "epoch": 0.45, "grad_norm": 4.536563141462291, "learning_rate": 6.056299071601678e-06, "loss": 0.9823, "step": 5497 }, { "epoch": 0.45, "grad_norm": 2.8697421361151076, "learning_rate": 6.055005133228829e-06, "loss": 0.6875, "step": 5498 }, { "epoch": 0.45, "grad_norm": 3.109542289630285, "learning_rate": 6.0537111209047115e-06, "loss": 0.4757, "step": 5499 }, { "epoch": 0.45, "grad_norm": 4.5212861195477325, "learning_rate": 6.052417034720032e-06, "loss": 0.8824, "step": 5500 }, { "epoch": 0.45, "grad_norm": 4.002153356204538, "learning_rate": 6.0511228747654985e-06, "loss": 0.9321, "step": 5501 }, { "epoch": 0.45, "grad_norm": 3.0857396379637962, "learning_rate": 6.0498286411318255e-06, "loss": 0.9694, "step": 5502 }, { "epoch": 0.45, "grad_norm": 2.853963101535627, "learning_rate": 6.0485343339097326e-06, "loss": 0.8505, "step": 5503 }, { "epoch": 0.45, "grad_norm": 3.3368021160183443, "learning_rate": 6.047239953189947e-06, "loss": 0.9013, "step": 5504 }, { "epoch": 0.45, "grad_norm": 4.310716106274053, "learning_rate": 6.045945499063197e-06, "loss": 0.8569, "step": 5505 }, { "epoch": 0.45, "grad_norm": 4.6831018651989345, "learning_rate": 6.044650971620222e-06, "loss": 1.0725, "step": 5506 }, { "epoch": 0.45, "grad_norm": 2.9558960430298806, "learning_rate": 6.043356370951757e-06, "loss": 0.5028, "step": 5507 }, { "epoch": 0.45, "grad_norm": 0.8734062377687183, "learning_rate": 6.042061697148555e-06, "loss": 0.1543, "step": 5508 }, { "epoch": 0.45, "grad_norm": 4.0318544937798455, "learning_rate": 6.040766950301361e-06, "loss": 0.8827, "step": 5509 }, { "epoch": 0.45, "grad_norm": 4.356019493312616, "learning_rate": 6.039472130500933e-06, "loss": 0.6415, "step": 5510 }, { "epoch": 0.45, "grad_norm": 0.9896690571284249, "learning_rate": 6.038177237838034e-06, "loss": 0.1408, "step": 5511 }, { "epoch": 0.45, "grad_norm": 3.614899518715243, "learning_rate": 6.036882272403426e-06, "loss": 0.8803, "step": 5512 }, { "epoch": 0.45, "grad_norm": 4.004401352590362, "learning_rate": 6.035587234287884e-06, "loss": 0.7291, "step": 5513 }, { "epoch": 0.45, "grad_norm": 4.7206832528416385, "learning_rate": 6.034292123582185e-06, "loss": 1.5733, "step": 5514 }, { "epoch": 0.45, "grad_norm": 5.143042002035592, "learning_rate": 6.032996940377108e-06, "loss": 1.1401, "step": 5515 }, { "epoch": 0.45, "grad_norm": 3.4042392691211365, "learning_rate": 6.031701684763443e-06, "loss": 1.0504, "step": 5516 }, { "epoch": 0.45, "grad_norm": 3.4200691856860685, "learning_rate": 6.030406356831979e-06, "loss": 0.9079, "step": 5517 }, { "epoch": 0.45, "grad_norm": 3.323923364132357, "learning_rate": 6.029110956673513e-06, "loss": 0.6282, "step": 5518 }, { "epoch": 0.45, "grad_norm": 4.098465470441731, "learning_rate": 6.027815484378848e-06, "loss": 0.9255, "step": 5519 }, { "epoch": 0.45, "grad_norm": 2.5134793061531457, "learning_rate": 6.0265199400387904e-06, "loss": 0.6663, "step": 5520 }, { "epoch": 0.45, "grad_norm": 3.517252099688381, "learning_rate": 6.025224323744153e-06, "loss": 0.9122, "step": 5521 }, { "epoch": 0.45, "grad_norm": 3.236853991100827, "learning_rate": 6.023928635585752e-06, "loss": 0.5387, "step": 5522 }, { "epoch": 0.45, "grad_norm": 2.0916988080878007, "learning_rate": 6.0226328756544105e-06, "loss": 0.3724, "step": 5523 }, { "epoch": 0.45, "grad_norm": 4.777582158576558, "learning_rate": 6.021337044040954e-06, "loss": 0.889, "step": 5524 }, { "epoch": 0.45, "grad_norm": 2.314986998203574, "learning_rate": 6.020041140836217e-06, "loss": 0.3922, "step": 5525 }, { "epoch": 0.45, "grad_norm": 4.836377061609312, "learning_rate": 6.0187451661310345e-06, "loss": 0.7884, "step": 5526 }, { "epoch": 0.45, "grad_norm": 4.28583804011794, "learning_rate": 6.017449120016249e-06, "loss": 0.9602, "step": 5527 }, { "epoch": 0.45, "grad_norm": 2.8415936348387962, "learning_rate": 6.016153002582708e-06, "loss": 0.3775, "step": 5528 }, { "epoch": 0.45, "grad_norm": 4.3430676896119875, "learning_rate": 6.014856813921264e-06, "loss": 1.0213, "step": 5529 }, { "epoch": 0.45, "grad_norm": 2.3657222141846157, "learning_rate": 6.013560554122773e-06, "loss": 0.3764, "step": 5530 }, { "epoch": 0.45, "grad_norm": 2.845682187224199, "learning_rate": 6.0122642232781e-06, "loss": 0.472, "step": 5531 }, { "epoch": 0.45, "grad_norm": 2.7079274159398072, "learning_rate": 6.01096782147811e-06, "loss": 0.469, "step": 5532 }, { "epoch": 0.45, "grad_norm": 1.345960476458339, "learning_rate": 6.009671348813675e-06, "loss": 0.2141, "step": 5533 }, { "epoch": 0.45, "grad_norm": 3.052975998276666, "learning_rate": 6.008374805375674e-06, "loss": 0.595, "step": 5534 }, { "epoch": 0.45, "grad_norm": 4.3505565706139375, "learning_rate": 6.0070781912549855e-06, "loss": 1.0399, "step": 5535 }, { "epoch": 0.45, "grad_norm": 2.2468596412968216, "learning_rate": 6.005781506542498e-06, "loss": 0.6398, "step": 5536 }, { "epoch": 0.45, "grad_norm": 4.073450939851316, "learning_rate": 6.004484751329107e-06, "loss": 0.8609, "step": 5537 }, { "epoch": 0.45, "grad_norm": 4.93717082896763, "learning_rate": 6.003187925705704e-06, "loss": 1.2229, "step": 5538 }, { "epoch": 0.45, "grad_norm": 3.492738577572085, "learning_rate": 6.001891029763194e-06, "loss": 0.7971, "step": 5539 }, { "epoch": 0.45, "grad_norm": 5.101721588026555, "learning_rate": 6.000594063592484e-06, "loss": 0.7578, "step": 5540 }, { "epoch": 0.45, "grad_norm": 3.8164989870699646, "learning_rate": 5.999297027284484e-06, "loss": 0.7848, "step": 5541 }, { "epoch": 0.45, "grad_norm": 3.4856305895260355, "learning_rate": 5.997999920930111e-06, "loss": 0.8738, "step": 5542 }, { "epoch": 0.45, "grad_norm": 2.6225289921817803, "learning_rate": 5.9967027446202885e-06, "loss": 0.7648, "step": 5543 }, { "epoch": 0.45, "grad_norm": 1.766273112541162, "learning_rate": 5.995405498445939e-06, "loss": 0.3674, "step": 5544 }, { "epoch": 0.45, "grad_norm": 2.7646141585134343, "learning_rate": 5.994108182497997e-06, "loss": 0.7503, "step": 5545 }, { "epoch": 0.45, "grad_norm": 3.8461361156409706, "learning_rate": 5.992810796867398e-06, "loss": 0.7506, "step": 5546 }, { "epoch": 0.45, "grad_norm": 5.003624900489704, "learning_rate": 5.991513341645082e-06, "loss": 0.9627, "step": 5547 }, { "epoch": 0.45, "grad_norm": 4.417943959323835, "learning_rate": 5.990215816921998e-06, "loss": 1.0619, "step": 5548 }, { "epoch": 0.45, "grad_norm": 4.733192583725168, "learning_rate": 5.988918222789093e-06, "loss": 0.8055, "step": 5549 }, { "epoch": 0.45, "grad_norm": 3.890782699748269, "learning_rate": 5.987620559337325e-06, "loss": 0.7575, "step": 5550 }, { "epoch": 0.45, "grad_norm": 4.791496139999864, "learning_rate": 5.9863228266576535e-06, "loss": 1.1452, "step": 5551 }, { "epoch": 0.45, "grad_norm": 1.8890858540861521, "learning_rate": 5.985025024841043e-06, "loss": 0.3557, "step": 5552 }, { "epoch": 0.45, "grad_norm": 2.912391990086859, "learning_rate": 5.983727153978467e-06, "loss": 0.5329, "step": 5553 }, { "epoch": 0.45, "grad_norm": 2.0390393911337132, "learning_rate": 5.982429214160899e-06, "loss": 0.5287, "step": 5554 }, { "epoch": 0.45, "grad_norm": 2.299533614708435, "learning_rate": 5.981131205479317e-06, "loss": 0.3959, "step": 5555 }, { "epoch": 0.45, "grad_norm": 5.61051813457963, "learning_rate": 5.9798331280247094e-06, "loss": 1.3886, "step": 5556 }, { "epoch": 0.45, "grad_norm": 3.8045804554973506, "learning_rate": 5.9785349818880626e-06, "loss": 0.7375, "step": 5557 }, { "epoch": 0.45, "grad_norm": 3.170660867053804, "learning_rate": 5.9772367671603715e-06, "loss": 0.5208, "step": 5558 }, { "epoch": 0.45, "grad_norm": 2.786348074967104, "learning_rate": 5.975938483932636e-06, "loss": 0.7823, "step": 5559 }, { "epoch": 0.45, "grad_norm": 2.6829597297233416, "learning_rate": 5.974640132295862e-06, "loss": 0.5209, "step": 5560 }, { "epoch": 0.45, "grad_norm": 3.311060390431268, "learning_rate": 5.973341712341054e-06, "loss": 0.5302, "step": 5561 }, { "epoch": 0.45, "grad_norm": 3.122646154918164, "learning_rate": 5.9720432241592285e-06, "loss": 0.835, "step": 5562 }, { "epoch": 0.45, "grad_norm": 4.768375739659902, "learning_rate": 5.970744667841404e-06, "loss": 1.0994, "step": 5563 }, { "epoch": 0.45, "grad_norm": 3.145289986517489, "learning_rate": 5.9694460434786035e-06, "loss": 0.7075, "step": 5564 }, { "epoch": 0.45, "grad_norm": 3.2116496176499525, "learning_rate": 5.968147351161854e-06, "loss": 0.3714, "step": 5565 }, { "epoch": 0.45, "grad_norm": 4.668740981570834, "learning_rate": 5.9668485909821886e-06, "loss": 0.9293, "step": 5566 }, { "epoch": 0.46, "grad_norm": 3.489957152442339, "learning_rate": 5.965549763030643e-06, "loss": 0.438, "step": 5567 }, { "epoch": 0.46, "grad_norm": 1.2246574479366155, "learning_rate": 5.9642508673982634e-06, "loss": 0.225, "step": 5568 }, { "epoch": 0.46, "grad_norm": 1.3514524458334403, "learning_rate": 5.9629519041760934e-06, "loss": 0.1995, "step": 5569 }, { "epoch": 0.46, "grad_norm": 3.902275863099431, "learning_rate": 5.961652873455186e-06, "loss": 0.7944, "step": 5570 }, { "epoch": 0.46, "grad_norm": 3.4541717446544973, "learning_rate": 5.9603537753265975e-06, "loss": 0.6926, "step": 5571 }, { "epoch": 0.46, "grad_norm": 3.036517138546423, "learning_rate": 5.959054609881388e-06, "loss": 0.7789, "step": 5572 }, { "epoch": 0.46, "grad_norm": 3.843901626875687, "learning_rate": 5.957755377210624e-06, "loss": 0.7952, "step": 5573 }, { "epoch": 0.46, "grad_norm": 3.240485021356367, "learning_rate": 5.956456077405378e-06, "loss": 0.4053, "step": 5574 }, { "epoch": 0.46, "grad_norm": 4.836154350544739, "learning_rate": 5.955156710556722e-06, "loss": 1.0861, "step": 5575 }, { "epoch": 0.46, "grad_norm": 4.504086906465851, "learning_rate": 5.953857276755737e-06, "loss": 1.4191, "step": 5576 }, { "epoch": 0.46, "grad_norm": 2.627717249324198, "learning_rate": 5.95255777609351e-06, "loss": 0.4167, "step": 5577 }, { "epoch": 0.46, "grad_norm": 3.0684441155294637, "learning_rate": 5.951258208661126e-06, "loss": 0.4953, "step": 5578 }, { "epoch": 0.46, "grad_norm": 2.802080977568901, "learning_rate": 5.949958574549683e-06, "loss": 0.5834, "step": 5579 }, { "epoch": 0.46, "grad_norm": 4.077596077455358, "learning_rate": 5.948658873850279e-06, "loss": 0.9969, "step": 5580 }, { "epoch": 0.46, "grad_norm": 4.116797514430086, "learning_rate": 5.947359106654016e-06, "loss": 0.7344, "step": 5581 }, { "epoch": 0.46, "grad_norm": 4.413224521799694, "learning_rate": 5.946059273052001e-06, "loss": 1.4048, "step": 5582 }, { "epoch": 0.46, "grad_norm": 4.876379618779488, "learning_rate": 5.944759373135349e-06, "loss": 0.8151, "step": 5583 }, { "epoch": 0.46, "grad_norm": 3.721767456197241, "learning_rate": 5.943459406995177e-06, "loss": 0.7432, "step": 5584 }, { "epoch": 0.46, "grad_norm": 3.366332111754268, "learning_rate": 5.942159374722606e-06, "loss": 0.7185, "step": 5585 }, { "epoch": 0.46, "grad_norm": 4.0003025244338435, "learning_rate": 5.940859276408764e-06, "loss": 0.8708, "step": 5586 }, { "epoch": 0.46, "grad_norm": 3.4982341148824743, "learning_rate": 5.939559112144781e-06, "loss": 0.7583, "step": 5587 }, { "epoch": 0.46, "grad_norm": 2.4006679307302226, "learning_rate": 5.938258882021793e-06, "loss": 0.3947, "step": 5588 }, { "epoch": 0.46, "grad_norm": 4.548102068329933, "learning_rate": 5.936958586130941e-06, "loss": 0.8525, "step": 5589 }, { "epoch": 0.46, "grad_norm": 4.541643842186359, "learning_rate": 5.935658224563369e-06, "loss": 1.1481, "step": 5590 }, { "epoch": 0.46, "grad_norm": 3.6352525308965022, "learning_rate": 5.934357797410229e-06, "loss": 0.6617, "step": 5591 }, { "epoch": 0.46, "grad_norm": 3.956310662055934, "learning_rate": 5.933057304762672e-06, "loss": 0.7801, "step": 5592 }, { "epoch": 0.46, "grad_norm": 3.0028046951097123, "learning_rate": 5.9317567467118585e-06, "loss": 0.5619, "step": 5593 }, { "epoch": 0.46, "grad_norm": 3.959436889256036, "learning_rate": 5.930456123348953e-06, "loss": 0.9281, "step": 5594 }, { "epoch": 0.46, "grad_norm": 1.907500813877162, "learning_rate": 5.929155434765122e-06, "loss": 0.4004, "step": 5595 }, { "epoch": 0.46, "grad_norm": 4.106542104042912, "learning_rate": 5.927854681051539e-06, "loss": 0.622, "step": 5596 }, { "epoch": 0.46, "grad_norm": 1.5786093027769945, "learning_rate": 5.926553862299382e-06, "loss": 0.3419, "step": 5597 }, { "epoch": 0.46, "grad_norm": 3.5457708150946003, "learning_rate": 5.92525297859983e-06, "loss": 0.7155, "step": 5598 }, { "epoch": 0.46, "grad_norm": 3.684123186030232, "learning_rate": 5.923952030044071e-06, "loss": 0.6645, "step": 5599 }, { "epoch": 0.46, "grad_norm": 3.085103688310422, "learning_rate": 5.922651016723298e-06, "loss": 0.4691, "step": 5600 }, { "epoch": 0.46, "grad_norm": 3.366456362481405, "learning_rate": 5.9213499387287025e-06, "loss": 0.4627, "step": 5601 }, { "epoch": 0.46, "grad_norm": 4.945239115251609, "learning_rate": 5.9200487961514855e-06, "loss": 1.3532, "step": 5602 }, { "epoch": 0.46, "grad_norm": 4.0888492208054945, "learning_rate": 5.918747589082853e-06, "loss": 1.133, "step": 5603 }, { "epoch": 0.46, "grad_norm": 3.8174920053002035, "learning_rate": 5.917446317614012e-06, "loss": 0.9695, "step": 5604 }, { "epoch": 0.46, "grad_norm": 3.6787831396291586, "learning_rate": 5.916144981836177e-06, "loss": 0.5809, "step": 5605 }, { "epoch": 0.46, "grad_norm": 1.2059337241792771, "learning_rate": 5.914843581840566e-06, "loss": 0.2091, "step": 5606 }, { "epoch": 0.46, "grad_norm": 3.134426566217191, "learning_rate": 5.913542117718401e-06, "loss": 0.556, "step": 5607 }, { "epoch": 0.46, "grad_norm": 2.488473168302814, "learning_rate": 5.91224058956091e-06, "loss": 0.4394, "step": 5608 }, { "epoch": 0.46, "grad_norm": 4.674172646517885, "learning_rate": 5.9109389974593234e-06, "loss": 1.0567, "step": 5609 }, { "epoch": 0.46, "grad_norm": 3.098756970166008, "learning_rate": 5.909637341504878e-06, "loss": 0.731, "step": 5610 }, { "epoch": 0.46, "grad_norm": 3.020079420394541, "learning_rate": 5.908335621788814e-06, "loss": 0.7689, "step": 5611 }, { "epoch": 0.46, "grad_norm": 3.9346032106283833, "learning_rate": 5.907033838402375e-06, "loss": 0.8292, "step": 5612 }, { "epoch": 0.46, "grad_norm": 3.3910071745667127, "learning_rate": 5.90573199143681e-06, "loss": 0.8922, "step": 5613 }, { "epoch": 0.46, "grad_norm": 2.5611325907497147, "learning_rate": 5.904430080983378e-06, "loss": 0.5135, "step": 5614 }, { "epoch": 0.46, "grad_norm": 4.210320865357581, "learning_rate": 5.9031281071333305e-06, "loss": 0.87, "step": 5615 }, { "epoch": 0.46, "grad_norm": 2.9804731507309135, "learning_rate": 5.901826069977933e-06, "loss": 0.5034, "step": 5616 }, { "epoch": 0.46, "grad_norm": 1.7768985879771326, "learning_rate": 5.900523969608454e-06, "loss": 0.3578, "step": 5617 }, { "epoch": 0.46, "grad_norm": 4.0134309205375125, "learning_rate": 5.8992218061161645e-06, "loss": 0.6261, "step": 5618 }, { "epoch": 0.46, "grad_norm": 4.525221805410668, "learning_rate": 5.897919579592337e-06, "loss": 1.1076, "step": 5619 }, { "epoch": 0.46, "grad_norm": 4.80410281650793, "learning_rate": 5.896617290128258e-06, "loss": 1.1153, "step": 5620 }, { "epoch": 0.46, "grad_norm": 3.8945697187673267, "learning_rate": 5.895314937815206e-06, "loss": 0.9155, "step": 5621 }, { "epoch": 0.46, "grad_norm": 2.4692131851393646, "learning_rate": 5.894012522744474e-06, "loss": 0.3466, "step": 5622 }, { "epoch": 0.46, "grad_norm": 4.439566624699234, "learning_rate": 5.892710045007357e-06, "loss": 1.1799, "step": 5623 }, { "epoch": 0.46, "grad_norm": 1.3540267704180882, "learning_rate": 5.891407504695149e-06, "loss": 0.2322, "step": 5624 }, { "epoch": 0.46, "grad_norm": 3.8034982849943217, "learning_rate": 5.8901049018991564e-06, "loss": 0.7363, "step": 5625 }, { "epoch": 0.46, "grad_norm": 2.844189826181136, "learning_rate": 5.888802236710681e-06, "loss": 0.568, "step": 5626 }, { "epoch": 0.46, "grad_norm": 4.288461366974695, "learning_rate": 5.88749950922104e-06, "loss": 1.1385, "step": 5627 }, { "epoch": 0.46, "grad_norm": 5.448119701622255, "learning_rate": 5.886196719521544e-06, "loss": 1.1424, "step": 5628 }, { "epoch": 0.46, "grad_norm": 3.2093745245117726, "learning_rate": 5.884893867703515e-06, "loss": 0.6699, "step": 5629 }, { "epoch": 0.46, "grad_norm": 2.6306971758348965, "learning_rate": 5.883590953858276e-06, "loss": 0.4229, "step": 5630 }, { "epoch": 0.46, "grad_norm": 3.804745123512171, "learning_rate": 5.882287978077158e-06, "loss": 0.4848, "step": 5631 }, { "epoch": 0.46, "grad_norm": 5.331107191514581, "learning_rate": 5.880984940451491e-06, "loss": 1.6346, "step": 5632 }, { "epoch": 0.46, "grad_norm": 3.233822282708715, "learning_rate": 5.879681841072614e-06, "loss": 0.7985, "step": 5633 }, { "epoch": 0.46, "grad_norm": 3.374574431792657, "learning_rate": 5.87837868003187e-06, "loss": 0.5632, "step": 5634 }, { "epoch": 0.46, "grad_norm": 3.36991586919913, "learning_rate": 5.877075457420602e-06, "loss": 0.7271, "step": 5635 }, { "epoch": 0.46, "grad_norm": 1.221505425581795, "learning_rate": 5.875772173330162e-06, "loss": 0.1695, "step": 5636 }, { "epoch": 0.46, "grad_norm": 3.6084175650892627, "learning_rate": 5.874468827851903e-06, "loss": 0.5352, "step": 5637 }, { "epoch": 0.46, "grad_norm": 3.3842514466431557, "learning_rate": 5.873165421077186e-06, "loss": 0.6894, "step": 5638 }, { "epoch": 0.46, "grad_norm": 4.907343104798318, "learning_rate": 5.871861953097372e-06, "loss": 1.0439, "step": 5639 }, { "epoch": 0.46, "grad_norm": 1.8433580518949468, "learning_rate": 5.87055842400383e-06, "loss": 0.3318, "step": 5640 }, { "epoch": 0.46, "grad_norm": 4.193401909883628, "learning_rate": 5.869254833887931e-06, "loss": 0.6984, "step": 5641 }, { "epoch": 0.46, "grad_norm": 3.215790357501764, "learning_rate": 5.867951182841052e-06, "loss": 0.9167, "step": 5642 }, { "epoch": 0.46, "grad_norm": 3.2390874229381965, "learning_rate": 5.866647470954572e-06, "loss": 0.4804, "step": 5643 }, { "epoch": 0.46, "grad_norm": 2.5895580562867244, "learning_rate": 5.8653436983198755e-06, "loss": 0.4501, "step": 5644 }, { "epoch": 0.46, "grad_norm": 3.5130031528387895, "learning_rate": 5.864039865028351e-06, "loss": 0.64, "step": 5645 }, { "epoch": 0.46, "grad_norm": 3.5725355905213174, "learning_rate": 5.862735971171394e-06, "loss": 0.8459, "step": 5646 }, { "epoch": 0.46, "grad_norm": 2.600874218765597, "learning_rate": 5.8614320168403986e-06, "loss": 0.5834, "step": 5647 }, { "epoch": 0.46, "grad_norm": 3.7204376203085254, "learning_rate": 5.860128002126769e-06, "loss": 0.8484, "step": 5648 }, { "epoch": 0.46, "grad_norm": 2.766384604117466, "learning_rate": 5.858823927121908e-06, "loss": 0.5566, "step": 5649 }, { "epoch": 0.46, "grad_norm": 6.210756108565881, "learning_rate": 5.85751979191723e-06, "loss": 1.1673, "step": 5650 }, { "epoch": 0.46, "grad_norm": 4.199498809687956, "learning_rate": 5.856215596604146e-06, "loss": 0.7733, "step": 5651 }, { "epoch": 0.46, "grad_norm": 3.9279068313710535, "learning_rate": 5.854911341274074e-06, "loss": 0.9522, "step": 5652 }, { "epoch": 0.46, "grad_norm": 2.0719494374336302, "learning_rate": 5.853607026018435e-06, "loss": 0.519, "step": 5653 }, { "epoch": 0.46, "grad_norm": 3.687955377889996, "learning_rate": 5.852302650928663e-06, "loss": 0.8059, "step": 5654 }, { "epoch": 0.46, "grad_norm": 4.218504325638327, "learning_rate": 5.850998216096181e-06, "loss": 0.8211, "step": 5655 }, { "epoch": 0.46, "grad_norm": 5.226698442187045, "learning_rate": 5.849693721612428e-06, "loss": 1.1432, "step": 5656 }, { "epoch": 0.46, "grad_norm": 4.8147250107784245, "learning_rate": 5.848389167568845e-06, "loss": 1.0348, "step": 5657 }, { "epoch": 0.46, "grad_norm": 3.9113101562177413, "learning_rate": 5.847084554056873e-06, "loss": 0.8063, "step": 5658 }, { "epoch": 0.46, "grad_norm": 4.512599912384629, "learning_rate": 5.845779881167959e-06, "loss": 0.9087, "step": 5659 }, { "epoch": 0.46, "grad_norm": 3.254628577072085, "learning_rate": 5.844475148993558e-06, "loss": 0.7667, "step": 5660 }, { "epoch": 0.46, "grad_norm": 3.965014543590636, "learning_rate": 5.843170357625122e-06, "loss": 0.6832, "step": 5661 }, { "epoch": 0.46, "grad_norm": 3.922702768592842, "learning_rate": 5.8418655071541145e-06, "loss": 0.5666, "step": 5662 }, { "epoch": 0.46, "grad_norm": 3.1475215219336645, "learning_rate": 5.840560597671999e-06, "loss": 0.3893, "step": 5663 }, { "epoch": 0.46, "grad_norm": 2.9612076985975904, "learning_rate": 5.8392556292702425e-06, "loss": 0.5127, "step": 5664 }, { "epoch": 0.46, "grad_norm": 3.8902169840323704, "learning_rate": 5.837950602040321e-06, "loss": 0.6394, "step": 5665 }, { "epoch": 0.46, "grad_norm": 5.313920807203621, "learning_rate": 5.836645516073709e-06, "loss": 1.2423, "step": 5666 }, { "epoch": 0.46, "grad_norm": 4.533505413128344, "learning_rate": 5.835340371461886e-06, "loss": 0.7282, "step": 5667 }, { "epoch": 0.46, "grad_norm": 3.180132481219037, "learning_rate": 5.83403516829634e-06, "loss": 0.5309, "step": 5668 }, { "epoch": 0.46, "grad_norm": 3.42936288406953, "learning_rate": 5.832729906668556e-06, "loss": 0.7588, "step": 5669 }, { "epoch": 0.46, "grad_norm": 5.619655411931857, "learning_rate": 5.83142458667003e-06, "loss": 1.2244, "step": 5670 }, { "epoch": 0.46, "grad_norm": 3.3241952225594344, "learning_rate": 5.83011920839226e-06, "loss": 0.6462, "step": 5671 }, { "epoch": 0.46, "grad_norm": 3.3524127689735126, "learning_rate": 5.828813771926746e-06, "loss": 0.563, "step": 5672 }, { "epoch": 0.46, "grad_norm": 3.7958909598404076, "learning_rate": 5.827508277364994e-06, "loss": 0.9086, "step": 5673 }, { "epoch": 0.46, "grad_norm": 3.6682499503574486, "learning_rate": 5.826202724798513e-06, "loss": 1.0416, "step": 5674 }, { "epoch": 0.46, "grad_norm": 4.167000503301076, "learning_rate": 5.824897114318815e-06, "loss": 0.9161, "step": 5675 }, { "epoch": 0.46, "grad_norm": 2.1926145434582027, "learning_rate": 5.82359144601742e-06, "loss": 0.4624, "step": 5676 }, { "epoch": 0.46, "grad_norm": 4.785094822337679, "learning_rate": 5.8222857199858495e-06, "loss": 1.0595, "step": 5677 }, { "epoch": 0.46, "grad_norm": 3.5222900636213748, "learning_rate": 5.820979936315628e-06, "loss": 0.3336, "step": 5678 }, { "epoch": 0.46, "grad_norm": 3.0167878091794575, "learning_rate": 5.819674095098286e-06, "loss": 0.4347, "step": 5679 }, { "epoch": 0.46, "grad_norm": 2.9541820770494716, "learning_rate": 5.818368196425358e-06, "loss": 0.5592, "step": 5680 }, { "epoch": 0.46, "grad_norm": 5.438792495249288, "learning_rate": 5.8170622403883815e-06, "loss": 1.0044, "step": 5681 }, { "epoch": 0.46, "grad_norm": 2.7398885866053737, "learning_rate": 5.815756227078896e-06, "loss": 0.4634, "step": 5682 }, { "epoch": 0.46, "grad_norm": 3.352284171432852, "learning_rate": 5.814450156588451e-06, "loss": 0.8685, "step": 5683 }, { "epoch": 0.46, "grad_norm": 3.1546835878369515, "learning_rate": 5.813144029008593e-06, "loss": 0.7393, "step": 5684 }, { "epoch": 0.46, "grad_norm": 3.0705438955166504, "learning_rate": 5.811837844430877e-06, "loss": 0.6707, "step": 5685 }, { "epoch": 0.46, "grad_norm": 5.250858309091342, "learning_rate": 5.810531602946863e-06, "loss": 0.9572, "step": 5686 }, { "epoch": 0.46, "grad_norm": 2.595430117641484, "learning_rate": 5.8092253046481095e-06, "loss": 0.2611, "step": 5687 }, { "epoch": 0.46, "grad_norm": 4.0958849916024, "learning_rate": 5.807918949626184e-06, "loss": 0.7519, "step": 5688 }, { "epoch": 0.46, "grad_norm": 2.858938120379014, "learning_rate": 5.806612537972658e-06, "loss": 0.3707, "step": 5689 }, { "epoch": 0.47, "grad_norm": 2.710846347541895, "learning_rate": 5.805306069779102e-06, "loss": 0.7015, "step": 5690 }, { "epoch": 0.47, "grad_norm": 3.6940713544638384, "learning_rate": 5.803999545137096e-06, "loss": 0.8145, "step": 5691 }, { "epoch": 0.47, "grad_norm": 2.9538177719718988, "learning_rate": 5.80269296413822e-06, "loss": 0.5931, "step": 5692 }, { "epoch": 0.47, "grad_norm": 4.266763180046466, "learning_rate": 5.80138632687406e-06, "loss": 0.7787, "step": 5693 }, { "epoch": 0.47, "grad_norm": 3.5477191117174707, "learning_rate": 5.8000796334362074e-06, "loss": 0.5738, "step": 5694 }, { "epoch": 0.47, "grad_norm": 2.7226809880218266, "learning_rate": 5.798772883916254e-06, "loss": 0.6034, "step": 5695 }, { "epoch": 0.47, "grad_norm": 1.7352513352651564, "learning_rate": 5.797466078405798e-06, "loss": 0.4274, "step": 5696 }, { "epoch": 0.47, "grad_norm": 3.044027972306153, "learning_rate": 5.796159216996441e-06, "loss": 0.4223, "step": 5697 }, { "epoch": 0.47, "grad_norm": 2.2023661997985142, "learning_rate": 5.794852299779787e-06, "loss": 0.3714, "step": 5698 }, { "epoch": 0.47, "grad_norm": 4.603670980340674, "learning_rate": 5.7935453268474454e-06, "loss": 1.1581, "step": 5699 }, { "epoch": 0.47, "grad_norm": 3.592738406110725, "learning_rate": 5.792238298291031e-06, "loss": 1.0247, "step": 5700 }, { "epoch": 0.47, "grad_norm": 3.7736571140746995, "learning_rate": 5.790931214202159e-06, "loss": 0.7871, "step": 5701 }, { "epoch": 0.47, "grad_norm": 3.3027765022500475, "learning_rate": 5.7896240746724505e-06, "loss": 0.7136, "step": 5702 }, { "epoch": 0.47, "grad_norm": 3.1869716893594364, "learning_rate": 5.788316879793533e-06, "loss": 0.5946, "step": 5703 }, { "epoch": 0.47, "grad_norm": 4.43303866822753, "learning_rate": 5.787009629657032e-06, "loss": 0.6631, "step": 5704 }, { "epoch": 0.47, "grad_norm": 4.369661125838084, "learning_rate": 5.78570232435458e-06, "loss": 0.8293, "step": 5705 }, { "epoch": 0.47, "grad_norm": 4.144263937869944, "learning_rate": 5.784394963977815e-06, "loss": 0.9573, "step": 5706 }, { "epoch": 0.47, "grad_norm": 3.1146427071289295, "learning_rate": 5.783087548618377e-06, "loss": 0.6525, "step": 5707 }, { "epoch": 0.47, "grad_norm": 4.547824438666656, "learning_rate": 5.78178007836791e-06, "loss": 0.966, "step": 5708 }, { "epoch": 0.47, "grad_norm": 4.950261807048099, "learning_rate": 5.7804725533180615e-06, "loss": 1.0529, "step": 5709 }, { "epoch": 0.47, "grad_norm": 2.9747306031892378, "learning_rate": 5.779164973560483e-06, "loss": 0.6283, "step": 5710 }, { "epoch": 0.47, "grad_norm": 4.529311563326823, "learning_rate": 5.777857339186832e-06, "loss": 0.7368, "step": 5711 }, { "epoch": 0.47, "grad_norm": 4.246827561234806, "learning_rate": 5.776549650288767e-06, "loss": 1.0219, "step": 5712 }, { "epoch": 0.47, "grad_norm": 4.408726329418388, "learning_rate": 5.775241906957949e-06, "loss": 0.8025, "step": 5713 }, { "epoch": 0.47, "grad_norm": 4.653669605183592, "learning_rate": 5.7739341092860505e-06, "loss": 1.1074, "step": 5714 }, { "epoch": 0.47, "grad_norm": 3.2976226358559413, "learning_rate": 5.772626257364736e-06, "loss": 0.503, "step": 5715 }, { "epoch": 0.47, "grad_norm": 3.2205688378314914, "learning_rate": 5.771318351285684e-06, "loss": 0.4389, "step": 5716 }, { "epoch": 0.47, "grad_norm": 4.054750227078472, "learning_rate": 5.7700103911405735e-06, "loss": 0.8102, "step": 5717 }, { "epoch": 0.47, "grad_norm": 3.245210152402887, "learning_rate": 5.7687023770210835e-06, "loss": 0.57, "step": 5718 }, { "epoch": 0.47, "grad_norm": 4.907066907446562, "learning_rate": 5.767394309018905e-06, "loss": 0.8984, "step": 5719 }, { "epoch": 0.47, "grad_norm": 4.286741875986198, "learning_rate": 5.766086187225725e-06, "loss": 0.9761, "step": 5720 }, { "epoch": 0.47, "grad_norm": 4.224340502273579, "learning_rate": 5.764778011733235e-06, "loss": 1.224, "step": 5721 }, { "epoch": 0.47, "grad_norm": 3.559377456310667, "learning_rate": 5.763469782633136e-06, "loss": 0.9564, "step": 5722 }, { "epoch": 0.47, "grad_norm": 2.497098893514583, "learning_rate": 5.762161500017128e-06, "loss": 0.5805, "step": 5723 }, { "epoch": 0.47, "grad_norm": 4.434473663235255, "learning_rate": 5.760853163976915e-06, "loss": 0.8909, "step": 5724 }, { "epoch": 0.47, "grad_norm": 3.9448167601439676, "learning_rate": 5.759544774604207e-06, "loss": 0.8595, "step": 5725 }, { "epoch": 0.47, "grad_norm": 3.743227996145874, "learning_rate": 5.758236331990717e-06, "loss": 0.7532, "step": 5726 }, { "epoch": 0.47, "grad_norm": 4.024226223243954, "learning_rate": 5.756927836228158e-06, "loss": 0.8766, "step": 5727 }, { "epoch": 0.47, "grad_norm": 2.5246596586966805, "learning_rate": 5.755619287408253e-06, "loss": 0.4241, "step": 5728 }, { "epoch": 0.47, "grad_norm": 2.005558144090475, "learning_rate": 5.754310685622724e-06, "loss": 0.2407, "step": 5729 }, { "epoch": 0.47, "grad_norm": 3.4679414027430684, "learning_rate": 5.753002030963298e-06, "loss": 0.7409, "step": 5730 }, { "epoch": 0.47, "grad_norm": 3.7261723466337284, "learning_rate": 5.751693323521709e-06, "loss": 0.7915, "step": 5731 }, { "epoch": 0.47, "grad_norm": 3.9821460241633777, "learning_rate": 5.750384563389687e-06, "loss": 0.8028, "step": 5732 }, { "epoch": 0.47, "grad_norm": 4.857185247382448, "learning_rate": 5.749075750658973e-06, "loss": 0.8735, "step": 5733 }, { "epoch": 0.47, "grad_norm": 3.846167569519588, "learning_rate": 5.747766885421309e-06, "loss": 0.5309, "step": 5734 }, { "epoch": 0.47, "grad_norm": 3.7474054280595746, "learning_rate": 5.7464579677684415e-06, "loss": 0.8645, "step": 5735 }, { "epoch": 0.47, "grad_norm": 4.295387737306311, "learning_rate": 5.745148997792119e-06, "loss": 0.8833, "step": 5736 }, { "epoch": 0.47, "grad_norm": 3.1438553701194087, "learning_rate": 5.743839975584096e-06, "loss": 0.4879, "step": 5737 }, { "epoch": 0.47, "grad_norm": 4.732617652793853, "learning_rate": 5.7425309012361255e-06, "loss": 1.003, "step": 5738 }, { "epoch": 0.47, "grad_norm": 4.8092772920083044, "learning_rate": 5.741221774839971e-06, "loss": 0.903, "step": 5739 }, { "epoch": 0.47, "grad_norm": 3.2358834200477284, "learning_rate": 5.739912596487396e-06, "loss": 0.7134, "step": 5740 }, { "epoch": 0.47, "grad_norm": 2.775149360538276, "learning_rate": 5.738603366270168e-06, "loss": 0.436, "step": 5741 }, { "epoch": 0.47, "grad_norm": 3.6307972809973466, "learning_rate": 5.737294084280058e-06, "loss": 0.7147, "step": 5742 }, { "epoch": 0.47, "grad_norm": 3.3848807124944083, "learning_rate": 5.735984750608843e-06, "loss": 1.0273, "step": 5743 }, { "epoch": 0.47, "grad_norm": 2.759591221597147, "learning_rate": 5.734675365348299e-06, "loss": 0.5455, "step": 5744 }, { "epoch": 0.47, "grad_norm": 4.205319482526289, "learning_rate": 5.733365928590208e-06, "loss": 0.9674, "step": 5745 }, { "epoch": 0.47, "grad_norm": 3.888076685253536, "learning_rate": 5.732056440426359e-06, "loss": 0.8197, "step": 5746 }, { "epoch": 0.47, "grad_norm": 4.375645006216542, "learning_rate": 5.730746900948538e-06, "loss": 1.0402, "step": 5747 }, { "epoch": 0.47, "grad_norm": 3.742123547621015, "learning_rate": 5.729437310248541e-06, "loss": 0.4881, "step": 5748 }, { "epoch": 0.47, "grad_norm": 4.354753309388454, "learning_rate": 5.728127668418162e-06, "loss": 0.9882, "step": 5749 }, { "epoch": 0.47, "grad_norm": 4.141328133970809, "learning_rate": 5.726817975549201e-06, "loss": 1.0981, "step": 5750 }, { "epoch": 0.47, "grad_norm": 4.362439718903292, "learning_rate": 5.7255082317334665e-06, "loss": 0.895, "step": 5751 }, { "epoch": 0.47, "grad_norm": 3.465193722877705, "learning_rate": 5.72419843706276e-06, "loss": 0.9494, "step": 5752 }, { "epoch": 0.47, "grad_norm": 2.394136592870638, "learning_rate": 5.722888591628895e-06, "loss": 0.6267, "step": 5753 }, { "epoch": 0.47, "grad_norm": 2.902125420421986, "learning_rate": 5.7215786955236865e-06, "loss": 0.6764, "step": 5754 }, { "epoch": 0.47, "grad_norm": 7.066351910581949, "learning_rate": 5.72026874883895e-06, "loss": 1.0081, "step": 5755 }, { "epoch": 0.47, "grad_norm": 4.120000950336103, "learning_rate": 5.7189587516665105e-06, "loss": 0.5777, "step": 5756 }, { "epoch": 0.47, "grad_norm": 3.7120891216701657, "learning_rate": 5.717648704098191e-06, "loss": 0.8645, "step": 5757 }, { "epoch": 0.47, "grad_norm": 3.5467828767445257, "learning_rate": 5.716338606225821e-06, "loss": 0.5317, "step": 5758 }, { "epoch": 0.47, "grad_norm": 3.9582341832144654, "learning_rate": 5.715028458141232e-06, "loss": 0.948, "step": 5759 }, { "epoch": 0.47, "grad_norm": 3.9280643667834787, "learning_rate": 5.71371825993626e-06, "loss": 0.7351, "step": 5760 }, { "epoch": 0.47, "grad_norm": 4.238562911035449, "learning_rate": 5.7124080117027435e-06, "loss": 0.6953, "step": 5761 }, { "epoch": 0.47, "grad_norm": 4.01743293033794, "learning_rate": 5.711097713532525e-06, "loss": 0.92, "step": 5762 }, { "epoch": 0.47, "grad_norm": 1.9946077563575941, "learning_rate": 5.709787365517453e-06, "loss": 0.2586, "step": 5763 }, { "epoch": 0.47, "grad_norm": 3.9150789608805256, "learning_rate": 5.708476967749375e-06, "loss": 0.6597, "step": 5764 }, { "epoch": 0.47, "grad_norm": 1.8986227363890973, "learning_rate": 5.7071665203201444e-06, "loss": 0.4035, "step": 5765 }, { "epoch": 0.47, "grad_norm": 5.552870877519428, "learning_rate": 5.70585602332162e-06, "loss": 1.4425, "step": 5766 }, { "epoch": 0.47, "grad_norm": 3.336830414117216, "learning_rate": 5.704545476845659e-06, "loss": 0.8516, "step": 5767 }, { "epoch": 0.47, "grad_norm": 2.513803551281201, "learning_rate": 5.703234880984126e-06, "loss": 0.454, "step": 5768 }, { "epoch": 0.47, "grad_norm": 2.9759675928845812, "learning_rate": 5.70192423582889e-06, "loss": 0.5834, "step": 5769 }, { "epoch": 0.47, "grad_norm": 3.596271191049582, "learning_rate": 5.700613541471818e-06, "loss": 0.5559, "step": 5770 }, { "epoch": 0.47, "grad_norm": 4.074366001164237, "learning_rate": 5.6993027980047866e-06, "loss": 0.6883, "step": 5771 }, { "epoch": 0.47, "grad_norm": 4.394754836340234, "learning_rate": 5.6979920055196725e-06, "loss": 1.3086, "step": 5772 }, { "epoch": 0.47, "grad_norm": 3.611169159436164, "learning_rate": 5.696681164108355e-06, "loss": 0.5912, "step": 5773 }, { "epoch": 0.47, "grad_norm": 4.738071640705117, "learning_rate": 5.6953702738627215e-06, "loss": 0.8901, "step": 5774 }, { "epoch": 0.47, "grad_norm": 4.026026575450983, "learning_rate": 5.694059334874658e-06, "loss": 0.8682, "step": 5775 }, { "epoch": 0.47, "grad_norm": 3.7881691134910755, "learning_rate": 5.692748347236055e-06, "loss": 0.5597, "step": 5776 }, { "epoch": 0.47, "grad_norm": 4.318696413394652, "learning_rate": 5.69143731103881e-06, "loss": 1.2268, "step": 5777 }, { "epoch": 0.47, "grad_norm": 4.482657348304525, "learning_rate": 5.6901262263748155e-06, "loss": 0.7588, "step": 5778 }, { "epoch": 0.47, "grad_norm": 4.181824507035459, "learning_rate": 5.6888150933359765e-06, "loss": 1.1801, "step": 5779 }, { "epoch": 0.47, "grad_norm": 4.050095910926002, "learning_rate": 5.687503912014199e-06, "loss": 0.9628, "step": 5780 }, { "epoch": 0.47, "grad_norm": 4.63451128404456, "learning_rate": 5.686192682501388e-06, "loss": 1.1779, "step": 5781 }, { "epoch": 0.47, "grad_norm": 2.2602723144577888, "learning_rate": 5.684881404889456e-06, "loss": 0.4366, "step": 5782 }, { "epoch": 0.47, "grad_norm": 4.9475423541631125, "learning_rate": 5.68357007927032e-06, "loss": 0.968, "step": 5783 }, { "epoch": 0.47, "grad_norm": 2.903104647376368, "learning_rate": 5.682258705735895e-06, "loss": 0.5192, "step": 5784 }, { "epoch": 0.47, "grad_norm": 4.125000797006016, "learning_rate": 5.680947284378102e-06, "loss": 1.1176, "step": 5785 }, { "epoch": 0.47, "grad_norm": 3.9315500926892883, "learning_rate": 5.679635815288871e-06, "loss": 0.7608, "step": 5786 }, { "epoch": 0.47, "grad_norm": 4.367998612990884, "learning_rate": 5.678324298560125e-06, "loss": 0.9233, "step": 5787 }, { "epoch": 0.47, "grad_norm": 3.4874247781562575, "learning_rate": 5.677012734283799e-06, "loss": 0.6668, "step": 5788 }, { "epoch": 0.47, "grad_norm": 2.371404386512218, "learning_rate": 5.675701122551827e-06, "loss": 0.4671, "step": 5789 }, { "epoch": 0.47, "grad_norm": 3.789388752179784, "learning_rate": 5.674389463456146e-06, "loss": 0.5927, "step": 5790 }, { "epoch": 0.47, "grad_norm": 3.8202185280672785, "learning_rate": 5.6730777570887e-06, "loss": 0.6714, "step": 5791 }, { "epoch": 0.47, "grad_norm": 3.733175702563129, "learning_rate": 5.67176600354143e-06, "loss": 0.8003, "step": 5792 }, { "epoch": 0.47, "grad_norm": 4.072108180130317, "learning_rate": 5.670454202906288e-06, "loss": 0.7722, "step": 5793 }, { "epoch": 0.47, "grad_norm": 3.9407350730100346, "learning_rate": 5.669142355275225e-06, "loss": 0.5922, "step": 5794 }, { "epoch": 0.47, "grad_norm": 3.7537621502854615, "learning_rate": 5.6678304607401934e-06, "loss": 0.7926, "step": 5795 }, { "epoch": 0.47, "grad_norm": 3.5741520687458026, "learning_rate": 5.6665185193931535e-06, "loss": 0.6796, "step": 5796 }, { "epoch": 0.47, "grad_norm": 5.1724419860360955, "learning_rate": 5.6652065313260675e-06, "loss": 0.8818, "step": 5797 }, { "epoch": 0.47, "grad_norm": 5.183680157685205, "learning_rate": 5.663894496630898e-06, "loss": 1.269, "step": 5798 }, { "epoch": 0.47, "grad_norm": 3.298214517678515, "learning_rate": 5.662582415399612e-06, "loss": 0.6199, "step": 5799 }, { "epoch": 0.47, "grad_norm": 3.2366070249034333, "learning_rate": 5.661270287724184e-06, "loss": 0.5728, "step": 5800 }, { "epoch": 0.47, "grad_norm": 3.9758275887289702, "learning_rate": 5.6599581136965855e-06, "loss": 0.4086, "step": 5801 }, { "epoch": 0.47, "grad_norm": 3.251838524436833, "learning_rate": 5.658645893408795e-06, "loss": 0.6051, "step": 5802 }, { "epoch": 0.47, "grad_norm": 3.1888373776623515, "learning_rate": 5.657333626952796e-06, "loss": 0.893, "step": 5803 }, { "epoch": 0.47, "grad_norm": 3.505630791060191, "learning_rate": 5.656021314420568e-06, "loss": 0.8322, "step": 5804 }, { "epoch": 0.47, "grad_norm": 3.5915373058970728, "learning_rate": 5.6547089559041025e-06, "loss": 0.6709, "step": 5805 }, { "epoch": 0.47, "grad_norm": 2.9530410208834805, "learning_rate": 5.65339655149539e-06, "loss": 0.6155, "step": 5806 }, { "epoch": 0.47, "grad_norm": 3.0412472608721006, "learning_rate": 5.652084101286419e-06, "loss": 0.553, "step": 5807 }, { "epoch": 0.47, "grad_norm": 1.7818611388354708, "learning_rate": 5.6507716053691916e-06, "loss": 0.2103, "step": 5808 }, { "epoch": 0.47, "grad_norm": 1.8957158313893403, "learning_rate": 5.649459063835708e-06, "loss": 0.3198, "step": 5809 }, { "epoch": 0.47, "grad_norm": 4.033966939716265, "learning_rate": 5.648146476777969e-06, "loss": 0.5737, "step": 5810 }, { "epoch": 0.47, "grad_norm": 4.0922371158650925, "learning_rate": 5.646833844287985e-06, "loss": 1.1988, "step": 5811 }, { "epoch": 0.48, "grad_norm": 4.005342843854818, "learning_rate": 5.6455211664577615e-06, "loss": 0.8087, "step": 5812 }, { "epoch": 0.48, "grad_norm": 2.9302402031710386, "learning_rate": 5.644208443379315e-06, "loss": 0.5335, "step": 5813 }, { "epoch": 0.48, "grad_norm": 5.093569074041109, "learning_rate": 5.642895675144659e-06, "loss": 0.9548, "step": 5814 }, { "epoch": 0.48, "grad_norm": 5.564522932067168, "learning_rate": 5.641582861845815e-06, "loss": 1.2332, "step": 5815 }, { "epoch": 0.48, "grad_norm": 2.296403499703234, "learning_rate": 5.640270003574804e-06, "loss": 0.3742, "step": 5816 }, { "epoch": 0.48, "grad_norm": 2.5994409059679557, "learning_rate": 5.638957100423652e-06, "loss": 0.3849, "step": 5817 }, { "epoch": 0.48, "grad_norm": 4.550878428964589, "learning_rate": 5.637644152484389e-06, "loss": 1.1475, "step": 5818 }, { "epoch": 0.48, "grad_norm": 4.33694740793171, "learning_rate": 5.6363311598490444e-06, "loss": 0.8275, "step": 5819 }, { "epoch": 0.48, "grad_norm": 3.4951912625209527, "learning_rate": 5.635018122609656e-06, "loss": 0.5433, "step": 5820 }, { "epoch": 0.48, "grad_norm": 2.888249678559443, "learning_rate": 5.633705040858262e-06, "loss": 0.4817, "step": 5821 }, { "epoch": 0.48, "grad_norm": 1.9934104155114218, "learning_rate": 5.6323919146869e-06, "loss": 0.4254, "step": 5822 }, { "epoch": 0.48, "grad_norm": 2.9285482506530185, "learning_rate": 5.631078744187618e-06, "loss": 0.6149, "step": 5823 }, { "epoch": 0.48, "grad_norm": 4.701760545520466, "learning_rate": 5.629765529452463e-06, "loss": 1.0096, "step": 5824 }, { "epoch": 0.48, "grad_norm": 3.097522815975042, "learning_rate": 5.628452270573483e-06, "loss": 0.8303, "step": 5825 }, { "epoch": 0.48, "grad_norm": 3.130182625184013, "learning_rate": 5.6271389676427365e-06, "loss": 0.6292, "step": 5826 }, { "epoch": 0.48, "grad_norm": 3.911329866629304, "learning_rate": 5.625825620752277e-06, "loss": 0.9716, "step": 5827 }, { "epoch": 0.48, "grad_norm": 3.1791773291730485, "learning_rate": 5.624512229994165e-06, "loss": 0.5456, "step": 5828 }, { "epoch": 0.48, "grad_norm": 4.022555284755302, "learning_rate": 5.623198795460463e-06, "loss": 0.7795, "step": 5829 }, { "epoch": 0.48, "grad_norm": 4.251633314084586, "learning_rate": 5.621885317243238e-06, "loss": 0.9052, "step": 5830 }, { "epoch": 0.48, "grad_norm": 3.1305021462405858, "learning_rate": 5.620571795434559e-06, "loss": 0.8813, "step": 5831 }, { "epoch": 0.48, "grad_norm": 4.017284324772185, "learning_rate": 5.619258230126497e-06, "loss": 0.7426, "step": 5832 }, { "epoch": 0.48, "grad_norm": 3.6937448691485972, "learning_rate": 5.617944621411128e-06, "loss": 1.0898, "step": 5833 }, { "epoch": 0.48, "grad_norm": 4.990890063867452, "learning_rate": 5.616630969380532e-06, "loss": 1.0355, "step": 5834 }, { "epoch": 0.48, "grad_norm": 3.1770042370908653, "learning_rate": 5.615317274126787e-06, "loss": 0.4759, "step": 5835 }, { "epoch": 0.48, "grad_norm": 3.3327796202482585, "learning_rate": 5.614003535741979e-06, "loss": 0.4879, "step": 5836 }, { "epoch": 0.48, "grad_norm": 3.616353182769598, "learning_rate": 5.612689754318196e-06, "loss": 0.899, "step": 5837 }, { "epoch": 0.48, "grad_norm": 3.9915988752037297, "learning_rate": 5.611375929947528e-06, "loss": 0.5136, "step": 5838 }, { "epoch": 0.48, "grad_norm": 4.015772413753597, "learning_rate": 5.610062062722067e-06, "loss": 0.9687, "step": 5839 }, { "epoch": 0.48, "grad_norm": 3.8811532612639135, "learning_rate": 5.608748152733911e-06, "loss": 0.8158, "step": 5840 }, { "epoch": 0.48, "grad_norm": 2.7737396464658843, "learning_rate": 5.607434200075159e-06, "loss": 0.5984, "step": 5841 }, { "epoch": 0.48, "grad_norm": 2.641508917195879, "learning_rate": 5.6061202048379125e-06, "loss": 0.6941, "step": 5842 }, { "epoch": 0.48, "grad_norm": 2.8913397236093714, "learning_rate": 5.6048061671142784e-06, "loss": 0.5939, "step": 5843 }, { "epoch": 0.48, "grad_norm": 3.8629156087235854, "learning_rate": 5.603492086996362e-06, "loss": 0.7298, "step": 5844 }, { "epoch": 0.48, "grad_norm": 2.405977959179416, "learning_rate": 5.602177964576279e-06, "loss": 0.5936, "step": 5845 }, { "epoch": 0.48, "grad_norm": 3.454230608280289, "learning_rate": 5.600863799946142e-06, "loss": 0.7115, "step": 5846 }, { "epoch": 0.48, "grad_norm": 3.032858118379392, "learning_rate": 5.599549593198066e-06, "loss": 0.7614, "step": 5847 }, { "epoch": 0.48, "grad_norm": 1.8293281378783064, "learning_rate": 5.598235344424172e-06, "loss": 0.3763, "step": 5848 }, { "epoch": 0.48, "grad_norm": 3.9752145053909422, "learning_rate": 5.596921053716585e-06, "loss": 0.5239, "step": 5849 }, { "epoch": 0.48, "grad_norm": 3.8362708691057366, "learning_rate": 5.59560672116743e-06, "loss": 0.8695, "step": 5850 }, { "epoch": 0.48, "grad_norm": 2.093146394011919, "learning_rate": 5.594292346868836e-06, "loss": 0.3435, "step": 5851 }, { "epoch": 0.48, "grad_norm": 4.654731671404846, "learning_rate": 5.592977930912934e-06, "loss": 0.9871, "step": 5852 }, { "epoch": 0.48, "grad_norm": 2.566857605599453, "learning_rate": 5.5916634733918604e-06, "loss": 0.3263, "step": 5853 }, { "epoch": 0.48, "grad_norm": 4.165932241309278, "learning_rate": 5.590348974397754e-06, "loss": 0.7467, "step": 5854 }, { "epoch": 0.48, "grad_norm": 4.921543603924678, "learning_rate": 5.589034434022751e-06, "loss": 0.878, "step": 5855 }, { "epoch": 0.48, "grad_norm": 3.4275927453103914, "learning_rate": 5.587719852358998e-06, "loss": 0.7094, "step": 5856 }, { "epoch": 0.48, "grad_norm": 3.42932349890855, "learning_rate": 5.586405229498641e-06, "loss": 0.717, "step": 5857 }, { "epoch": 0.48, "grad_norm": 2.4348472387267925, "learning_rate": 5.58509056553383e-06, "loss": 0.3761, "step": 5858 }, { "epoch": 0.48, "grad_norm": 5.504870214130669, "learning_rate": 5.583775860556717e-06, "loss": 1.402, "step": 5859 }, { "epoch": 0.48, "grad_norm": 2.994133857225235, "learning_rate": 5.582461114659456e-06, "loss": 0.6329, "step": 5860 }, { "epoch": 0.48, "grad_norm": 2.549347230680468, "learning_rate": 5.581146327934207e-06, "loss": 0.5622, "step": 5861 }, { "epoch": 0.48, "grad_norm": 1.511334130940213, "learning_rate": 5.579831500473129e-06, "loss": 0.3354, "step": 5862 }, { "epoch": 0.48, "grad_norm": 3.567918857164272, "learning_rate": 5.578516632368387e-06, "loss": 0.7942, "step": 5863 }, { "epoch": 0.48, "grad_norm": 4.184583826317246, "learning_rate": 5.577201723712145e-06, "loss": 0.9476, "step": 5864 }, { "epoch": 0.48, "grad_norm": 3.20165301221488, "learning_rate": 5.575886774596574e-06, "loss": 0.5796, "step": 5865 }, { "epoch": 0.48, "grad_norm": 3.294679591078616, "learning_rate": 5.574571785113848e-06, "loss": 0.7292, "step": 5866 }, { "epoch": 0.48, "grad_norm": 3.3593511678914845, "learning_rate": 5.57325675535614e-06, "loss": 0.7505, "step": 5867 }, { "epoch": 0.48, "grad_norm": 3.96636084138706, "learning_rate": 5.571941685415628e-06, "loss": 0.9361, "step": 5868 }, { "epoch": 0.48, "grad_norm": 4.014532399988285, "learning_rate": 5.570626575384494e-06, "loss": 1.1275, "step": 5869 }, { "epoch": 0.48, "grad_norm": 3.261551974745053, "learning_rate": 5.569311425354918e-06, "loss": 0.4729, "step": 5870 }, { "epoch": 0.48, "grad_norm": 2.3052957847977624, "learning_rate": 5.567996235419092e-06, "loss": 0.428, "step": 5871 }, { "epoch": 0.48, "grad_norm": 2.925265009700976, "learning_rate": 5.566681005669199e-06, "loss": 0.7657, "step": 5872 }, { "epoch": 0.48, "grad_norm": 4.014876049573892, "learning_rate": 5.565365736197434e-06, "loss": 0.904, "step": 5873 }, { "epoch": 0.48, "grad_norm": 4.136471844997838, "learning_rate": 5.564050427095993e-06, "loss": 0.6476, "step": 5874 }, { "epoch": 0.48, "grad_norm": 3.9040217539038466, "learning_rate": 5.56273507845707e-06, "loss": 1.1665, "step": 5875 }, { "epoch": 0.48, "grad_norm": 2.9449207137628903, "learning_rate": 5.561419690372869e-06, "loss": 0.5986, "step": 5876 }, { "epoch": 0.48, "grad_norm": 2.207911771550932, "learning_rate": 5.56010426293559e-06, "loss": 0.3904, "step": 5877 }, { "epoch": 0.48, "grad_norm": 4.728565173368823, "learning_rate": 5.55878879623744e-06, "loss": 1.4535, "step": 5878 }, { "epoch": 0.48, "grad_norm": 2.8645212034986347, "learning_rate": 5.557473290370626e-06, "loss": 0.552, "step": 5879 }, { "epoch": 0.48, "grad_norm": 6.05123044463225, "learning_rate": 5.556157745427362e-06, "loss": 1.2705, "step": 5880 }, { "epoch": 0.48, "grad_norm": 2.530542306620343, "learning_rate": 5.554842161499859e-06, "loss": 0.3485, "step": 5881 }, { "epoch": 0.48, "grad_norm": 2.2538968371779498, "learning_rate": 5.553526538680336e-06, "loss": 0.4511, "step": 5882 }, { "epoch": 0.48, "grad_norm": 4.075364276320732, "learning_rate": 5.552210877061013e-06, "loss": 0.8227, "step": 5883 }, { "epoch": 0.48, "grad_norm": 3.914795662493378, "learning_rate": 5.550895176734109e-06, "loss": 0.7729, "step": 5884 }, { "epoch": 0.48, "grad_norm": 5.610903928405641, "learning_rate": 5.549579437791851e-06, "loss": 0.959, "step": 5885 }, { "epoch": 0.48, "grad_norm": 1.9201182277283213, "learning_rate": 5.548263660326466e-06, "loss": 0.3125, "step": 5886 }, { "epoch": 0.48, "grad_norm": 3.6869314822047303, "learning_rate": 5.546947844430185e-06, "loss": 0.9981, "step": 5887 }, { "epoch": 0.48, "grad_norm": 4.61344356519624, "learning_rate": 5.5456319901952395e-06, "loss": 1.2744, "step": 5888 }, { "epoch": 0.48, "grad_norm": 4.566978454501438, "learning_rate": 5.5443160977138665e-06, "loss": 1.2574, "step": 5889 }, { "epoch": 0.48, "grad_norm": 5.499805201646772, "learning_rate": 5.543000167078304e-06, "loss": 1.0968, "step": 5890 }, { "epoch": 0.48, "grad_norm": 4.3004879299099406, "learning_rate": 5.541684198380793e-06, "loss": 0.7849, "step": 5891 }, { "epoch": 0.48, "grad_norm": 2.486444310642127, "learning_rate": 5.5403681917135785e-06, "loss": 0.2836, "step": 5892 }, { "epoch": 0.48, "grad_norm": 3.346837915740214, "learning_rate": 5.539052147168903e-06, "loss": 0.4871, "step": 5893 }, { "epoch": 0.48, "grad_norm": 4.654557335604403, "learning_rate": 5.53773606483902e-06, "loss": 0.5943, "step": 5894 }, { "epoch": 0.48, "grad_norm": 5.12528844521019, "learning_rate": 5.536419944816177e-06, "loss": 0.8638, "step": 5895 }, { "epoch": 0.48, "grad_norm": 4.340640069400724, "learning_rate": 5.535103787192631e-06, "loss": 0.8913, "step": 5896 }, { "epoch": 0.48, "grad_norm": 3.315891110519818, "learning_rate": 5.53378759206064e-06, "loss": 0.6577, "step": 5897 }, { "epoch": 0.48, "grad_norm": 4.025794917047234, "learning_rate": 5.53247135951246e-06, "loss": 0.851, "step": 5898 }, { "epoch": 0.48, "grad_norm": 3.9692707729717536, "learning_rate": 5.531155089640357e-06, "loss": 0.8836, "step": 5899 }, { "epoch": 0.48, "grad_norm": 3.5622011013882013, "learning_rate": 5.529838782536591e-06, "loss": 0.5705, "step": 5900 }, { "epoch": 0.48, "grad_norm": 4.680175791348038, "learning_rate": 5.528522438293434e-06, "loss": 0.7379, "step": 5901 }, { "epoch": 0.48, "grad_norm": 3.8857911704118298, "learning_rate": 5.527206057003154e-06, "loss": 0.9183, "step": 5902 }, { "epoch": 0.48, "grad_norm": 1.635674392930614, "learning_rate": 5.525889638758024e-06, "loss": 0.3244, "step": 5903 }, { "epoch": 0.48, "grad_norm": 5.16657170536579, "learning_rate": 5.524573183650318e-06, "loss": 1.1687, "step": 5904 }, { "epoch": 0.48, "grad_norm": 3.6165556998196884, "learning_rate": 5.523256691772315e-06, "loss": 0.9416, "step": 5905 }, { "epoch": 0.48, "grad_norm": 3.649624359146059, "learning_rate": 5.521940163216296e-06, "loss": 0.7414, "step": 5906 }, { "epoch": 0.48, "grad_norm": 3.278093059658078, "learning_rate": 5.5206235980745435e-06, "loss": 0.8448, "step": 5907 }, { "epoch": 0.48, "grad_norm": 2.904017485476948, "learning_rate": 5.519306996439342e-06, "loss": 0.5803, "step": 5908 }, { "epoch": 0.48, "grad_norm": 2.9049980254893626, "learning_rate": 5.5179903584029805e-06, "loss": 0.4991, "step": 5909 }, { "epoch": 0.48, "grad_norm": 4.362250538158217, "learning_rate": 5.516673684057747e-06, "loss": 0.8793, "step": 5910 }, { "epoch": 0.48, "grad_norm": 3.8059950212682, "learning_rate": 5.515356973495939e-06, "loss": 0.9024, "step": 5911 }, { "epoch": 0.48, "grad_norm": 3.2889491763754126, "learning_rate": 5.514040226809849e-06, "loss": 0.8642, "step": 5912 }, { "epoch": 0.48, "grad_norm": 4.014061500307177, "learning_rate": 5.512723444091776e-06, "loss": 0.8543, "step": 5913 }, { "epoch": 0.48, "grad_norm": 2.317930909121491, "learning_rate": 5.5114066254340215e-06, "loss": 0.3756, "step": 5914 }, { "epoch": 0.48, "grad_norm": 4.296118836384158, "learning_rate": 5.510089770928889e-06, "loss": 0.7751, "step": 5915 }, { "epoch": 0.48, "grad_norm": 4.17047831966923, "learning_rate": 5.508772880668682e-06, "loss": 0.9673, "step": 5916 }, { "epoch": 0.48, "grad_norm": 3.36608965530601, "learning_rate": 5.507455954745712e-06, "loss": 0.7936, "step": 5917 }, { "epoch": 0.48, "grad_norm": 3.3478663979393133, "learning_rate": 5.506138993252285e-06, "loss": 0.7502, "step": 5918 }, { "epoch": 0.48, "grad_norm": 2.997363442657447, "learning_rate": 5.504821996280719e-06, "loss": 0.8172, "step": 5919 }, { "epoch": 0.48, "grad_norm": 4.639148310727434, "learning_rate": 5.50350496392333e-06, "loss": 0.7692, "step": 5920 }, { "epoch": 0.48, "grad_norm": 4.705950501431014, "learning_rate": 5.502187896272432e-06, "loss": 0.8484, "step": 5921 }, { "epoch": 0.48, "grad_norm": 5.888276588328582, "learning_rate": 5.500870793420349e-06, "loss": 1.4463, "step": 5922 }, { "epoch": 0.48, "grad_norm": 6.138729432535756, "learning_rate": 5.4995536554594035e-06, "loss": 1.5104, "step": 5923 }, { "epoch": 0.48, "grad_norm": 3.424694881068357, "learning_rate": 5.498236482481919e-06, "loss": 0.6552, "step": 5924 }, { "epoch": 0.48, "grad_norm": 2.7475488823187413, "learning_rate": 5.496919274580226e-06, "loss": 0.449, "step": 5925 }, { "epoch": 0.48, "grad_norm": 3.8115374734444227, "learning_rate": 5.495602031846655e-06, "loss": 0.8435, "step": 5926 }, { "epoch": 0.48, "grad_norm": 3.8308256770443916, "learning_rate": 5.494284754373538e-06, "loss": 0.56, "step": 5927 }, { "epoch": 0.48, "grad_norm": 4.414466118736038, "learning_rate": 5.492967442253211e-06, "loss": 0.9561, "step": 5928 }, { "epoch": 0.48, "grad_norm": 3.5556606784233122, "learning_rate": 5.491650095578013e-06, "loss": 0.5788, "step": 5929 }, { "epoch": 0.48, "grad_norm": 5.00760104064909, "learning_rate": 5.4903327144402814e-06, "loss": 0.9597, "step": 5930 }, { "epoch": 0.48, "grad_norm": 3.699846439981498, "learning_rate": 5.489015298932362e-06, "loss": 0.8504, "step": 5931 }, { "epoch": 0.48, "grad_norm": 2.5765579870648723, "learning_rate": 5.487697849146596e-06, "loss": 0.7532, "step": 5932 }, { "epoch": 0.48, "grad_norm": 4.194937056636086, "learning_rate": 5.4863803651753345e-06, "loss": 0.5019, "step": 5933 }, { "epoch": 0.49, "grad_norm": 2.387994818204489, "learning_rate": 5.485062847110927e-06, "loss": 0.3907, "step": 5934 }, { "epoch": 0.49, "grad_norm": 3.7829552978663625, "learning_rate": 5.483745295045724e-06, "loss": 0.8088, "step": 5935 }, { "epoch": 0.49, "grad_norm": 2.194828419721978, "learning_rate": 5.48242770907208e-06, "loss": 0.3704, "step": 5936 }, { "epoch": 0.49, "grad_norm": 4.16157216863073, "learning_rate": 5.481110089282355e-06, "loss": 1.063, "step": 5937 }, { "epoch": 0.49, "grad_norm": 5.462414446566765, "learning_rate": 5.4797924357689045e-06, "loss": 0.9265, "step": 5938 }, { "epoch": 0.49, "grad_norm": 3.1440044922376944, "learning_rate": 5.478474748624095e-06, "loss": 0.6075, "step": 5939 }, { "epoch": 0.49, "grad_norm": 2.715496593630727, "learning_rate": 5.477157027940286e-06, "loss": 0.5834, "step": 5940 }, { "epoch": 0.49, "grad_norm": 4.899814377063645, "learning_rate": 5.475839273809846e-06, "loss": 1.2268, "step": 5941 }, { "epoch": 0.49, "grad_norm": 3.6442247505119956, "learning_rate": 5.474521486325145e-06, "loss": 0.5574, "step": 5942 }, { "epoch": 0.49, "grad_norm": 2.2709048047438967, "learning_rate": 5.473203665578553e-06, "loss": 0.411, "step": 5943 }, { "epoch": 0.49, "grad_norm": 3.2479962450882995, "learning_rate": 5.471885811662442e-06, "loss": 0.5546, "step": 5944 }, { "epoch": 0.49, "grad_norm": 3.4924594522157797, "learning_rate": 5.470567924669189e-06, "loss": 0.9061, "step": 5945 }, { "epoch": 0.49, "grad_norm": 4.707626979273108, "learning_rate": 5.469250004691174e-06, "loss": 1.1444, "step": 5946 }, { "epoch": 0.49, "grad_norm": 3.7467684133691357, "learning_rate": 5.467932051820776e-06, "loss": 0.8675, "step": 5947 }, { "epoch": 0.49, "grad_norm": 4.129339723411537, "learning_rate": 5.466614066150375e-06, "loss": 1.0813, "step": 5948 }, { "epoch": 0.49, "grad_norm": 3.2643072965740694, "learning_rate": 5.465296047772362e-06, "loss": 0.6233, "step": 5949 }, { "epoch": 0.49, "grad_norm": 3.1355986530047826, "learning_rate": 5.463977996779119e-06, "loss": 0.8856, "step": 5950 }, { "epoch": 0.49, "grad_norm": 4.443501834756944, "learning_rate": 5.4626599132630384e-06, "loss": 1.0806, "step": 5951 }, { "epoch": 0.49, "grad_norm": 3.627958424176721, "learning_rate": 5.46134179731651e-06, "loss": 0.659, "step": 5952 }, { "epoch": 0.49, "grad_norm": 3.309435183852325, "learning_rate": 5.4600236490319305e-06, "loss": 0.7406, "step": 5953 }, { "epoch": 0.49, "grad_norm": 4.533187208048489, "learning_rate": 5.458705468501696e-06, "loss": 0.6347, "step": 5954 }, { "epoch": 0.49, "grad_norm": 1.5210903979167285, "learning_rate": 5.457387255818204e-06, "loss": 0.39, "step": 5955 }, { "epoch": 0.49, "grad_norm": 3.4005814640182117, "learning_rate": 5.456069011073854e-06, "loss": 0.753, "step": 5956 }, { "epoch": 0.49, "grad_norm": 5.28949496031527, "learning_rate": 5.454750734361054e-06, "loss": 1.3707, "step": 5957 }, { "epoch": 0.49, "grad_norm": 4.28649999961873, "learning_rate": 5.453432425772205e-06, "loss": 0.6708, "step": 5958 }, { "epoch": 0.49, "grad_norm": 5.050799746874854, "learning_rate": 5.4521140853997166e-06, "loss": 0.8119, "step": 5959 }, { "epoch": 0.49, "grad_norm": 3.9711602718109136, "learning_rate": 5.450795713335999e-06, "loss": 0.8896, "step": 5960 }, { "epoch": 0.49, "grad_norm": 2.5967403665073654, "learning_rate": 5.449477309673462e-06, "loss": 0.4791, "step": 5961 }, { "epoch": 0.49, "grad_norm": 4.957273264260848, "learning_rate": 5.4481588745045245e-06, "loss": 1.1205, "step": 5962 }, { "epoch": 0.49, "grad_norm": 2.5226503055402056, "learning_rate": 5.446840407921599e-06, "loss": 0.3371, "step": 5963 }, { "epoch": 0.49, "grad_norm": 2.6492716318407035, "learning_rate": 5.445521910017104e-06, "loss": 0.7335, "step": 5964 }, { "epoch": 0.49, "grad_norm": 4.534855989455732, "learning_rate": 5.444203380883464e-06, "loss": 0.5681, "step": 5965 }, { "epoch": 0.49, "grad_norm": 3.8778627977130533, "learning_rate": 5.442884820613099e-06, "loss": 0.5447, "step": 5966 }, { "epoch": 0.49, "grad_norm": 5.641037162912989, "learning_rate": 5.441566229298436e-06, "loss": 0.8469, "step": 5967 }, { "epoch": 0.49, "grad_norm": 5.211550326231087, "learning_rate": 5.440247607031901e-06, "loss": 1.1026, "step": 5968 }, { "epoch": 0.49, "grad_norm": 5.769302466784294, "learning_rate": 5.438928953905926e-06, "loss": 1.2086, "step": 5969 }, { "epoch": 0.49, "grad_norm": 4.350234579846934, "learning_rate": 5.437610270012943e-06, "loss": 0.889, "step": 5970 }, { "epoch": 0.49, "grad_norm": 3.712562852846676, "learning_rate": 5.436291555445383e-06, "loss": 1.0308, "step": 5971 }, { "epoch": 0.49, "grad_norm": 3.5171631949861393, "learning_rate": 5.434972810295683e-06, "loss": 0.671, "step": 5972 }, { "epoch": 0.49, "grad_norm": 3.662786074086821, "learning_rate": 5.433654034656283e-06, "loss": 0.932, "step": 5973 }, { "epoch": 0.49, "grad_norm": 4.598717293441438, "learning_rate": 5.4323352286196215e-06, "loss": 0.695, "step": 5974 }, { "epoch": 0.49, "grad_norm": 5.191268318394292, "learning_rate": 5.431016392278142e-06, "loss": 1.0992, "step": 5975 }, { "epoch": 0.49, "grad_norm": 1.7364795990950455, "learning_rate": 5.429697525724289e-06, "loss": 0.3564, "step": 5976 }, { "epoch": 0.49, "grad_norm": 2.4752863215518532, "learning_rate": 5.428378629050511e-06, "loss": 0.5215, "step": 5977 }, { "epoch": 0.49, "grad_norm": 2.6326295764189065, "learning_rate": 5.427059702349255e-06, "loss": 0.7813, "step": 5978 }, { "epoch": 0.49, "grad_norm": 3.4500554392551557, "learning_rate": 5.425740745712972e-06, "loss": 0.5694, "step": 5979 }, { "epoch": 0.49, "grad_norm": 3.664928275209959, "learning_rate": 5.4244217592341165e-06, "loss": 0.7722, "step": 5980 }, { "epoch": 0.49, "grad_norm": 3.2221681323965026, "learning_rate": 5.423102743005141e-06, "loss": 0.4881, "step": 5981 }, { "epoch": 0.49, "grad_norm": 5.414440890088172, "learning_rate": 5.421783697118506e-06, "loss": 1.3149, "step": 5982 }, { "epoch": 0.49, "grad_norm": 4.183253117450436, "learning_rate": 5.420464621666669e-06, "loss": 1.2179, "step": 5983 }, { "epoch": 0.49, "grad_norm": 3.7352526455230937, "learning_rate": 5.4191455167420905e-06, "loss": 0.581, "step": 5984 }, { "epoch": 0.49, "grad_norm": 3.8724811227526703, "learning_rate": 5.417826382437238e-06, "loss": 0.9634, "step": 5985 }, { "epoch": 0.49, "grad_norm": 6.3110195488406875, "learning_rate": 5.4165072188445734e-06, "loss": 1.7039, "step": 5986 }, { "epoch": 0.49, "grad_norm": 3.598222072883564, "learning_rate": 5.415188026056565e-06, "loss": 0.6402, "step": 5987 }, { "epoch": 0.49, "grad_norm": 5.553321277530451, "learning_rate": 5.413868804165682e-06, "loss": 1.097, "step": 5988 }, { "epoch": 0.49, "grad_norm": 3.4588707286392295, "learning_rate": 5.412549553264399e-06, "loss": 0.6014, "step": 5989 }, { "epoch": 0.49, "grad_norm": 4.733512858828318, "learning_rate": 5.411230273445186e-06, "loss": 1.325, "step": 5990 }, { "epoch": 0.49, "grad_norm": 4.39984972508648, "learning_rate": 5.409910964800522e-06, "loss": 0.8622, "step": 5991 }, { "epoch": 0.49, "grad_norm": 2.2789657345602468, "learning_rate": 5.4085916274228825e-06, "loss": 0.5902, "step": 5992 }, { "epoch": 0.49, "grad_norm": 2.1865544219152837, "learning_rate": 5.407272261404748e-06, "loss": 0.4441, "step": 5993 }, { "epoch": 0.49, "grad_norm": 2.593265950272095, "learning_rate": 5.405952866838602e-06, "loss": 0.384, "step": 5994 }, { "epoch": 0.49, "grad_norm": 4.557730997750645, "learning_rate": 5.4046334438169245e-06, "loss": 1.0818, "step": 5995 }, { "epoch": 0.49, "grad_norm": 3.413490038438569, "learning_rate": 5.403313992432203e-06, "loss": 0.5347, "step": 5996 }, { "epoch": 0.49, "grad_norm": 6.295959641215288, "learning_rate": 5.401994512776928e-06, "loss": 1.3955, "step": 5997 }, { "epoch": 0.49, "grad_norm": 2.796602048676265, "learning_rate": 5.4006750049435864e-06, "loss": 0.3992, "step": 5998 }, { "epoch": 0.49, "grad_norm": 5.21319372425049, "learning_rate": 5.3993554690246695e-06, "loss": 1.3535, "step": 5999 }, { "epoch": 0.49, "grad_norm": 3.4363423550782217, "learning_rate": 5.398035905112675e-06, "loss": 0.9095, "step": 6000 }, { "epoch": 0.49, "grad_norm": 4.160294196177696, "learning_rate": 5.396716313300094e-06, "loss": 0.6002, "step": 6001 }, { "epoch": 0.49, "grad_norm": 4.867758171193, "learning_rate": 5.395396693679427e-06, "loss": 0.987, "step": 6002 }, { "epoch": 0.49, "grad_norm": 1.9501210659651882, "learning_rate": 5.394077046343172e-06, "loss": 0.4098, "step": 6003 }, { "epoch": 0.49, "grad_norm": 3.628940380104001, "learning_rate": 5.39275737138383e-06, "loss": 0.8241, "step": 6004 }, { "epoch": 0.49, "grad_norm": 3.718599098761617, "learning_rate": 5.3914376688939065e-06, "loss": 0.919, "step": 6005 }, { "epoch": 0.49, "grad_norm": 3.045665200502083, "learning_rate": 5.390117938965906e-06, "loss": 0.7792, "step": 6006 }, { "epoch": 0.49, "grad_norm": 3.8960198644132213, "learning_rate": 5.388798181692335e-06, "loss": 0.8319, "step": 6007 }, { "epoch": 0.49, "grad_norm": 4.9175820202923095, "learning_rate": 5.387478397165704e-06, "loss": 1.3319, "step": 6008 }, { "epoch": 0.49, "grad_norm": 2.8390832555859196, "learning_rate": 5.386158585478525e-06, "loss": 0.6011, "step": 6009 }, { "epoch": 0.49, "grad_norm": 3.63872663669692, "learning_rate": 5.384838746723308e-06, "loss": 0.6307, "step": 6010 }, { "epoch": 0.49, "grad_norm": 3.2631288297706647, "learning_rate": 5.383518880992571e-06, "loss": 0.45, "step": 6011 }, { "epoch": 0.49, "grad_norm": 4.06846177820315, "learning_rate": 5.382198988378829e-06, "loss": 0.9493, "step": 6012 }, { "epoch": 0.49, "grad_norm": 2.471610145989451, "learning_rate": 5.380879068974599e-06, "loss": 0.426, "step": 6013 }, { "epoch": 0.49, "grad_norm": 1.05154167912491, "learning_rate": 5.3795591228724065e-06, "loss": 0.1397, "step": 6014 }, { "epoch": 0.49, "grad_norm": 4.269178799749522, "learning_rate": 5.37823915016477e-06, "loss": 0.868, "step": 6015 }, { "epoch": 0.49, "grad_norm": 2.6985714026759426, "learning_rate": 5.376919150944218e-06, "loss": 0.3655, "step": 6016 }, { "epoch": 0.49, "grad_norm": 3.5445661109934754, "learning_rate": 5.375599125303272e-06, "loss": 0.5883, "step": 6017 }, { "epoch": 0.49, "grad_norm": 1.865373846194751, "learning_rate": 5.3742790733344604e-06, "loss": 0.3618, "step": 6018 }, { "epoch": 0.49, "grad_norm": 2.484862095770465, "learning_rate": 5.372958995130315e-06, "loss": 0.5057, "step": 6019 }, { "epoch": 0.49, "grad_norm": 4.6606923982642225, "learning_rate": 5.37163889078337e-06, "loss": 0.748, "step": 6020 }, { "epoch": 0.49, "grad_norm": 4.877526635623892, "learning_rate": 5.3703187603861525e-06, "loss": 0.7487, "step": 6021 }, { "epoch": 0.49, "grad_norm": 3.2999462796503423, "learning_rate": 5.368998604031202e-06, "loss": 0.8011, "step": 6022 }, { "epoch": 0.49, "grad_norm": 3.3706799983308726, "learning_rate": 5.367678421811058e-06, "loss": 0.4402, "step": 6023 }, { "epoch": 0.49, "grad_norm": 2.3353806291212935, "learning_rate": 5.366358213818256e-06, "loss": 0.4826, "step": 6024 }, { "epoch": 0.49, "grad_norm": 4.558573600171047, "learning_rate": 5.365037980145337e-06, "loss": 1.0699, "step": 6025 }, { "epoch": 0.49, "grad_norm": 3.7952480984435892, "learning_rate": 5.3637177208848435e-06, "loss": 0.5923, "step": 6026 }, { "epoch": 0.49, "grad_norm": 3.223010186727015, "learning_rate": 5.362397436129321e-06, "loss": 0.6329, "step": 6027 }, { "epoch": 0.49, "grad_norm": 3.1927538308642323, "learning_rate": 5.361077125971316e-06, "loss": 0.5016, "step": 6028 }, { "epoch": 0.49, "grad_norm": 4.116713591518483, "learning_rate": 5.359756790503376e-06, "loss": 0.6132, "step": 6029 }, { "epoch": 0.49, "grad_norm": 4.433978801843281, "learning_rate": 5.358436429818049e-06, "loss": 0.6, "step": 6030 }, { "epoch": 0.49, "grad_norm": 1.227780056767419, "learning_rate": 5.357116044007889e-06, "loss": 0.2047, "step": 6031 }, { "epoch": 0.49, "grad_norm": 2.257502964558977, "learning_rate": 5.35579563316545e-06, "loss": 0.3571, "step": 6032 }, { "epoch": 0.49, "grad_norm": 5.06788660150286, "learning_rate": 5.354475197383284e-06, "loss": 1.581, "step": 6033 }, { "epoch": 0.49, "grad_norm": 3.744145590496548, "learning_rate": 5.353154736753951e-06, "loss": 0.5749, "step": 6034 }, { "epoch": 0.49, "grad_norm": 4.09242606935554, "learning_rate": 5.351834251370006e-06, "loss": 0.9254, "step": 6035 }, { "epoch": 0.49, "grad_norm": 1.771394588947226, "learning_rate": 5.350513741324011e-06, "loss": 0.3295, "step": 6036 }, { "epoch": 0.49, "grad_norm": 5.021615405626378, "learning_rate": 5.349193206708529e-06, "loss": 1.1829, "step": 6037 }, { "epoch": 0.49, "grad_norm": 5.202720037583286, "learning_rate": 5.347872647616122e-06, "loss": 1.2606, "step": 6038 }, { "epoch": 0.49, "grad_norm": 3.6572563355449743, "learning_rate": 5.3465520641393585e-06, "loss": 0.7677, "step": 6039 }, { "epoch": 0.49, "grad_norm": 3.8552302348051444, "learning_rate": 5.345231456370802e-06, "loss": 0.7232, "step": 6040 }, { "epoch": 0.49, "grad_norm": 4.806366339065954, "learning_rate": 5.3439108244030234e-06, "loss": 1.2052, "step": 6041 }, { "epoch": 0.49, "grad_norm": 4.29929892416941, "learning_rate": 5.342590168328592e-06, "loss": 0.7961, "step": 6042 }, { "epoch": 0.49, "grad_norm": 2.1248953723976363, "learning_rate": 5.341269488240082e-06, "loss": 0.4013, "step": 6043 }, { "epoch": 0.49, "grad_norm": 5.355464824501211, "learning_rate": 5.3399487842300646e-06, "loss": 1.2155, "step": 6044 }, { "epoch": 0.49, "grad_norm": 4.124892612547487, "learning_rate": 5.338628056391118e-06, "loss": 1.2099, "step": 6045 }, { "epoch": 0.49, "grad_norm": 2.18600294308645, "learning_rate": 5.337307304815817e-06, "loss": 0.343, "step": 6046 }, { "epoch": 0.49, "grad_norm": 2.844076954517568, "learning_rate": 5.335986529596743e-06, "loss": 0.6046, "step": 6047 }, { "epoch": 0.49, "grad_norm": 6.3044142195789785, "learning_rate": 5.334665730826476e-06, "loss": 1.1133, "step": 6048 }, { "epoch": 0.49, "grad_norm": 1.3205392762697725, "learning_rate": 5.333344908597597e-06, "loss": 0.1942, "step": 6049 }, { "epoch": 0.49, "grad_norm": 4.773085470119928, "learning_rate": 5.332024063002691e-06, "loss": 0.9705, "step": 6050 }, { "epoch": 0.49, "grad_norm": 5.0438150380204245, "learning_rate": 5.330703194134342e-06, "loss": 1.1824, "step": 6051 }, { "epoch": 0.49, "grad_norm": 5.62512093898664, "learning_rate": 5.32938230208514e-06, "loss": 1.051, "step": 6052 }, { "epoch": 0.49, "grad_norm": 4.944231217257914, "learning_rate": 5.328061386947671e-06, "loss": 0.9748, "step": 6053 }, { "epoch": 0.49, "grad_norm": 4.627745480283834, "learning_rate": 5.326740448814527e-06, "loss": 0.8678, "step": 6054 }, { "epoch": 0.49, "grad_norm": 3.288780733394661, "learning_rate": 5.325419487778299e-06, "loss": 0.7457, "step": 6055 }, { "epoch": 0.49, "grad_norm": 3.2709540185919006, "learning_rate": 5.324098503931581e-06, "loss": 0.4442, "step": 6056 }, { "epoch": 0.5, "grad_norm": 4.528635346577336, "learning_rate": 5.3227774973669695e-06, "loss": 1.0207, "step": 6057 }, { "epoch": 0.5, "grad_norm": 3.666482261079773, "learning_rate": 5.3214564681770585e-06, "loss": 0.9279, "step": 6058 }, { "epoch": 0.5, "grad_norm": 3.9690261119748724, "learning_rate": 5.320135416454448e-06, "loss": 0.9664, "step": 6059 }, { "epoch": 0.5, "grad_norm": 3.266908773209643, "learning_rate": 5.3188143422917405e-06, "loss": 0.8157, "step": 6060 }, { "epoch": 0.5, "grad_norm": 3.098677175359244, "learning_rate": 5.317493245781533e-06, "loss": 0.5497, "step": 6061 }, { "epoch": 0.5, "grad_norm": 4.276565373552879, "learning_rate": 5.316172127016431e-06, "loss": 0.7332, "step": 6062 }, { "epoch": 0.5, "grad_norm": 1.2610912101476668, "learning_rate": 5.31485098608904e-06, "loss": 0.2074, "step": 6063 }, { "epoch": 0.5, "grad_norm": 2.128417890513426, "learning_rate": 5.313529823091964e-06, "loss": 0.3317, "step": 6064 }, { "epoch": 0.5, "grad_norm": 3.5045172622483607, "learning_rate": 5.312208638117812e-06, "loss": 1.0077, "step": 6065 }, { "epoch": 0.5, "grad_norm": 4.508505709824206, "learning_rate": 5.310887431259194e-06, "loss": 0.9134, "step": 6066 }, { "epoch": 0.5, "grad_norm": 2.28128242779539, "learning_rate": 5.309566202608719e-06, "loss": 0.3332, "step": 6067 }, { "epoch": 0.5, "grad_norm": 3.8888855207366695, "learning_rate": 5.3082449522590005e-06, "loss": 1.0126, "step": 6068 }, { "epoch": 0.5, "grad_norm": 3.5042120031476163, "learning_rate": 5.306923680302654e-06, "loss": 1.0205, "step": 6069 }, { "epoch": 0.5, "grad_norm": 3.3608982135572427, "learning_rate": 5.30560238683229e-06, "loss": 0.6756, "step": 6070 }, { "epoch": 0.5, "grad_norm": 3.1357296635241734, "learning_rate": 5.304281071940532e-06, "loss": 0.6604, "step": 6071 }, { "epoch": 0.5, "grad_norm": 3.487126975007773, "learning_rate": 5.302959735719995e-06, "loss": 0.7766, "step": 6072 }, { "epoch": 0.5, "grad_norm": 4.043631275556565, "learning_rate": 5.301638378263296e-06, "loss": 0.8359, "step": 6073 }, { "epoch": 0.5, "grad_norm": 3.251135154978727, "learning_rate": 5.300316999663062e-06, "loss": 0.7924, "step": 6074 }, { "epoch": 0.5, "grad_norm": 3.0227537491324274, "learning_rate": 5.298995600011912e-06, "loss": 0.5663, "step": 6075 }, { "epoch": 0.5, "grad_norm": 2.749566650478806, "learning_rate": 5.2976741794024725e-06, "loss": 0.4459, "step": 6076 }, { "epoch": 0.5, "grad_norm": 2.2032780087289554, "learning_rate": 5.296352737927368e-06, "loss": 0.2584, "step": 6077 }, { "epoch": 0.5, "grad_norm": 2.4978819867980775, "learning_rate": 5.295031275679226e-06, "loss": 0.5388, "step": 6078 }, { "epoch": 0.5, "grad_norm": 3.7285154432660215, "learning_rate": 5.293709792750677e-06, "loss": 0.7644, "step": 6079 }, { "epoch": 0.5, "grad_norm": 3.7192776482024046, "learning_rate": 5.292388289234349e-06, "loss": 0.9788, "step": 6080 }, { "epoch": 0.5, "grad_norm": 4.711842440350777, "learning_rate": 5.2910667652228735e-06, "loss": 0.7359, "step": 6081 }, { "epoch": 0.5, "grad_norm": 3.673850306310747, "learning_rate": 5.289745220808885e-06, "loss": 0.924, "step": 6082 }, { "epoch": 0.5, "grad_norm": 4.4479694632064355, "learning_rate": 5.288423656085018e-06, "loss": 0.8344, "step": 6083 }, { "epoch": 0.5, "grad_norm": 2.3718104486576355, "learning_rate": 5.287102071143907e-06, "loss": 0.3449, "step": 6084 }, { "epoch": 0.5, "grad_norm": 1.9814963008856785, "learning_rate": 5.28578046607819e-06, "loss": 0.2176, "step": 6085 }, { "epoch": 0.5, "grad_norm": 3.0309721955972324, "learning_rate": 5.284458840980507e-06, "loss": 0.7822, "step": 6086 }, { "epoch": 0.5, "grad_norm": 3.8376790697897962, "learning_rate": 5.283137195943499e-06, "loss": 0.6084, "step": 6087 }, { "epoch": 0.5, "grad_norm": 4.032045832585798, "learning_rate": 5.281815531059803e-06, "loss": 0.7281, "step": 6088 }, { "epoch": 0.5, "grad_norm": 2.844734848963236, "learning_rate": 5.280493846422066e-06, "loss": 0.5976, "step": 6089 }, { "epoch": 0.5, "grad_norm": 4.2656608970985594, "learning_rate": 5.27917214212293e-06, "loss": 1.0984, "step": 6090 }, { "epoch": 0.5, "grad_norm": 4.5982297716675875, "learning_rate": 5.2778504182550436e-06, "loss": 1.1228, "step": 6091 }, { "epoch": 0.5, "grad_norm": 3.9160003755177946, "learning_rate": 5.27652867491105e-06, "loss": 0.9268, "step": 6092 }, { "epoch": 0.5, "grad_norm": 2.7159953624083575, "learning_rate": 5.2752069121836e-06, "loss": 0.7194, "step": 6093 }, { "epoch": 0.5, "grad_norm": 3.542153783435038, "learning_rate": 5.273885130165345e-06, "loss": 0.7737, "step": 6094 }, { "epoch": 0.5, "grad_norm": 2.4585385374304933, "learning_rate": 5.2725633289489345e-06, "loss": 0.5026, "step": 6095 }, { "epoch": 0.5, "grad_norm": 3.0917108234195774, "learning_rate": 5.2712415086270185e-06, "loss": 0.655, "step": 6096 }, { "epoch": 0.5, "grad_norm": 5.3591110112259255, "learning_rate": 5.2699196692922546e-06, "loss": 0.8652, "step": 6097 }, { "epoch": 0.5, "grad_norm": 4.300957812764537, "learning_rate": 5.268597811037296e-06, "loss": 0.538, "step": 6098 }, { "epoch": 0.5, "grad_norm": 3.2486258407781285, "learning_rate": 5.2672759339547995e-06, "loss": 0.6235, "step": 6099 }, { "epoch": 0.5, "grad_norm": 4.393035122156463, "learning_rate": 5.265954038137424e-06, "loss": 0.7664, "step": 6100 }, { "epoch": 0.5, "grad_norm": 3.9881075428557233, "learning_rate": 5.264632123677827e-06, "loss": 0.5406, "step": 6101 }, { "epoch": 0.5, "grad_norm": 4.355037993476339, "learning_rate": 5.2633101906686715e-06, "loss": 1.1116, "step": 6102 }, { "epoch": 0.5, "grad_norm": 1.8713903279823412, "learning_rate": 5.261988239202617e-06, "loss": 0.396, "step": 6103 }, { "epoch": 0.5, "grad_norm": 2.868093306142094, "learning_rate": 5.260666269372327e-06, "loss": 0.3755, "step": 6104 }, { "epoch": 0.5, "grad_norm": 3.782449515322387, "learning_rate": 5.259344281270464e-06, "loss": 0.8487, "step": 6105 }, { "epoch": 0.5, "grad_norm": 4.327406827144394, "learning_rate": 5.258022274989698e-06, "loss": 0.8965, "step": 6106 }, { "epoch": 0.5, "grad_norm": 5.280276130996807, "learning_rate": 5.256700250622692e-06, "loss": 1.35, "step": 6107 }, { "epoch": 0.5, "grad_norm": 2.8376123252688825, "learning_rate": 5.2553782082621155e-06, "loss": 0.6479, "step": 6108 }, { "epoch": 0.5, "grad_norm": 4.390103833093076, "learning_rate": 5.2540561480006395e-06, "loss": 0.9966, "step": 6109 }, { "epoch": 0.5, "grad_norm": 3.1779116496556297, "learning_rate": 5.252734069930933e-06, "loss": 0.5502, "step": 6110 }, { "epoch": 0.5, "grad_norm": 4.844221005521417, "learning_rate": 5.251411974145667e-06, "loss": 1.1597, "step": 6111 }, { "epoch": 0.5, "grad_norm": 3.0666922562733765, "learning_rate": 5.250089860737516e-06, "loss": 0.7088, "step": 6112 }, { "epoch": 0.5, "grad_norm": 2.681093129070991, "learning_rate": 5.248767729799153e-06, "loss": 0.567, "step": 6113 }, { "epoch": 0.5, "grad_norm": 5.172353609353667, "learning_rate": 5.247445581423257e-06, "loss": 1.5891, "step": 6114 }, { "epoch": 0.5, "grad_norm": 4.01038345251461, "learning_rate": 5.246123415702502e-06, "loss": 1.059, "step": 6115 }, { "epoch": 0.5, "grad_norm": 3.3530754017556252, "learning_rate": 5.244801232729566e-06, "loss": 0.9773, "step": 6116 }, { "epoch": 0.5, "grad_norm": 5.0136245644424715, "learning_rate": 5.2434790325971295e-06, "loss": 0.901, "step": 6117 }, { "epoch": 0.5, "grad_norm": 2.13231353398788, "learning_rate": 5.242156815397873e-06, "loss": 0.3978, "step": 6118 }, { "epoch": 0.5, "grad_norm": 1.7810045992983448, "learning_rate": 5.240834581224476e-06, "loss": 0.3124, "step": 6119 }, { "epoch": 0.5, "grad_norm": 4.194978801765924, "learning_rate": 5.239512330169625e-06, "loss": 0.5603, "step": 6120 }, { "epoch": 0.5, "grad_norm": 4.131501788834688, "learning_rate": 5.238190062326001e-06, "loss": 1.1013, "step": 6121 }, { "epoch": 0.5, "grad_norm": 2.673214352109646, "learning_rate": 5.23686777778629e-06, "loss": 0.3919, "step": 6122 }, { "epoch": 0.5, "grad_norm": 2.9256462722989505, "learning_rate": 5.235545476643179e-06, "loss": 0.8098, "step": 6123 }, { "epoch": 0.5, "grad_norm": 4.373331473673059, "learning_rate": 5.234223158989354e-06, "loss": 1.2181, "step": 6124 }, { "epoch": 0.5, "grad_norm": 2.1702997037447873, "learning_rate": 5.232900824917507e-06, "loss": 0.5741, "step": 6125 }, { "epoch": 0.5, "grad_norm": 4.186281271164072, "learning_rate": 5.231578474520324e-06, "loss": 0.8313, "step": 6126 }, { "epoch": 0.5, "grad_norm": 4.9746730564601425, "learning_rate": 5.230256107890499e-06, "loss": 0.8272, "step": 6127 }, { "epoch": 0.5, "grad_norm": 2.1480789590876506, "learning_rate": 5.228933725120722e-06, "loss": 0.2823, "step": 6128 }, { "epoch": 0.5, "grad_norm": 4.664596301551104, "learning_rate": 5.227611326303688e-06, "loss": 1.1076, "step": 6129 }, { "epoch": 0.5, "grad_norm": 3.57939184414976, "learning_rate": 5.2262889115320895e-06, "loss": 0.8099, "step": 6130 }, { "epoch": 0.5, "grad_norm": 3.7668009677088365, "learning_rate": 5.224966480898624e-06, "loss": 0.772, "step": 6131 }, { "epoch": 0.5, "grad_norm": 4.9670168505602925, "learning_rate": 5.2236440344959875e-06, "loss": 1.3492, "step": 6132 }, { "epoch": 0.5, "grad_norm": 3.797135588049277, "learning_rate": 5.2223215724168764e-06, "loss": 0.8794, "step": 6133 }, { "epoch": 0.5, "grad_norm": 3.8553143869273296, "learning_rate": 5.220999094753992e-06, "loss": 0.4619, "step": 6134 }, { "epoch": 0.5, "grad_norm": 3.0458033099846755, "learning_rate": 5.2196766016000325e-06, "loss": 0.7381, "step": 6135 }, { "epoch": 0.5, "grad_norm": 3.306552937733528, "learning_rate": 5.218354093047697e-06, "loss": 0.6208, "step": 6136 }, { "epoch": 0.5, "grad_norm": 3.4600195846023656, "learning_rate": 5.217031569189692e-06, "loss": 0.6653, "step": 6137 }, { "epoch": 0.5, "grad_norm": 1.174761429229127, "learning_rate": 5.215709030118718e-06, "loss": 0.1854, "step": 6138 }, { "epoch": 0.5, "grad_norm": 2.469828478962, "learning_rate": 5.21438647592748e-06, "loss": 0.425, "step": 6139 }, { "epoch": 0.5, "grad_norm": 4.028015358329215, "learning_rate": 5.213063906708683e-06, "loss": 0.7259, "step": 6140 }, { "epoch": 0.5, "grad_norm": 3.181936284915569, "learning_rate": 5.211741322555034e-06, "loss": 0.6182, "step": 6141 }, { "epoch": 0.5, "grad_norm": 3.5836489621305465, "learning_rate": 5.2104187235592395e-06, "loss": 0.797, "step": 6142 }, { "epoch": 0.5, "grad_norm": 3.5992488846652266, "learning_rate": 5.209096109814008e-06, "loss": 0.6928, "step": 6143 }, { "epoch": 0.5, "grad_norm": 3.4981195911857967, "learning_rate": 5.207773481412049e-06, "loss": 0.8769, "step": 6144 }, { "epoch": 0.5, "grad_norm": 3.6780818571798677, "learning_rate": 5.206450838446072e-06, "loss": 0.5248, "step": 6145 }, { "epoch": 0.5, "grad_norm": 3.7670184348930027, "learning_rate": 5.205128181008791e-06, "loss": 0.9068, "step": 6146 }, { "epoch": 0.5, "grad_norm": 3.5566362645193568, "learning_rate": 5.203805509192917e-06, "loss": 0.6849, "step": 6147 }, { "epoch": 0.5, "grad_norm": 2.4882825346129476, "learning_rate": 5.202482823091165e-06, "loss": 0.8195, "step": 6148 }, { "epoch": 0.5, "grad_norm": 2.611746586855792, "learning_rate": 5.201160122796247e-06, "loss": 0.3935, "step": 6149 }, { "epoch": 0.5, "grad_norm": 3.2290167217233137, "learning_rate": 5.19983740840088e-06, "loss": 0.3995, "step": 6150 }, { "epoch": 0.5, "grad_norm": 2.734378775660485, "learning_rate": 5.198514679997782e-06, "loss": 0.6287, "step": 6151 }, { "epoch": 0.5, "grad_norm": 4.602912475170529, "learning_rate": 5.197191937679667e-06, "loss": 0.9427, "step": 6152 }, { "epoch": 0.5, "grad_norm": 3.735567133731998, "learning_rate": 5.195869181539255e-06, "loss": 0.8277, "step": 6153 }, { "epoch": 0.5, "grad_norm": 6.010553948935826, "learning_rate": 5.194546411669267e-06, "loss": 0.8849, "step": 6154 }, { "epoch": 0.5, "grad_norm": 4.871751178871961, "learning_rate": 5.193223628162421e-06, "loss": 0.8736, "step": 6155 }, { "epoch": 0.5, "grad_norm": 0.9287174402242686, "learning_rate": 5.19190083111144e-06, "loss": 0.1051, "step": 6156 }, { "epoch": 0.5, "grad_norm": 2.7633676823644064, "learning_rate": 5.190578020609047e-06, "loss": 0.7295, "step": 6157 }, { "epoch": 0.5, "grad_norm": 4.490580213260556, "learning_rate": 5.189255196747964e-06, "loss": 0.7785, "step": 6158 }, { "epoch": 0.5, "grad_norm": 3.2016378207556, "learning_rate": 5.187932359620914e-06, "loss": 0.7724, "step": 6159 }, { "epoch": 0.5, "grad_norm": 3.911124554425165, "learning_rate": 5.186609509320625e-06, "loss": 0.5837, "step": 6160 }, { "epoch": 0.5, "grad_norm": 2.103299272865019, "learning_rate": 5.18528664593982e-06, "loss": 0.3336, "step": 6161 }, { "epoch": 0.5, "grad_norm": 3.763653361213138, "learning_rate": 5.183963769571227e-06, "loss": 0.8515, "step": 6162 }, { "epoch": 0.5, "grad_norm": 2.0625643276009, "learning_rate": 5.1826408803075765e-06, "loss": 0.4117, "step": 6163 }, { "epoch": 0.5, "grad_norm": 5.418750418805527, "learning_rate": 5.181317978241595e-06, "loss": 1.5736, "step": 6164 }, { "epoch": 0.5, "grad_norm": 3.5187906029484766, "learning_rate": 5.179995063466011e-06, "loss": 0.7462, "step": 6165 }, { "epoch": 0.5, "grad_norm": 4.098670287517148, "learning_rate": 5.178672136073558e-06, "loss": 0.7015, "step": 6166 }, { "epoch": 0.5, "grad_norm": 4.194162905885539, "learning_rate": 5.177349196156964e-06, "loss": 0.7261, "step": 6167 }, { "epoch": 0.5, "grad_norm": 4.153913117205848, "learning_rate": 5.1760262438089636e-06, "loss": 0.8085, "step": 6168 }, { "epoch": 0.5, "grad_norm": 4.997762346294353, "learning_rate": 5.174703279122291e-06, "loss": 0.8826, "step": 6169 }, { "epoch": 0.5, "grad_norm": 3.476328227990508, "learning_rate": 5.173380302189676e-06, "loss": 0.9064, "step": 6170 }, { "epoch": 0.5, "grad_norm": 5.570371625510055, "learning_rate": 5.172057313103859e-06, "loss": 1.4817, "step": 6171 }, { "epoch": 0.5, "grad_norm": 1.9306079358977413, "learning_rate": 5.170734311957572e-06, "loss": 0.3788, "step": 6172 }, { "epoch": 0.5, "grad_norm": 4.549389996259477, "learning_rate": 5.169411298843554e-06, "loss": 0.7813, "step": 6173 }, { "epoch": 0.5, "grad_norm": 5.499649837486986, "learning_rate": 5.16808827385454e-06, "loss": 0.7076, "step": 6174 }, { "epoch": 0.5, "grad_norm": 3.989391726629326, "learning_rate": 5.16676523708327e-06, "loss": 0.9742, "step": 6175 }, { "epoch": 0.5, "grad_norm": 3.67099707567816, "learning_rate": 5.165442188622482e-06, "loss": 0.879, "step": 6176 }, { "epoch": 0.5, "grad_norm": 4.558072060843167, "learning_rate": 5.164119128564917e-06, "loss": 0.8436, "step": 6177 }, { "epoch": 0.5, "grad_norm": 1.8061979255149667, "learning_rate": 5.162796057003316e-06, "loss": 0.4058, "step": 6178 }, { "epoch": 0.51, "grad_norm": 3.1751579642441854, "learning_rate": 5.161472974030418e-06, "loss": 0.7461, "step": 6179 }, { "epoch": 0.51, "grad_norm": 4.176957644498074, "learning_rate": 5.1601498797389695e-06, "loss": 0.758, "step": 6180 }, { "epoch": 0.51, "grad_norm": 3.112975382854141, "learning_rate": 5.158826774221711e-06, "loss": 0.5926, "step": 6181 }, { "epoch": 0.51, "grad_norm": 2.8319967477483363, "learning_rate": 5.157503657571386e-06, "loss": 0.6073, "step": 6182 }, { "epoch": 0.51, "grad_norm": 0.9127903859105102, "learning_rate": 5.156180529880741e-06, "loss": 0.1528, "step": 6183 }, { "epoch": 0.51, "grad_norm": 3.757988523389512, "learning_rate": 5.15485739124252e-06, "loss": 0.8195, "step": 6184 }, { "epoch": 0.51, "grad_norm": 5.149178222477253, "learning_rate": 5.153534241749468e-06, "loss": 1.2277, "step": 6185 }, { "epoch": 0.51, "grad_norm": 3.1744426702739177, "learning_rate": 5.152211081494336e-06, "loss": 0.5887, "step": 6186 }, { "epoch": 0.51, "grad_norm": 2.2991471817823994, "learning_rate": 5.150887910569868e-06, "loss": 0.3999, "step": 6187 }, { "epoch": 0.51, "grad_norm": 3.973632128969777, "learning_rate": 5.149564729068816e-06, "loss": 0.6487, "step": 6188 }, { "epoch": 0.51, "grad_norm": 2.4212412938391443, "learning_rate": 5.148241537083928e-06, "loss": 0.5501, "step": 6189 }, { "epoch": 0.51, "grad_norm": 3.9912273125515885, "learning_rate": 5.146918334707952e-06, "loss": 0.9781, "step": 6190 }, { "epoch": 0.51, "grad_norm": 1.7338531351318034, "learning_rate": 5.145595122033641e-06, "loss": 0.5279, "step": 6191 }, { "epoch": 0.51, "grad_norm": 3.797698382201967, "learning_rate": 5.144271899153743e-06, "loss": 1.1141, "step": 6192 }, { "epoch": 0.51, "grad_norm": 5.051888552737093, "learning_rate": 5.142948666161015e-06, "loss": 0.962, "step": 6193 }, { "epoch": 0.51, "grad_norm": 2.695575880327883, "learning_rate": 5.1416254231482075e-06, "loss": 0.7048, "step": 6194 }, { "epoch": 0.51, "grad_norm": 4.149596836313373, "learning_rate": 5.140302170208073e-06, "loss": 0.7707, "step": 6195 }, { "epoch": 0.51, "grad_norm": 2.8069664635200313, "learning_rate": 5.138978907433368e-06, "loss": 0.4488, "step": 6196 }, { "epoch": 0.51, "grad_norm": 1.8215426670677781, "learning_rate": 5.137655634916847e-06, "loss": 0.3643, "step": 6197 }, { "epoch": 0.51, "grad_norm": 3.3808154312253578, "learning_rate": 5.136332352751264e-06, "loss": 0.6463, "step": 6198 }, { "epoch": 0.51, "grad_norm": 4.444167956178485, "learning_rate": 5.1350090610293765e-06, "loss": 0.7728, "step": 6199 }, { "epoch": 0.51, "grad_norm": 3.866901181736248, "learning_rate": 5.133685759843942e-06, "loss": 0.7861, "step": 6200 }, { "epoch": 0.51, "grad_norm": 3.2801548514287724, "learning_rate": 5.132362449287717e-06, "loss": 0.8588, "step": 6201 }, { "epoch": 0.51, "grad_norm": 3.5811069303386147, "learning_rate": 5.13103912945346e-06, "loss": 0.9135, "step": 6202 }, { "epoch": 0.51, "grad_norm": 3.447064912064372, "learning_rate": 5.129715800433931e-06, "loss": 0.6497, "step": 6203 }, { "epoch": 0.51, "grad_norm": 3.5511670926001306, "learning_rate": 5.128392462321889e-06, "loss": 0.621, "step": 6204 }, { "epoch": 0.51, "grad_norm": 3.2851645891357455, "learning_rate": 5.127069115210094e-06, "loss": 0.6323, "step": 6205 }, { "epoch": 0.51, "grad_norm": 3.0718547433639816, "learning_rate": 5.125745759191307e-06, "loss": 0.4425, "step": 6206 }, { "epoch": 0.51, "grad_norm": 4.205788564576086, "learning_rate": 5.124422394358289e-06, "loss": 1.1922, "step": 6207 }, { "epoch": 0.51, "grad_norm": 4.570194497022711, "learning_rate": 5.123099020803803e-06, "loss": 1.1588, "step": 6208 }, { "epoch": 0.51, "grad_norm": 4.123753768682675, "learning_rate": 5.1217756386206115e-06, "loss": 0.9596, "step": 6209 }, { "epoch": 0.51, "grad_norm": 3.1374146278778574, "learning_rate": 5.120452247901477e-06, "loss": 0.7494, "step": 6210 }, { "epoch": 0.51, "grad_norm": 5.131343355026282, "learning_rate": 5.119128848739165e-06, "loss": 0.8035, "step": 6211 }, { "epoch": 0.51, "grad_norm": 5.085041690104158, "learning_rate": 5.11780544122644e-06, "loss": 0.8113, "step": 6212 }, { "epoch": 0.51, "grad_norm": 2.71463247425855, "learning_rate": 5.116482025456066e-06, "loss": 0.6242, "step": 6213 }, { "epoch": 0.51, "grad_norm": 3.9995805170970105, "learning_rate": 5.115158601520807e-06, "loss": 1.2192, "step": 6214 }, { "epoch": 0.51, "grad_norm": 4.687765821380948, "learning_rate": 5.1138351695134325e-06, "loss": 1.2597, "step": 6215 }, { "epoch": 0.51, "grad_norm": 4.4346026177724935, "learning_rate": 5.112511729526708e-06, "loss": 1.0865, "step": 6216 }, { "epoch": 0.51, "grad_norm": 2.6694227711778957, "learning_rate": 5.111188281653401e-06, "loss": 0.3491, "step": 6217 }, { "epoch": 0.51, "grad_norm": 2.883057852039323, "learning_rate": 5.109864825986278e-06, "loss": 0.4154, "step": 6218 }, { "epoch": 0.51, "grad_norm": 4.028272430272317, "learning_rate": 5.1085413626181115e-06, "loss": 1.1474, "step": 6219 }, { "epoch": 0.51, "grad_norm": 6.025093841644022, "learning_rate": 5.107217891641666e-06, "loss": 1.2059, "step": 6220 }, { "epoch": 0.51, "grad_norm": 3.402164700317038, "learning_rate": 5.1058944131497136e-06, "loss": 0.5818, "step": 6221 }, { "epoch": 0.51, "grad_norm": 2.7144367551172395, "learning_rate": 5.104570927235022e-06, "loss": 0.7003, "step": 6222 }, { "epoch": 0.51, "grad_norm": 3.4100266315263363, "learning_rate": 5.103247433990366e-06, "loss": 0.7946, "step": 6223 }, { "epoch": 0.51, "grad_norm": 5.265470392284644, "learning_rate": 5.1019239335085125e-06, "loss": 1.2367, "step": 6224 }, { "epoch": 0.51, "grad_norm": 3.9032990061558483, "learning_rate": 5.100600425882235e-06, "loss": 0.713, "step": 6225 }, { "epoch": 0.51, "grad_norm": 3.8693250297510664, "learning_rate": 5.099276911204306e-06, "loss": 0.7159, "step": 6226 }, { "epoch": 0.51, "grad_norm": 4.0870859928610015, "learning_rate": 5.097953389567498e-06, "loss": 0.8153, "step": 6227 }, { "epoch": 0.51, "grad_norm": 4.029878114306394, "learning_rate": 5.096629861064582e-06, "loss": 0.9994, "step": 6228 }, { "epoch": 0.51, "grad_norm": 3.951793258420478, "learning_rate": 5.095306325788335e-06, "loss": 0.7461, "step": 6229 }, { "epoch": 0.51, "grad_norm": 5.776917444102281, "learning_rate": 5.093982783831528e-06, "loss": 1.5689, "step": 6230 }, { "epoch": 0.51, "grad_norm": 2.812739997250496, "learning_rate": 5.092659235286938e-06, "loss": 0.5859, "step": 6231 }, { "epoch": 0.51, "grad_norm": 3.7644079508415316, "learning_rate": 5.091335680247339e-06, "loss": 0.6422, "step": 6232 }, { "epoch": 0.51, "grad_norm": 0.9220303257829688, "learning_rate": 5.090012118805505e-06, "loss": 0.1435, "step": 6233 }, { "epoch": 0.51, "grad_norm": 2.62449968094465, "learning_rate": 5.088688551054214e-06, "loss": 0.5911, "step": 6234 }, { "epoch": 0.51, "grad_norm": 3.742055006600364, "learning_rate": 5.0873649770862425e-06, "loss": 0.7453, "step": 6235 }, { "epoch": 0.51, "grad_norm": 3.9706011107453003, "learning_rate": 5.086041396994365e-06, "loss": 0.9127, "step": 6236 }, { "epoch": 0.51, "grad_norm": 3.6868597009845514, "learning_rate": 5.08471781087136e-06, "loss": 0.8972, "step": 6237 }, { "epoch": 0.51, "grad_norm": 3.5694862211533263, "learning_rate": 5.083394218810006e-06, "loss": 0.6879, "step": 6238 }, { "epoch": 0.51, "grad_norm": 2.4643485370094247, "learning_rate": 5.082070620903079e-06, "loss": 0.5668, "step": 6239 }, { "epoch": 0.51, "grad_norm": 5.66966876203938, "learning_rate": 5.0807470172433594e-06, "loss": 1.0946, "step": 6240 }, { "epoch": 0.51, "grad_norm": 4.886788766329076, "learning_rate": 5.079423407923625e-06, "loss": 0.9192, "step": 6241 }, { "epoch": 0.51, "grad_norm": 2.5662657650469645, "learning_rate": 5.078099793036656e-06, "loss": 0.5687, "step": 6242 }, { "epoch": 0.51, "grad_norm": 2.2423877659804696, "learning_rate": 5.076776172675232e-06, "loss": 0.284, "step": 6243 }, { "epoch": 0.51, "grad_norm": 3.700604243524799, "learning_rate": 5.0754525469321305e-06, "loss": 0.6439, "step": 6244 }, { "epoch": 0.51, "grad_norm": 3.5641119785064723, "learning_rate": 5.074128915900134e-06, "loss": 1.022, "step": 6245 }, { "epoch": 0.51, "grad_norm": 3.13662079686435, "learning_rate": 5.072805279672025e-06, "loss": 0.6375, "step": 6246 }, { "epoch": 0.51, "grad_norm": 2.752477270526784, "learning_rate": 5.071481638340581e-06, "loss": 0.5996, "step": 6247 }, { "epoch": 0.51, "grad_norm": 4.72802022968144, "learning_rate": 5.070157991998586e-06, "loss": 0.9262, "step": 6248 }, { "epoch": 0.51, "grad_norm": 3.94412339728263, "learning_rate": 5.06883434073882e-06, "loss": 0.9916, "step": 6249 }, { "epoch": 0.51, "grad_norm": 2.737079520523553, "learning_rate": 5.067510684654069e-06, "loss": 0.459, "step": 6250 }, { "epoch": 0.51, "grad_norm": 2.5092207035448744, "learning_rate": 5.06618702383711e-06, "loss": 0.5877, "step": 6251 }, { "epoch": 0.51, "grad_norm": 3.1853215059093363, "learning_rate": 5.06486335838073e-06, "loss": 0.6728, "step": 6252 }, { "epoch": 0.51, "grad_norm": 4.408146409003843, "learning_rate": 5.06353968837771e-06, "loss": 0.8563, "step": 6253 }, { "epoch": 0.51, "grad_norm": 3.7415881893911864, "learning_rate": 5.062216013920836e-06, "loss": 0.6281, "step": 6254 }, { "epoch": 0.51, "grad_norm": 5.423419492453897, "learning_rate": 5.060892335102888e-06, "loss": 1.5519, "step": 6255 }, { "epoch": 0.51, "grad_norm": 2.1369108485311883, "learning_rate": 5.0595686520166535e-06, "loss": 0.4109, "step": 6256 }, { "epoch": 0.51, "grad_norm": 2.6276554918514528, "learning_rate": 5.058244964754916e-06, "loss": 0.4997, "step": 6257 }, { "epoch": 0.51, "grad_norm": 4.678298545740144, "learning_rate": 5.056921273410459e-06, "loss": 1.0561, "step": 6258 }, { "epoch": 0.51, "grad_norm": 5.04000145001499, "learning_rate": 5.05559757807607e-06, "loss": 0.6706, "step": 6259 }, { "epoch": 0.51, "grad_norm": 4.713966562253011, "learning_rate": 5.054273878844532e-06, "loss": 0.8137, "step": 6260 }, { "epoch": 0.51, "grad_norm": 1.8093419343800716, "learning_rate": 5.052950175808631e-06, "loss": 0.217, "step": 6261 }, { "epoch": 0.51, "grad_norm": 8.562365238511848, "learning_rate": 5.051626469061153e-06, "loss": 0.5402, "step": 6262 }, { "epoch": 0.51, "grad_norm": 5.0469827554424525, "learning_rate": 5.050302758694885e-06, "loss": 1.1309, "step": 6263 }, { "epoch": 0.51, "grad_norm": 4.855241797223289, "learning_rate": 5.048979044802611e-06, "loss": 1.0927, "step": 6264 }, { "epoch": 0.51, "grad_norm": 3.7073773922637976, "learning_rate": 5.047655327477119e-06, "loss": 0.8027, "step": 6265 }, { "epoch": 0.51, "grad_norm": 3.4058276992984235, "learning_rate": 5.0463316068111975e-06, "loss": 0.8036, "step": 6266 }, { "epoch": 0.51, "grad_norm": 4.921621492746431, "learning_rate": 5.0450078828976326e-06, "loss": 1.1416, "step": 6267 }, { "epoch": 0.51, "grad_norm": 2.8577221966367548, "learning_rate": 5.0436841558292096e-06, "loss": 0.4846, "step": 6268 }, { "epoch": 0.51, "grad_norm": 4.471053589326682, "learning_rate": 5.042360425698718e-06, "loss": 0.9808, "step": 6269 }, { "epoch": 0.51, "grad_norm": 6.235702521038686, "learning_rate": 5.041036692598944e-06, "loss": 1.4737, "step": 6270 }, { "epoch": 0.51, "grad_norm": 4.378368906345673, "learning_rate": 5.039712956622678e-06, "loss": 1.4435, "step": 6271 }, { "epoch": 0.51, "grad_norm": 4.277791036639248, "learning_rate": 5.038389217862705e-06, "loss": 1.0241, "step": 6272 }, { "epoch": 0.51, "grad_norm": 3.796578394912698, "learning_rate": 5.037065476411816e-06, "loss": 0.8263, "step": 6273 }, { "epoch": 0.51, "grad_norm": 3.3092776679300604, "learning_rate": 5.035741732362798e-06, "loss": 0.8006, "step": 6274 }, { "epoch": 0.51, "grad_norm": 4.584746533139775, "learning_rate": 5.0344179858084395e-06, "loss": 1.0898, "step": 6275 }, { "epoch": 0.51, "grad_norm": 4.850175703684064, "learning_rate": 5.033094236841531e-06, "loss": 1.0502, "step": 6276 }, { "epoch": 0.51, "grad_norm": 3.6000543964963483, "learning_rate": 5.03177048555486e-06, "loss": 0.5403, "step": 6277 }, { "epoch": 0.51, "grad_norm": 1.8518070760604197, "learning_rate": 5.030446732041216e-06, "loss": 0.6082, "step": 6278 }, { "epoch": 0.51, "grad_norm": 1.5225956717460523, "learning_rate": 5.029122976393388e-06, "loss": 0.2299, "step": 6279 }, { "epoch": 0.51, "grad_norm": 3.0498073241758967, "learning_rate": 5.027799218704168e-06, "loss": 0.6301, "step": 6280 }, { "epoch": 0.51, "grad_norm": 4.102132833210754, "learning_rate": 5.026475459066342e-06, "loss": 0.5903, "step": 6281 }, { "epoch": 0.51, "grad_norm": 3.6469790492142407, "learning_rate": 5.025151697572703e-06, "loss": 0.8519, "step": 6282 }, { "epoch": 0.51, "grad_norm": 4.220063325470204, "learning_rate": 5.023827934316039e-06, "loss": 0.9139, "step": 6283 }, { "epoch": 0.51, "grad_norm": 1.1852192562946298, "learning_rate": 5.02250416938914e-06, "loss": 0.2314, "step": 6284 }, { "epoch": 0.51, "grad_norm": 2.9635533768960713, "learning_rate": 5.021180402884796e-06, "loss": 0.386, "step": 6285 }, { "epoch": 0.51, "grad_norm": 2.860632087202993, "learning_rate": 5.0198566348958e-06, "loss": 0.4964, "step": 6286 }, { "epoch": 0.51, "grad_norm": 3.56821484008037, "learning_rate": 5.018532865514938e-06, "loss": 0.7766, "step": 6287 }, { "epoch": 0.51, "grad_norm": 3.670637243869879, "learning_rate": 5.0172090948350036e-06, "loss": 0.7336, "step": 6288 }, { "epoch": 0.51, "grad_norm": 4.17472118045534, "learning_rate": 5.015885322948787e-06, "loss": 1.0167, "step": 6289 }, { "epoch": 0.51, "grad_norm": 2.121084537108153, "learning_rate": 5.01456154994908e-06, "loss": 0.4099, "step": 6290 }, { "epoch": 0.51, "grad_norm": 4.323932905163277, "learning_rate": 5.01323777592867e-06, "loss": 0.7663, "step": 6291 }, { "epoch": 0.51, "grad_norm": 3.36491687728099, "learning_rate": 5.011914000980349e-06, "loss": 0.6422, "step": 6292 }, { "epoch": 0.51, "grad_norm": 3.666396995504404, "learning_rate": 5.0105902251969084e-06, "loss": 0.904, "step": 6293 }, { "epoch": 0.51, "grad_norm": 3.3936856557765145, "learning_rate": 5.00926644867114e-06, "loss": 0.9992, "step": 6294 }, { "epoch": 0.51, "grad_norm": 4.7203280943851595, "learning_rate": 5.007942671495832e-06, "loss": 1.2324, "step": 6295 }, { "epoch": 0.51, "grad_norm": 3.3110721080028003, "learning_rate": 5.006618893763779e-06, "loss": 0.6198, "step": 6296 }, { "epoch": 0.51, "grad_norm": 3.771613517384583, "learning_rate": 5.005295115567771e-06, "loss": 0.8101, "step": 6297 }, { "epoch": 0.51, "grad_norm": 3.2345002368899296, "learning_rate": 5.003971337000597e-06, "loss": 0.4533, "step": 6298 }, { "epoch": 0.51, "grad_norm": 4.523863241460051, "learning_rate": 5.00264755815505e-06, "loss": 0.8525, "step": 6299 }, { "epoch": 0.51, "grad_norm": 2.7989059343538045, "learning_rate": 5.001323779123921e-06, "loss": 0.5445, "step": 6300 }, { "epoch": 0.52, "grad_norm": 4.8013321983918775, "learning_rate": 5e-06, "loss": 0.8655, "step": 6301 }, { "epoch": 0.52, "grad_norm": 4.150377577679675, "learning_rate": 4.99867622087608e-06, "loss": 0.8885, "step": 6302 }, { "epoch": 0.52, "grad_norm": 4.902739896702521, "learning_rate": 4.99735244184495e-06, "loss": 0.8761, "step": 6303 }, { "epoch": 0.52, "grad_norm": 3.774687840814019, "learning_rate": 4.996028662999405e-06, "loss": 0.5963, "step": 6304 }, { "epoch": 0.52, "grad_norm": 4.410054218319989, "learning_rate": 4.994704884432231e-06, "loss": 1.0872, "step": 6305 }, { "epoch": 0.52, "grad_norm": 1.0244463177855765, "learning_rate": 4.9933811062362224e-06, "loss": 0.1778, "step": 6306 }, { "epoch": 0.52, "grad_norm": 4.6620882328925575, "learning_rate": 4.992057328504169e-06, "loss": 0.884, "step": 6307 }, { "epoch": 0.52, "grad_norm": 3.403301419532233, "learning_rate": 4.990733551328862e-06, "loss": 0.5739, "step": 6308 }, { "epoch": 0.52, "grad_norm": 3.9845782164457884, "learning_rate": 4.989409774803092e-06, "loss": 0.7148, "step": 6309 }, { "epoch": 0.52, "grad_norm": 3.2284050123991626, "learning_rate": 4.988085999019654e-06, "loss": 0.5754, "step": 6310 }, { "epoch": 0.52, "grad_norm": 2.799427569150806, "learning_rate": 4.9867622240713325e-06, "loss": 0.585, "step": 6311 }, { "epoch": 0.52, "grad_norm": 4.501545703443735, "learning_rate": 4.985438450050922e-06, "loss": 0.901, "step": 6312 }, { "epoch": 0.52, "grad_norm": 3.6819769592865965, "learning_rate": 4.984114677051214e-06, "loss": 0.7882, "step": 6313 }, { "epoch": 0.52, "grad_norm": 3.440819132850262, "learning_rate": 4.982790905164997e-06, "loss": 0.7031, "step": 6314 }, { "epoch": 0.52, "grad_norm": 3.6972404685211484, "learning_rate": 4.981467134485062e-06, "loss": 1.0036, "step": 6315 }, { "epoch": 0.52, "grad_norm": 2.9079783686070577, "learning_rate": 4.980143365104203e-06, "loss": 0.5343, "step": 6316 }, { "epoch": 0.52, "grad_norm": 3.982005751028777, "learning_rate": 4.978819597115205e-06, "loss": 0.4965, "step": 6317 }, { "epoch": 0.52, "grad_norm": 2.893228389137094, "learning_rate": 4.977495830610862e-06, "loss": 0.4195, "step": 6318 }, { "epoch": 0.52, "grad_norm": 2.552228796695302, "learning_rate": 4.976172065683963e-06, "loss": 0.7378, "step": 6319 }, { "epoch": 0.52, "grad_norm": 3.264345436792313, "learning_rate": 4.974848302427299e-06, "loss": 0.8076, "step": 6320 }, { "epoch": 0.52, "grad_norm": 2.7932063119244797, "learning_rate": 4.9735245409336586e-06, "loss": 0.5244, "step": 6321 }, { "epoch": 0.52, "grad_norm": 5.227818897280464, "learning_rate": 4.972200781295835e-06, "loss": 1.6158, "step": 6322 }, { "epoch": 0.52, "grad_norm": 3.2963786876135655, "learning_rate": 4.970877023606613e-06, "loss": 0.9132, "step": 6323 }, { "epoch": 0.52, "grad_norm": 5.589089310372882, "learning_rate": 4.969553267958785e-06, "loss": 1.3736, "step": 6324 }, { "epoch": 0.52, "grad_norm": 3.6672605443108566, "learning_rate": 4.9682295144451415e-06, "loss": 1.0031, "step": 6325 }, { "epoch": 0.52, "grad_norm": 2.800499484512619, "learning_rate": 4.96690576315847e-06, "loss": 0.59, "step": 6326 }, { "epoch": 0.52, "grad_norm": 3.3517919780931447, "learning_rate": 4.965582014191562e-06, "loss": 0.5405, "step": 6327 }, { "epoch": 0.52, "grad_norm": 3.1593004177129544, "learning_rate": 4.964258267637204e-06, "loss": 0.4237, "step": 6328 }, { "epoch": 0.52, "grad_norm": 2.1968301978355282, "learning_rate": 4.962934523588187e-06, "loss": 0.3499, "step": 6329 }, { "epoch": 0.52, "grad_norm": 4.644271439604117, "learning_rate": 4.961610782137297e-06, "loss": 0.8678, "step": 6330 }, { "epoch": 0.52, "grad_norm": 5.526329531410269, "learning_rate": 4.960287043377324e-06, "loss": 0.9463, "step": 6331 }, { "epoch": 0.52, "grad_norm": 4.135163169342016, "learning_rate": 4.958963307401056e-06, "loss": 1.0386, "step": 6332 }, { "epoch": 0.52, "grad_norm": 3.074959112540754, "learning_rate": 4.957639574301285e-06, "loss": 0.588, "step": 6333 }, { "epoch": 0.52, "grad_norm": 3.475894793094254, "learning_rate": 4.956315844170792e-06, "loss": 0.4042, "step": 6334 }, { "epoch": 0.52, "grad_norm": 3.1667108394781, "learning_rate": 4.954992117102369e-06, "loss": 0.6566, "step": 6335 }, { "epoch": 0.52, "grad_norm": 2.6318038286029357, "learning_rate": 4.953668393188803e-06, "loss": 0.6653, "step": 6336 }, { "epoch": 0.52, "grad_norm": 3.5598441681408564, "learning_rate": 4.9523446725228805e-06, "loss": 0.9093, "step": 6337 }, { "epoch": 0.52, "grad_norm": 4.312639300371545, "learning_rate": 4.95102095519739e-06, "loss": 0.6447, "step": 6338 }, { "epoch": 0.52, "grad_norm": 3.2869108321149354, "learning_rate": 4.949697241305118e-06, "loss": 0.5326, "step": 6339 }, { "epoch": 0.52, "grad_norm": 3.2398638722023074, "learning_rate": 4.948373530938849e-06, "loss": 0.5487, "step": 6340 }, { "epoch": 0.52, "grad_norm": 2.6746133544438657, "learning_rate": 4.947049824191371e-06, "loss": 0.3259, "step": 6341 }, { "epoch": 0.52, "grad_norm": 5.023439343154598, "learning_rate": 4.945726121155469e-06, "loss": 0.7726, "step": 6342 }, { "epoch": 0.52, "grad_norm": 3.423088694007279, "learning_rate": 4.9444024219239315e-06, "loss": 0.599, "step": 6343 }, { "epoch": 0.52, "grad_norm": 4.657343174210334, "learning_rate": 4.943078726589543e-06, "loss": 0.8292, "step": 6344 }, { "epoch": 0.52, "grad_norm": 4.069730396690454, "learning_rate": 4.941755035245087e-06, "loss": 0.7747, "step": 6345 }, { "epoch": 0.52, "grad_norm": 3.907570960554669, "learning_rate": 4.940431347983348e-06, "loss": 0.5811, "step": 6346 }, { "epoch": 0.52, "grad_norm": 3.9121204580647655, "learning_rate": 4.9391076648971135e-06, "loss": 0.8157, "step": 6347 }, { "epoch": 0.52, "grad_norm": 4.920499319357153, "learning_rate": 4.937783986079165e-06, "loss": 0.8814, "step": 6348 }, { "epoch": 0.52, "grad_norm": 4.867359978831976, "learning_rate": 4.93646031162229e-06, "loss": 1.0379, "step": 6349 }, { "epoch": 0.52, "grad_norm": 4.988426299490411, "learning_rate": 4.935136641619272e-06, "loss": 1.4079, "step": 6350 }, { "epoch": 0.52, "grad_norm": 2.295221225986864, "learning_rate": 4.933812976162892e-06, "loss": 0.4422, "step": 6351 }, { "epoch": 0.52, "grad_norm": 2.7696669405353247, "learning_rate": 4.932489315345933e-06, "loss": 0.6909, "step": 6352 }, { "epoch": 0.52, "grad_norm": 3.1369585460443123, "learning_rate": 4.9311656592611804e-06, "loss": 0.645, "step": 6353 }, { "epoch": 0.52, "grad_norm": 2.3681224044111864, "learning_rate": 4.929842008001415e-06, "loss": 0.4803, "step": 6354 }, { "epoch": 0.52, "grad_norm": 4.98677924723652, "learning_rate": 4.92851836165942e-06, "loss": 1.2134, "step": 6355 }, { "epoch": 0.52, "grad_norm": 2.4940831075783816, "learning_rate": 4.927194720327978e-06, "loss": 0.6613, "step": 6356 }, { "epoch": 0.52, "grad_norm": 2.3360524861854586, "learning_rate": 4.925871084099867e-06, "loss": 0.5912, "step": 6357 }, { "epoch": 0.52, "grad_norm": 3.13829777093247, "learning_rate": 4.924547453067871e-06, "loss": 0.5976, "step": 6358 }, { "epoch": 0.52, "grad_norm": 3.864907012819576, "learning_rate": 4.923223827324769e-06, "loss": 0.7684, "step": 6359 }, { "epoch": 0.52, "grad_norm": 3.654007313074135, "learning_rate": 4.921900206963345e-06, "loss": 1.1836, "step": 6360 }, { "epoch": 0.52, "grad_norm": 2.7205381939259325, "learning_rate": 4.920576592076375e-06, "loss": 0.4543, "step": 6361 }, { "epoch": 0.52, "grad_norm": 4.207080349387025, "learning_rate": 4.919252982756643e-06, "loss": 1.1061, "step": 6362 }, { "epoch": 0.52, "grad_norm": 4.497492926051889, "learning_rate": 4.9179293790969225e-06, "loss": 1.2731, "step": 6363 }, { "epoch": 0.52, "grad_norm": 3.9592866021194957, "learning_rate": 4.916605781189996e-06, "loss": 0.7913, "step": 6364 }, { "epoch": 0.52, "grad_norm": 1.789903155601039, "learning_rate": 4.9152821891286404e-06, "loss": 0.3982, "step": 6365 }, { "epoch": 0.52, "grad_norm": 3.6060905109286803, "learning_rate": 4.913958603005636e-06, "loss": 0.6695, "step": 6366 }, { "epoch": 0.52, "grad_norm": 2.7958090281529833, "learning_rate": 4.91263502291376e-06, "loss": 0.5765, "step": 6367 }, { "epoch": 0.52, "grad_norm": 3.5029185894999735, "learning_rate": 4.911311448945787e-06, "loss": 0.9681, "step": 6368 }, { "epoch": 0.52, "grad_norm": 4.238696692533975, "learning_rate": 4.9099878811944965e-06, "loss": 0.7806, "step": 6369 }, { "epoch": 0.52, "grad_norm": 3.8919735811733625, "learning_rate": 4.908664319752663e-06, "loss": 0.9318, "step": 6370 }, { "epoch": 0.52, "grad_norm": 3.736754873965216, "learning_rate": 4.9073407647130625e-06, "loss": 0.6905, "step": 6371 }, { "epoch": 0.52, "grad_norm": 4.136277462289465, "learning_rate": 4.906017216168471e-06, "loss": 1.0667, "step": 6372 }, { "epoch": 0.52, "grad_norm": 2.2902996833226683, "learning_rate": 4.904693674211667e-06, "loss": 0.6025, "step": 6373 }, { "epoch": 0.52, "grad_norm": 3.8055175213525025, "learning_rate": 4.9033701389354185e-06, "loss": 0.7545, "step": 6374 }, { "epoch": 0.52, "grad_norm": 1.9379557571515578, "learning_rate": 4.902046610432504e-06, "loss": 0.3936, "step": 6375 }, { "epoch": 0.52, "grad_norm": 5.6127646401142055, "learning_rate": 4.900723088795695e-06, "loss": 0.9573, "step": 6376 }, { "epoch": 0.52, "grad_norm": 3.634383651385539, "learning_rate": 4.899399574117766e-06, "loss": 0.928, "step": 6377 }, { "epoch": 0.52, "grad_norm": 4.3841366044644765, "learning_rate": 4.898076066491488e-06, "loss": 0.8414, "step": 6378 }, { "epoch": 0.52, "grad_norm": 2.8380715593656936, "learning_rate": 4.896752566009637e-06, "loss": 0.6047, "step": 6379 }, { "epoch": 0.52, "grad_norm": 2.3478182534713485, "learning_rate": 4.8954290727649785e-06, "loss": 0.3525, "step": 6380 }, { "epoch": 0.52, "grad_norm": 3.2704191305723413, "learning_rate": 4.894105586850288e-06, "loss": 0.5744, "step": 6381 }, { "epoch": 0.52, "grad_norm": 5.296254206143929, "learning_rate": 4.892782108358335e-06, "loss": 0.9147, "step": 6382 }, { "epoch": 0.52, "grad_norm": 3.577524099642258, "learning_rate": 4.891458637381891e-06, "loss": 0.6596, "step": 6383 }, { "epoch": 0.52, "grad_norm": 3.5392351497191736, "learning_rate": 4.8901351740137235e-06, "loss": 0.7526, "step": 6384 }, { "epoch": 0.52, "grad_norm": 3.0395114611346887, "learning_rate": 4.888811718346602e-06, "loss": 0.7119, "step": 6385 }, { "epoch": 0.52, "grad_norm": 2.111179056014712, "learning_rate": 4.887488270473294e-06, "loss": 0.6011, "step": 6386 }, { "epoch": 0.52, "grad_norm": 3.5648992088187637, "learning_rate": 4.886164830486569e-06, "loss": 0.5581, "step": 6387 }, { "epoch": 0.52, "grad_norm": 4.3273150749747025, "learning_rate": 4.8848413984791935e-06, "loss": 0.8686, "step": 6388 }, { "epoch": 0.52, "grad_norm": 4.942505465751809, "learning_rate": 4.883517974543935e-06, "loss": 0.9702, "step": 6389 }, { "epoch": 0.52, "grad_norm": 3.8595179418584245, "learning_rate": 4.882194558773562e-06, "loss": 0.9561, "step": 6390 }, { "epoch": 0.52, "grad_norm": 4.280663704527302, "learning_rate": 4.8808711512608355e-06, "loss": 0.8193, "step": 6391 }, { "epoch": 0.52, "grad_norm": 3.2210982354831654, "learning_rate": 4.879547752098524e-06, "loss": 0.5106, "step": 6392 }, { "epoch": 0.52, "grad_norm": 4.337642173676028, "learning_rate": 4.878224361379389e-06, "loss": 1.0615, "step": 6393 }, { "epoch": 0.52, "grad_norm": 4.14583018619186, "learning_rate": 4.8769009791961975e-06, "loss": 0.7129, "step": 6394 }, { "epoch": 0.52, "grad_norm": 2.1424088613650816, "learning_rate": 4.875577605641711e-06, "loss": 0.3761, "step": 6395 }, { "epoch": 0.52, "grad_norm": 3.8310450197505426, "learning_rate": 4.8742542408086955e-06, "loss": 0.8652, "step": 6396 }, { "epoch": 0.52, "grad_norm": 3.2018217096329407, "learning_rate": 4.8729308847899075e-06, "loss": 0.5413, "step": 6397 }, { "epoch": 0.52, "grad_norm": 3.362855687172706, "learning_rate": 4.8716075376781115e-06, "loss": 0.5127, "step": 6398 }, { "epoch": 0.52, "grad_norm": 2.967190852418591, "learning_rate": 4.87028419956607e-06, "loss": 0.5075, "step": 6399 }, { "epoch": 0.52, "grad_norm": 3.1975202187908796, "learning_rate": 4.86896087054654e-06, "loss": 0.5307, "step": 6400 }, { "epoch": 0.52, "grad_norm": 5.1581629182550195, "learning_rate": 4.867637550712283e-06, "loss": 0.9677, "step": 6401 }, { "epoch": 0.52, "grad_norm": 3.6676487285144606, "learning_rate": 4.86631424015606e-06, "loss": 0.6706, "step": 6402 }, { "epoch": 0.52, "grad_norm": 3.6524160507678043, "learning_rate": 4.864990938970624e-06, "loss": 0.6049, "step": 6403 }, { "epoch": 0.52, "grad_norm": 4.650624751612126, "learning_rate": 4.863667647248737e-06, "loss": 1.1215, "step": 6404 }, { "epoch": 0.52, "grad_norm": 4.530953242466552, "learning_rate": 4.862344365083154e-06, "loss": 0.9914, "step": 6405 }, { "epoch": 0.52, "grad_norm": 3.1659566501334755, "learning_rate": 4.861021092566633e-06, "loss": 0.5622, "step": 6406 }, { "epoch": 0.52, "grad_norm": 4.536544577037245, "learning_rate": 4.859697829791927e-06, "loss": 1.2066, "step": 6407 }, { "epoch": 0.52, "grad_norm": 4.33958469940971, "learning_rate": 4.858374576851795e-06, "loss": 1.2222, "step": 6408 }, { "epoch": 0.52, "grad_norm": 3.381614360695345, "learning_rate": 4.857051333838987e-06, "loss": 0.65, "step": 6409 }, { "epoch": 0.52, "grad_norm": 3.2292333130820845, "learning_rate": 4.855728100846258e-06, "loss": 0.6844, "step": 6410 }, { "epoch": 0.52, "grad_norm": 4.292140738399272, "learning_rate": 4.854404877966361e-06, "loss": 0.9661, "step": 6411 }, { "epoch": 0.52, "grad_norm": 2.2999953285623556, "learning_rate": 4.8530816652920485e-06, "loss": 0.5868, "step": 6412 }, { "epoch": 0.52, "grad_norm": 3.0015489970748885, "learning_rate": 4.851758462916075e-06, "loss": 0.7151, "step": 6413 }, { "epoch": 0.52, "grad_norm": 0.9300620487820964, "learning_rate": 4.850435270931184e-06, "loss": 0.1667, "step": 6414 }, { "epoch": 0.52, "grad_norm": 2.440839701309359, "learning_rate": 4.849112089430133e-06, "loss": 0.3568, "step": 6415 }, { "epoch": 0.52, "grad_norm": 3.295708322856452, "learning_rate": 4.847788918505665e-06, "loss": 0.8336, "step": 6416 }, { "epoch": 0.52, "grad_norm": 2.7894794538723233, "learning_rate": 4.846465758250532e-06, "loss": 0.8585, "step": 6417 }, { "epoch": 0.52, "grad_norm": 1.0371011365815022, "learning_rate": 4.845142608757481e-06, "loss": 0.1409, "step": 6418 }, { "epoch": 0.52, "grad_norm": 4.118848364575265, "learning_rate": 4.843819470119262e-06, "loss": 0.608, "step": 6419 }, { "epoch": 0.52, "grad_norm": 2.2868040020579445, "learning_rate": 4.842496342428616e-06, "loss": 0.471, "step": 6420 }, { "epoch": 0.52, "grad_norm": 4.479520859133651, "learning_rate": 4.84117322577829e-06, "loss": 0.8966, "step": 6421 }, { "epoch": 0.52, "grad_norm": 4.594747610138365, "learning_rate": 4.839850120261032e-06, "loss": 1.0153, "step": 6422 }, { "epoch": 0.52, "grad_norm": 3.33839452827434, "learning_rate": 4.838527025969582e-06, "loss": 0.8278, "step": 6423 }, { "epoch": 0.53, "grad_norm": 3.867393160179802, "learning_rate": 4.837203942996687e-06, "loss": 0.7852, "step": 6424 }, { "epoch": 0.53, "grad_norm": 3.9408893881153704, "learning_rate": 4.8358808714350856e-06, "loss": 0.3435, "step": 6425 }, { "epoch": 0.53, "grad_norm": 2.157718397068329, "learning_rate": 4.834557811377519e-06, "loss": 0.3427, "step": 6426 }, { "epoch": 0.53, "grad_norm": 3.9450794345286235, "learning_rate": 4.833234762916731e-06, "loss": 0.8995, "step": 6427 }, { "epoch": 0.53, "grad_norm": 4.727524322944186, "learning_rate": 4.831911726145461e-06, "loss": 1.1968, "step": 6428 }, { "epoch": 0.53, "grad_norm": 2.605545754356394, "learning_rate": 4.830588701156448e-06, "loss": 0.5252, "step": 6429 }, { "epoch": 0.53, "grad_norm": 3.202165484465828, "learning_rate": 4.829265688042429e-06, "loss": 0.7731, "step": 6430 }, { "epoch": 0.53, "grad_norm": 5.30692738832285, "learning_rate": 4.827942686896143e-06, "loss": 0.9813, "step": 6431 }, { "epoch": 0.53, "grad_norm": 3.3485257582972343, "learning_rate": 4.8266196978103245e-06, "loss": 0.5746, "step": 6432 }, { "epoch": 0.53, "grad_norm": 1.9948209222241482, "learning_rate": 4.825296720877711e-06, "loss": 0.4288, "step": 6433 }, { "epoch": 0.53, "grad_norm": 3.359141276541377, "learning_rate": 4.823973756191037e-06, "loss": 0.4782, "step": 6434 }, { "epoch": 0.53, "grad_norm": 3.3460161213266657, "learning_rate": 4.822650803843037e-06, "loss": 0.8516, "step": 6435 }, { "epoch": 0.53, "grad_norm": 3.9047246603491295, "learning_rate": 4.821327863926445e-06, "loss": 0.8164, "step": 6436 }, { "epoch": 0.53, "grad_norm": 4.389933865662566, "learning_rate": 4.8200049365339905e-06, "loss": 0.9688, "step": 6437 }, { "epoch": 0.53, "grad_norm": 3.90959305816749, "learning_rate": 4.818682021758407e-06, "loss": 1.114, "step": 6438 }, { "epoch": 0.53, "grad_norm": 4.746758804316483, "learning_rate": 4.817359119692424e-06, "loss": 1.0432, "step": 6439 }, { "epoch": 0.53, "grad_norm": 2.4099016585770023, "learning_rate": 4.816036230428773e-06, "loss": 0.32, "step": 6440 }, { "epoch": 0.53, "grad_norm": 4.881278816114833, "learning_rate": 4.814713354060181e-06, "loss": 1.0872, "step": 6441 }, { "epoch": 0.53, "grad_norm": 4.622871480181095, "learning_rate": 4.8133904906793776e-06, "loss": 0.9624, "step": 6442 }, { "epoch": 0.53, "grad_norm": 5.9472587561883765, "learning_rate": 4.8120676403790875e-06, "loss": 1.0216, "step": 6443 }, { "epoch": 0.53, "grad_norm": 3.361458801690792, "learning_rate": 4.8107448032520376e-06, "loss": 0.9053, "step": 6444 }, { "epoch": 0.53, "grad_norm": 4.2503964795396, "learning_rate": 4.809421979390954e-06, "loss": 0.943, "step": 6445 }, { "epoch": 0.53, "grad_norm": 2.839225942265717, "learning_rate": 4.8080991688885606e-06, "loss": 0.6044, "step": 6446 }, { "epoch": 0.53, "grad_norm": 2.701583715591647, "learning_rate": 4.806776371837581e-06, "loss": 0.4485, "step": 6447 }, { "epoch": 0.53, "grad_norm": 2.4206292275300285, "learning_rate": 4.805453588330735e-06, "loss": 0.366, "step": 6448 }, { "epoch": 0.53, "grad_norm": 3.9656245324891324, "learning_rate": 4.804130818460746e-06, "loss": 0.8929, "step": 6449 }, { "epoch": 0.53, "grad_norm": 1.2925316239131028, "learning_rate": 4.802808062320334e-06, "loss": 0.181, "step": 6450 }, { "epoch": 0.53, "grad_norm": 3.1121094496228907, "learning_rate": 4.801485320002219e-06, "loss": 0.9133, "step": 6451 }, { "epoch": 0.53, "grad_norm": 4.748963455706853, "learning_rate": 4.8001625915991205e-06, "loss": 1.0718, "step": 6452 }, { "epoch": 0.53, "grad_norm": 4.125724567974992, "learning_rate": 4.798839877203754e-06, "loss": 1.165, "step": 6453 }, { "epoch": 0.53, "grad_norm": 3.5048961563875642, "learning_rate": 4.7975171769088366e-06, "loss": 0.3907, "step": 6454 }, { "epoch": 0.53, "grad_norm": 5.241688967850177, "learning_rate": 4.7961944908070835e-06, "loss": 1.272, "step": 6455 }, { "epoch": 0.53, "grad_norm": 3.530270446774279, "learning_rate": 4.7948718189912095e-06, "loss": 0.7915, "step": 6456 }, { "epoch": 0.53, "grad_norm": 4.395649687464648, "learning_rate": 4.793549161553927e-06, "loss": 1.0097, "step": 6457 }, { "epoch": 0.53, "grad_norm": 4.084798442453911, "learning_rate": 4.792226518587952e-06, "loss": 0.6873, "step": 6458 }, { "epoch": 0.53, "grad_norm": 2.339275731224011, "learning_rate": 4.7909038901859945e-06, "loss": 0.4039, "step": 6459 }, { "epoch": 0.53, "grad_norm": 3.936160088236716, "learning_rate": 4.789581276440762e-06, "loss": 0.7648, "step": 6460 }, { "epoch": 0.53, "grad_norm": 5.014448484107769, "learning_rate": 4.788258677444967e-06, "loss": 0.9322, "step": 6461 }, { "epoch": 0.53, "grad_norm": 2.740184648909575, "learning_rate": 4.786936093291318e-06, "loss": 0.7014, "step": 6462 }, { "epoch": 0.53, "grad_norm": 2.564045541750739, "learning_rate": 4.78561352407252e-06, "loss": 0.4463, "step": 6463 }, { "epoch": 0.53, "grad_norm": 3.101364495462342, "learning_rate": 4.784290969881284e-06, "loss": 0.5848, "step": 6464 }, { "epoch": 0.53, "grad_norm": 4.295584779861668, "learning_rate": 4.78296843081031e-06, "loss": 0.9924, "step": 6465 }, { "epoch": 0.53, "grad_norm": 3.119756801063344, "learning_rate": 4.781645906952304e-06, "loss": 0.4947, "step": 6466 }, { "epoch": 0.53, "grad_norm": 3.2737077807255406, "learning_rate": 4.78032339839997e-06, "loss": 0.6644, "step": 6467 }, { "epoch": 0.53, "grad_norm": 3.500344929627298, "learning_rate": 4.779000905246009e-06, "loss": 0.5359, "step": 6468 }, { "epoch": 0.53, "grad_norm": 4.267609818219153, "learning_rate": 4.777678427583124e-06, "loss": 0.5198, "step": 6469 }, { "epoch": 0.53, "grad_norm": 4.657578711382833, "learning_rate": 4.776355965504015e-06, "loss": 0.5331, "step": 6470 }, { "epoch": 0.53, "grad_norm": 4.994108345839051, "learning_rate": 4.775033519101378e-06, "loss": 0.9264, "step": 6471 }, { "epoch": 0.53, "grad_norm": 2.8699942361158346, "learning_rate": 4.773711088467912e-06, "loss": 0.7568, "step": 6472 }, { "epoch": 0.53, "grad_norm": 5.175424319748729, "learning_rate": 4.772388673696314e-06, "loss": 1.2296, "step": 6473 }, { "epoch": 0.53, "grad_norm": 4.502369361692485, "learning_rate": 4.771066274879279e-06, "loss": 0.9849, "step": 6474 }, { "epoch": 0.53, "grad_norm": 3.651851635025855, "learning_rate": 4.769743892109502e-06, "loss": 0.5328, "step": 6475 }, { "epoch": 0.53, "grad_norm": 4.544804100513222, "learning_rate": 4.768421525479677e-06, "loss": 1.0894, "step": 6476 }, { "epoch": 0.53, "grad_norm": 3.1437570454179444, "learning_rate": 4.767099175082495e-06, "loss": 0.7833, "step": 6477 }, { "epoch": 0.53, "grad_norm": 4.020674057653643, "learning_rate": 4.765776841010647e-06, "loss": 0.7353, "step": 6478 }, { "epoch": 0.53, "grad_norm": 4.518597556006481, "learning_rate": 4.764454523356823e-06, "loss": 1.1217, "step": 6479 }, { "epoch": 0.53, "grad_norm": 2.13953384114246, "learning_rate": 4.763132222213711e-06, "loss": 0.3948, "step": 6480 }, { "epoch": 0.53, "grad_norm": 2.9619726314103363, "learning_rate": 4.761809937673999e-06, "loss": 0.6049, "step": 6481 }, { "epoch": 0.53, "grad_norm": 3.2256903440489735, "learning_rate": 4.760487669830377e-06, "loss": 0.4989, "step": 6482 }, { "epoch": 0.53, "grad_norm": 2.624195051629224, "learning_rate": 4.7591654187755245e-06, "loss": 0.4404, "step": 6483 }, { "epoch": 0.53, "grad_norm": 3.8447451022651813, "learning_rate": 4.757843184602128e-06, "loss": 0.9409, "step": 6484 }, { "epoch": 0.53, "grad_norm": 4.210561404766787, "learning_rate": 4.756520967402871e-06, "loss": 1.0253, "step": 6485 }, { "epoch": 0.53, "grad_norm": 3.2958280305185474, "learning_rate": 4.755198767270434e-06, "loss": 0.6959, "step": 6486 }, { "epoch": 0.53, "grad_norm": 3.8705923794882873, "learning_rate": 4.7538765842975e-06, "loss": 0.9916, "step": 6487 }, { "epoch": 0.53, "grad_norm": 4.17973960051939, "learning_rate": 4.752554418576744e-06, "loss": 0.8638, "step": 6488 }, { "epoch": 0.53, "grad_norm": 3.8058675230780707, "learning_rate": 4.7512322702008475e-06, "loss": 0.9882, "step": 6489 }, { "epoch": 0.53, "grad_norm": 2.584219461622293, "learning_rate": 4.749910139262485e-06, "loss": 0.459, "step": 6490 }, { "epoch": 0.53, "grad_norm": 3.5689914149607276, "learning_rate": 4.748588025854334e-06, "loss": 0.9858, "step": 6491 }, { "epoch": 0.53, "grad_norm": 1.6919630395843155, "learning_rate": 4.747265930069069e-06, "loss": 0.2479, "step": 6492 }, { "epoch": 0.53, "grad_norm": 3.534827490112639, "learning_rate": 4.745943851999362e-06, "loss": 0.8208, "step": 6493 }, { "epoch": 0.53, "grad_norm": 5.060101817436625, "learning_rate": 4.744621791737886e-06, "loss": 1.1179, "step": 6494 }, { "epoch": 0.53, "grad_norm": 4.358457354866757, "learning_rate": 4.74329974937731e-06, "loss": 1.2671, "step": 6495 }, { "epoch": 0.53, "grad_norm": 3.4693194824047855, "learning_rate": 4.741977725010304e-06, "loss": 0.6998, "step": 6496 }, { "epoch": 0.53, "grad_norm": 4.503786475228983, "learning_rate": 4.740655718729537e-06, "loss": 0.9596, "step": 6497 }, { "epoch": 0.53, "grad_norm": 3.5345182678278477, "learning_rate": 4.739333730627674e-06, "loss": 0.966, "step": 6498 }, { "epoch": 0.53, "grad_norm": 3.3117682019112844, "learning_rate": 4.7380117607973855e-06, "loss": 0.5517, "step": 6499 }, { "epoch": 0.53, "grad_norm": 2.778259677896063, "learning_rate": 4.73668980933133e-06, "loss": 0.5979, "step": 6500 }, { "epoch": 0.53, "grad_norm": 4.104106387260651, "learning_rate": 4.735367876322174e-06, "loss": 0.6847, "step": 6501 }, { "epoch": 0.53, "grad_norm": 3.7767560467196026, "learning_rate": 4.734045961862577e-06, "loss": 1.1592, "step": 6502 }, { "epoch": 0.53, "grad_norm": 3.879394886737245, "learning_rate": 4.732724066045201e-06, "loss": 1.032, "step": 6503 }, { "epoch": 0.53, "grad_norm": 4.215580320281627, "learning_rate": 4.731402188962706e-06, "loss": 0.7044, "step": 6504 }, { "epoch": 0.53, "grad_norm": 3.941829061345157, "learning_rate": 4.730080330707748e-06, "loss": 0.7978, "step": 6505 }, { "epoch": 0.53, "grad_norm": 3.2538614465290667, "learning_rate": 4.728758491372983e-06, "loss": 0.5861, "step": 6506 }, { "epoch": 0.53, "grad_norm": 4.061368353748383, "learning_rate": 4.727436671051068e-06, "loss": 0.9944, "step": 6507 }, { "epoch": 0.53, "grad_norm": 3.395148902636811, "learning_rate": 4.726114869834656e-06, "loss": 0.4854, "step": 6508 }, { "epoch": 0.53, "grad_norm": 4.547254005610118, "learning_rate": 4.7247930878164e-06, "loss": 0.892, "step": 6509 }, { "epoch": 0.53, "grad_norm": 2.354189589986255, "learning_rate": 4.723471325088953e-06, "loss": 0.3938, "step": 6510 }, { "epoch": 0.53, "grad_norm": 3.010307150721494, "learning_rate": 4.722149581744959e-06, "loss": 0.4741, "step": 6511 }, { "epoch": 0.53, "grad_norm": 3.220441302810838, "learning_rate": 4.720827857877071e-06, "loss": 0.5945, "step": 6512 }, { "epoch": 0.53, "grad_norm": 2.697154306835642, "learning_rate": 4.719506153577935e-06, "loss": 0.6007, "step": 6513 }, { "epoch": 0.53, "grad_norm": 4.4702241418488216, "learning_rate": 4.718184468940197e-06, "loss": 0.9425, "step": 6514 }, { "epoch": 0.53, "grad_norm": 3.4005861961412664, "learning_rate": 4.716862804056503e-06, "loss": 0.7388, "step": 6515 }, { "epoch": 0.53, "grad_norm": 3.177697256568991, "learning_rate": 4.7155411590194935e-06, "loss": 0.6258, "step": 6516 }, { "epoch": 0.53, "grad_norm": 2.5965537600315365, "learning_rate": 4.714219533921811e-06, "loss": 0.3597, "step": 6517 }, { "epoch": 0.53, "grad_norm": 3.821519678293872, "learning_rate": 4.712897928856095e-06, "loss": 0.6991, "step": 6518 }, { "epoch": 0.53, "grad_norm": 4.183071395135402, "learning_rate": 4.711576343914984e-06, "loss": 0.4475, "step": 6519 }, { "epoch": 0.53, "grad_norm": 3.3883270732955357, "learning_rate": 4.710254779191116e-06, "loss": 0.4484, "step": 6520 }, { "epoch": 0.53, "grad_norm": 3.2618431776639802, "learning_rate": 4.7089332347771265e-06, "loss": 0.4983, "step": 6521 }, { "epoch": 0.53, "grad_norm": 1.8306121244153692, "learning_rate": 4.707611710765654e-06, "loss": 0.3587, "step": 6522 }, { "epoch": 0.53, "grad_norm": 4.443170147328888, "learning_rate": 4.706290207249325e-06, "loss": 1.4299, "step": 6523 }, { "epoch": 0.53, "grad_norm": 4.4569262766400835, "learning_rate": 4.704968724320775e-06, "loss": 0.7632, "step": 6524 }, { "epoch": 0.53, "grad_norm": 2.247821939618158, "learning_rate": 4.703647262072634e-06, "loss": 0.353, "step": 6525 }, { "epoch": 0.53, "grad_norm": 3.1100807442195255, "learning_rate": 4.702325820597528e-06, "loss": 0.5021, "step": 6526 }, { "epoch": 0.53, "grad_norm": 4.355309216517904, "learning_rate": 4.70100439998809e-06, "loss": 0.8495, "step": 6527 }, { "epoch": 0.53, "grad_norm": 3.0247338209320747, "learning_rate": 4.699683000336941e-06, "loss": 0.7517, "step": 6528 }, { "epoch": 0.53, "grad_norm": 4.236116874762068, "learning_rate": 4.698361621736705e-06, "loss": 1.0296, "step": 6529 }, { "epoch": 0.53, "grad_norm": 1.955894405304145, "learning_rate": 4.6970402642800075e-06, "loss": 0.3625, "step": 6530 }, { "epoch": 0.53, "grad_norm": 2.878574051928466, "learning_rate": 4.695718928059469e-06, "loss": 0.506, "step": 6531 }, { "epoch": 0.53, "grad_norm": 3.8906841416371174, "learning_rate": 4.694397613167709e-06, "loss": 0.6025, "step": 6532 }, { "epoch": 0.53, "grad_norm": 3.6763792751475264, "learning_rate": 4.69307631969735e-06, "loss": 0.6466, "step": 6533 }, { "epoch": 0.53, "grad_norm": 3.0301141217686656, "learning_rate": 4.691755047741001e-06, "loss": 0.6439, "step": 6534 }, { "epoch": 0.53, "grad_norm": 4.391320284304078, "learning_rate": 4.690433797391282e-06, "loss": 0.8834, "step": 6535 }, { "epoch": 0.53, "grad_norm": 3.0619941823911168, "learning_rate": 4.689112568740807e-06, "loss": 0.3854, "step": 6536 }, { "epoch": 0.53, "grad_norm": 4.758029545952147, "learning_rate": 4.687791361882188e-06, "loss": 1.0864, "step": 6537 }, { "epoch": 0.53, "grad_norm": 5.363849172380941, "learning_rate": 4.6864701769080364e-06, "loss": 1.0227, "step": 6538 }, { "epoch": 0.53, "grad_norm": 2.9221309019995574, "learning_rate": 4.685149013910962e-06, "loss": 0.3992, "step": 6539 }, { "epoch": 0.53, "grad_norm": 4.769912945729529, "learning_rate": 4.683827872983571e-06, "loss": 0.7576, "step": 6540 }, { "epoch": 0.53, "grad_norm": 3.032375109346851, "learning_rate": 4.682506754218469e-06, "loss": 0.7517, "step": 6541 }, { "epoch": 0.53, "grad_norm": 2.84124810640443, "learning_rate": 4.681185657708261e-06, "loss": 0.3858, "step": 6542 }, { "epoch": 0.53, "grad_norm": 3.26168632843942, "learning_rate": 4.679864583545552e-06, "loss": 0.4775, "step": 6543 }, { "epoch": 0.53, "grad_norm": 2.778346260856777, "learning_rate": 4.678543531822944e-06, "loss": 0.433, "step": 6544 }, { "epoch": 0.53, "grad_norm": 3.001229869305055, "learning_rate": 4.677222502633033e-06, "loss": 0.6559, "step": 6545 }, { "epoch": 0.54, "grad_norm": 4.434317474416247, "learning_rate": 4.67590149606842e-06, "loss": 0.5421, "step": 6546 }, { "epoch": 0.54, "grad_norm": 4.331987535791356, "learning_rate": 4.674580512221703e-06, "loss": 0.6373, "step": 6547 }, { "epoch": 0.54, "grad_norm": 3.1064710446924804, "learning_rate": 4.673259551185475e-06, "loss": 0.6903, "step": 6548 }, { "epoch": 0.54, "grad_norm": 4.482780717530685, "learning_rate": 4.67193861305233e-06, "loss": 0.7344, "step": 6549 }, { "epoch": 0.54, "grad_norm": 2.1185714991572953, "learning_rate": 4.670617697914863e-06, "loss": 0.3799, "step": 6550 }, { "epoch": 0.54, "grad_norm": 2.986888769209969, "learning_rate": 4.66929680586566e-06, "loss": 0.7964, "step": 6551 }, { "epoch": 0.54, "grad_norm": 2.1396347568967338, "learning_rate": 4.667975936997311e-06, "loss": 0.3424, "step": 6552 }, { "epoch": 0.54, "grad_norm": 4.826732914643811, "learning_rate": 4.666655091402404e-06, "loss": 0.8784, "step": 6553 }, { "epoch": 0.54, "grad_norm": 3.2725881096348632, "learning_rate": 4.665334269173526e-06, "loss": 0.8182, "step": 6554 }, { "epoch": 0.54, "grad_norm": 5.082508256581759, "learning_rate": 4.664013470403258e-06, "loss": 1.0632, "step": 6555 }, { "epoch": 0.54, "grad_norm": 2.5090485390867383, "learning_rate": 4.662692695184184e-06, "loss": 0.5078, "step": 6556 }, { "epoch": 0.54, "grad_norm": 3.547058739510734, "learning_rate": 4.661371943608884e-06, "loss": 0.5732, "step": 6557 }, { "epoch": 0.54, "grad_norm": 3.6117772081754747, "learning_rate": 4.660051215769937e-06, "loss": 0.5969, "step": 6558 }, { "epoch": 0.54, "grad_norm": 5.578479510510918, "learning_rate": 4.65873051175992e-06, "loss": 1.3698, "step": 6559 }, { "epoch": 0.54, "grad_norm": 5.660950646030756, "learning_rate": 4.6574098316714086e-06, "loss": 1.2019, "step": 6560 }, { "epoch": 0.54, "grad_norm": 3.683994687913521, "learning_rate": 4.656089175596978e-06, "loss": 0.8219, "step": 6561 }, { "epoch": 0.54, "grad_norm": 5.582453556871559, "learning_rate": 4.6547685436292e-06, "loss": 1.3325, "step": 6562 }, { "epoch": 0.54, "grad_norm": 3.104896611457791, "learning_rate": 4.653447935860642e-06, "loss": 0.4239, "step": 6563 }, { "epoch": 0.54, "grad_norm": 3.6478755011837922, "learning_rate": 4.652127352383879e-06, "loss": 0.3726, "step": 6564 }, { "epoch": 0.54, "grad_norm": 3.3335035105942667, "learning_rate": 4.650806793291472e-06, "loss": 0.3934, "step": 6565 }, { "epoch": 0.54, "grad_norm": 4.783339180629704, "learning_rate": 4.649486258675989e-06, "loss": 1.1521, "step": 6566 }, { "epoch": 0.54, "grad_norm": 1.4381816934589031, "learning_rate": 4.6481657486299965e-06, "loss": 0.2135, "step": 6567 }, { "epoch": 0.54, "grad_norm": 3.2725112225425272, "learning_rate": 4.646845263246052e-06, "loss": 0.8093, "step": 6568 }, { "epoch": 0.54, "grad_norm": 2.744853814936621, "learning_rate": 4.645524802616717e-06, "loss": 0.3797, "step": 6569 }, { "epoch": 0.54, "grad_norm": 3.3123920628629215, "learning_rate": 4.644204366834551e-06, "loss": 0.7096, "step": 6570 }, { "epoch": 0.54, "grad_norm": 5.841646773780519, "learning_rate": 4.6428839559921115e-06, "loss": 0.9956, "step": 6571 }, { "epoch": 0.54, "grad_norm": 3.0695846330423233, "learning_rate": 4.6415635701819515e-06, "loss": 0.5472, "step": 6572 }, { "epoch": 0.54, "grad_norm": 3.7488795781565565, "learning_rate": 4.640243209496627e-06, "loss": 0.8055, "step": 6573 }, { "epoch": 0.54, "grad_norm": 5.29002043121706, "learning_rate": 4.638922874028686e-06, "loss": 1.171, "step": 6574 }, { "epoch": 0.54, "grad_norm": 4.652437636545116, "learning_rate": 4.63760256387068e-06, "loss": 0.746, "step": 6575 }, { "epoch": 0.54, "grad_norm": 3.036073634733401, "learning_rate": 4.636282279115157e-06, "loss": 0.3928, "step": 6576 }, { "epoch": 0.54, "grad_norm": 2.9691247579664775, "learning_rate": 4.634962019854664e-06, "loss": 0.64, "step": 6577 }, { "epoch": 0.54, "grad_norm": 3.546770284448663, "learning_rate": 4.633641786181746e-06, "loss": 1.0569, "step": 6578 }, { "epoch": 0.54, "grad_norm": 3.7128870863572634, "learning_rate": 4.632321578188943e-06, "loss": 0.5789, "step": 6579 }, { "epoch": 0.54, "grad_norm": 2.9631752562069495, "learning_rate": 4.6310013959687985e-06, "loss": 0.7038, "step": 6580 }, { "epoch": 0.54, "grad_norm": 3.112639351331203, "learning_rate": 4.629681239613848e-06, "loss": 0.499, "step": 6581 }, { "epoch": 0.54, "grad_norm": 3.10163436923461, "learning_rate": 4.628361109216633e-06, "loss": 0.5696, "step": 6582 }, { "epoch": 0.54, "grad_norm": 3.5160555089852474, "learning_rate": 4.627041004869684e-06, "loss": 0.7325, "step": 6583 }, { "epoch": 0.54, "grad_norm": 4.279189696864294, "learning_rate": 4.625720926665542e-06, "loss": 0.8291, "step": 6584 }, { "epoch": 0.54, "grad_norm": 3.2144240576872196, "learning_rate": 4.624400874696731e-06, "loss": 0.4094, "step": 6585 }, { "epoch": 0.54, "grad_norm": 3.3151730118113907, "learning_rate": 4.623080849055784e-06, "loss": 0.4029, "step": 6586 }, { "epoch": 0.54, "grad_norm": 4.492740953954248, "learning_rate": 4.6217608498352305e-06, "loss": 0.8886, "step": 6587 }, { "epoch": 0.54, "grad_norm": 2.2091740236208612, "learning_rate": 4.620440877127594e-06, "loss": 0.5968, "step": 6588 }, { "epoch": 0.54, "grad_norm": 5.130614751465802, "learning_rate": 4.619120931025401e-06, "loss": 0.9419, "step": 6589 }, { "epoch": 0.54, "grad_norm": 4.9276537385180665, "learning_rate": 4.617801011621175e-06, "loss": 1.1146, "step": 6590 }, { "epoch": 0.54, "grad_norm": 3.517852696399206, "learning_rate": 4.6164811190074314e-06, "loss": 0.7668, "step": 6591 }, { "epoch": 0.54, "grad_norm": 3.9414529003498098, "learning_rate": 4.615161253276693e-06, "loss": 0.7907, "step": 6592 }, { "epoch": 0.54, "grad_norm": 4.829034509793269, "learning_rate": 4.613841414521477e-06, "loss": 0.7181, "step": 6593 }, { "epoch": 0.54, "grad_norm": 4.935798880433848, "learning_rate": 4.612521602834297e-06, "loss": 1.1686, "step": 6594 }, { "epoch": 0.54, "grad_norm": 3.9536139118306277, "learning_rate": 4.611201818307666e-06, "loss": 1.3524, "step": 6595 }, { "epoch": 0.54, "grad_norm": 5.098698829021364, "learning_rate": 4.609882061034097e-06, "loss": 0.9503, "step": 6596 }, { "epoch": 0.54, "grad_norm": 4.207210196248606, "learning_rate": 4.608562331106096e-06, "loss": 0.9931, "step": 6597 }, { "epoch": 0.54, "grad_norm": 3.2699870899783843, "learning_rate": 4.607242628616171e-06, "loss": 0.6659, "step": 6598 }, { "epoch": 0.54, "grad_norm": 3.2458183261833398, "learning_rate": 4.605922953656829e-06, "loss": 0.8099, "step": 6599 }, { "epoch": 0.54, "grad_norm": 2.6046779267216835, "learning_rate": 4.604603306320574e-06, "loss": 0.418, "step": 6600 }, { "epoch": 0.54, "grad_norm": 2.6416739769033932, "learning_rate": 4.603283686699907e-06, "loss": 0.5294, "step": 6601 }, { "epoch": 0.54, "grad_norm": 2.029470485869123, "learning_rate": 4.601964094887327e-06, "loss": 0.392, "step": 6602 }, { "epoch": 0.54, "grad_norm": 2.069810495616436, "learning_rate": 4.600644530975331e-06, "loss": 0.3439, "step": 6603 }, { "epoch": 0.54, "grad_norm": 2.1883788923176763, "learning_rate": 4.599324995056415e-06, "loss": 0.5165, "step": 6604 }, { "epoch": 0.54, "grad_norm": 2.3009145317121376, "learning_rate": 4.598005487223073e-06, "loss": 0.3408, "step": 6605 }, { "epoch": 0.54, "grad_norm": 4.1050672621062265, "learning_rate": 4.596686007567797e-06, "loss": 1.0344, "step": 6606 }, { "epoch": 0.54, "grad_norm": 2.8337463877223708, "learning_rate": 4.595366556183079e-06, "loss": 0.6772, "step": 6607 }, { "epoch": 0.54, "grad_norm": 3.48425966090482, "learning_rate": 4.5940471331614014e-06, "loss": 0.879, "step": 6608 }, { "epoch": 0.54, "grad_norm": 3.4774955498265934, "learning_rate": 4.592727738595254e-06, "loss": 0.6558, "step": 6609 }, { "epoch": 0.54, "grad_norm": 2.3091363448551796, "learning_rate": 4.59140837257712e-06, "loss": 0.668, "step": 6610 }, { "epoch": 0.54, "grad_norm": 4.8952445582123945, "learning_rate": 4.59008903519948e-06, "loss": 0.9435, "step": 6611 }, { "epoch": 0.54, "grad_norm": 4.7482706881409085, "learning_rate": 4.588769726554814e-06, "loss": 1.3747, "step": 6612 }, { "epoch": 0.54, "grad_norm": 3.4056673960688952, "learning_rate": 4.587450446735604e-06, "loss": 0.5709, "step": 6613 }, { "epoch": 0.54, "grad_norm": 3.7836948198696514, "learning_rate": 4.586131195834319e-06, "loss": 0.8782, "step": 6614 }, { "epoch": 0.54, "grad_norm": 2.9011139557679586, "learning_rate": 4.584811973943437e-06, "loss": 0.4426, "step": 6615 }, { "epoch": 0.54, "grad_norm": 3.6092322214967996, "learning_rate": 4.583492781155428e-06, "loss": 0.5468, "step": 6616 }, { "epoch": 0.54, "grad_norm": 3.779247375258173, "learning_rate": 4.582173617562764e-06, "loss": 0.7156, "step": 6617 }, { "epoch": 0.54, "grad_norm": 3.4377675092726236, "learning_rate": 4.5808544832579095e-06, "loss": 0.6088, "step": 6618 }, { "epoch": 0.54, "grad_norm": 2.2807342570697653, "learning_rate": 4.579535378333334e-06, "loss": 0.31, "step": 6619 }, { "epoch": 0.54, "grad_norm": 2.3071067500491274, "learning_rate": 4.578216302881497e-06, "loss": 0.3559, "step": 6620 }, { "epoch": 0.54, "grad_norm": 3.017469989892803, "learning_rate": 4.576897256994861e-06, "loss": 0.5663, "step": 6621 }, { "epoch": 0.54, "grad_norm": 4.223549494510722, "learning_rate": 4.575578240765885e-06, "loss": 0.5989, "step": 6622 }, { "epoch": 0.54, "grad_norm": 6.180017186692998, "learning_rate": 4.574259254287028e-06, "loss": 1.4431, "step": 6623 }, { "epoch": 0.54, "grad_norm": 4.595082314447604, "learning_rate": 4.572940297650747e-06, "loss": 1.1177, "step": 6624 }, { "epoch": 0.54, "grad_norm": 1.3635989281444825, "learning_rate": 4.57162137094949e-06, "loss": 0.1789, "step": 6625 }, { "epoch": 0.54, "grad_norm": 2.05576306019229, "learning_rate": 4.570302474275712e-06, "loss": 0.3513, "step": 6626 }, { "epoch": 0.54, "grad_norm": 3.255732174985103, "learning_rate": 4.568983607721859e-06, "loss": 0.7319, "step": 6627 }, { "epoch": 0.54, "grad_norm": 2.9994180654619003, "learning_rate": 4.567664771380379e-06, "loss": 0.5239, "step": 6628 }, { "epoch": 0.54, "grad_norm": 3.9709190789507844, "learning_rate": 4.566345965343718e-06, "loss": 0.6596, "step": 6629 }, { "epoch": 0.54, "grad_norm": 3.4005590334116964, "learning_rate": 4.5650271897043195e-06, "loss": 0.6441, "step": 6630 }, { "epoch": 0.54, "grad_norm": 4.121247443116029, "learning_rate": 4.563708444554619e-06, "loss": 0.928, "step": 6631 }, { "epoch": 0.54, "grad_norm": 3.1053500780565817, "learning_rate": 4.562389729987059e-06, "loss": 0.6048, "step": 6632 }, { "epoch": 0.54, "grad_norm": 2.3267540639919053, "learning_rate": 4.561071046094075e-06, "loss": 0.3739, "step": 6633 }, { "epoch": 0.54, "grad_norm": 4.113156196181085, "learning_rate": 4.5597523929680986e-06, "loss": 0.9381, "step": 6634 }, { "epoch": 0.54, "grad_norm": 3.388165656572617, "learning_rate": 4.558433770701565e-06, "loss": 0.5184, "step": 6635 }, { "epoch": 0.54, "grad_norm": 3.799676204479062, "learning_rate": 4.557115179386903e-06, "loss": 0.6542, "step": 6636 }, { "epoch": 0.54, "grad_norm": 4.259239487215517, "learning_rate": 4.555796619116538e-06, "loss": 0.9032, "step": 6637 }, { "epoch": 0.54, "grad_norm": 3.6370771361107526, "learning_rate": 4.554478089982897e-06, "loss": 1.0361, "step": 6638 }, { "epoch": 0.54, "grad_norm": 3.756742079319636, "learning_rate": 4.553159592078403e-06, "loss": 0.6837, "step": 6639 }, { "epoch": 0.54, "grad_norm": 4.149914278587901, "learning_rate": 4.551841125495477e-06, "loss": 0.8688, "step": 6640 }, { "epoch": 0.54, "grad_norm": 3.347737195032477, "learning_rate": 4.550522690326538e-06, "loss": 0.7162, "step": 6641 }, { "epoch": 0.54, "grad_norm": 3.1569733317515665, "learning_rate": 4.5492042866640045e-06, "loss": 0.6957, "step": 6642 }, { "epoch": 0.54, "grad_norm": 4.681665935187307, "learning_rate": 4.547885914600285e-06, "loss": 1.0, "step": 6643 }, { "epoch": 0.54, "grad_norm": 5.004368873239083, "learning_rate": 4.546567574227796e-06, "loss": 1.0405, "step": 6644 }, { "epoch": 0.54, "grad_norm": 3.6669501577059718, "learning_rate": 4.545249265638947e-06, "loss": 0.4618, "step": 6645 }, { "epoch": 0.54, "grad_norm": 3.91651972542203, "learning_rate": 4.543930988926145e-06, "loss": 0.9557, "step": 6646 }, { "epoch": 0.54, "grad_norm": 3.1338220022417818, "learning_rate": 4.542612744181799e-06, "loss": 0.5671, "step": 6647 }, { "epoch": 0.54, "grad_norm": 5.158874878095333, "learning_rate": 4.541294531498306e-06, "loss": 1.2718, "step": 6648 }, { "epoch": 0.54, "grad_norm": 4.720407098990491, "learning_rate": 4.539976350968071e-06, "loss": 0.9709, "step": 6649 }, { "epoch": 0.54, "grad_norm": 3.2245280801283176, "learning_rate": 4.53865820268349e-06, "loss": 0.4954, "step": 6650 }, { "epoch": 0.54, "grad_norm": 2.147232656477744, "learning_rate": 4.537340086736963e-06, "loss": 0.4342, "step": 6651 }, { "epoch": 0.54, "grad_norm": 3.721820243847922, "learning_rate": 4.536022003220882e-06, "loss": 0.9516, "step": 6652 }, { "epoch": 0.54, "grad_norm": 6.140103242181031, "learning_rate": 4.534703952227641e-06, "loss": 1.5263, "step": 6653 }, { "epoch": 0.54, "grad_norm": 1.8124082668299026, "learning_rate": 4.533385933849626e-06, "loss": 0.3272, "step": 6654 }, { "epoch": 0.54, "grad_norm": 4.421852785391417, "learning_rate": 4.532067948179227e-06, "loss": 0.9541, "step": 6655 }, { "epoch": 0.54, "grad_norm": 3.7558805640610675, "learning_rate": 4.530749995308827e-06, "loss": 0.8192, "step": 6656 }, { "epoch": 0.54, "grad_norm": 3.297025578531439, "learning_rate": 4.529432075330811e-06, "loss": 0.7935, "step": 6657 }, { "epoch": 0.54, "grad_norm": 5.023969964098497, "learning_rate": 4.528114188337559e-06, "loss": 1.112, "step": 6658 }, { "epoch": 0.54, "grad_norm": 4.27069614561819, "learning_rate": 4.52679633442145e-06, "loss": 0.7922, "step": 6659 }, { "epoch": 0.54, "grad_norm": 2.3307633982499767, "learning_rate": 4.525478513674857e-06, "loss": 0.3234, "step": 6660 }, { "epoch": 0.54, "grad_norm": 4.15814670184417, "learning_rate": 4.5241607261901545e-06, "loss": 0.3978, "step": 6661 }, { "epoch": 0.54, "grad_norm": 3.2839993498903466, "learning_rate": 4.522842972059715e-06, "loss": 0.46, "step": 6662 }, { "epoch": 0.54, "grad_norm": 4.085813041441138, "learning_rate": 4.5215252513759065e-06, "loss": 0.9939, "step": 6663 }, { "epoch": 0.54, "grad_norm": 4.624647901844173, "learning_rate": 4.520207564231096e-06, "loss": 0.7483, "step": 6664 }, { "epoch": 0.54, "grad_norm": 4.423834373085487, "learning_rate": 4.518889910717646e-06, "loss": 0.892, "step": 6665 }, { "epoch": 0.54, "grad_norm": 1.3592898624579992, "learning_rate": 4.517572290927922e-06, "loss": 0.1814, "step": 6666 }, { "epoch": 0.54, "grad_norm": 4.094091936733789, "learning_rate": 4.516254704954279e-06, "loss": 0.9623, "step": 6667 }, { "epoch": 0.55, "grad_norm": 4.380531451903595, "learning_rate": 4.514937152889074e-06, "loss": 0.9385, "step": 6668 }, { "epoch": 0.55, "grad_norm": 1.548515060595646, "learning_rate": 4.5136196348246655e-06, "loss": 0.2365, "step": 6669 }, { "epoch": 0.55, "grad_norm": 5.871283572351933, "learning_rate": 4.5123021508534055e-06, "loss": 1.5944, "step": 6670 }, { "epoch": 0.55, "grad_norm": 4.586024416815253, "learning_rate": 4.510984701067641e-06, "loss": 0.6512, "step": 6671 }, { "epoch": 0.55, "grad_norm": 4.684192797698732, "learning_rate": 4.509667285559719e-06, "loss": 0.8768, "step": 6672 }, { "epoch": 0.55, "grad_norm": 4.4344216835852714, "learning_rate": 4.5083499044219896e-06, "loss": 0.9899, "step": 6673 }, { "epoch": 0.55, "grad_norm": 3.5196756901261192, "learning_rate": 4.50703255774679e-06, "loss": 0.8292, "step": 6674 }, { "epoch": 0.55, "grad_norm": 1.8425163445693746, "learning_rate": 4.505715245626462e-06, "loss": 0.2697, "step": 6675 }, { "epoch": 0.55, "grad_norm": 2.7258897847745307, "learning_rate": 4.5043979681533475e-06, "loss": 0.5433, "step": 6676 }, { "epoch": 0.55, "grad_norm": 3.688133066641052, "learning_rate": 4.5030807254197755e-06, "loss": 1.1534, "step": 6677 }, { "epoch": 0.55, "grad_norm": 5.01387823574584, "learning_rate": 4.501763517518082e-06, "loss": 0.9374, "step": 6678 }, { "epoch": 0.55, "grad_norm": 4.690055779675321, "learning_rate": 4.500446344540598e-06, "loss": 1.1436, "step": 6679 }, { "epoch": 0.55, "grad_norm": 2.7781687517608864, "learning_rate": 4.499129206579653e-06, "loss": 0.5423, "step": 6680 }, { "epoch": 0.55, "grad_norm": 3.301595233545369, "learning_rate": 4.497812103727569e-06, "loss": 0.8588, "step": 6681 }, { "epoch": 0.55, "grad_norm": 2.7899519352888555, "learning_rate": 4.496495036076673e-06, "loss": 0.6174, "step": 6682 }, { "epoch": 0.55, "grad_norm": 4.606542284391494, "learning_rate": 4.4951780037192814e-06, "loss": 1.2281, "step": 6683 }, { "epoch": 0.55, "grad_norm": 2.4470797538474462, "learning_rate": 4.4938610067477155e-06, "loss": 0.5242, "step": 6684 }, { "epoch": 0.55, "grad_norm": 3.6842774274565326, "learning_rate": 4.4925440452542905e-06, "loss": 0.7994, "step": 6685 }, { "epoch": 0.55, "grad_norm": 2.313781227321232, "learning_rate": 4.491227119331319e-06, "loss": 0.7083, "step": 6686 }, { "epoch": 0.55, "grad_norm": 3.5080033230773133, "learning_rate": 4.489910229071113e-06, "loss": 0.7683, "step": 6687 }, { "epoch": 0.55, "grad_norm": 4.298710508526099, "learning_rate": 4.488593374565979e-06, "loss": 0.8617, "step": 6688 }, { "epoch": 0.55, "grad_norm": 2.9292771100654753, "learning_rate": 4.487276555908225e-06, "loss": 0.342, "step": 6689 }, { "epoch": 0.55, "grad_norm": 3.569969537758229, "learning_rate": 4.4859597731901525e-06, "loss": 0.8084, "step": 6690 }, { "epoch": 0.55, "grad_norm": 3.8138563785328468, "learning_rate": 4.484643026504063e-06, "loss": 0.8283, "step": 6691 }, { "epoch": 0.55, "grad_norm": 3.7839647659751714, "learning_rate": 4.483326315942253e-06, "loss": 0.5061, "step": 6692 }, { "epoch": 0.55, "grad_norm": 2.8229391413619958, "learning_rate": 4.482009641597023e-06, "loss": 0.8583, "step": 6693 }, { "epoch": 0.55, "grad_norm": 1.5440870104220314, "learning_rate": 4.48069300356066e-06, "loss": 0.1245, "step": 6694 }, { "epoch": 0.55, "grad_norm": 2.9891717366161417, "learning_rate": 4.479376401925457e-06, "loss": 0.6948, "step": 6695 }, { "epoch": 0.55, "grad_norm": 3.258479099635694, "learning_rate": 4.4780598367837045e-06, "loss": 1.1285, "step": 6696 }, { "epoch": 0.55, "grad_norm": 3.046156263768596, "learning_rate": 4.476743308227685e-06, "loss": 0.3902, "step": 6697 }, { "epoch": 0.55, "grad_norm": 2.5052250731095094, "learning_rate": 4.475426816349682e-06, "loss": 0.4513, "step": 6698 }, { "epoch": 0.55, "grad_norm": 5.004491542181198, "learning_rate": 4.4741103612419785e-06, "loss": 1.2451, "step": 6699 }, { "epoch": 0.55, "grad_norm": 4.030912891854148, "learning_rate": 4.472793942996848e-06, "loss": 0.7664, "step": 6700 }, { "epoch": 0.55, "grad_norm": 4.0108380696713715, "learning_rate": 4.471477561706567e-06, "loss": 0.7946, "step": 6701 }, { "epoch": 0.55, "grad_norm": 3.347382738183261, "learning_rate": 4.470161217463409e-06, "loss": 0.7192, "step": 6702 }, { "epoch": 0.55, "grad_norm": 4.086513506690113, "learning_rate": 4.468844910359645e-06, "loss": 0.9626, "step": 6703 }, { "epoch": 0.55, "grad_norm": 2.7659728831313295, "learning_rate": 4.467528640487541e-06, "loss": 0.6089, "step": 6704 }, { "epoch": 0.55, "grad_norm": 3.9804538854072966, "learning_rate": 4.466212407939362e-06, "loss": 0.9291, "step": 6705 }, { "epoch": 0.55, "grad_norm": 1.299522483564469, "learning_rate": 4.464896212807369e-06, "loss": 0.2137, "step": 6706 }, { "epoch": 0.55, "grad_norm": 3.5323175506029703, "learning_rate": 4.463580055183824e-06, "loss": 0.8715, "step": 6707 }, { "epoch": 0.55, "grad_norm": 1.7972727230387882, "learning_rate": 4.462263935160982e-06, "loss": 0.4094, "step": 6708 }, { "epoch": 0.55, "grad_norm": 4.017408192743553, "learning_rate": 4.460947852831097e-06, "loss": 0.8983, "step": 6709 }, { "epoch": 0.55, "grad_norm": 3.943276224413567, "learning_rate": 4.459631808286424e-06, "loss": 0.5783, "step": 6710 }, { "epoch": 0.55, "grad_norm": 2.3353441614559265, "learning_rate": 4.458315801619208e-06, "loss": 0.37, "step": 6711 }, { "epoch": 0.55, "grad_norm": 5.189435494692342, "learning_rate": 4.456999832921697e-06, "loss": 1.2434, "step": 6712 }, { "epoch": 0.55, "grad_norm": 3.3544064433359244, "learning_rate": 4.455683902286134e-06, "loss": 0.9277, "step": 6713 }, { "epoch": 0.55, "grad_norm": 4.212854139587158, "learning_rate": 4.454368009804761e-06, "loss": 0.913, "step": 6714 }, { "epoch": 0.55, "grad_norm": 4.389668485127944, "learning_rate": 4.453052155569816e-06, "loss": 0.9254, "step": 6715 }, { "epoch": 0.55, "grad_norm": 2.041303875057696, "learning_rate": 4.451736339673536e-06, "loss": 0.5614, "step": 6716 }, { "epoch": 0.55, "grad_norm": 3.9140860955081225, "learning_rate": 4.450420562208151e-06, "loss": 0.62, "step": 6717 }, { "epoch": 0.55, "grad_norm": 3.6971464338906084, "learning_rate": 4.449104823265893e-06, "loss": 0.8889, "step": 6718 }, { "epoch": 0.55, "grad_norm": 2.4099851679890616, "learning_rate": 4.4477891229389895e-06, "loss": 0.6155, "step": 6719 }, { "epoch": 0.55, "grad_norm": 4.341321347347397, "learning_rate": 4.446473461319664e-06, "loss": 1.0529, "step": 6720 }, { "epoch": 0.55, "grad_norm": 2.3881489681389207, "learning_rate": 4.445157838500141e-06, "loss": 0.4215, "step": 6721 }, { "epoch": 0.55, "grad_norm": 1.9211033591281976, "learning_rate": 4.44384225457264e-06, "loss": 0.4279, "step": 6722 }, { "epoch": 0.55, "grad_norm": 2.70667426714046, "learning_rate": 4.442526709629376e-06, "loss": 0.4008, "step": 6723 }, { "epoch": 0.55, "grad_norm": 3.1879485480740595, "learning_rate": 4.441211203762562e-06, "loss": 0.7089, "step": 6724 }, { "epoch": 0.55, "grad_norm": 5.074765884183015, "learning_rate": 4.439895737064411e-06, "loss": 0.935, "step": 6725 }, { "epoch": 0.55, "grad_norm": 4.171599425732302, "learning_rate": 4.438580309627132e-06, "loss": 1.009, "step": 6726 }, { "epoch": 0.55, "grad_norm": 3.8459991702099563, "learning_rate": 4.437264921542931e-06, "loss": 0.7308, "step": 6727 }, { "epoch": 0.55, "grad_norm": 3.9044927686931064, "learning_rate": 4.4359495729040095e-06, "loss": 0.8916, "step": 6728 }, { "epoch": 0.55, "grad_norm": 3.7245129839287126, "learning_rate": 4.434634263802567e-06, "loss": 0.484, "step": 6729 }, { "epoch": 0.55, "grad_norm": 4.745818385243437, "learning_rate": 4.433318994330802e-06, "loss": 1.1118, "step": 6730 }, { "epoch": 0.55, "grad_norm": 3.9149526073559415, "learning_rate": 4.43200376458091e-06, "loss": 0.7711, "step": 6731 }, { "epoch": 0.55, "grad_norm": 5.0550435934262605, "learning_rate": 4.430688574645081e-06, "loss": 0.9976, "step": 6732 }, { "epoch": 0.55, "grad_norm": 4.442141227850007, "learning_rate": 4.429373424615509e-06, "loss": 1.1589, "step": 6733 }, { "epoch": 0.55, "grad_norm": 3.4050159968489413, "learning_rate": 4.428058314584373e-06, "loss": 0.7095, "step": 6734 }, { "epoch": 0.55, "grad_norm": 4.473274924192312, "learning_rate": 4.426743244643862e-06, "loss": 1.6246, "step": 6735 }, { "epoch": 0.55, "grad_norm": 4.516636084525008, "learning_rate": 4.425428214886153e-06, "loss": 1.0359, "step": 6736 }, { "epoch": 0.55, "grad_norm": 5.0959314160256275, "learning_rate": 4.424113225403425e-06, "loss": 1.5183, "step": 6737 }, { "epoch": 0.55, "grad_norm": 3.9595000335141557, "learning_rate": 4.422798276287855e-06, "loss": 0.7305, "step": 6738 }, { "epoch": 0.55, "grad_norm": 2.95466063717298, "learning_rate": 4.421483367631616e-06, "loss": 0.6199, "step": 6739 }, { "epoch": 0.55, "grad_norm": 5.506402893224004, "learning_rate": 4.420168499526872e-06, "loss": 1.046, "step": 6740 }, { "epoch": 0.55, "grad_norm": 4.537825050212767, "learning_rate": 4.418853672065794e-06, "loss": 1.3168, "step": 6741 }, { "epoch": 0.55, "grad_norm": 3.8395774802404476, "learning_rate": 4.4175388853405445e-06, "loss": 0.8424, "step": 6742 }, { "epoch": 0.55, "grad_norm": 5.11543314203391, "learning_rate": 4.4162241394432834e-06, "loss": 1.1579, "step": 6743 }, { "epoch": 0.55, "grad_norm": 4.842197311328773, "learning_rate": 4.414909434466172e-06, "loss": 1.0507, "step": 6744 }, { "epoch": 0.55, "grad_norm": 3.917293590730472, "learning_rate": 4.41359477050136e-06, "loss": 0.6057, "step": 6745 }, { "epoch": 0.55, "grad_norm": 4.337147638873848, "learning_rate": 4.412280147641003e-06, "loss": 1.1647, "step": 6746 }, { "epoch": 0.55, "grad_norm": 2.5815547564880315, "learning_rate": 4.410965565977251e-06, "loss": 0.5415, "step": 6747 }, { "epoch": 0.55, "grad_norm": 3.4075359973084156, "learning_rate": 4.409651025602248e-06, "loss": 0.8486, "step": 6748 }, { "epoch": 0.55, "grad_norm": 3.8845777983966663, "learning_rate": 4.40833652660814e-06, "loss": 1.0452, "step": 6749 }, { "epoch": 0.55, "grad_norm": 4.680515425628574, "learning_rate": 4.407022069087067e-06, "loss": 0.7546, "step": 6750 }, { "epoch": 0.55, "grad_norm": 2.7372695630352295, "learning_rate": 4.405707653131166e-06, "loss": 0.2813, "step": 6751 }, { "epoch": 0.55, "grad_norm": 4.607317444290473, "learning_rate": 4.404393278832572e-06, "loss": 0.5272, "step": 6752 }, { "epoch": 0.55, "grad_norm": 4.780700197590161, "learning_rate": 4.403078946283416e-06, "loss": 1.0148, "step": 6753 }, { "epoch": 0.55, "grad_norm": 4.169287698761378, "learning_rate": 4.401764655575828e-06, "loss": 1.033, "step": 6754 }, { "epoch": 0.55, "grad_norm": 3.5874236085275633, "learning_rate": 4.400450406801935e-06, "loss": 0.8101, "step": 6755 }, { "epoch": 0.55, "grad_norm": 3.411895553826789, "learning_rate": 4.39913620005386e-06, "loss": 0.5623, "step": 6756 }, { "epoch": 0.55, "grad_norm": 3.1675888394351133, "learning_rate": 4.3978220354237215e-06, "loss": 0.6919, "step": 6757 }, { "epoch": 0.55, "grad_norm": 2.7486331496396943, "learning_rate": 4.396507913003638e-06, "loss": 0.3992, "step": 6758 }, { "epoch": 0.55, "grad_norm": 4.34685623752115, "learning_rate": 4.395193832885723e-06, "loss": 0.7302, "step": 6759 }, { "epoch": 0.55, "grad_norm": 4.252400234021796, "learning_rate": 4.393879795162088e-06, "loss": 0.9949, "step": 6760 }, { "epoch": 0.55, "grad_norm": 3.978397095087008, "learning_rate": 4.392565799924841e-06, "loss": 0.7132, "step": 6761 }, { "epoch": 0.55, "grad_norm": 4.466544139260184, "learning_rate": 4.391251847266091e-06, "loss": 0.7536, "step": 6762 }, { "epoch": 0.55, "grad_norm": 1.6048107934800868, "learning_rate": 4.389937937277934e-06, "loss": 0.3515, "step": 6763 }, { "epoch": 0.55, "grad_norm": 3.7738442378916877, "learning_rate": 4.388624070052473e-06, "loss": 0.7473, "step": 6764 }, { "epoch": 0.55, "grad_norm": 4.139769946318573, "learning_rate": 4.387310245681805e-06, "loss": 0.6443, "step": 6765 }, { "epoch": 0.55, "grad_norm": 4.117095300850483, "learning_rate": 4.385996464258021e-06, "loss": 0.7387, "step": 6766 }, { "epoch": 0.55, "grad_norm": 4.228842653356213, "learning_rate": 4.384682725873215e-06, "loss": 1.046, "step": 6767 }, { "epoch": 0.55, "grad_norm": 3.4775465019126184, "learning_rate": 4.383369030619471e-06, "loss": 0.7722, "step": 6768 }, { "epoch": 0.55, "grad_norm": 3.1534807109644785, "learning_rate": 4.382055378588873e-06, "loss": 0.5986, "step": 6769 }, { "epoch": 0.55, "grad_norm": 3.5132800394612183, "learning_rate": 4.380741769873504e-06, "loss": 0.5259, "step": 6770 }, { "epoch": 0.55, "grad_norm": 5.468051046678788, "learning_rate": 4.379428204565442e-06, "loss": 1.0535, "step": 6771 }, { "epoch": 0.55, "grad_norm": 4.447498264682462, "learning_rate": 4.378114682756764e-06, "loss": 0.6449, "step": 6772 }, { "epoch": 0.55, "grad_norm": 2.113338299264036, "learning_rate": 4.3768012045395395e-06, "loss": 0.4076, "step": 6773 }, { "epoch": 0.55, "grad_norm": 3.2080865823927436, "learning_rate": 4.375487770005837e-06, "loss": 0.5992, "step": 6774 }, { "epoch": 0.55, "grad_norm": 2.74180423498849, "learning_rate": 4.374174379247726e-06, "loss": 0.5277, "step": 6775 }, { "epoch": 0.55, "grad_norm": 2.7026056758885635, "learning_rate": 4.372861032357265e-06, "loss": 0.5905, "step": 6776 }, { "epoch": 0.55, "grad_norm": 4.380073082270201, "learning_rate": 4.371547729426517e-06, "loss": 1.1251, "step": 6777 }, { "epoch": 0.55, "grad_norm": 4.17091378817613, "learning_rate": 4.370234470547538e-06, "loss": 0.9241, "step": 6778 }, { "epoch": 0.55, "grad_norm": 3.807829620202627, "learning_rate": 4.3689212558123846e-06, "loss": 0.9373, "step": 6779 }, { "epoch": 0.55, "grad_norm": 4.167161769805908, "learning_rate": 4.367608085313102e-06, "loss": 0.9693, "step": 6780 }, { "epoch": 0.55, "grad_norm": 3.0216638364730692, "learning_rate": 4.36629495914174e-06, "loss": 0.467, "step": 6781 }, { "epoch": 0.55, "grad_norm": 4.565712444915998, "learning_rate": 4.364981877390345e-06, "loss": 0.8293, "step": 6782 }, { "epoch": 0.55, "grad_norm": 4.587781430703773, "learning_rate": 4.363668840150956e-06, "loss": 0.8427, "step": 6783 }, { "epoch": 0.55, "grad_norm": 4.102385554796526, "learning_rate": 4.362355847515614e-06, "loss": 0.6292, "step": 6784 }, { "epoch": 0.55, "grad_norm": 2.8173956924667825, "learning_rate": 4.36104289957635e-06, "loss": 0.5476, "step": 6785 }, { "epoch": 0.55, "grad_norm": 3.8232940754466638, "learning_rate": 4.359729996425198e-06, "loss": 0.9364, "step": 6786 }, { "epoch": 0.55, "grad_norm": 2.1896318124601213, "learning_rate": 4.358417138154186e-06, "loss": 0.274, "step": 6787 }, { "epoch": 0.55, "grad_norm": 2.882635089458406, "learning_rate": 4.357104324855342e-06, "loss": 0.6152, "step": 6788 }, { "epoch": 0.55, "grad_norm": 3.9231177114410576, "learning_rate": 4.355791556620686e-06, "loss": 0.6004, "step": 6789 }, { "epoch": 0.55, "grad_norm": 3.074891731812101, "learning_rate": 4.35447883354224e-06, "loss": 0.306, "step": 6790 }, { "epoch": 0.56, "grad_norm": 4.845996157750704, "learning_rate": 4.353166155712018e-06, "loss": 1.2494, "step": 6791 }, { "epoch": 0.56, "grad_norm": 4.174181340098687, "learning_rate": 4.351853523222032e-06, "loss": 0.7239, "step": 6792 }, { "epoch": 0.56, "grad_norm": 4.133578831771037, "learning_rate": 4.350540936164293e-06, "loss": 1.085, "step": 6793 }, { "epoch": 0.56, "grad_norm": 4.31131228805281, "learning_rate": 4.349228394630808e-06, "loss": 0.84, "step": 6794 }, { "epoch": 0.56, "grad_norm": 3.599116948148214, "learning_rate": 4.347915898713581e-06, "loss": 0.6953, "step": 6795 }, { "epoch": 0.56, "grad_norm": 4.989679839429888, "learning_rate": 4.346603448504614e-06, "loss": 1.3605, "step": 6796 }, { "epoch": 0.56, "grad_norm": 3.9755423751338297, "learning_rate": 4.345291044095898e-06, "loss": 0.581, "step": 6797 }, { "epoch": 0.56, "grad_norm": 4.652500888585743, "learning_rate": 4.343978685579433e-06, "loss": 1.1582, "step": 6798 }, { "epoch": 0.56, "grad_norm": 3.579899946824376, "learning_rate": 4.342666373047207e-06, "loss": 0.8965, "step": 6799 }, { "epoch": 0.56, "grad_norm": 4.180070198560346, "learning_rate": 4.341354106591205e-06, "loss": 1.0277, "step": 6800 }, { "epoch": 0.56, "grad_norm": 3.1973548176785074, "learning_rate": 4.340041886303415e-06, "loss": 0.7563, "step": 6801 }, { "epoch": 0.56, "grad_norm": 2.585822185199269, "learning_rate": 4.338729712275818e-06, "loss": 0.4064, "step": 6802 }, { "epoch": 0.56, "grad_norm": 2.914939939545324, "learning_rate": 4.337417584600389e-06, "loss": 0.586, "step": 6803 }, { "epoch": 0.56, "grad_norm": 3.983914984009011, "learning_rate": 4.336105503369104e-06, "loss": 1.0038, "step": 6804 }, { "epoch": 0.56, "grad_norm": 4.850325783736151, "learning_rate": 4.334793468673935e-06, "loss": 0.913, "step": 6805 }, { "epoch": 0.56, "grad_norm": 2.1456873238432825, "learning_rate": 4.333481480606847e-06, "loss": 0.3679, "step": 6806 }, { "epoch": 0.56, "grad_norm": 3.485251348069992, "learning_rate": 4.332169539259809e-06, "loss": 0.8325, "step": 6807 }, { "epoch": 0.56, "grad_norm": 2.295788439384921, "learning_rate": 4.330857644724778e-06, "loss": 0.2682, "step": 6808 }, { "epoch": 0.56, "grad_norm": 2.598856570533403, "learning_rate": 4.329545797093713e-06, "loss": 0.287, "step": 6809 }, { "epoch": 0.56, "grad_norm": 3.7782431799103495, "learning_rate": 4.3282339964585705e-06, "loss": 0.3696, "step": 6810 }, { "epoch": 0.56, "grad_norm": 5.723726663255735, "learning_rate": 4.326922242911302e-06, "loss": 0.9142, "step": 6811 }, { "epoch": 0.56, "grad_norm": 5.014153303089965, "learning_rate": 4.325610536543855e-06, "loss": 1.1812, "step": 6812 }, { "epoch": 0.56, "grad_norm": 4.004017551772608, "learning_rate": 4.324298877448176e-06, "loss": 0.98, "step": 6813 }, { "epoch": 0.56, "grad_norm": 2.3388195160299645, "learning_rate": 4.3229872657162034e-06, "loss": 0.3867, "step": 6814 }, { "epoch": 0.56, "grad_norm": 4.417142475877144, "learning_rate": 4.3216757014398755e-06, "loss": 0.9917, "step": 6815 }, { "epoch": 0.56, "grad_norm": 2.5876620978645044, "learning_rate": 4.320364184711131e-06, "loss": 0.3205, "step": 6816 }, { "epoch": 0.56, "grad_norm": 4.843499879562751, "learning_rate": 4.319052715621898e-06, "loss": 0.7618, "step": 6817 }, { "epoch": 0.56, "grad_norm": 4.722391630062092, "learning_rate": 4.317741294264106e-06, "loss": 0.8201, "step": 6818 }, { "epoch": 0.56, "grad_norm": 3.7302836216449453, "learning_rate": 4.3164299207296824e-06, "loss": 0.6738, "step": 6819 }, { "epoch": 0.56, "grad_norm": 2.365487702840651, "learning_rate": 4.315118595110545e-06, "loss": 0.4978, "step": 6820 }, { "epoch": 0.56, "grad_norm": 3.5141520687520944, "learning_rate": 4.313807317498614e-06, "loss": 0.608, "step": 6821 }, { "epoch": 0.56, "grad_norm": 5.628472427927134, "learning_rate": 4.312496087985802e-06, "loss": 1.4771, "step": 6822 }, { "epoch": 0.56, "grad_norm": 3.272710188604787, "learning_rate": 4.3111849066640234e-06, "loss": 0.6231, "step": 6823 }, { "epoch": 0.56, "grad_norm": 3.9297442148002624, "learning_rate": 4.309873773625187e-06, "loss": 0.8096, "step": 6824 }, { "epoch": 0.56, "grad_norm": 2.0858669492068054, "learning_rate": 4.308562688961193e-06, "loss": 0.448, "step": 6825 }, { "epoch": 0.56, "grad_norm": 3.9865323854928296, "learning_rate": 4.3072516527639456e-06, "loss": 0.874, "step": 6826 }, { "epoch": 0.56, "grad_norm": 2.3713817084126427, "learning_rate": 4.305940665125342e-06, "loss": 0.5426, "step": 6827 }, { "epoch": 0.56, "grad_norm": 3.9105183931123766, "learning_rate": 4.304629726137279e-06, "loss": 0.7191, "step": 6828 }, { "epoch": 0.56, "grad_norm": 2.5341255524071977, "learning_rate": 4.303318835891645e-06, "loss": 0.7492, "step": 6829 }, { "epoch": 0.56, "grad_norm": 1.3118491259015854, "learning_rate": 4.302007994480331e-06, "loss": 0.1992, "step": 6830 }, { "epoch": 0.56, "grad_norm": 3.6402227805381098, "learning_rate": 4.300697201995216e-06, "loss": 0.8198, "step": 6831 }, { "epoch": 0.56, "grad_norm": 2.861804706465785, "learning_rate": 4.299386458528184e-06, "loss": 0.5432, "step": 6832 }, { "epoch": 0.56, "grad_norm": 3.6059084259740115, "learning_rate": 4.298075764171112e-06, "loss": 0.581, "step": 6833 }, { "epoch": 0.56, "grad_norm": 2.4516925600421895, "learning_rate": 4.2967651190158745e-06, "loss": 0.5308, "step": 6834 }, { "epoch": 0.56, "grad_norm": 4.462719347637248, "learning_rate": 4.295454523154342e-06, "loss": 0.7217, "step": 6835 }, { "epoch": 0.56, "grad_norm": 2.8513362667586333, "learning_rate": 4.294143976678382e-06, "loss": 0.4031, "step": 6836 }, { "epoch": 0.56, "grad_norm": 3.175222583089542, "learning_rate": 4.292833479679857e-06, "loss": 1.0811, "step": 6837 }, { "epoch": 0.56, "grad_norm": 5.269094356348749, "learning_rate": 4.291523032250627e-06, "loss": 1.0881, "step": 6838 }, { "epoch": 0.56, "grad_norm": 2.5117461525024996, "learning_rate": 4.290212634482549e-06, "loss": 0.3385, "step": 6839 }, { "epoch": 0.56, "grad_norm": 5.76109821823398, "learning_rate": 4.2889022864674755e-06, "loss": 1.2529, "step": 6840 }, { "epoch": 0.56, "grad_norm": 4.260502714074846, "learning_rate": 4.287591988297257e-06, "loss": 0.9448, "step": 6841 }, { "epoch": 0.56, "grad_norm": 4.6874940118687105, "learning_rate": 4.286281740063743e-06, "loss": 0.8363, "step": 6842 }, { "epoch": 0.56, "grad_norm": 3.32013293811396, "learning_rate": 4.28497154185877e-06, "loss": 0.3973, "step": 6843 }, { "epoch": 0.56, "grad_norm": 3.7190494425481817, "learning_rate": 4.283661393774181e-06, "loss": 0.8758, "step": 6844 }, { "epoch": 0.56, "grad_norm": 3.4261964662966773, "learning_rate": 4.28235129590181e-06, "loss": 1.0139, "step": 6845 }, { "epoch": 0.56, "grad_norm": 5.030158882763015, "learning_rate": 4.28104124833349e-06, "loss": 0.8224, "step": 6846 }, { "epoch": 0.56, "grad_norm": 2.473704631683547, "learning_rate": 4.279731251161051e-06, "loss": 0.5438, "step": 6847 }, { "epoch": 0.56, "grad_norm": 3.1995239724376523, "learning_rate": 4.278421304476316e-06, "loss": 0.574, "step": 6848 }, { "epoch": 0.56, "grad_norm": 4.054270936042657, "learning_rate": 4.277111408371106e-06, "loss": 0.704, "step": 6849 }, { "epoch": 0.56, "grad_norm": 3.0846166069226713, "learning_rate": 4.27580156293724e-06, "loss": 1.0241, "step": 6850 }, { "epoch": 0.56, "grad_norm": 4.395305755295111, "learning_rate": 4.274491768266535e-06, "loss": 1.1476, "step": 6851 }, { "epoch": 0.56, "grad_norm": 2.7818834248217987, "learning_rate": 4.273182024450799e-06, "loss": 0.856, "step": 6852 }, { "epoch": 0.56, "grad_norm": 2.8201990121495544, "learning_rate": 4.271872331581841e-06, "loss": 0.4176, "step": 6853 }, { "epoch": 0.56, "grad_norm": 3.316887402577849, "learning_rate": 4.270562689751461e-06, "loss": 0.7332, "step": 6854 }, { "epoch": 0.56, "grad_norm": 3.173018717309746, "learning_rate": 4.2692530990514625e-06, "loss": 0.5993, "step": 6855 }, { "epoch": 0.56, "grad_norm": 3.8334914810271368, "learning_rate": 4.267943559573642e-06, "loss": 0.7014, "step": 6856 }, { "epoch": 0.56, "grad_norm": 3.8593540784128573, "learning_rate": 4.2666340714097915e-06, "loss": 0.6737, "step": 6857 }, { "epoch": 0.56, "grad_norm": 3.774865952838886, "learning_rate": 4.265324634651703e-06, "loss": 0.4685, "step": 6858 }, { "epoch": 0.56, "grad_norm": 3.985799646092007, "learning_rate": 4.264015249391159e-06, "loss": 0.4988, "step": 6859 }, { "epoch": 0.56, "grad_norm": 4.204955750493154, "learning_rate": 4.2627059157199435e-06, "loss": 0.6423, "step": 6860 }, { "epoch": 0.56, "grad_norm": 4.36506220921179, "learning_rate": 4.261396633729834e-06, "loss": 1.126, "step": 6861 }, { "epoch": 0.56, "grad_norm": 3.383543259218011, "learning_rate": 4.260087403512605e-06, "loss": 0.7018, "step": 6862 }, { "epoch": 0.56, "grad_norm": 2.8830217325549654, "learning_rate": 4.25877822516003e-06, "loss": 0.6265, "step": 6863 }, { "epoch": 0.56, "grad_norm": 4.628668987731295, "learning_rate": 4.2574690987638745e-06, "loss": 0.8785, "step": 6864 }, { "epoch": 0.56, "grad_norm": 3.219833557255795, "learning_rate": 4.2561600244159066e-06, "loss": 0.7595, "step": 6865 }, { "epoch": 0.56, "grad_norm": 2.6218901336310694, "learning_rate": 4.254851002207882e-06, "loss": 0.5658, "step": 6866 }, { "epoch": 0.56, "grad_norm": 4.121018907344332, "learning_rate": 4.253542032231559e-06, "loss": 0.8594, "step": 6867 }, { "epoch": 0.56, "grad_norm": 4.638940263029625, "learning_rate": 4.252233114578691e-06, "loss": 0.5599, "step": 6868 }, { "epoch": 0.56, "grad_norm": 3.9511356064965457, "learning_rate": 4.250924249341028e-06, "loss": 0.7547, "step": 6869 }, { "epoch": 0.56, "grad_norm": 5.170116867479131, "learning_rate": 4.249615436610316e-06, "loss": 1.0425, "step": 6870 }, { "epoch": 0.56, "grad_norm": 3.177039311445076, "learning_rate": 4.248306676478295e-06, "loss": 0.7487, "step": 6871 }, { "epoch": 0.56, "grad_norm": 2.817298145174552, "learning_rate": 4.246997969036703e-06, "loss": 0.3305, "step": 6872 }, { "epoch": 0.56, "grad_norm": 4.12940330283581, "learning_rate": 4.245689314377277e-06, "loss": 0.6051, "step": 6873 }, { "epoch": 0.56, "grad_norm": 3.131631486801754, "learning_rate": 4.244380712591749e-06, "loss": 0.4731, "step": 6874 }, { "epoch": 0.56, "grad_norm": 2.4150405961163743, "learning_rate": 4.243072163771843e-06, "loss": 0.3221, "step": 6875 }, { "epoch": 0.56, "grad_norm": 4.999646529642497, "learning_rate": 4.241763668009286e-06, "loss": 0.9084, "step": 6876 }, { "epoch": 0.56, "grad_norm": 3.358170987405386, "learning_rate": 4.240455225395796e-06, "loss": 0.7714, "step": 6877 }, { "epoch": 0.56, "grad_norm": 2.5208093951344193, "learning_rate": 4.239146836023087e-06, "loss": 0.3809, "step": 6878 }, { "epoch": 0.56, "grad_norm": 4.927345876190294, "learning_rate": 4.237838499982874e-06, "loss": 1.2175, "step": 6879 }, { "epoch": 0.56, "grad_norm": 5.817855918603794, "learning_rate": 4.236530217366865e-06, "loss": 1.1805, "step": 6880 }, { "epoch": 0.56, "grad_norm": 3.6204962988593254, "learning_rate": 4.235221988266766e-06, "loss": 0.547, "step": 6881 }, { "epoch": 0.56, "grad_norm": 4.289319688627827, "learning_rate": 4.233913812774278e-06, "loss": 0.899, "step": 6882 }, { "epoch": 0.56, "grad_norm": 3.9054539055753694, "learning_rate": 4.232605690981096e-06, "loss": 0.9904, "step": 6883 }, { "epoch": 0.56, "grad_norm": 4.786545958744005, "learning_rate": 4.231297622978917e-06, "loss": 0.9344, "step": 6884 }, { "epoch": 0.56, "grad_norm": 4.10221020007079, "learning_rate": 4.229989608859428e-06, "loss": 1.1117, "step": 6885 }, { "epoch": 0.56, "grad_norm": 3.2718030494194648, "learning_rate": 4.228681648714317e-06, "loss": 0.4801, "step": 6886 }, { "epoch": 0.56, "grad_norm": 3.260089357399006, "learning_rate": 4.2273737426352665e-06, "loss": 0.851, "step": 6887 }, { "epoch": 0.56, "grad_norm": 3.5932974222685434, "learning_rate": 4.226065890713953e-06, "loss": 0.781, "step": 6888 }, { "epoch": 0.56, "grad_norm": 3.915015621717631, "learning_rate": 4.224758093042052e-06, "loss": 0.6147, "step": 6889 }, { "epoch": 0.56, "grad_norm": 2.5451163006412014, "learning_rate": 4.223450349711235e-06, "loss": 0.732, "step": 6890 }, { "epoch": 0.56, "grad_norm": 4.196982720790917, "learning_rate": 4.222142660813169e-06, "loss": 1.0102, "step": 6891 }, { "epoch": 0.56, "grad_norm": 3.8062168968693655, "learning_rate": 4.220835026439517e-06, "loss": 0.7792, "step": 6892 }, { "epoch": 0.56, "grad_norm": 4.01107608177263, "learning_rate": 4.219527446681941e-06, "loss": 0.9339, "step": 6893 }, { "epoch": 0.56, "grad_norm": 4.502385615477792, "learning_rate": 4.218219921632093e-06, "loss": 0.9728, "step": 6894 }, { "epoch": 0.56, "grad_norm": 4.103095125130039, "learning_rate": 4.2169124513816245e-06, "loss": 0.9507, "step": 6895 }, { "epoch": 0.56, "grad_norm": 4.0188942558864795, "learning_rate": 4.2156050360221855e-06, "loss": 0.9009, "step": 6896 }, { "epoch": 0.56, "grad_norm": 4.223842439731416, "learning_rate": 4.21429767564542e-06, "loss": 0.9399, "step": 6897 }, { "epoch": 0.56, "grad_norm": 4.502148157216099, "learning_rate": 4.21299037034297e-06, "loss": 0.6637, "step": 6898 }, { "epoch": 0.56, "grad_norm": 5.084211832079616, "learning_rate": 4.211683120206469e-06, "loss": 1.052, "step": 6899 }, { "epoch": 0.56, "grad_norm": 3.043921168635646, "learning_rate": 4.21037592532755e-06, "loss": 0.3334, "step": 6900 }, { "epoch": 0.56, "grad_norm": 3.333588962768206, "learning_rate": 4.209068785797842e-06, "loss": 0.6464, "step": 6901 }, { "epoch": 0.56, "grad_norm": 4.1643242914476115, "learning_rate": 4.20776170170897e-06, "loss": 1.1004, "step": 6902 }, { "epoch": 0.56, "grad_norm": 3.581833199513512, "learning_rate": 4.2064546731525545e-06, "loss": 0.766, "step": 6903 }, { "epoch": 0.56, "grad_norm": 3.16260367601079, "learning_rate": 4.205147700220214e-06, "loss": 0.6797, "step": 6904 }, { "epoch": 0.56, "grad_norm": 3.0079859962392805, "learning_rate": 4.203840783003561e-06, "loss": 0.8811, "step": 6905 }, { "epoch": 0.56, "grad_norm": 3.7685315304775404, "learning_rate": 4.202533921594203e-06, "loss": 0.5685, "step": 6906 }, { "epoch": 0.56, "grad_norm": 3.8447103263989066, "learning_rate": 4.201227116083747e-06, "loss": 0.8439, "step": 6907 }, { "epoch": 0.56, "grad_norm": 1.3527188800025902, "learning_rate": 4.199920366563793e-06, "loss": 0.1809, "step": 6908 }, { "epoch": 0.56, "grad_norm": 2.6407532064114183, "learning_rate": 4.19861367312594e-06, "loss": 0.4898, "step": 6909 }, { "epoch": 0.56, "grad_norm": 4.9041728475041895, "learning_rate": 4.197307035861783e-06, "loss": 0.7451, "step": 6910 }, { "epoch": 0.56, "grad_norm": 3.247228990935387, "learning_rate": 4.196000454862907e-06, "loss": 0.598, "step": 6911 }, { "epoch": 0.56, "grad_norm": 1.7470822297571256, "learning_rate": 4.194693930220899e-06, "loss": 0.3764, "step": 6912 }, { "epoch": 0.57, "grad_norm": 3.8398047777086575, "learning_rate": 4.193387462027343e-06, "loss": 0.8945, "step": 6913 }, { "epoch": 0.57, "grad_norm": 3.47561128374342, "learning_rate": 4.1920810503738165e-06, "loss": 0.6011, "step": 6914 }, { "epoch": 0.57, "grad_norm": 3.356979311294287, "learning_rate": 4.190774695351891e-06, "loss": 0.6791, "step": 6915 }, { "epoch": 0.57, "grad_norm": 1.6033166663574987, "learning_rate": 4.18946839705314e-06, "loss": 0.3343, "step": 6916 }, { "epoch": 0.57, "grad_norm": 4.180106896231907, "learning_rate": 4.188162155569124e-06, "loss": 0.9223, "step": 6917 }, { "epoch": 0.57, "grad_norm": 3.3692075779863893, "learning_rate": 4.186855970991409e-06, "loss": 0.5588, "step": 6918 }, { "epoch": 0.57, "grad_norm": 2.250783680882565, "learning_rate": 4.18554984341155e-06, "loss": 0.2383, "step": 6919 }, { "epoch": 0.57, "grad_norm": 4.012464368258494, "learning_rate": 4.184243772921104e-06, "loss": 0.8274, "step": 6920 }, { "epoch": 0.57, "grad_norm": 4.615573314560767, "learning_rate": 4.18293775961162e-06, "loss": 1.0733, "step": 6921 }, { "epoch": 0.57, "grad_norm": 4.063651310014804, "learning_rate": 4.181631803574643e-06, "loss": 0.7932, "step": 6922 }, { "epoch": 0.57, "grad_norm": 4.69790970871073, "learning_rate": 4.180325904901715e-06, "loss": 0.9528, "step": 6923 }, { "epoch": 0.57, "grad_norm": 2.9197981238712365, "learning_rate": 4.179020063684373e-06, "loss": 0.6375, "step": 6924 }, { "epoch": 0.57, "grad_norm": 3.274650253765275, "learning_rate": 4.177714280014151e-06, "loss": 0.8576, "step": 6925 }, { "epoch": 0.57, "grad_norm": 5.550335842713816, "learning_rate": 4.176408553982581e-06, "loss": 1.2013, "step": 6926 }, { "epoch": 0.57, "grad_norm": 1.934264354807582, "learning_rate": 4.175102885681187e-06, "loss": 0.4646, "step": 6927 }, { "epoch": 0.57, "grad_norm": 3.6260526181564816, "learning_rate": 4.17379727520149e-06, "loss": 0.834, "step": 6928 }, { "epoch": 0.57, "grad_norm": 5.768560143083979, "learning_rate": 4.1724917226350084e-06, "loss": 1.3616, "step": 6929 }, { "epoch": 0.57, "grad_norm": 3.8626606591188324, "learning_rate": 4.171186228073256e-06, "loss": 0.8391, "step": 6930 }, { "epoch": 0.57, "grad_norm": 5.188468437834785, "learning_rate": 4.169880791607741e-06, "loss": 1.2928, "step": 6931 }, { "epoch": 0.57, "grad_norm": 3.109587675048897, "learning_rate": 4.168575413329971e-06, "loss": 0.6591, "step": 6932 }, { "epoch": 0.57, "grad_norm": 3.8197203035624305, "learning_rate": 4.167270093331447e-06, "loss": 0.6995, "step": 6933 }, { "epoch": 0.57, "grad_norm": 2.8125429356691902, "learning_rate": 4.165964831703663e-06, "loss": 0.4956, "step": 6934 }, { "epoch": 0.57, "grad_norm": 5.289835103679579, "learning_rate": 4.164659628538116e-06, "loss": 1.5974, "step": 6935 }, { "epoch": 0.57, "grad_norm": 3.8978445321359225, "learning_rate": 4.163354483926292e-06, "loss": 0.5473, "step": 6936 }, { "epoch": 0.57, "grad_norm": 2.442305017248432, "learning_rate": 4.1620493979596795e-06, "loss": 0.3806, "step": 6937 }, { "epoch": 0.57, "grad_norm": 4.3640488602564815, "learning_rate": 4.160744370729757e-06, "loss": 0.9416, "step": 6938 }, { "epoch": 0.57, "grad_norm": 4.333629385556718, "learning_rate": 4.159439402328003e-06, "loss": 0.8327, "step": 6939 }, { "epoch": 0.57, "grad_norm": 2.066121830157949, "learning_rate": 4.158134492845886e-06, "loss": 0.4121, "step": 6940 }, { "epoch": 0.57, "grad_norm": 3.269603193669817, "learning_rate": 4.1568296423748785e-06, "loss": 0.6631, "step": 6941 }, { "epoch": 0.57, "grad_norm": 1.0426816193321784, "learning_rate": 4.155524851006444e-06, "loss": 0.1427, "step": 6942 }, { "epoch": 0.57, "grad_norm": 3.587387024359568, "learning_rate": 4.154220118832041e-06, "loss": 0.9754, "step": 6943 }, { "epoch": 0.57, "grad_norm": 1.2798933960380874, "learning_rate": 4.1529154459431285e-06, "loss": 0.2721, "step": 6944 }, { "epoch": 0.57, "grad_norm": 4.342283592432994, "learning_rate": 4.151610832431156e-06, "loss": 1.3131, "step": 6945 }, { "epoch": 0.57, "grad_norm": 5.365002485112433, "learning_rate": 4.150306278387573e-06, "loss": 1.095, "step": 6946 }, { "epoch": 0.57, "grad_norm": 2.6816522520739703, "learning_rate": 4.14900178390382e-06, "loss": 0.4537, "step": 6947 }, { "epoch": 0.57, "grad_norm": 2.7082743424961486, "learning_rate": 4.147697349071339e-06, "loss": 0.6203, "step": 6948 }, { "epoch": 0.57, "grad_norm": 3.7347242164312022, "learning_rate": 4.146392973981564e-06, "loss": 0.6242, "step": 6949 }, { "epoch": 0.57, "grad_norm": 3.870684993167908, "learning_rate": 4.14508865872593e-06, "loss": 0.9103, "step": 6950 }, { "epoch": 0.57, "grad_norm": 3.7617659166446287, "learning_rate": 4.143784403395858e-06, "loss": 0.5403, "step": 6951 }, { "epoch": 0.57, "grad_norm": 4.47068227471356, "learning_rate": 4.142480208082771e-06, "loss": 0.9692, "step": 6952 }, { "epoch": 0.57, "grad_norm": 4.2552461800628345, "learning_rate": 4.141176072878093e-06, "loss": 1.0206, "step": 6953 }, { "epoch": 0.57, "grad_norm": 4.822356209708549, "learning_rate": 4.1398719978732324e-06, "loss": 1.2444, "step": 6954 }, { "epoch": 0.57, "grad_norm": 4.07159222227165, "learning_rate": 4.138567983159601e-06, "loss": 0.7202, "step": 6955 }, { "epoch": 0.57, "grad_norm": 3.20873396021626, "learning_rate": 4.137264028828609e-06, "loss": 0.6758, "step": 6956 }, { "epoch": 0.57, "grad_norm": 4.56375038944988, "learning_rate": 4.1359601349716504e-06, "loss": 1.035, "step": 6957 }, { "epoch": 0.57, "grad_norm": 3.0266586675579226, "learning_rate": 4.134656301680126e-06, "loss": 0.8107, "step": 6958 }, { "epoch": 0.57, "grad_norm": 3.6120283958518833, "learning_rate": 4.133352529045429e-06, "loss": 0.6101, "step": 6959 }, { "epoch": 0.57, "grad_norm": 3.9478032953611444, "learning_rate": 4.13204881715895e-06, "loss": 0.8561, "step": 6960 }, { "epoch": 0.57, "grad_norm": 4.541264148590845, "learning_rate": 4.130745166112069e-06, "loss": 0.9062, "step": 6961 }, { "epoch": 0.57, "grad_norm": 3.446684652833886, "learning_rate": 4.129441575996172e-06, "loss": 0.7517, "step": 6962 }, { "epoch": 0.57, "grad_norm": 5.053587922886659, "learning_rate": 4.128138046902629e-06, "loss": 0.9202, "step": 6963 }, { "epoch": 0.57, "grad_norm": 3.1761630302225887, "learning_rate": 4.126834578922816e-06, "loss": 0.836, "step": 6964 }, { "epoch": 0.57, "grad_norm": 3.8479761947356645, "learning_rate": 4.1255311721480975e-06, "loss": 1.0355, "step": 6965 }, { "epoch": 0.57, "grad_norm": 2.74405953449905, "learning_rate": 4.124227826669839e-06, "loss": 0.6668, "step": 6966 }, { "epoch": 0.57, "grad_norm": 3.427789690531807, "learning_rate": 4.1229245425794004e-06, "loss": 0.4646, "step": 6967 }, { "epoch": 0.57, "grad_norm": 5.196806288674009, "learning_rate": 4.121621319968131e-06, "loss": 1.1004, "step": 6968 }, { "epoch": 0.57, "grad_norm": 3.8513313981058475, "learning_rate": 4.120318158927387e-06, "loss": 0.7007, "step": 6969 }, { "epoch": 0.57, "grad_norm": 3.613814344524273, "learning_rate": 4.11901505954851e-06, "loss": 0.6972, "step": 6970 }, { "epoch": 0.57, "grad_norm": 3.802800757327425, "learning_rate": 4.117712021922843e-06, "loss": 0.502, "step": 6971 }, { "epoch": 0.57, "grad_norm": 3.7788402929949156, "learning_rate": 4.116409046141725e-06, "loss": 0.8423, "step": 6972 }, { "epoch": 0.57, "grad_norm": 3.9727719345173997, "learning_rate": 4.115106132296488e-06, "loss": 1.042, "step": 6973 }, { "epoch": 0.57, "grad_norm": 2.137717977015083, "learning_rate": 4.113803280478458e-06, "loss": 0.3862, "step": 6974 }, { "epoch": 0.57, "grad_norm": 4.174766112087384, "learning_rate": 4.112500490778962e-06, "loss": 0.6982, "step": 6975 }, { "epoch": 0.57, "grad_norm": 4.699326857290857, "learning_rate": 4.1111977632893195e-06, "loss": 0.847, "step": 6976 }, { "epoch": 0.57, "grad_norm": 5.016945658519239, "learning_rate": 4.109895098100845e-06, "loss": 0.8596, "step": 6977 }, { "epoch": 0.57, "grad_norm": 4.494855177875274, "learning_rate": 4.108592495304851e-06, "loss": 1.012, "step": 6978 }, { "epoch": 0.57, "grad_norm": 2.9046747025960418, "learning_rate": 4.107289954992646e-06, "loss": 0.5896, "step": 6979 }, { "epoch": 0.57, "grad_norm": 4.042097661435592, "learning_rate": 4.1059874772555265e-06, "loss": 0.8972, "step": 6980 }, { "epoch": 0.57, "grad_norm": 3.9732746412026763, "learning_rate": 4.104685062184795e-06, "loss": 0.8474, "step": 6981 }, { "epoch": 0.57, "grad_norm": 4.6544356724749, "learning_rate": 4.103382709871744e-06, "loss": 1.3651, "step": 6982 }, { "epoch": 0.57, "grad_norm": 3.6851193126074655, "learning_rate": 4.102080420407662e-06, "loss": 0.6084, "step": 6983 }, { "epoch": 0.57, "grad_norm": 4.122859640156609, "learning_rate": 4.100778193883838e-06, "loss": 1.1105, "step": 6984 }, { "epoch": 0.57, "grad_norm": 3.0783988319102744, "learning_rate": 4.099476030391548e-06, "loss": 0.4396, "step": 6985 }, { "epoch": 0.57, "grad_norm": 4.685476561129729, "learning_rate": 4.098173930022069e-06, "loss": 1.0255, "step": 6986 }, { "epoch": 0.57, "grad_norm": 5.006861256635884, "learning_rate": 4.096871892866672e-06, "loss": 1.0051, "step": 6987 }, { "epoch": 0.57, "grad_norm": 3.209919906489276, "learning_rate": 4.095569919016624e-06, "loss": 0.5489, "step": 6988 }, { "epoch": 0.57, "grad_norm": 4.427039188500597, "learning_rate": 4.0942680085631896e-06, "loss": 0.844, "step": 6989 }, { "epoch": 0.57, "grad_norm": 4.688646828015336, "learning_rate": 4.092966161597628e-06, "loss": 1.3874, "step": 6990 }, { "epoch": 0.57, "grad_norm": 3.9209706639075392, "learning_rate": 4.0916643782111885e-06, "loss": 0.7576, "step": 6991 }, { "epoch": 0.57, "grad_norm": 3.602547173452972, "learning_rate": 4.0903626584951235e-06, "loss": 0.7423, "step": 6992 }, { "epoch": 0.57, "grad_norm": 1.218153447306228, "learning_rate": 4.089061002540678e-06, "loss": 0.1575, "step": 6993 }, { "epoch": 0.57, "grad_norm": 3.162180261822791, "learning_rate": 4.087759410439091e-06, "loss": 0.8636, "step": 6994 }, { "epoch": 0.57, "grad_norm": 4.211893565610061, "learning_rate": 4.0864578822815996e-06, "loss": 0.5571, "step": 6995 }, { "epoch": 0.57, "grad_norm": 1.93994923532354, "learning_rate": 4.085156418159436e-06, "loss": 0.255, "step": 6996 }, { "epoch": 0.57, "grad_norm": 4.078531851812658, "learning_rate": 4.083855018163825e-06, "loss": 0.7768, "step": 6997 }, { "epoch": 0.57, "grad_norm": 3.6106329231752055, "learning_rate": 4.0825536823859895e-06, "loss": 0.8278, "step": 6998 }, { "epoch": 0.57, "grad_norm": 3.9039369701823583, "learning_rate": 4.081252410917148e-06, "loss": 0.705, "step": 6999 }, { "epoch": 0.57, "grad_norm": 3.937544893436525, "learning_rate": 4.079951203848515e-06, "loss": 0.8428, "step": 7000 }, { "epoch": 0.57, "grad_norm": 5.265860819313912, "learning_rate": 4.078650061271298e-06, "loss": 1.3106, "step": 7001 }, { "epoch": 0.57, "grad_norm": 1.076213834629379, "learning_rate": 4.077348983276705e-06, "loss": 0.1628, "step": 7002 }, { "epoch": 0.57, "grad_norm": 5.9747004961131625, "learning_rate": 4.0760479699559295e-06, "loss": 1.2271, "step": 7003 }, { "epoch": 0.57, "grad_norm": 3.229486804362292, "learning_rate": 4.074747021400171e-06, "loss": 0.5791, "step": 7004 }, { "epoch": 0.57, "grad_norm": 3.696071073445764, "learning_rate": 4.073446137700619e-06, "loss": 0.9656, "step": 7005 }, { "epoch": 0.57, "grad_norm": 4.023258765103961, "learning_rate": 4.072145318948461e-06, "loss": 0.9494, "step": 7006 }, { "epoch": 0.57, "grad_norm": 2.3544085903462877, "learning_rate": 4.0708445652348795e-06, "loss": 0.3698, "step": 7007 }, { "epoch": 0.57, "grad_norm": 2.5439805146070493, "learning_rate": 4.069543876651048e-06, "loss": 0.4269, "step": 7008 }, { "epoch": 0.57, "grad_norm": 3.0477291576943464, "learning_rate": 4.068243253288143e-06, "loss": 0.491, "step": 7009 }, { "epoch": 0.57, "grad_norm": 5.424843315953494, "learning_rate": 4.0669426952373305e-06, "loss": 0.8975, "step": 7010 }, { "epoch": 0.57, "grad_norm": 4.902101146987203, "learning_rate": 4.065642202589774e-06, "loss": 1.0385, "step": 7011 }, { "epoch": 0.57, "grad_norm": 3.566245633850951, "learning_rate": 4.064341775436632e-06, "loss": 0.6051, "step": 7012 }, { "epoch": 0.57, "grad_norm": 4.341015013701762, "learning_rate": 4.063041413869062e-06, "loss": 0.9417, "step": 7013 }, { "epoch": 0.57, "grad_norm": 4.06159040114982, "learning_rate": 4.061741117978209e-06, "loss": 0.7986, "step": 7014 }, { "epoch": 0.57, "grad_norm": 4.947224703436263, "learning_rate": 4.06044088785522e-06, "loss": 1.0728, "step": 7015 }, { "epoch": 0.57, "grad_norm": 3.263300477571825, "learning_rate": 4.059140723591238e-06, "loss": 0.6382, "step": 7016 }, { "epoch": 0.57, "grad_norm": 4.3658893841201705, "learning_rate": 4.057840625277395e-06, "loss": 0.8812, "step": 7017 }, { "epoch": 0.57, "grad_norm": 4.202865379750988, "learning_rate": 4.056540593004823e-06, "loss": 0.484, "step": 7018 }, { "epoch": 0.57, "grad_norm": 3.352692183555134, "learning_rate": 4.0552406268646524e-06, "loss": 0.6808, "step": 7019 }, { "epoch": 0.57, "grad_norm": 3.1367246178274346, "learning_rate": 4.053940726948001e-06, "loss": 0.6093, "step": 7020 }, { "epoch": 0.57, "grad_norm": 2.8307141323573917, "learning_rate": 4.052640893345986e-06, "loss": 0.3585, "step": 7021 }, { "epoch": 0.57, "grad_norm": 2.9444468720987205, "learning_rate": 4.051341126149722e-06, "loss": 0.4466, "step": 7022 }, { "epoch": 0.57, "grad_norm": 1.089568245849912, "learning_rate": 4.0500414254503174e-06, "loss": 0.1585, "step": 7023 }, { "epoch": 0.57, "grad_norm": 3.5099561653851397, "learning_rate": 4.048741791338874e-06, "loss": 0.7526, "step": 7024 }, { "epoch": 0.57, "grad_norm": 3.843930801874921, "learning_rate": 4.047442223906493e-06, "loss": 0.7592, "step": 7025 }, { "epoch": 0.57, "grad_norm": 3.210986763199321, "learning_rate": 4.046142723244264e-06, "loss": 0.5776, "step": 7026 }, { "epoch": 0.57, "grad_norm": 4.040105628903596, "learning_rate": 4.044843289443279e-06, "loss": 1.2542, "step": 7027 }, { "epoch": 0.57, "grad_norm": 4.132099269451299, "learning_rate": 4.043543922594623e-06, "loss": 0.8952, "step": 7028 }, { "epoch": 0.57, "grad_norm": 6.111833355656927, "learning_rate": 4.042244622789376e-06, "loss": 1.1106, "step": 7029 }, { "epoch": 0.57, "grad_norm": 3.988197594451673, "learning_rate": 4.040945390118614e-06, "loss": 0.6804, "step": 7030 }, { "epoch": 0.57, "grad_norm": 4.509084378635519, "learning_rate": 4.039646224673404e-06, "loss": 1.1252, "step": 7031 }, { "epoch": 0.57, "grad_norm": 3.7563337982585474, "learning_rate": 4.038347126544816e-06, "loss": 0.9299, "step": 7032 }, { "epoch": 0.57, "grad_norm": 3.396042004521007, "learning_rate": 4.037048095823907e-06, "loss": 0.8917, "step": 7033 }, { "epoch": 0.57, "grad_norm": 3.6091353439783886, "learning_rate": 4.035749132601738e-06, "loss": 0.8339, "step": 7034 }, { "epoch": 0.58, "grad_norm": 3.324665985027785, "learning_rate": 4.034450236969357e-06, "loss": 0.8793, "step": 7035 }, { "epoch": 0.58, "grad_norm": 3.889280898988449, "learning_rate": 4.033151409017814e-06, "loss": 0.6727, "step": 7036 }, { "epoch": 0.58, "grad_norm": 4.705194395612646, "learning_rate": 4.031852648838148e-06, "loss": 1.1622, "step": 7037 }, { "epoch": 0.58, "grad_norm": 4.899128719302082, "learning_rate": 4.030553956521397e-06, "loss": 0.9026, "step": 7038 }, { "epoch": 0.58, "grad_norm": 3.4238661464589204, "learning_rate": 4.029255332158597e-06, "loss": 0.641, "step": 7039 }, { "epoch": 0.58, "grad_norm": 3.179772961300419, "learning_rate": 4.0279567758407715e-06, "loss": 0.738, "step": 7040 }, { "epoch": 0.58, "grad_norm": 4.810268008598644, "learning_rate": 4.026658287658947e-06, "loss": 0.7484, "step": 7041 }, { "epoch": 0.58, "grad_norm": 4.924214179817267, "learning_rate": 4.025359867704141e-06, "loss": 0.8885, "step": 7042 }, { "epoch": 0.58, "grad_norm": 4.578335547143393, "learning_rate": 4.024061516067365e-06, "loss": 1.2875, "step": 7043 }, { "epoch": 0.58, "grad_norm": 2.0325952968801584, "learning_rate": 4.02276323283963e-06, "loss": 0.4974, "step": 7044 }, { "epoch": 0.58, "grad_norm": 6.395851544449063, "learning_rate": 4.021465018111939e-06, "loss": 1.2998, "step": 7045 }, { "epoch": 0.58, "grad_norm": 3.706246330303951, "learning_rate": 4.020166871975293e-06, "loss": 0.8945, "step": 7046 }, { "epoch": 0.58, "grad_norm": 4.104683337642188, "learning_rate": 4.0188687945206846e-06, "loss": 1.0408, "step": 7047 }, { "epoch": 0.58, "grad_norm": 1.8628401219992465, "learning_rate": 4.0175707858391035e-06, "loss": 0.4056, "step": 7048 }, { "epoch": 0.58, "grad_norm": 4.183498230905607, "learning_rate": 4.016272846021534e-06, "loss": 0.9974, "step": 7049 }, { "epoch": 0.58, "grad_norm": 3.068505142474466, "learning_rate": 4.014974975158958e-06, "loss": 0.8056, "step": 7050 }, { "epoch": 0.58, "grad_norm": 3.3480882349045986, "learning_rate": 4.013677173342348e-06, "loss": 0.6373, "step": 7051 }, { "epoch": 0.58, "grad_norm": 3.9158743043304876, "learning_rate": 4.012379440662676e-06, "loss": 0.7279, "step": 7052 }, { "epoch": 0.58, "grad_norm": 5.9453187149906, "learning_rate": 4.011081777210909e-06, "loss": 1.505, "step": 7053 }, { "epoch": 0.58, "grad_norm": 1.6622951104295558, "learning_rate": 4.009784183078004e-06, "loss": 0.3531, "step": 7054 }, { "epoch": 0.58, "grad_norm": 2.9462489165130714, "learning_rate": 4.008486658354919e-06, "loss": 0.4783, "step": 7055 }, { "epoch": 0.58, "grad_norm": 4.435618489485107, "learning_rate": 4.007189203132603e-06, "loss": 0.9431, "step": 7056 }, { "epoch": 0.58, "grad_norm": 5.505739082053664, "learning_rate": 4.005891817502004e-06, "loss": 1.2068, "step": 7057 }, { "epoch": 0.58, "grad_norm": 4.567645285380558, "learning_rate": 4.004594501554061e-06, "loss": 0.8364, "step": 7058 }, { "epoch": 0.58, "grad_norm": 3.8855141545565215, "learning_rate": 4.003297255379715e-06, "loss": 0.6913, "step": 7059 }, { "epoch": 0.58, "grad_norm": 3.4254509751515205, "learning_rate": 4.0020000790698895e-06, "loss": 0.5555, "step": 7060 }, { "epoch": 0.58, "grad_norm": 3.424551143787788, "learning_rate": 4.0007029727155165e-06, "loss": 0.5029, "step": 7061 }, { "epoch": 0.58, "grad_norm": 1.8812165633556992, "learning_rate": 3.999405936407517e-06, "loss": 0.3657, "step": 7062 }, { "epoch": 0.58, "grad_norm": 3.6522064217633807, "learning_rate": 3.998108970236807e-06, "loss": 0.6894, "step": 7063 }, { "epoch": 0.58, "grad_norm": 3.6574313938311116, "learning_rate": 3.9968120742942965e-06, "loss": 1.0746, "step": 7064 }, { "epoch": 0.58, "grad_norm": 3.8574976031253674, "learning_rate": 3.995515248670896e-06, "loss": 0.8057, "step": 7065 }, { "epoch": 0.58, "grad_norm": 4.902393834271197, "learning_rate": 3.994218493457503e-06, "loss": 0.8585, "step": 7066 }, { "epoch": 0.58, "grad_norm": 1.7013551577078005, "learning_rate": 3.992921808745016e-06, "loss": 0.3055, "step": 7067 }, { "epoch": 0.58, "grad_norm": 3.1942799397653476, "learning_rate": 3.991625194624328e-06, "loss": 0.7487, "step": 7068 }, { "epoch": 0.58, "grad_norm": 2.478872092128973, "learning_rate": 3.990328651186326e-06, "loss": 0.2751, "step": 7069 }, { "epoch": 0.58, "grad_norm": 5.540871815467057, "learning_rate": 3.989032178521892e-06, "loss": 0.8078, "step": 7070 }, { "epoch": 0.58, "grad_norm": 4.353383629970022, "learning_rate": 3.987735776721902e-06, "loss": 1.0104, "step": 7071 }, { "epoch": 0.58, "grad_norm": 4.733842174247616, "learning_rate": 3.9864394458772275e-06, "loss": 0.9171, "step": 7072 }, { "epoch": 0.58, "grad_norm": 4.91998825679797, "learning_rate": 3.9851431860787376e-06, "loss": 0.9968, "step": 7073 }, { "epoch": 0.58, "grad_norm": 4.401450605560654, "learning_rate": 3.983846997417293e-06, "loss": 0.5211, "step": 7074 }, { "epoch": 0.58, "grad_norm": 3.6189428504676515, "learning_rate": 3.982550879983752e-06, "loss": 0.9904, "step": 7075 }, { "epoch": 0.58, "grad_norm": 2.908054204358741, "learning_rate": 3.981254833868968e-06, "loss": 0.5373, "step": 7076 }, { "epoch": 0.58, "grad_norm": 5.439929750668236, "learning_rate": 3.979958859163785e-06, "loss": 1.0696, "step": 7077 }, { "epoch": 0.58, "grad_norm": 2.936184099302287, "learning_rate": 3.978662955959047e-06, "loss": 0.5715, "step": 7078 }, { "epoch": 0.58, "grad_norm": 3.8749727121931197, "learning_rate": 3.977367124345591e-06, "loss": 0.9434, "step": 7079 }, { "epoch": 0.58, "grad_norm": 3.5969238585770107, "learning_rate": 3.976071364414248e-06, "loss": 0.661, "step": 7080 }, { "epoch": 0.58, "grad_norm": 3.761432159512601, "learning_rate": 3.974775676255847e-06, "loss": 0.8004, "step": 7081 }, { "epoch": 0.58, "grad_norm": 5.105157516199045, "learning_rate": 3.973480059961211e-06, "loss": 1.0297, "step": 7082 }, { "epoch": 0.58, "grad_norm": 3.2195043443638456, "learning_rate": 3.9721845156211535e-06, "loss": 0.6143, "step": 7083 }, { "epoch": 0.58, "grad_norm": 3.3895344869403745, "learning_rate": 3.970889043326488e-06, "loss": 0.9823, "step": 7084 }, { "epoch": 0.58, "grad_norm": 3.4684250527361247, "learning_rate": 3.969593643168022e-06, "loss": 0.4505, "step": 7085 }, { "epoch": 0.58, "grad_norm": 3.388774080285407, "learning_rate": 3.968298315236558e-06, "loss": 0.7791, "step": 7086 }, { "epoch": 0.58, "grad_norm": 3.0887367584929875, "learning_rate": 3.967003059622893e-06, "loss": 0.6912, "step": 7087 }, { "epoch": 0.58, "grad_norm": 4.66271295613892, "learning_rate": 3.965707876417818e-06, "loss": 1.1753, "step": 7088 }, { "epoch": 0.58, "grad_norm": 4.455896539629951, "learning_rate": 3.964412765712118e-06, "loss": 0.811, "step": 7089 }, { "epoch": 0.58, "grad_norm": 3.664757526270815, "learning_rate": 3.963117727596576e-06, "loss": 0.674, "step": 7090 }, { "epoch": 0.58, "grad_norm": 2.662817213759074, "learning_rate": 3.961822762161969e-06, "loss": 0.6596, "step": 7091 }, { "epoch": 0.58, "grad_norm": 3.843816071724626, "learning_rate": 3.960527869499068e-06, "loss": 0.868, "step": 7092 }, { "epoch": 0.58, "grad_norm": 3.971949880221828, "learning_rate": 3.959233049698642e-06, "loss": 0.5862, "step": 7093 }, { "epoch": 0.58, "grad_norm": 4.403184894041416, "learning_rate": 3.957938302851447e-06, "loss": 0.8985, "step": 7094 }, { "epoch": 0.58, "grad_norm": 2.151657750427979, "learning_rate": 3.956643629048244e-06, "loss": 0.3323, "step": 7095 }, { "epoch": 0.58, "grad_norm": 4.326153968617495, "learning_rate": 3.95534902837978e-06, "loss": 0.893, "step": 7096 }, { "epoch": 0.58, "grad_norm": 4.0435757373843595, "learning_rate": 3.954054500936803e-06, "loss": 0.8832, "step": 7097 }, { "epoch": 0.58, "grad_norm": 2.846159514245381, "learning_rate": 3.952760046810054e-06, "loss": 0.4438, "step": 7098 }, { "epoch": 0.58, "grad_norm": 5.182475428073442, "learning_rate": 3.951465666090269e-06, "loss": 0.975, "step": 7099 }, { "epoch": 0.58, "grad_norm": 5.230426826177786, "learning_rate": 3.950171358868177e-06, "loss": 1.4132, "step": 7100 }, { "epoch": 0.58, "grad_norm": 3.8291254257521725, "learning_rate": 3.948877125234502e-06, "loss": 0.6377, "step": 7101 }, { "epoch": 0.58, "grad_norm": 3.216084276924933, "learning_rate": 3.947582965279969e-06, "loss": 0.6111, "step": 7102 }, { "epoch": 0.58, "grad_norm": 1.6706962900628781, "learning_rate": 3.9462888790952885e-06, "loss": 0.2765, "step": 7103 }, { "epoch": 0.58, "grad_norm": 3.138040175328101, "learning_rate": 3.944994866771171e-06, "loss": 0.339, "step": 7104 }, { "epoch": 0.58, "grad_norm": 3.604511535772637, "learning_rate": 3.943700928398325e-06, "loss": 0.7229, "step": 7105 }, { "epoch": 0.58, "grad_norm": 3.5932734228415804, "learning_rate": 3.942407064067444e-06, "loss": 0.5461, "step": 7106 }, { "epoch": 0.58, "grad_norm": 3.790858424347929, "learning_rate": 3.941113273869226e-06, "loss": 0.7984, "step": 7107 }, { "epoch": 0.58, "grad_norm": 3.991797473632268, "learning_rate": 3.939819557894358e-06, "loss": 0.7425, "step": 7108 }, { "epoch": 0.58, "grad_norm": 4.2829158230152045, "learning_rate": 3.938525916233527e-06, "loss": 1.1413, "step": 7109 }, { "epoch": 0.58, "grad_norm": 2.27835612607791, "learning_rate": 3.93723234897741e-06, "loss": 0.3811, "step": 7110 }, { "epoch": 0.58, "grad_norm": 4.0617761846025, "learning_rate": 3.93593885621668e-06, "loss": 0.6642, "step": 7111 }, { "epoch": 0.58, "grad_norm": 4.211114673808272, "learning_rate": 3.934645438042004e-06, "loss": 1.2995, "step": 7112 }, { "epoch": 0.58, "grad_norm": 3.1915693087058945, "learning_rate": 3.933352094544045e-06, "loss": 0.7407, "step": 7113 }, { "epoch": 0.58, "grad_norm": 4.701250053167851, "learning_rate": 3.932058825813464e-06, "loss": 1.1612, "step": 7114 }, { "epoch": 0.58, "grad_norm": 4.970892171852544, "learning_rate": 3.930765631940911e-06, "loss": 0.8714, "step": 7115 }, { "epoch": 0.58, "grad_norm": 3.527673185339682, "learning_rate": 3.929472513017036e-06, "loss": 0.653, "step": 7116 }, { "epoch": 0.58, "grad_norm": 3.623928334585889, "learning_rate": 3.928179469132477e-06, "loss": 0.8409, "step": 7117 }, { "epoch": 0.58, "grad_norm": 4.8302572266053065, "learning_rate": 3.926886500377874e-06, "loss": 1.0464, "step": 7118 }, { "epoch": 0.58, "grad_norm": 3.6459442820124863, "learning_rate": 3.925593606843856e-06, "loss": 0.8235, "step": 7119 }, { "epoch": 0.58, "grad_norm": 3.4826241592611815, "learning_rate": 3.924300788621049e-06, "loss": 0.6283, "step": 7120 }, { "epoch": 0.58, "grad_norm": 2.9437138488386823, "learning_rate": 3.923008045800077e-06, "loss": 0.461, "step": 7121 }, { "epoch": 0.58, "grad_norm": 3.932708836589702, "learning_rate": 3.921715378471555e-06, "loss": 0.9496, "step": 7122 }, { "epoch": 0.58, "grad_norm": 3.194910499503078, "learning_rate": 3.92042278672609e-06, "loss": 0.6686, "step": 7123 }, { "epoch": 0.58, "grad_norm": 3.1726502179347773, "learning_rate": 3.91913027065429e-06, "loss": 0.5938, "step": 7124 }, { "epoch": 0.58, "grad_norm": 1.8286749940870537, "learning_rate": 3.917837830346754e-06, "loss": 0.4451, "step": 7125 }, { "epoch": 0.58, "grad_norm": 3.945794551030212, "learning_rate": 3.916545465894077e-06, "loss": 1.0091, "step": 7126 }, { "epoch": 0.58, "grad_norm": 2.53195597074014, "learning_rate": 3.915253177386849e-06, "loss": 0.6858, "step": 7127 }, { "epoch": 0.58, "grad_norm": 2.440713321478638, "learning_rate": 3.91396096491565e-06, "loss": 0.4462, "step": 7128 }, { "epoch": 0.58, "grad_norm": 3.8823848511405292, "learning_rate": 3.912668828571061e-06, "loss": 0.9204, "step": 7129 }, { "epoch": 0.58, "grad_norm": 3.9367897793515216, "learning_rate": 3.9113767684436555e-06, "loss": 0.73, "step": 7130 }, { "epoch": 0.58, "grad_norm": 4.190586554285153, "learning_rate": 3.910084784624001e-06, "loss": 0.9722, "step": 7131 }, { "epoch": 0.58, "grad_norm": 4.9779115794191515, "learning_rate": 3.90879287720266e-06, "loss": 1.2202, "step": 7132 }, { "epoch": 0.58, "grad_norm": 2.5050948355733933, "learning_rate": 3.907501046270189e-06, "loss": 0.4069, "step": 7133 }, { "epoch": 0.58, "grad_norm": 3.415859284537811, "learning_rate": 3.906209291917141e-06, "loss": 0.8027, "step": 7134 }, { "epoch": 0.58, "grad_norm": 4.9358216022893835, "learning_rate": 3.904917614234061e-06, "loss": 1.2471, "step": 7135 }, { "epoch": 0.58, "grad_norm": 3.9747677120767912, "learning_rate": 3.903626013311489e-06, "loss": 0.7411, "step": 7136 }, { "epoch": 0.58, "grad_norm": 3.988415913963125, "learning_rate": 3.902334489239963e-06, "loss": 0.7372, "step": 7137 }, { "epoch": 0.58, "grad_norm": 5.738282110028436, "learning_rate": 3.901043042110012e-06, "loss": 0.8475, "step": 7138 }, { "epoch": 0.58, "grad_norm": 4.100754157933643, "learning_rate": 3.899751672012163e-06, "loss": 0.8092, "step": 7139 }, { "epoch": 0.58, "grad_norm": 3.5068835161335983, "learning_rate": 3.898460379036931e-06, "loss": 0.7991, "step": 7140 }, { "epoch": 0.58, "grad_norm": 3.933771498849496, "learning_rate": 3.897169163274835e-06, "loss": 0.8676, "step": 7141 }, { "epoch": 0.58, "grad_norm": 4.602783614402582, "learning_rate": 3.895878024816378e-06, "loss": 0.7007, "step": 7142 }, { "epoch": 0.58, "grad_norm": 2.797454968338988, "learning_rate": 3.894586963752068e-06, "loss": 0.5681, "step": 7143 }, { "epoch": 0.58, "grad_norm": 3.1546536385429227, "learning_rate": 3.893295980172401e-06, "loss": 0.7844, "step": 7144 }, { "epoch": 0.58, "grad_norm": 3.454972069711611, "learning_rate": 3.892005074167871e-06, "loss": 0.5962, "step": 7145 }, { "epoch": 0.58, "grad_norm": 4.4058536007061235, "learning_rate": 3.890714245828961e-06, "loss": 0.7945, "step": 7146 }, { "epoch": 0.58, "grad_norm": 4.3281395348638245, "learning_rate": 3.889423495246155e-06, "loss": 0.8408, "step": 7147 }, { "epoch": 0.58, "grad_norm": 2.1678889532437804, "learning_rate": 3.88813282250993e-06, "loss": 0.2202, "step": 7148 }, { "epoch": 0.58, "grad_norm": 3.6827832514151853, "learning_rate": 3.8868422277107536e-06, "loss": 0.7201, "step": 7149 }, { "epoch": 0.58, "grad_norm": 2.427866915998655, "learning_rate": 3.885551710939095e-06, "loss": 0.5189, "step": 7150 }, { "epoch": 0.58, "grad_norm": 3.738846069484021, "learning_rate": 3.884261272285409e-06, "loss": 0.8858, "step": 7151 }, { "epoch": 0.58, "grad_norm": 2.843459236720858, "learning_rate": 3.8829709118401525e-06, "loss": 0.4664, "step": 7152 }, { "epoch": 0.58, "grad_norm": 3.153689597283749, "learning_rate": 3.881680629693774e-06, "loss": 0.4918, "step": 7153 }, { "epoch": 0.58, "grad_norm": 4.088250182020076, "learning_rate": 3.8803904259367156e-06, "loss": 0.6217, "step": 7154 }, { "epoch": 0.58, "grad_norm": 5.021547163364894, "learning_rate": 3.879100300659417e-06, "loss": 1.1175, "step": 7155 }, { "epoch": 0.58, "grad_norm": 4.582998666770339, "learning_rate": 3.87781025395231e-06, "loss": 1.0289, "step": 7156 }, { "epoch": 0.58, "grad_norm": 4.112295157664457, "learning_rate": 3.87652028590582e-06, "loss": 0.6874, "step": 7157 }, { "epoch": 0.59, "grad_norm": 2.8179712349560764, "learning_rate": 3.875230396610367e-06, "loss": 0.3803, "step": 7158 }, { "epoch": 0.59, "grad_norm": 0.9175547172248338, "learning_rate": 3.873940586156368e-06, "loss": 0.1297, "step": 7159 }, { "epoch": 0.59, "grad_norm": 2.2277994379466444, "learning_rate": 3.8726508546342346e-06, "loss": 0.2989, "step": 7160 }, { "epoch": 0.59, "grad_norm": 4.338092565175346, "learning_rate": 3.8713612021343695e-06, "loss": 0.7526, "step": 7161 }, { "epoch": 0.59, "grad_norm": 3.7247563648076056, "learning_rate": 3.870071628747174e-06, "loss": 0.7655, "step": 7162 }, { "epoch": 0.59, "grad_norm": 5.352743472567859, "learning_rate": 3.868782134563038e-06, "loss": 1.3943, "step": 7163 }, { "epoch": 0.59, "grad_norm": 3.3405860367231086, "learning_rate": 3.867492719672352e-06, "loss": 0.4234, "step": 7164 }, { "epoch": 0.59, "grad_norm": 5.123619589734145, "learning_rate": 3.866203384165497e-06, "loss": 0.9548, "step": 7165 }, { "epoch": 0.59, "grad_norm": 1.3149482566165072, "learning_rate": 3.86491412813285e-06, "loss": 0.1749, "step": 7166 }, { "epoch": 0.59, "grad_norm": 2.0442256070497082, "learning_rate": 3.863624951664785e-06, "loss": 0.3793, "step": 7167 }, { "epoch": 0.59, "grad_norm": 3.1812285417600368, "learning_rate": 3.862335854851664e-06, "loss": 0.7994, "step": 7168 }, { "epoch": 0.59, "grad_norm": 3.998063794562161, "learning_rate": 3.861046837783847e-06, "loss": 0.5162, "step": 7169 }, { "epoch": 0.59, "grad_norm": 1.6757687591265147, "learning_rate": 3.859757900551691e-06, "loss": 0.2664, "step": 7170 }, { "epoch": 0.59, "grad_norm": 4.706957271055265, "learning_rate": 3.8584690432455456e-06, "loss": 1.1055, "step": 7171 }, { "epoch": 0.59, "grad_norm": 3.327806797529627, "learning_rate": 3.85718026595575e-06, "loss": 0.6231, "step": 7172 }, { "epoch": 0.59, "grad_norm": 3.6424505231979754, "learning_rate": 3.855891568772646e-06, "loss": 0.8499, "step": 7173 }, { "epoch": 0.59, "grad_norm": 0.8250076554024681, "learning_rate": 3.854602951786562e-06, "loss": 0.1412, "step": 7174 }, { "epoch": 0.59, "grad_norm": 2.9142664387470116, "learning_rate": 3.8533144150878275e-06, "loss": 0.6215, "step": 7175 }, { "epoch": 0.59, "grad_norm": 5.347449923340581, "learning_rate": 3.8520259587667605e-06, "loss": 1.1855, "step": 7176 }, { "epoch": 0.59, "grad_norm": 3.15536023408874, "learning_rate": 3.850737582913679e-06, "loss": 0.7286, "step": 7177 }, { "epoch": 0.59, "grad_norm": 3.8471299159732375, "learning_rate": 3.849449287618892e-06, "loss": 0.9961, "step": 7178 }, { "epoch": 0.59, "grad_norm": 4.839480131416237, "learning_rate": 3.848161072972702e-06, "loss": 1.4947, "step": 7179 }, { "epoch": 0.59, "grad_norm": 3.7682019393758934, "learning_rate": 3.846872939065409e-06, "loss": 0.669, "step": 7180 }, { "epoch": 0.59, "grad_norm": 3.5419341999668967, "learning_rate": 3.8455848859873035e-06, "loss": 0.7145, "step": 7181 }, { "epoch": 0.59, "grad_norm": 4.1367184258575165, "learning_rate": 3.8442969138286726e-06, "loss": 0.6236, "step": 7182 }, { "epoch": 0.59, "grad_norm": 3.862127518357182, "learning_rate": 3.843009022679799e-06, "loss": 1.1014, "step": 7183 }, { "epoch": 0.59, "grad_norm": 2.6911367705490776, "learning_rate": 3.841721212630958e-06, "loss": 0.5209, "step": 7184 }, { "epoch": 0.59, "grad_norm": 3.800166453494574, "learning_rate": 3.8404334837724205e-06, "loss": 0.5094, "step": 7185 }, { "epoch": 0.59, "grad_norm": 2.127442610276738, "learning_rate": 3.8391458361944475e-06, "loss": 0.2214, "step": 7186 }, { "epoch": 0.59, "grad_norm": 3.3877133824762238, "learning_rate": 3.837858269987299e-06, "loss": 0.8476, "step": 7187 }, { "epoch": 0.59, "grad_norm": 4.600332999607474, "learning_rate": 3.836570785241231e-06, "loss": 0.7541, "step": 7188 }, { "epoch": 0.59, "grad_norm": 3.847101954500791, "learning_rate": 3.835283382046484e-06, "loss": 0.6709, "step": 7189 }, { "epoch": 0.59, "grad_norm": 4.822372588945832, "learning_rate": 3.833996060493307e-06, "loss": 1.0825, "step": 7190 }, { "epoch": 0.59, "grad_norm": 5.36848710279312, "learning_rate": 3.832708820671928e-06, "loss": 1.3174, "step": 7191 }, { "epoch": 0.59, "grad_norm": 4.109358838896932, "learning_rate": 3.831421662672582e-06, "loss": 1.0806, "step": 7192 }, { "epoch": 0.59, "grad_norm": 5.408353703345709, "learning_rate": 3.830134586585491e-06, "loss": 0.8491, "step": 7193 }, { "epoch": 0.59, "grad_norm": 4.424293970670651, "learning_rate": 3.828847592500875e-06, "loss": 1.0811, "step": 7194 }, { "epoch": 0.59, "grad_norm": 3.882556551822584, "learning_rate": 3.827560680508946e-06, "loss": 0.8556, "step": 7195 }, { "epoch": 0.59, "grad_norm": 5.2613405765315, "learning_rate": 3.826273850699912e-06, "loss": 0.8493, "step": 7196 }, { "epoch": 0.59, "grad_norm": 4.55056975151563, "learning_rate": 3.824987103163972e-06, "loss": 0.5399, "step": 7197 }, { "epoch": 0.59, "grad_norm": 5.130506068996539, "learning_rate": 3.823700437991321e-06, "loss": 1.0217, "step": 7198 }, { "epoch": 0.59, "grad_norm": 3.646202870650748, "learning_rate": 3.822413855272151e-06, "loss": 0.7791, "step": 7199 }, { "epoch": 0.59, "grad_norm": 3.460344371701214, "learning_rate": 3.821127355096645e-06, "loss": 0.6711, "step": 7200 }, { "epoch": 0.59, "grad_norm": 2.8134014500209763, "learning_rate": 3.81984093755498e-06, "loss": 0.518, "step": 7201 }, { "epoch": 0.59, "grad_norm": 4.253356241796577, "learning_rate": 3.8185546027373325e-06, "loss": 0.807, "step": 7202 }, { "epoch": 0.59, "grad_norm": 3.7265383373452403, "learning_rate": 3.817268350733862e-06, "loss": 0.677, "step": 7203 }, { "epoch": 0.59, "grad_norm": 3.600616746223601, "learning_rate": 3.815982181634735e-06, "loss": 0.6427, "step": 7204 }, { "epoch": 0.59, "grad_norm": 2.1665978297552964, "learning_rate": 3.814696095530103e-06, "loss": 0.3392, "step": 7205 }, { "epoch": 0.59, "grad_norm": 4.16134808595628, "learning_rate": 3.813410092510116e-06, "loss": 1.0075, "step": 7206 }, { "epoch": 0.59, "grad_norm": 1.1975198351294647, "learning_rate": 3.8121241726649195e-06, "loss": 0.2097, "step": 7207 }, { "epoch": 0.59, "grad_norm": 3.6734953521052582, "learning_rate": 3.8108383360846467e-06, "loss": 0.6527, "step": 7208 }, { "epoch": 0.59, "grad_norm": 4.394152793853723, "learning_rate": 3.809552582859432e-06, "loss": 0.9742, "step": 7209 }, { "epoch": 0.59, "grad_norm": 3.63757007236222, "learning_rate": 3.8082669130793998e-06, "loss": 0.4478, "step": 7210 }, { "epoch": 0.59, "grad_norm": 1.644899905912618, "learning_rate": 3.8069813268346717e-06, "loss": 0.3406, "step": 7211 }, { "epoch": 0.59, "grad_norm": 3.91484487437852, "learning_rate": 3.8056958242153598e-06, "loss": 0.7422, "step": 7212 }, { "epoch": 0.59, "grad_norm": 4.109537964323625, "learning_rate": 3.804410405311575e-06, "loss": 0.6795, "step": 7213 }, { "epoch": 0.59, "grad_norm": 4.625631627386881, "learning_rate": 3.8031250702134148e-06, "loss": 0.9754, "step": 7214 }, { "epoch": 0.59, "grad_norm": 3.138406520861325, "learning_rate": 3.801839819010979e-06, "loss": 0.575, "step": 7215 }, { "epoch": 0.59, "grad_norm": 3.9259807505491606, "learning_rate": 3.800554651794357e-06, "loss": 0.9691, "step": 7216 }, { "epoch": 0.59, "grad_norm": 5.172195251924218, "learning_rate": 3.7992695686536345e-06, "loss": 1.3919, "step": 7217 }, { "epoch": 0.59, "grad_norm": 2.575012156658763, "learning_rate": 3.7979845696788903e-06, "loss": 0.3482, "step": 7218 }, { "epoch": 0.59, "grad_norm": 3.898379099253242, "learning_rate": 3.7966996549601968e-06, "loss": 0.6983, "step": 7219 }, { "epoch": 0.59, "grad_norm": 3.230484488211065, "learning_rate": 3.795414824587621e-06, "loss": 0.7996, "step": 7220 }, { "epoch": 0.59, "grad_norm": 3.50312455766705, "learning_rate": 3.794130078651222e-06, "loss": 0.8794, "step": 7221 }, { "epoch": 0.59, "grad_norm": 1.9917695078674407, "learning_rate": 3.7928454172410565e-06, "loss": 0.397, "step": 7222 }, { "epoch": 0.59, "grad_norm": 3.7089917338476215, "learning_rate": 3.7915608404471738e-06, "loss": 0.5728, "step": 7223 }, { "epoch": 0.59, "grad_norm": 3.7570602077823247, "learning_rate": 3.7902763483596173e-06, "loss": 0.6975, "step": 7224 }, { "epoch": 0.59, "grad_norm": 4.485543040083705, "learning_rate": 3.7889919410684262e-06, "loss": 0.6952, "step": 7225 }, { "epoch": 0.59, "grad_norm": 2.7751991440072854, "learning_rate": 3.7877076186636275e-06, "loss": 0.5311, "step": 7226 }, { "epoch": 0.59, "grad_norm": 2.946904427500743, "learning_rate": 3.7864233812352497e-06, "loss": 0.5584, "step": 7227 }, { "epoch": 0.59, "grad_norm": 3.3960138117057532, "learning_rate": 3.78513922887331e-06, "loss": 0.7777, "step": 7228 }, { "epoch": 0.59, "grad_norm": 3.8388110904642248, "learning_rate": 3.783855161667824e-06, "loss": 0.9353, "step": 7229 }, { "epoch": 0.59, "grad_norm": 4.208385056053049, "learning_rate": 3.7825711797088e-06, "loss": 0.7865, "step": 7230 }, { "epoch": 0.59, "grad_norm": 4.654728205483348, "learning_rate": 3.7812872830862363e-06, "loss": 1.0619, "step": 7231 }, { "epoch": 0.59, "grad_norm": 4.943147912524849, "learning_rate": 3.78000347189013e-06, "loss": 0.9678, "step": 7232 }, { "epoch": 0.59, "grad_norm": 1.6386007331503682, "learning_rate": 3.778719746210471e-06, "loss": 0.2088, "step": 7233 }, { "epoch": 0.59, "grad_norm": 4.974326653202862, "learning_rate": 3.777436106137244e-06, "loss": 0.8962, "step": 7234 }, { "epoch": 0.59, "grad_norm": 4.247202635701191, "learning_rate": 3.7761525517604237e-06, "loss": 0.918, "step": 7235 }, { "epoch": 0.59, "grad_norm": 3.920840009975349, "learning_rate": 3.7748690831699858e-06, "loss": 0.6603, "step": 7236 }, { "epoch": 0.59, "grad_norm": 2.024972776872732, "learning_rate": 3.7735857004558913e-06, "loss": 0.3891, "step": 7237 }, { "epoch": 0.59, "grad_norm": 1.5436469189015576, "learning_rate": 3.772302403708102e-06, "loss": 0.2604, "step": 7238 }, { "epoch": 0.59, "grad_norm": 4.024412906807474, "learning_rate": 3.7710191930165705e-06, "loss": 0.8262, "step": 7239 }, { "epoch": 0.59, "grad_norm": 6.155739632668728, "learning_rate": 3.769736068471246e-06, "loss": 1.5707, "step": 7240 }, { "epoch": 0.59, "grad_norm": 2.7645413009827906, "learning_rate": 3.7684530301620693e-06, "loss": 0.3664, "step": 7241 }, { "epoch": 0.59, "grad_norm": 3.8908487487689944, "learning_rate": 3.7671700781789753e-06, "loss": 0.7639, "step": 7242 }, { "epoch": 0.59, "grad_norm": 3.865174935670953, "learning_rate": 3.7658872126118945e-06, "loss": 0.6395, "step": 7243 }, { "epoch": 0.59, "grad_norm": 5.682703256402187, "learning_rate": 3.7646044335507474e-06, "loss": 1.2663, "step": 7244 }, { "epoch": 0.59, "grad_norm": 2.220821504408669, "learning_rate": 3.7633217410854534e-06, "loss": 0.4474, "step": 7245 }, { "epoch": 0.59, "grad_norm": 4.811613429874075, "learning_rate": 3.7620391353059232e-06, "loss": 0.9988, "step": 7246 }, { "epoch": 0.59, "grad_norm": 4.060604468939529, "learning_rate": 3.760756616302064e-06, "loss": 0.7735, "step": 7247 }, { "epoch": 0.59, "grad_norm": 5.596254542379706, "learning_rate": 3.759474184163771e-06, "loss": 0.7325, "step": 7248 }, { "epoch": 0.59, "grad_norm": 4.334206059149519, "learning_rate": 3.7581918389809384e-06, "loss": 0.7668, "step": 7249 }, { "epoch": 0.59, "grad_norm": 3.1112879445181103, "learning_rate": 3.756909580843455e-06, "loss": 0.6829, "step": 7250 }, { "epoch": 0.59, "grad_norm": 5.016626681173837, "learning_rate": 3.7556274098411993e-06, "loss": 0.9484, "step": 7251 }, { "epoch": 0.59, "grad_norm": 3.9305174867410395, "learning_rate": 3.754345326064046e-06, "loss": 0.8028, "step": 7252 }, { "epoch": 0.59, "grad_norm": 4.047174790150774, "learning_rate": 3.7530633296018664e-06, "loss": 0.8824, "step": 7253 }, { "epoch": 0.59, "grad_norm": 2.7846759776594423, "learning_rate": 3.7517814205445187e-06, "loss": 0.5059, "step": 7254 }, { "epoch": 0.59, "grad_norm": 4.173612013099389, "learning_rate": 3.7504995989818615e-06, "loss": 1.1138, "step": 7255 }, { "epoch": 0.59, "grad_norm": 1.5987499473447744, "learning_rate": 3.749217865003744e-06, "loss": 0.179, "step": 7256 }, { "epoch": 0.59, "grad_norm": 4.470430144097223, "learning_rate": 3.747936218700012e-06, "loss": 0.9139, "step": 7257 }, { "epoch": 0.59, "grad_norm": 3.7070475368116615, "learning_rate": 3.7466546601605012e-06, "loss": 0.6481, "step": 7258 }, { "epoch": 0.59, "grad_norm": 4.589366742179254, "learning_rate": 3.745373189475046e-06, "loss": 1.0235, "step": 7259 }, { "epoch": 0.59, "grad_norm": 3.8524322868345173, "learning_rate": 3.744091806733468e-06, "loss": 0.8042, "step": 7260 }, { "epoch": 0.59, "grad_norm": 5.538043867165417, "learning_rate": 3.742810512025589e-06, "loss": 1.122, "step": 7261 }, { "epoch": 0.59, "grad_norm": 4.42242285248518, "learning_rate": 3.7415293054412216e-06, "loss": 1.1899, "step": 7262 }, { "epoch": 0.59, "grad_norm": 5.037165970787136, "learning_rate": 3.7402481870701722e-06, "loss": 1.1023, "step": 7263 }, { "epoch": 0.59, "grad_norm": 3.7586712690069137, "learning_rate": 3.7389671570022445e-06, "loss": 0.6374, "step": 7264 }, { "epoch": 0.59, "grad_norm": 3.649014060677766, "learning_rate": 3.7376862153272307e-06, "loss": 0.6938, "step": 7265 }, { "epoch": 0.59, "grad_norm": 3.3655819352546277, "learning_rate": 3.7364053621349193e-06, "loss": 0.8945, "step": 7266 }, { "epoch": 0.59, "grad_norm": 2.0661749900771826, "learning_rate": 3.7351245975150924e-06, "loss": 0.3209, "step": 7267 }, { "epoch": 0.59, "grad_norm": 3.109721130348778, "learning_rate": 3.733843921557526e-06, "loss": 0.8675, "step": 7268 }, { "epoch": 0.59, "grad_norm": 3.4110127821534957, "learning_rate": 3.7325633343519907e-06, "loss": 0.4387, "step": 7269 }, { "epoch": 0.59, "grad_norm": 1.3270126397093516, "learning_rate": 3.731282835988252e-06, "loss": 0.1857, "step": 7270 }, { "epoch": 0.59, "grad_norm": 3.1982952028614506, "learning_rate": 3.7300024265560623e-06, "loss": 0.6263, "step": 7271 }, { "epoch": 0.59, "grad_norm": 4.054983253419086, "learning_rate": 3.7287221061451763e-06, "loss": 0.6723, "step": 7272 }, { "epoch": 0.59, "grad_norm": 3.006387213108985, "learning_rate": 3.7274418748453378e-06, "loss": 0.5593, "step": 7273 }, { "epoch": 0.59, "grad_norm": 3.177843696204133, "learning_rate": 3.7261617327462857e-06, "loss": 0.4144, "step": 7274 }, { "epoch": 0.59, "grad_norm": 2.154411830595848, "learning_rate": 3.7248816799377517e-06, "loss": 0.2859, "step": 7275 }, { "epoch": 0.59, "grad_norm": 3.7085682586083197, "learning_rate": 3.723601716509465e-06, "loss": 0.7336, "step": 7276 }, { "epoch": 0.59, "grad_norm": 4.532344507880409, "learning_rate": 3.7223218425511416e-06, "loss": 0.743, "step": 7277 }, { "epoch": 0.59, "grad_norm": 4.1145090200213295, "learning_rate": 3.721042058152496e-06, "loss": 1.0092, "step": 7278 }, { "epoch": 0.59, "grad_norm": 2.9824707968840727, "learning_rate": 3.719762363403236e-06, "loss": 0.6897, "step": 7279 }, { "epoch": 0.6, "grad_norm": 4.93318616633958, "learning_rate": 3.718482758393064e-06, "loss": 1.0741, "step": 7280 }, { "epoch": 0.6, "grad_norm": 3.873862285417129, "learning_rate": 3.7172032432116724e-06, "loss": 0.6303, "step": 7281 }, { "epoch": 0.6, "grad_norm": 4.028763878606627, "learning_rate": 3.715923817948752e-06, "loss": 0.952, "step": 7282 }, { "epoch": 0.6, "grad_norm": 4.664191244431255, "learning_rate": 3.7146444826939828e-06, "loss": 0.8097, "step": 7283 }, { "epoch": 0.6, "grad_norm": 5.243476704374192, "learning_rate": 3.7133652375370404e-06, "loss": 0.9209, "step": 7284 }, { "epoch": 0.6, "grad_norm": 3.81678470925151, "learning_rate": 3.7120860825675965e-06, "loss": 0.5091, "step": 7285 }, { "epoch": 0.6, "grad_norm": 4.085855589423921, "learning_rate": 3.710807017875312e-06, "loss": 0.775, "step": 7286 }, { "epoch": 0.6, "grad_norm": 3.6442545721216475, "learning_rate": 3.7095280435498476e-06, "loss": 1.1102, "step": 7287 }, { "epoch": 0.6, "grad_norm": 3.050918337247399, "learning_rate": 3.708249159680849e-06, "loss": 0.4903, "step": 7288 }, { "epoch": 0.6, "grad_norm": 2.5453549381261493, "learning_rate": 3.7069703663579626e-06, "loss": 0.5086, "step": 7289 }, { "epoch": 0.6, "grad_norm": 2.55975029920748, "learning_rate": 3.7056916636708275e-06, "loss": 0.4368, "step": 7290 }, { "epoch": 0.6, "grad_norm": 3.9211923351279503, "learning_rate": 3.7044130517090725e-06, "loss": 0.7221, "step": 7291 }, { "epoch": 0.6, "grad_norm": 3.8695486692822603, "learning_rate": 3.7031345305623247e-06, "loss": 0.5645, "step": 7292 }, { "epoch": 0.6, "grad_norm": 5.0322526274600685, "learning_rate": 3.701856100320205e-06, "loss": 0.993, "step": 7293 }, { "epoch": 0.6, "grad_norm": 3.397881077392223, "learning_rate": 3.700577761072319e-06, "loss": 0.9325, "step": 7294 }, { "epoch": 0.6, "grad_norm": 5.109022269602481, "learning_rate": 3.6992995129082787e-06, "loss": 0.9902, "step": 7295 }, { "epoch": 0.6, "grad_norm": 3.0430714662205114, "learning_rate": 3.6980213559176806e-06, "loss": 0.5342, "step": 7296 }, { "epoch": 0.6, "grad_norm": 2.979238248294013, "learning_rate": 3.6967432901901214e-06, "loss": 0.8024, "step": 7297 }, { "epoch": 0.6, "grad_norm": 3.357106852408541, "learning_rate": 3.695465315815184e-06, "loss": 0.6754, "step": 7298 }, { "epoch": 0.6, "grad_norm": 6.536203245639044, "learning_rate": 3.6941874328824528e-06, "loss": 1.4261, "step": 7299 }, { "epoch": 0.6, "grad_norm": 4.114188179870397, "learning_rate": 3.692909641481498e-06, "loss": 1.0592, "step": 7300 }, { "epoch": 0.6, "grad_norm": 3.7910836878874328, "learning_rate": 3.691631941701889e-06, "loss": 0.7355, "step": 7301 }, { "epoch": 0.6, "grad_norm": 3.512324231847214, "learning_rate": 3.690354333633186e-06, "loss": 0.7389, "step": 7302 }, { "epoch": 0.6, "grad_norm": 4.08733216422255, "learning_rate": 3.689076817364945e-06, "loss": 0.6672, "step": 7303 }, { "epoch": 0.6, "grad_norm": 2.5066420531243563, "learning_rate": 3.6877993929867146e-06, "loss": 0.5613, "step": 7304 }, { "epoch": 0.6, "grad_norm": 4.030634115273332, "learning_rate": 3.6865220605880363e-06, "loss": 1.0986, "step": 7305 }, { "epoch": 0.6, "grad_norm": 5.683643009920432, "learning_rate": 3.6852448202584457e-06, "loss": 0.9423, "step": 7306 }, { "epoch": 0.6, "grad_norm": 2.59612249188272, "learning_rate": 3.6839676720874695e-06, "loss": 0.4942, "step": 7307 }, { "epoch": 0.6, "grad_norm": 4.3689628530978375, "learning_rate": 3.6826906161646325e-06, "loss": 0.9352, "step": 7308 }, { "epoch": 0.6, "grad_norm": 3.9925971887147798, "learning_rate": 3.681413652579451e-06, "loss": 0.4234, "step": 7309 }, { "epoch": 0.6, "grad_norm": 2.259614527439117, "learning_rate": 3.680136781421435e-06, "loss": 0.3901, "step": 7310 }, { "epoch": 0.6, "grad_norm": 3.3481061105666785, "learning_rate": 3.6788600027800847e-06, "loss": 0.8297, "step": 7311 }, { "epoch": 0.6, "grad_norm": 4.272554803335286, "learning_rate": 3.677583316744899e-06, "loss": 1.3087, "step": 7312 }, { "epoch": 0.6, "grad_norm": 2.2133791927491147, "learning_rate": 3.6763067234053686e-06, "loss": 0.3896, "step": 7313 }, { "epoch": 0.6, "grad_norm": 3.4051697569394843, "learning_rate": 3.6750302228509747e-06, "loss": 0.6691, "step": 7314 }, { "epoch": 0.6, "grad_norm": 6.793644442129009, "learning_rate": 3.6737538151711965e-06, "loss": 1.5565, "step": 7315 }, { "epoch": 0.6, "grad_norm": 2.1024584202356036, "learning_rate": 3.6724775004555056e-06, "loss": 0.4786, "step": 7316 }, { "epoch": 0.6, "grad_norm": 3.2672856545005318, "learning_rate": 3.6712012787933627e-06, "loss": 0.8188, "step": 7317 }, { "epoch": 0.6, "grad_norm": 3.534730237228595, "learning_rate": 3.669925150274227e-06, "loss": 0.7064, "step": 7318 }, { "epoch": 0.6, "grad_norm": 4.176559579252146, "learning_rate": 3.66864911498755e-06, "loss": 1.1786, "step": 7319 }, { "epoch": 0.6, "grad_norm": 3.01451385356136, "learning_rate": 3.667373173022777e-06, "loss": 0.6224, "step": 7320 }, { "epoch": 0.6, "grad_norm": 2.702301279569575, "learning_rate": 3.6660973244693443e-06, "loss": 0.4707, "step": 7321 }, { "epoch": 0.6, "grad_norm": 2.6518809111910415, "learning_rate": 3.6648215694166854e-06, "loss": 0.6508, "step": 7322 }, { "epoch": 0.6, "grad_norm": 4.837301252009652, "learning_rate": 3.663545907954222e-06, "loss": 1.0315, "step": 7323 }, { "epoch": 0.6, "grad_norm": 3.467377058353138, "learning_rate": 3.662270340171374e-06, "loss": 0.9296, "step": 7324 }, { "epoch": 0.6, "grad_norm": 2.037556790617754, "learning_rate": 3.660994866157553e-06, "loss": 0.4212, "step": 7325 }, { "epoch": 0.6, "grad_norm": 2.7411227792089052, "learning_rate": 3.659719486002165e-06, "loss": 0.5611, "step": 7326 }, { "epoch": 0.6, "grad_norm": 4.029110782388751, "learning_rate": 3.65844419979461e-06, "loss": 0.7441, "step": 7327 }, { "epoch": 0.6, "grad_norm": 3.598556112905251, "learning_rate": 3.6571690076242762e-06, "loss": 0.5019, "step": 7328 }, { "epoch": 0.6, "grad_norm": 3.103631475632426, "learning_rate": 3.6558939095805524e-06, "loss": 0.669, "step": 7329 }, { "epoch": 0.6, "grad_norm": 4.278943860233236, "learning_rate": 3.654618905752814e-06, "loss": 0.792, "step": 7330 }, { "epoch": 0.6, "grad_norm": 3.379718370226008, "learning_rate": 3.6533439962304363e-06, "loss": 0.6466, "step": 7331 }, { "epoch": 0.6, "grad_norm": 3.2511593392125406, "learning_rate": 3.6520691811027833e-06, "loss": 0.6463, "step": 7332 }, { "epoch": 0.6, "grad_norm": 3.9418092034160632, "learning_rate": 3.650794460459216e-06, "loss": 0.7541, "step": 7333 }, { "epoch": 0.6, "grad_norm": 3.3988686548257045, "learning_rate": 3.6495198343890834e-06, "loss": 0.5703, "step": 7334 }, { "epoch": 0.6, "grad_norm": 2.596962690304631, "learning_rate": 3.6482453029817335e-06, "loss": 0.3518, "step": 7335 }, { "epoch": 0.6, "grad_norm": 3.3513090857832393, "learning_rate": 3.6469708663265058e-06, "loss": 0.3199, "step": 7336 }, { "epoch": 0.6, "grad_norm": 4.101769394719204, "learning_rate": 3.645696524512731e-06, "loss": 0.713, "step": 7337 }, { "epoch": 0.6, "grad_norm": 3.2251311010046115, "learning_rate": 3.6444222776297356e-06, "loss": 0.7979, "step": 7338 }, { "epoch": 0.6, "grad_norm": 1.6622082215963914, "learning_rate": 3.6431481257668417e-06, "loss": 0.2782, "step": 7339 }, { "epoch": 0.6, "grad_norm": 2.8688816543762545, "learning_rate": 3.641874069013357e-06, "loss": 0.4553, "step": 7340 }, { "epoch": 0.6, "grad_norm": 3.884857490036489, "learning_rate": 3.640600107458589e-06, "loss": 0.6496, "step": 7341 }, { "epoch": 0.6, "grad_norm": 2.5034422972171426, "learning_rate": 3.639326241191837e-06, "loss": 0.4517, "step": 7342 }, { "epoch": 0.6, "grad_norm": 4.427570284980379, "learning_rate": 3.6380524703023955e-06, "loss": 1.0919, "step": 7343 }, { "epoch": 0.6, "grad_norm": 3.414248128192576, "learning_rate": 3.636778794879548e-06, "loss": 0.6897, "step": 7344 }, { "epoch": 0.6, "grad_norm": 4.009681478728431, "learning_rate": 3.6355052150125756e-06, "loss": 0.8974, "step": 7345 }, { "epoch": 0.6, "grad_norm": 3.60090679232242, "learning_rate": 3.6342317307907476e-06, "loss": 0.7755, "step": 7346 }, { "epoch": 0.6, "grad_norm": 5.080563646163504, "learning_rate": 3.632958342303331e-06, "loss": 1.1131, "step": 7347 }, { "epoch": 0.6, "grad_norm": 2.6892240634984996, "learning_rate": 3.6316850496395863e-06, "loss": 0.7426, "step": 7348 }, { "epoch": 0.6, "grad_norm": 3.709703790317675, "learning_rate": 3.630411852888763e-06, "loss": 0.776, "step": 7349 }, { "epoch": 0.6, "grad_norm": 2.7744602064414847, "learning_rate": 3.6291387521401116e-06, "loss": 0.3538, "step": 7350 }, { "epoch": 0.6, "grad_norm": 4.682406043099396, "learning_rate": 3.6278657474828655e-06, "loss": 1.0515, "step": 7351 }, { "epoch": 0.6, "grad_norm": 3.959454819170579, "learning_rate": 3.62659283900626e-06, "loss": 0.477, "step": 7352 }, { "epoch": 0.6, "grad_norm": 3.224783213547431, "learning_rate": 3.625320026799518e-06, "loss": 0.7098, "step": 7353 }, { "epoch": 0.6, "grad_norm": 2.6252261600463815, "learning_rate": 3.6240473109518595e-06, "loss": 0.3263, "step": 7354 }, { "epoch": 0.6, "grad_norm": 4.377159100802485, "learning_rate": 3.6227746915524964e-06, "loss": 1.1255, "step": 7355 }, { "epoch": 0.6, "grad_norm": 4.271968038367136, "learning_rate": 3.621502168690636e-06, "loss": 0.6547, "step": 7356 }, { "epoch": 0.6, "grad_norm": 3.449237596352372, "learning_rate": 3.6202297424554723e-06, "loss": 0.7299, "step": 7357 }, { "epoch": 0.6, "grad_norm": 1.6632260784943322, "learning_rate": 3.618957412936199e-06, "loss": 0.23, "step": 7358 }, { "epoch": 0.6, "grad_norm": 2.7017260069150733, "learning_rate": 3.6176851802220015e-06, "loss": 0.4136, "step": 7359 }, { "epoch": 0.6, "grad_norm": 3.1140772619513952, "learning_rate": 3.6164130444020557e-06, "loss": 0.6248, "step": 7360 }, { "epoch": 0.6, "grad_norm": 4.261273199477408, "learning_rate": 3.6151410055655346e-06, "loss": 0.9102, "step": 7361 }, { "epoch": 0.6, "grad_norm": 3.254035443186992, "learning_rate": 3.613869063801604e-06, "loss": 0.5402, "step": 7362 }, { "epoch": 0.6, "grad_norm": 3.826738840401367, "learning_rate": 3.6125972191994167e-06, "loss": 0.8809, "step": 7363 }, { "epoch": 0.6, "grad_norm": 4.627710868115313, "learning_rate": 3.611325471848127e-06, "loss": 1.0343, "step": 7364 }, { "epoch": 0.6, "grad_norm": 4.942724274888949, "learning_rate": 3.6100538218368788e-06, "loss": 1.0607, "step": 7365 }, { "epoch": 0.6, "grad_norm": 2.6652416919731916, "learning_rate": 3.608782269254809e-06, "loss": 0.7414, "step": 7366 }, { "epoch": 0.6, "grad_norm": 6.567687807888815, "learning_rate": 3.6075108141910477e-06, "loss": 1.2659, "step": 7367 }, { "epoch": 0.6, "grad_norm": 5.565997631667151, "learning_rate": 3.606239456734718e-06, "loss": 1.0212, "step": 7368 }, { "epoch": 0.6, "grad_norm": 2.7521879553116104, "learning_rate": 3.604968196974936e-06, "loss": 0.4801, "step": 7369 }, { "epoch": 0.6, "grad_norm": 4.227513652989928, "learning_rate": 3.6036970350008117e-06, "loss": 0.8487, "step": 7370 }, { "epoch": 0.6, "grad_norm": 3.8028013106733445, "learning_rate": 3.6024259709014485e-06, "loss": 0.7161, "step": 7371 }, { "epoch": 0.6, "grad_norm": 4.561632017753492, "learning_rate": 3.601155004765943e-06, "loss": 0.9349, "step": 7372 }, { "epoch": 0.6, "grad_norm": 3.8234288647297845, "learning_rate": 3.599884136683386e-06, "loss": 0.6112, "step": 7373 }, { "epoch": 0.6, "grad_norm": 2.1618931501398477, "learning_rate": 3.5986133667428552e-06, "loss": 0.29, "step": 7374 }, { "epoch": 0.6, "grad_norm": 4.341504592456218, "learning_rate": 3.59734269503343e-06, "loss": 0.9025, "step": 7375 }, { "epoch": 0.6, "grad_norm": 5.567997868746615, "learning_rate": 3.596072121644176e-06, "loss": 1.6976, "step": 7376 }, { "epoch": 0.6, "grad_norm": 3.8884371462644887, "learning_rate": 3.5948016466641565e-06, "loss": 0.9424, "step": 7377 }, { "epoch": 0.6, "grad_norm": 3.3051617998588774, "learning_rate": 3.593531270182426e-06, "loss": 0.6257, "step": 7378 }, { "epoch": 0.6, "grad_norm": 4.1393146329806285, "learning_rate": 3.5922609922880347e-06, "loss": 0.5962, "step": 7379 }, { "epoch": 0.6, "grad_norm": 4.380352244126183, "learning_rate": 3.5909908130700196e-06, "loss": 0.8863, "step": 7380 }, { "epoch": 0.6, "grad_norm": 3.7033105237332506, "learning_rate": 3.5897207326174162e-06, "loss": 0.7133, "step": 7381 }, { "epoch": 0.6, "grad_norm": 4.520638027193085, "learning_rate": 3.5884507510192524e-06, "loss": 0.9772, "step": 7382 }, { "epoch": 0.6, "grad_norm": 6.268886224791158, "learning_rate": 3.5871808683645475e-06, "loss": 1.332, "step": 7383 }, { "epoch": 0.6, "grad_norm": 5.086688366684082, "learning_rate": 3.585911084742315e-06, "loss": 0.7312, "step": 7384 }, { "epoch": 0.6, "grad_norm": 5.756162826036348, "learning_rate": 3.584641400241563e-06, "loss": 1.1717, "step": 7385 }, { "epoch": 0.6, "grad_norm": 5.053332697897676, "learning_rate": 3.5833718149512874e-06, "loss": 0.8567, "step": 7386 }, { "epoch": 0.6, "grad_norm": 3.551526636895038, "learning_rate": 3.582102328960483e-06, "loss": 0.7164, "step": 7387 }, { "epoch": 0.6, "grad_norm": 4.299671804905078, "learning_rate": 3.580832942358134e-06, "loss": 0.7368, "step": 7388 }, { "epoch": 0.6, "grad_norm": 1.5397836578283757, "learning_rate": 3.5795636552332203e-06, "loss": 0.1627, "step": 7389 }, { "epoch": 0.6, "grad_norm": 2.319864688208959, "learning_rate": 3.5782944676747135e-06, "loss": 0.3598, "step": 7390 }, { "epoch": 0.6, "grad_norm": 3.855430350446534, "learning_rate": 3.5770253797715747e-06, "loss": 0.8059, "step": 7391 }, { "epoch": 0.6, "grad_norm": 3.4382552861938604, "learning_rate": 3.5757563916127665e-06, "loss": 0.848, "step": 7392 }, { "epoch": 0.6, "grad_norm": 5.002761732304879, "learning_rate": 3.574487503287235e-06, "loss": 0.9404, "step": 7393 }, { "epoch": 0.6, "grad_norm": 3.8270837023387534, "learning_rate": 3.5732187148839257e-06, "loss": 0.5003, "step": 7394 }, { "epoch": 0.6, "grad_norm": 4.440678145686728, "learning_rate": 3.571950026491776e-06, "loss": 1.0051, "step": 7395 }, { "epoch": 0.6, "grad_norm": 3.452605562559225, "learning_rate": 3.5706814381997157e-06, "loss": 0.6493, "step": 7396 }, { "epoch": 0.6, "grad_norm": 5.001457635681139, "learning_rate": 3.5694129500966645e-06, "loss": 0.7764, "step": 7397 }, { "epoch": 0.6, "grad_norm": 2.664373470123157, "learning_rate": 3.5681445622715396e-06, "loss": 0.5855, "step": 7398 }, { "epoch": 0.6, "grad_norm": 3.8412498830805792, "learning_rate": 3.56687627481325e-06, "loss": 0.6159, "step": 7399 }, { "epoch": 0.6, "grad_norm": 3.8788036431099524, "learning_rate": 3.5656080878106957e-06, "loss": 0.6772, "step": 7400 }, { "epoch": 0.6, "grad_norm": 5.203657324782098, "learning_rate": 3.5643400013527723e-06, "loss": 1.1349, "step": 7401 }, { "epoch": 0.61, "grad_norm": 3.3686955244781855, "learning_rate": 3.5630720155283686e-06, "loss": 0.5233, "step": 7402 }, { "epoch": 0.61, "grad_norm": 1.3251001828269917, "learning_rate": 3.561804130426361e-06, "loss": 0.2024, "step": 7403 }, { "epoch": 0.61, "grad_norm": 3.5517867637136455, "learning_rate": 3.560536346135625e-06, "loss": 0.4885, "step": 7404 }, { "epoch": 0.61, "grad_norm": 3.65419707229043, "learning_rate": 3.559268662745027e-06, "loss": 0.8909, "step": 7405 }, { "epoch": 0.61, "grad_norm": 3.0702596670441102, "learning_rate": 3.5580010803434254e-06, "loss": 0.6835, "step": 7406 }, { "epoch": 0.61, "grad_norm": 2.9999027181662297, "learning_rate": 3.5567335990196725e-06, "loss": 0.4085, "step": 7407 }, { "epoch": 0.61, "grad_norm": 2.6525097086960625, "learning_rate": 3.5554662188626147e-06, "loss": 0.5371, "step": 7408 }, { "epoch": 0.61, "grad_norm": 1.987470859525362, "learning_rate": 3.5541989399610866e-06, "loss": 0.1876, "step": 7409 }, { "epoch": 0.61, "grad_norm": 3.367384021483217, "learning_rate": 3.5529317624039205e-06, "loss": 0.4064, "step": 7410 }, { "epoch": 0.61, "grad_norm": 2.4214064440204153, "learning_rate": 3.5516646862799404e-06, "loss": 0.414, "step": 7411 }, { "epoch": 0.61, "grad_norm": 1.4276191576511317, "learning_rate": 3.5503977116779624e-06, "loss": 0.2154, "step": 7412 }, { "epoch": 0.61, "grad_norm": 4.0645046994455445, "learning_rate": 3.5491308386867983e-06, "loss": 0.9574, "step": 7413 }, { "epoch": 0.61, "grad_norm": 2.187906423787895, "learning_rate": 3.5478640673952456e-06, "loss": 0.3924, "step": 7414 }, { "epoch": 0.61, "grad_norm": 3.386770954240138, "learning_rate": 3.5465973978921042e-06, "loss": 0.9238, "step": 7415 }, { "epoch": 0.61, "grad_norm": 3.7501762987264438, "learning_rate": 3.545330830266158e-06, "loss": 0.5472, "step": 7416 }, { "epoch": 0.61, "grad_norm": 3.476586699802735, "learning_rate": 3.54406436460619e-06, "loss": 0.3458, "step": 7417 }, { "epoch": 0.61, "grad_norm": 3.781993328385959, "learning_rate": 3.5427980010009746e-06, "loss": 0.713, "step": 7418 }, { "epoch": 0.61, "grad_norm": 4.432373455583828, "learning_rate": 3.541531739539279e-06, "loss": 0.9409, "step": 7419 }, { "epoch": 0.61, "grad_norm": 3.6775218937185987, "learning_rate": 3.540265580309859e-06, "loss": 0.8305, "step": 7420 }, { "epoch": 0.61, "grad_norm": 3.84216649186372, "learning_rate": 3.538999523401469e-06, "loss": 0.723, "step": 7421 }, { "epoch": 0.61, "grad_norm": 3.6949795452225156, "learning_rate": 3.5377335689028556e-06, "loss": 0.5788, "step": 7422 }, { "epoch": 0.61, "grad_norm": 3.958017014079015, "learning_rate": 3.536467716902754e-06, "loss": 1.0746, "step": 7423 }, { "epoch": 0.61, "grad_norm": 1.173675695552111, "learning_rate": 3.5352019674898956e-06, "loss": 0.1809, "step": 7424 }, { "epoch": 0.61, "grad_norm": 3.5554370486237157, "learning_rate": 3.533936320753007e-06, "loss": 0.786, "step": 7425 }, { "epoch": 0.61, "grad_norm": 3.5327408285560624, "learning_rate": 3.5326707767808e-06, "loss": 0.5137, "step": 7426 }, { "epoch": 0.61, "grad_norm": 5.520860779424227, "learning_rate": 3.5314053356619852e-06, "loss": 1.2974, "step": 7427 }, { "epoch": 0.61, "grad_norm": 4.882673289960211, "learning_rate": 3.5301399974852656e-06, "loss": 0.9882, "step": 7428 }, { "epoch": 0.61, "grad_norm": 4.430372009046639, "learning_rate": 3.528874762339336e-06, "loss": 0.6721, "step": 7429 }, { "epoch": 0.61, "grad_norm": 2.8215136183775615, "learning_rate": 3.5276096303128837e-06, "loss": 0.5899, "step": 7430 }, { "epoch": 0.61, "grad_norm": 3.9066179246724344, "learning_rate": 3.526344601494588e-06, "loss": 0.8211, "step": 7431 }, { "epoch": 0.61, "grad_norm": 2.62528976844446, "learning_rate": 3.525079675973121e-06, "loss": 0.6035, "step": 7432 }, { "epoch": 0.61, "grad_norm": 4.377924604021585, "learning_rate": 3.5238148538371506e-06, "loss": 0.9099, "step": 7433 }, { "epoch": 0.61, "grad_norm": 3.741153913875198, "learning_rate": 3.5225501351753346e-06, "loss": 0.5172, "step": 7434 }, { "epoch": 0.61, "grad_norm": 3.934678384231139, "learning_rate": 3.5212855200763237e-06, "loss": 0.9718, "step": 7435 }, { "epoch": 0.61, "grad_norm": 4.491095188918565, "learning_rate": 3.5200210086287646e-06, "loss": 1.1595, "step": 7436 }, { "epoch": 0.61, "grad_norm": 5.229394049045571, "learning_rate": 3.51875660092129e-06, "loss": 1.2175, "step": 7437 }, { "epoch": 0.61, "grad_norm": 1.1439429695257528, "learning_rate": 3.5174922970425317e-06, "loss": 0.1642, "step": 7438 }, { "epoch": 0.61, "grad_norm": 2.755070223779588, "learning_rate": 3.5162280970811115e-06, "loss": 0.3445, "step": 7439 }, { "epoch": 0.61, "grad_norm": 2.578523707958233, "learning_rate": 3.5149640011256438e-06, "loss": 0.5381, "step": 7440 }, { "epoch": 0.61, "grad_norm": 4.715438769454317, "learning_rate": 3.5137000092647366e-06, "loss": 0.8441, "step": 7441 }, { "epoch": 0.61, "grad_norm": 2.880341497534685, "learning_rate": 3.512436121586993e-06, "loss": 0.5466, "step": 7442 }, { "epoch": 0.61, "grad_norm": 4.493807636256478, "learning_rate": 3.5111723381810005e-06, "loss": 0.9816, "step": 7443 }, { "epoch": 0.61, "grad_norm": 1.8737191790534322, "learning_rate": 3.509908659135348e-06, "loss": 0.4204, "step": 7444 }, { "epoch": 0.61, "grad_norm": 2.901209827546255, "learning_rate": 3.5086450845386145e-06, "loss": 0.5692, "step": 7445 }, { "epoch": 0.61, "grad_norm": 4.1984061838214535, "learning_rate": 3.5073816144793695e-06, "loss": 0.6392, "step": 7446 }, { "epoch": 0.61, "grad_norm": 6.097471089521399, "learning_rate": 3.5061182490461775e-06, "loss": 1.7763, "step": 7447 }, { "epoch": 0.61, "grad_norm": 4.848732075469702, "learning_rate": 3.5048549883275962e-06, "loss": 0.867, "step": 7448 }, { "epoch": 0.61, "grad_norm": 4.370455763998571, "learning_rate": 3.503591832412172e-06, "loss": 1.0785, "step": 7449 }, { "epoch": 0.61, "grad_norm": 4.194037593480006, "learning_rate": 3.5023287813884476e-06, "loss": 1.0598, "step": 7450 }, { "epoch": 0.61, "grad_norm": 3.4061901521566673, "learning_rate": 3.5010658353449576e-06, "loss": 0.9436, "step": 7451 }, { "epoch": 0.61, "grad_norm": 3.807685581479717, "learning_rate": 3.4998029943702305e-06, "loss": 0.7093, "step": 7452 }, { "epoch": 0.61, "grad_norm": 4.000582686325802, "learning_rate": 3.498540258552785e-06, "loss": 0.7585, "step": 7453 }, { "epoch": 0.61, "grad_norm": 3.807383909205339, "learning_rate": 3.497277627981132e-06, "loss": 0.7692, "step": 7454 }, { "epoch": 0.61, "grad_norm": 4.908451982034939, "learning_rate": 3.496015102743777e-06, "loss": 0.7361, "step": 7455 }, { "epoch": 0.61, "grad_norm": 4.320412268232292, "learning_rate": 3.4947526829292177e-06, "loss": 0.9214, "step": 7456 }, { "epoch": 0.61, "grad_norm": 3.731564301514826, "learning_rate": 3.4934903686259445e-06, "loss": 0.8081, "step": 7457 }, { "epoch": 0.61, "grad_norm": 2.433002279457299, "learning_rate": 3.4922281599224404e-06, "loss": 0.515, "step": 7458 }, { "epoch": 0.61, "grad_norm": 4.935834415095173, "learning_rate": 3.4909660569071823e-06, "loss": 1.3236, "step": 7459 }, { "epoch": 0.61, "grad_norm": 3.0732773788320507, "learning_rate": 3.4897040596686345e-06, "loss": 0.7249, "step": 7460 }, { "epoch": 0.61, "grad_norm": 2.9806807216526545, "learning_rate": 3.4884421682952596e-06, "loss": 0.5422, "step": 7461 }, { "epoch": 0.61, "grad_norm": 3.475308889726864, "learning_rate": 3.4871803828755102e-06, "loss": 0.8781, "step": 7462 }, { "epoch": 0.61, "grad_norm": 4.0605076079031805, "learning_rate": 3.4859187034978315e-06, "loss": 0.8476, "step": 7463 }, { "epoch": 0.61, "grad_norm": 3.7484248994060683, "learning_rate": 3.4846571302506624e-06, "loss": 0.5632, "step": 7464 }, { "epoch": 0.61, "grad_norm": 4.288806473145816, "learning_rate": 3.4833956632224364e-06, "loss": 0.6753, "step": 7465 }, { "epoch": 0.61, "grad_norm": 1.7944051302550061, "learning_rate": 3.482134302501572e-06, "loss": 0.3302, "step": 7466 }, { "epoch": 0.61, "grad_norm": 4.089755641139934, "learning_rate": 3.480873048176486e-06, "loss": 0.8287, "step": 7467 }, { "epoch": 0.61, "grad_norm": 3.201852703903197, "learning_rate": 3.47961190033559e-06, "loss": 0.5108, "step": 7468 }, { "epoch": 0.61, "grad_norm": 5.298715241648617, "learning_rate": 3.478350859067282e-06, "loss": 1.2063, "step": 7469 }, { "epoch": 0.61, "grad_norm": 3.5977723557337526, "learning_rate": 3.477089924459959e-06, "loss": 0.5909, "step": 7470 }, { "epoch": 0.61, "grad_norm": 4.519710106347817, "learning_rate": 3.475829096602002e-06, "loss": 0.8826, "step": 7471 }, { "epoch": 0.61, "grad_norm": 3.543261817142673, "learning_rate": 3.4745683755817917e-06, "loss": 0.6256, "step": 7472 }, { "epoch": 0.61, "grad_norm": 3.2491847333109622, "learning_rate": 3.4733077614877003e-06, "loss": 0.7993, "step": 7473 }, { "epoch": 0.61, "grad_norm": 3.2529523946365866, "learning_rate": 3.472047254408091e-06, "loss": 0.9358, "step": 7474 }, { "epoch": 0.61, "grad_norm": 4.239751609942161, "learning_rate": 3.4707868544313196e-06, "loss": 0.9912, "step": 7475 }, { "epoch": 0.61, "grad_norm": 3.688500912592552, "learning_rate": 3.469526561645735e-06, "loss": 0.8995, "step": 7476 }, { "epoch": 0.61, "grad_norm": 4.623117656250236, "learning_rate": 3.4682663761396773e-06, "loss": 0.8724, "step": 7477 }, { "epoch": 0.61, "grad_norm": 3.9838805874911793, "learning_rate": 3.4670062980014795e-06, "loss": 0.9372, "step": 7478 }, { "epoch": 0.61, "grad_norm": 4.245121124665357, "learning_rate": 3.465746327319469e-06, "loss": 1.4008, "step": 7479 }, { "epoch": 0.61, "grad_norm": 4.453068472780212, "learning_rate": 3.4644864641819635e-06, "loss": 1.1487, "step": 7480 }, { "epoch": 0.61, "grad_norm": 1.5552816233293498, "learning_rate": 3.463226708677275e-06, "loss": 0.3086, "step": 7481 }, { "epoch": 0.61, "grad_norm": 5.190192010096277, "learning_rate": 3.4619670608937074e-06, "loss": 1.0783, "step": 7482 }, { "epoch": 0.61, "grad_norm": 3.978473378064943, "learning_rate": 3.460707520919554e-06, "loss": 1.2116, "step": 7483 }, { "epoch": 0.61, "grad_norm": 4.974293050204034, "learning_rate": 3.4594480888431046e-06, "loss": 0.9243, "step": 7484 }, { "epoch": 0.61, "grad_norm": 4.470372717599689, "learning_rate": 3.4581887647526393e-06, "loss": 0.7718, "step": 7485 }, { "epoch": 0.61, "grad_norm": 3.1744644582526265, "learning_rate": 3.456929548736431e-06, "loss": 0.8462, "step": 7486 }, { "epoch": 0.61, "grad_norm": 3.8233489871206396, "learning_rate": 3.455670440882746e-06, "loss": 0.5939, "step": 7487 }, { "epoch": 0.61, "grad_norm": 3.1547175216729215, "learning_rate": 3.4544114412798447e-06, "loss": 0.5356, "step": 7488 }, { "epoch": 0.61, "grad_norm": 3.0007252413208176, "learning_rate": 3.4531525500159724e-06, "loss": 0.6602, "step": 7489 }, { "epoch": 0.61, "grad_norm": 2.97396173912392, "learning_rate": 3.451893767179375e-06, "loss": 0.4509, "step": 7490 }, { "epoch": 0.61, "grad_norm": 4.641610811802933, "learning_rate": 3.4506350928582878e-06, "loss": 1.0176, "step": 7491 }, { "epoch": 0.61, "grad_norm": 4.148555565788872, "learning_rate": 3.449376527140936e-06, "loss": 0.8331, "step": 7492 }, { "epoch": 0.61, "grad_norm": 4.6461205122890075, "learning_rate": 3.4481180701155435e-06, "loss": 0.7634, "step": 7493 }, { "epoch": 0.61, "grad_norm": 3.602022670249473, "learning_rate": 3.4468597218703203e-06, "loss": 0.6074, "step": 7494 }, { "epoch": 0.61, "grad_norm": 4.776633567319659, "learning_rate": 3.44560148249347e-06, "loss": 1.1803, "step": 7495 }, { "epoch": 0.61, "grad_norm": 3.1504778031021816, "learning_rate": 3.4443433520731908e-06, "loss": 0.5468, "step": 7496 }, { "epoch": 0.61, "grad_norm": 3.802423189114528, "learning_rate": 3.443085330697673e-06, "loss": 0.7454, "step": 7497 }, { "epoch": 0.61, "grad_norm": 3.1758347245807057, "learning_rate": 3.441827418455098e-06, "loss": 0.7281, "step": 7498 }, { "epoch": 0.61, "grad_norm": 2.841314650957273, "learning_rate": 3.44056961543364e-06, "loss": 0.6456, "step": 7499 }, { "epoch": 0.61, "grad_norm": 5.301548555979058, "learning_rate": 3.4393119217214643e-06, "loss": 1.1869, "step": 7500 }, { "epoch": 0.61, "grad_norm": 3.4204896042815256, "learning_rate": 3.438054337406732e-06, "loss": 0.551, "step": 7501 }, { "epoch": 0.61, "grad_norm": 3.2855473071915275, "learning_rate": 3.4367968625775923e-06, "loss": 0.793, "step": 7502 }, { "epoch": 0.61, "grad_norm": 4.264205516013644, "learning_rate": 3.435539497322189e-06, "loss": 0.9784, "step": 7503 }, { "epoch": 0.61, "grad_norm": 5.189963158184375, "learning_rate": 3.4342822417286586e-06, "loss": 0.9427, "step": 7504 }, { "epoch": 0.61, "grad_norm": 4.31453330800363, "learning_rate": 3.433025095885131e-06, "loss": 0.922, "step": 7505 }, { "epoch": 0.61, "grad_norm": 3.0832892899562063, "learning_rate": 3.4317680598797227e-06, "loss": 0.4771, "step": 7506 }, { "epoch": 0.61, "grad_norm": 3.9636349208458657, "learning_rate": 3.4305111338005483e-06, "loss": 0.931, "step": 7507 }, { "epoch": 0.61, "grad_norm": 2.7181152910442608, "learning_rate": 3.429254317735714e-06, "loss": 0.4166, "step": 7508 }, { "epoch": 0.61, "grad_norm": 5.991948585753611, "learning_rate": 3.4279976117733148e-06, "loss": 1.1894, "step": 7509 }, { "epoch": 0.61, "grad_norm": 2.5817551031001433, "learning_rate": 3.426741016001444e-06, "loss": 0.5615, "step": 7510 }, { "epoch": 0.61, "grad_norm": 2.5481343706597617, "learning_rate": 3.4254845305081796e-06, "loss": 0.5636, "step": 7511 }, { "epoch": 0.61, "grad_norm": 1.2740628976561783, "learning_rate": 3.4242281553815963e-06, "loss": 0.2066, "step": 7512 }, { "epoch": 0.61, "grad_norm": 3.854818236466965, "learning_rate": 3.422971890709762e-06, "loss": 1.035, "step": 7513 }, { "epoch": 0.61, "grad_norm": 1.9314099983526627, "learning_rate": 3.4217157365807352e-06, "loss": 0.435, "step": 7514 }, { "epoch": 0.61, "grad_norm": 3.1837886110940596, "learning_rate": 3.4204596930825674e-06, "loss": 0.547, "step": 7515 }, { "epoch": 0.61, "grad_norm": 3.7329810664237026, "learning_rate": 3.419203760303301e-06, "loss": 0.7076, "step": 7516 }, { "epoch": 0.61, "grad_norm": 3.96816950610144, "learning_rate": 3.417947938330971e-06, "loss": 0.6511, "step": 7517 }, { "epoch": 0.61, "grad_norm": 3.106187483351225, "learning_rate": 3.416692227253604e-06, "loss": 0.6246, "step": 7518 }, { "epoch": 0.61, "grad_norm": 2.4304170799997538, "learning_rate": 3.4154366271592222e-06, "loss": 0.2782, "step": 7519 }, { "epoch": 0.61, "grad_norm": 4.7879286041422535, "learning_rate": 3.4141811381358364e-06, "loss": 1.486, "step": 7520 }, { "epoch": 0.61, "grad_norm": 3.3751720368744564, "learning_rate": 3.4129257602714514e-06, "loss": 0.6315, "step": 7521 }, { "epoch": 0.61, "grad_norm": 4.683938628475828, "learning_rate": 3.4116704936540656e-06, "loss": 0.7881, "step": 7522 }, { "epoch": 0.61, "grad_norm": 3.1457861012787807, "learning_rate": 3.4104153383716644e-06, "loss": 0.5855, "step": 7523 }, { "epoch": 0.61, "grad_norm": 3.8336870542509214, "learning_rate": 3.4091602945122305e-06, "loss": 0.5423, "step": 7524 }, { "epoch": 0.62, "grad_norm": 3.984032714223586, "learning_rate": 3.4079053621637346e-06, "loss": 0.7277, "step": 7525 }, { "epoch": 0.62, "grad_norm": 4.337185833290463, "learning_rate": 3.4066505414141453e-06, "loss": 1.0906, "step": 7526 }, { "epoch": 0.62, "grad_norm": 2.3899786036667607, "learning_rate": 3.4053958323514185e-06, "loss": 0.3782, "step": 7527 }, { "epoch": 0.62, "grad_norm": 2.017552575570357, "learning_rate": 3.404141235063506e-06, "loss": 0.377, "step": 7528 }, { "epoch": 0.62, "grad_norm": 4.178231000840538, "learning_rate": 3.4028867496383454e-06, "loss": 0.6578, "step": 7529 }, { "epoch": 0.62, "grad_norm": 4.787115596496799, "learning_rate": 3.4016323761638737e-06, "loss": 0.8246, "step": 7530 }, { "epoch": 0.62, "grad_norm": 3.1835348520912077, "learning_rate": 3.400378114728017e-06, "loss": 0.4493, "step": 7531 }, { "epoch": 0.62, "grad_norm": 3.0554416692959383, "learning_rate": 3.399123965418692e-06, "loss": 0.7489, "step": 7532 }, { "epoch": 0.62, "grad_norm": 4.752411187164596, "learning_rate": 3.3978699283238117e-06, "loss": 0.7197, "step": 7533 }, { "epoch": 0.62, "grad_norm": 3.4033966339527217, "learning_rate": 3.396616003531275e-06, "loss": 0.4122, "step": 7534 }, { "epoch": 0.62, "grad_norm": 1.9913020044422074, "learning_rate": 3.3953621911289784e-06, "loss": 0.3394, "step": 7535 }, { "epoch": 0.62, "grad_norm": 4.767458762737945, "learning_rate": 3.3941084912048094e-06, "loss": 1.2963, "step": 7536 }, { "epoch": 0.62, "grad_norm": 3.405917623453279, "learning_rate": 3.392854903846645e-06, "loss": 0.8285, "step": 7537 }, { "epoch": 0.62, "grad_norm": 4.751814868947692, "learning_rate": 3.39160142914236e-06, "loss": 0.8188, "step": 7538 }, { "epoch": 0.62, "grad_norm": 5.227358384881683, "learning_rate": 3.3903480671798145e-06, "loss": 1.3352, "step": 7539 }, { "epoch": 0.62, "grad_norm": 3.904301672916866, "learning_rate": 3.389094818046864e-06, "loss": 0.5046, "step": 7540 }, { "epoch": 0.62, "grad_norm": 4.168266207804141, "learning_rate": 3.3878416818313555e-06, "loss": 0.9672, "step": 7541 }, { "epoch": 0.62, "grad_norm": 2.752994067863077, "learning_rate": 3.3865886586211285e-06, "loss": 0.6885, "step": 7542 }, { "epoch": 0.62, "grad_norm": 4.105697614699745, "learning_rate": 3.385335748504015e-06, "loss": 0.9677, "step": 7543 }, { "epoch": 0.62, "grad_norm": 2.653123686297841, "learning_rate": 3.3840829515678386e-06, "loss": 0.7451, "step": 7544 }, { "epoch": 0.62, "grad_norm": 1.8318854848639077, "learning_rate": 3.382830267900417e-06, "loss": 0.3442, "step": 7545 }, { "epoch": 0.62, "grad_norm": 5.125126939314708, "learning_rate": 3.381577697589554e-06, "loss": 1.1589, "step": 7546 }, { "epoch": 0.62, "grad_norm": 3.436915972131372, "learning_rate": 3.380325240723051e-06, "loss": 0.5953, "step": 7547 }, { "epoch": 0.62, "grad_norm": 4.305223721072696, "learning_rate": 3.3790728973886994e-06, "loss": 0.8719, "step": 7548 }, { "epoch": 0.62, "grad_norm": 4.119188080328333, "learning_rate": 3.377820667674283e-06, "loss": 0.8093, "step": 7549 }, { "epoch": 0.62, "grad_norm": 3.495245249192985, "learning_rate": 3.3765685516675805e-06, "loss": 0.7286, "step": 7550 }, { "epoch": 0.62, "grad_norm": 3.3455194062619977, "learning_rate": 3.3753165494563554e-06, "loss": 0.7161, "step": 7551 }, { "epoch": 0.62, "grad_norm": 4.926704792998524, "learning_rate": 3.3740646611283687e-06, "loss": 0.9703, "step": 7552 }, { "epoch": 0.62, "grad_norm": 4.408141112156821, "learning_rate": 3.3728128867713743e-06, "loss": 0.8305, "step": 7553 }, { "epoch": 0.62, "grad_norm": 4.8160753357356905, "learning_rate": 3.3715612264731155e-06, "loss": 1.3461, "step": 7554 }, { "epoch": 0.62, "grad_norm": 4.103611211003639, "learning_rate": 3.3703096803213263e-06, "loss": 1.0364, "step": 7555 }, { "epoch": 0.62, "grad_norm": 4.048244769376617, "learning_rate": 3.3690582484037393e-06, "loss": 0.8223, "step": 7556 }, { "epoch": 0.62, "grad_norm": 4.33130685895782, "learning_rate": 3.367806930808068e-06, "loss": 0.9185, "step": 7557 }, { "epoch": 0.62, "grad_norm": 3.541873494179832, "learning_rate": 3.366555727622028e-06, "loss": 0.6951, "step": 7558 }, { "epoch": 0.62, "grad_norm": 2.521856016581727, "learning_rate": 3.365304638933322e-06, "loss": 0.2948, "step": 7559 }, { "epoch": 0.62, "grad_norm": 3.633589064668164, "learning_rate": 3.3640536648296473e-06, "loss": 1.1136, "step": 7560 }, { "epoch": 0.62, "grad_norm": 2.977653755333384, "learning_rate": 3.362802805398692e-06, "loss": 0.4834, "step": 7561 }, { "epoch": 0.62, "grad_norm": 3.62936734920231, "learning_rate": 3.361552060728135e-06, "loss": 0.9729, "step": 7562 }, { "epoch": 0.62, "grad_norm": 4.7147232100280645, "learning_rate": 3.360301430905648e-06, "loss": 1.0255, "step": 7563 }, { "epoch": 0.62, "grad_norm": 4.422361900806105, "learning_rate": 3.3590509160188935e-06, "loss": 0.7291, "step": 7564 }, { "epoch": 0.62, "grad_norm": 3.136258418736888, "learning_rate": 3.3578005161555284e-06, "loss": 0.6473, "step": 7565 }, { "epoch": 0.62, "grad_norm": 5.260504074934148, "learning_rate": 3.3565502314032006e-06, "loss": 1.5827, "step": 7566 }, { "epoch": 0.62, "grad_norm": 3.064556639812398, "learning_rate": 3.355300061849549e-06, "loss": 0.7217, "step": 7567 }, { "epoch": 0.62, "grad_norm": 3.6113118562312545, "learning_rate": 3.354050007582207e-06, "loss": 0.5611, "step": 7568 }, { "epoch": 0.62, "grad_norm": 3.3387791111899765, "learning_rate": 3.3528000686887946e-06, "loss": 0.6625, "step": 7569 }, { "epoch": 0.62, "grad_norm": 3.446240931359145, "learning_rate": 3.3515502452569293e-06, "loss": 0.7795, "step": 7570 }, { "epoch": 0.62, "grad_norm": 1.9553336893169155, "learning_rate": 3.350300537374217e-06, "loss": 0.3551, "step": 7571 }, { "epoch": 0.62, "grad_norm": 3.8888905259600195, "learning_rate": 3.3490509451282582e-06, "loss": 0.6538, "step": 7572 }, { "epoch": 0.62, "grad_norm": 4.248790654389525, "learning_rate": 3.3478014686066448e-06, "loss": 0.8886, "step": 7573 }, { "epoch": 0.62, "grad_norm": 4.075163711164652, "learning_rate": 3.3465521078969552e-06, "loss": 0.4495, "step": 7574 }, { "epoch": 0.62, "grad_norm": 2.875334794198349, "learning_rate": 3.345302863086768e-06, "loss": 0.6683, "step": 7575 }, { "epoch": 0.62, "grad_norm": 3.0236885132985227, "learning_rate": 3.3440537342636483e-06, "loss": 0.4801, "step": 7576 }, { "epoch": 0.62, "grad_norm": 5.222717550677892, "learning_rate": 3.3428047215151566e-06, "loss": 1.1999, "step": 7577 }, { "epoch": 0.62, "grad_norm": 4.616336755588772, "learning_rate": 3.3415558249288404e-06, "loss": 0.9043, "step": 7578 }, { "epoch": 0.62, "grad_norm": 4.039906669102835, "learning_rate": 3.340307044592245e-06, "loss": 1.0051, "step": 7579 }, { "epoch": 0.62, "grad_norm": 2.1416453078958284, "learning_rate": 3.3390583805929016e-06, "loss": 0.5973, "step": 7580 }, { "epoch": 0.62, "grad_norm": 3.480569071267736, "learning_rate": 3.3378098330183366e-06, "loss": 0.6455, "step": 7581 }, { "epoch": 0.62, "grad_norm": 2.8108546519330186, "learning_rate": 3.336561401956069e-06, "loss": 0.568, "step": 7582 }, { "epoch": 0.62, "grad_norm": 3.7071940548697038, "learning_rate": 3.3353130874936074e-06, "loss": 1.0497, "step": 7583 }, { "epoch": 0.62, "grad_norm": 4.649638239315057, "learning_rate": 3.3340648897184546e-06, "loss": 0.6986, "step": 7584 }, { "epoch": 0.62, "grad_norm": 4.0271069934779264, "learning_rate": 3.3328168087181036e-06, "loss": 0.744, "step": 7585 }, { "epoch": 0.62, "grad_norm": 3.646900193708152, "learning_rate": 3.3315688445800376e-06, "loss": 0.9999, "step": 7586 }, { "epoch": 0.62, "grad_norm": 4.4062328965541, "learning_rate": 3.330320997391734e-06, "loss": 1.0649, "step": 7587 }, { "epoch": 0.62, "grad_norm": 1.968267573998116, "learning_rate": 3.329073267240662e-06, "loss": 0.3551, "step": 7588 }, { "epoch": 0.62, "grad_norm": 3.2727032722386675, "learning_rate": 3.3278256542142818e-06, "loss": 0.5636, "step": 7589 }, { "epoch": 0.62, "grad_norm": 2.485911056813762, "learning_rate": 3.326578158400049e-06, "loss": 0.3504, "step": 7590 }, { "epoch": 0.62, "grad_norm": 4.810087278594246, "learning_rate": 3.325330779885401e-06, "loss": 0.8325, "step": 7591 }, { "epoch": 0.62, "grad_norm": 4.5155577878663395, "learning_rate": 3.324083518757778e-06, "loss": 0.7222, "step": 7592 }, { "epoch": 0.62, "grad_norm": 4.226243750137132, "learning_rate": 3.322836375104608e-06, "loss": 0.8349, "step": 7593 }, { "epoch": 0.62, "grad_norm": 4.579907909260929, "learning_rate": 3.3215893490133076e-06, "loss": 0.8511, "step": 7594 }, { "epoch": 0.62, "grad_norm": 2.839432439727437, "learning_rate": 3.320342440571289e-06, "loss": 0.4744, "step": 7595 }, { "epoch": 0.62, "grad_norm": 5.348776624156341, "learning_rate": 3.319095649865958e-06, "loss": 1.0481, "step": 7596 }, { "epoch": 0.62, "grad_norm": 2.9393705216616834, "learning_rate": 3.3178489769847046e-06, "loss": 0.7586, "step": 7597 }, { "epoch": 0.62, "grad_norm": 3.455380150704934, "learning_rate": 3.3166024220149173e-06, "loss": 0.5484, "step": 7598 }, { "epoch": 0.62, "grad_norm": 5.055728788130167, "learning_rate": 3.3153559850439737e-06, "loss": 1.0842, "step": 7599 }, { "epoch": 0.62, "grad_norm": 2.7015896855427592, "learning_rate": 3.3141096661592455e-06, "loss": 0.5129, "step": 7600 }, { "epoch": 0.62, "grad_norm": 5.337951165458357, "learning_rate": 3.3128634654480906e-06, "loss": 0.9046, "step": 7601 }, { "epoch": 0.62, "grad_norm": 4.379378514832218, "learning_rate": 3.3116173829978666e-06, "loss": 0.8637, "step": 7602 }, { "epoch": 0.62, "grad_norm": 3.7526183219145404, "learning_rate": 3.3103714188959156e-06, "loss": 0.6631, "step": 7603 }, { "epoch": 0.62, "grad_norm": 3.788574484025864, "learning_rate": 3.3091255732295736e-06, "loss": 0.9459, "step": 7604 }, { "epoch": 0.62, "grad_norm": 3.6683996965418135, "learning_rate": 3.3078798460861704e-06, "loss": 0.5804, "step": 7605 }, { "epoch": 0.62, "grad_norm": 3.916929575285865, "learning_rate": 3.306634237553026e-06, "loss": 0.9159, "step": 7606 }, { "epoch": 0.62, "grad_norm": 4.091661616270836, "learning_rate": 3.305388747717453e-06, "loss": 0.7485, "step": 7607 }, { "epoch": 0.62, "grad_norm": 2.253125457820861, "learning_rate": 3.3041433766667535e-06, "loss": 0.5195, "step": 7608 }, { "epoch": 0.62, "grad_norm": 3.9477457676776226, "learning_rate": 3.302898124488222e-06, "loss": 0.8162, "step": 7609 }, { "epoch": 0.62, "grad_norm": 3.8355738202745426, "learning_rate": 3.3016529912691476e-06, "loss": 0.9991, "step": 7610 }, { "epoch": 0.62, "grad_norm": 4.310163980369584, "learning_rate": 3.3004079770968055e-06, "loss": 0.6773, "step": 7611 }, { "epoch": 0.62, "grad_norm": 2.6459081353394605, "learning_rate": 3.299163082058468e-06, "loss": 0.5445, "step": 7612 }, { "epoch": 0.62, "grad_norm": 3.2563150817337876, "learning_rate": 3.297918306241399e-06, "loss": 0.745, "step": 7613 }, { "epoch": 0.62, "grad_norm": 3.496579311994659, "learning_rate": 3.2966736497328463e-06, "loss": 0.3865, "step": 7614 }, { "epoch": 0.62, "grad_norm": 4.477069154674302, "learning_rate": 3.2954291126200577e-06, "loss": 0.8421, "step": 7615 }, { "epoch": 0.62, "grad_norm": 4.45013467679716, "learning_rate": 3.294184694990271e-06, "loss": 1.1175, "step": 7616 }, { "epoch": 0.62, "grad_norm": 3.9947369308971163, "learning_rate": 3.2929403969307137e-06, "loss": 0.7213, "step": 7617 }, { "epoch": 0.62, "grad_norm": 4.387494139290766, "learning_rate": 3.291696218528605e-06, "loss": 1.2012, "step": 7618 }, { "epoch": 0.62, "grad_norm": 4.169223623551574, "learning_rate": 3.290452159871158e-06, "loss": 1.0328, "step": 7619 }, { "epoch": 0.62, "grad_norm": 2.866520823535412, "learning_rate": 3.289208221045573e-06, "loss": 0.5849, "step": 7620 }, { "epoch": 0.62, "grad_norm": 1.9425109571369379, "learning_rate": 3.2879644021390468e-06, "loss": 0.3425, "step": 7621 }, { "epoch": 0.62, "grad_norm": 3.468745940756694, "learning_rate": 3.286720703238765e-06, "loss": 0.7145, "step": 7622 }, { "epoch": 0.62, "grad_norm": 3.913128123597565, "learning_rate": 3.2854771244319052e-06, "loss": 0.7324, "step": 7623 }, { "epoch": 0.62, "grad_norm": 4.211279066072754, "learning_rate": 3.2842336658056383e-06, "loss": 0.6421, "step": 7624 }, { "epoch": 0.62, "grad_norm": 3.2310589376655137, "learning_rate": 3.2829903274471253e-06, "loss": 0.5612, "step": 7625 }, { "epoch": 0.62, "grad_norm": 5.2109699398316724, "learning_rate": 3.281747109443517e-06, "loss": 1.3728, "step": 7626 }, { "epoch": 0.62, "grad_norm": 4.4741011651583875, "learning_rate": 3.2805040118819574e-06, "loss": 0.9257, "step": 7627 }, { "epoch": 0.62, "grad_norm": 2.96943882155435, "learning_rate": 3.279261034849584e-06, "loss": 0.7778, "step": 7628 }, { "epoch": 0.62, "grad_norm": 2.230742837335636, "learning_rate": 3.278018178433523e-06, "loss": 0.5448, "step": 7629 }, { "epoch": 0.62, "grad_norm": 3.6382394352891763, "learning_rate": 3.276775442720896e-06, "loss": 0.7383, "step": 7630 }, { "epoch": 0.62, "grad_norm": 5.943406264757444, "learning_rate": 3.2755328277988084e-06, "loss": 1.0892, "step": 7631 }, { "epoch": 0.62, "grad_norm": 4.364702175304437, "learning_rate": 3.274290333754365e-06, "loss": 1.0101, "step": 7632 }, { "epoch": 0.62, "grad_norm": 3.5276837531138274, "learning_rate": 3.2730479606746594e-06, "loss": 0.5491, "step": 7633 }, { "epoch": 0.62, "grad_norm": 3.3067665969011113, "learning_rate": 3.271805708646776e-06, "loss": 0.7095, "step": 7634 }, { "epoch": 0.62, "grad_norm": 3.819460150118888, "learning_rate": 3.2705635777577904e-06, "loss": 0.5795, "step": 7635 }, { "epoch": 0.62, "grad_norm": 4.61967776942314, "learning_rate": 3.2693215680947737e-06, "loss": 0.6914, "step": 7636 }, { "epoch": 0.62, "grad_norm": 2.6878796081754066, "learning_rate": 3.268079679744781e-06, "loss": 0.5596, "step": 7637 }, { "epoch": 0.62, "grad_norm": 2.0924427201373974, "learning_rate": 3.2668379127948656e-06, "loss": 0.3269, "step": 7638 }, { "epoch": 0.62, "grad_norm": 4.884712377878976, "learning_rate": 3.26559626733207e-06, "loss": 0.8669, "step": 7639 }, { "epoch": 0.62, "grad_norm": 4.37203318965418, "learning_rate": 3.264354743443429e-06, "loss": 0.8481, "step": 7640 }, { "epoch": 0.62, "grad_norm": 3.399144750533389, "learning_rate": 3.2631133412159656e-06, "loss": 0.8149, "step": 7641 }, { "epoch": 0.62, "grad_norm": 4.265972866984133, "learning_rate": 3.2618720607367e-06, "loss": 0.6379, "step": 7642 }, { "epoch": 0.62, "grad_norm": 5.488346454035597, "learning_rate": 3.2606309020926364e-06, "loss": 1.2761, "step": 7643 }, { "epoch": 0.62, "grad_norm": 3.0438213532347667, "learning_rate": 3.2593898653707773e-06, "loss": 0.6676, "step": 7644 }, { "epoch": 0.62, "grad_norm": 5.061186580438532, "learning_rate": 3.2581489506581134e-06, "loss": 1.1957, "step": 7645 }, { "epoch": 0.62, "grad_norm": 3.092072730761351, "learning_rate": 3.2569081580416273e-06, "loss": 0.562, "step": 7646 }, { "epoch": 0.63, "grad_norm": 2.5801424147558554, "learning_rate": 3.2556674876082937e-06, "loss": 0.527, "step": 7647 }, { "epoch": 0.63, "grad_norm": 3.061391757672997, "learning_rate": 3.254426939445079e-06, "loss": 0.7016, "step": 7648 }, { "epoch": 0.63, "grad_norm": 3.898385610961387, "learning_rate": 3.2531865136389383e-06, "loss": 0.7654, "step": 7649 }, { "epoch": 0.63, "grad_norm": 4.1875028295410495, "learning_rate": 3.251946210276821e-06, "loss": 1.0348, "step": 7650 }, { "epoch": 0.63, "grad_norm": 3.034755909633507, "learning_rate": 3.2507060294456653e-06, "loss": 0.5311, "step": 7651 }, { "epoch": 0.63, "grad_norm": 2.705049010180583, "learning_rate": 3.249465971232405e-06, "loss": 0.5876, "step": 7652 }, { "epoch": 0.63, "grad_norm": 2.3703704410665343, "learning_rate": 3.248226035723963e-06, "loss": 0.4805, "step": 7653 }, { "epoch": 0.63, "grad_norm": 1.7354395150446371, "learning_rate": 3.2469862230072507e-06, "loss": 0.2601, "step": 7654 }, { "epoch": 0.63, "grad_norm": 4.281087596216457, "learning_rate": 3.245746533169175e-06, "loss": 0.8449, "step": 7655 }, { "epoch": 0.63, "grad_norm": 3.8102663809612016, "learning_rate": 3.244506966296633e-06, "loss": 0.9758, "step": 7656 }, { "epoch": 0.63, "grad_norm": 2.0974875755818427, "learning_rate": 3.2432675224765133e-06, "loss": 0.2948, "step": 7657 }, { "epoch": 0.63, "grad_norm": 2.934298887973439, "learning_rate": 3.242028201795694e-06, "loss": 0.8, "step": 7658 }, { "epoch": 0.63, "grad_norm": 3.9409185012799575, "learning_rate": 3.240789004341049e-06, "loss": 0.846, "step": 7659 }, { "epoch": 0.63, "grad_norm": 2.956801842865033, "learning_rate": 3.2395499301994366e-06, "loss": 0.272, "step": 7660 }, { "epoch": 0.63, "grad_norm": 3.100245111197015, "learning_rate": 3.238310979457713e-06, "loss": 0.5009, "step": 7661 }, { "epoch": 0.63, "grad_norm": 2.908941130615876, "learning_rate": 3.2370721522027226e-06, "loss": 0.8223, "step": 7662 }, { "epoch": 0.63, "grad_norm": 3.062481040532317, "learning_rate": 3.235833448521303e-06, "loss": 0.5634, "step": 7663 }, { "epoch": 0.63, "grad_norm": 4.42252314474069, "learning_rate": 3.2345948685002796e-06, "loss": 1.076, "step": 7664 }, { "epoch": 0.63, "grad_norm": 3.611775133421519, "learning_rate": 3.2333564122264755e-06, "loss": 0.5999, "step": 7665 }, { "epoch": 0.63, "grad_norm": 3.5563105544668465, "learning_rate": 3.2321180797866962e-06, "loss": 0.7487, "step": 7666 }, { "epoch": 0.63, "grad_norm": 6.936191030571098, "learning_rate": 3.2308798712677456e-06, "loss": 1.237, "step": 7667 }, { "epoch": 0.63, "grad_norm": 4.145405830437945, "learning_rate": 3.2296417867564166e-06, "loss": 0.5232, "step": 7668 }, { "epoch": 0.63, "grad_norm": 3.453801351461035, "learning_rate": 3.2284038263394946e-06, "loss": 0.4514, "step": 7669 }, { "epoch": 0.63, "grad_norm": 3.517791485006161, "learning_rate": 3.2271659901037555e-06, "loss": 0.6854, "step": 7670 }, { "epoch": 0.63, "grad_norm": 3.6261401874406745, "learning_rate": 3.2259282781359634e-06, "loss": 0.763, "step": 7671 }, { "epoch": 0.63, "grad_norm": 4.000540569718012, "learning_rate": 3.224690690522879e-06, "loss": 0.693, "step": 7672 }, { "epoch": 0.63, "grad_norm": 4.120716139586882, "learning_rate": 3.22345322735125e-06, "loss": 1.1604, "step": 7673 }, { "epoch": 0.63, "grad_norm": 1.383118903590275, "learning_rate": 3.2222158887078187e-06, "loss": 0.2229, "step": 7674 }, { "epoch": 0.63, "grad_norm": 4.976290591456961, "learning_rate": 3.2209786746793163e-06, "loss": 0.9455, "step": 7675 }, { "epoch": 0.63, "grad_norm": 1.5314161645389535, "learning_rate": 3.219741585352469e-06, "loss": 0.2351, "step": 7676 }, { "epoch": 0.63, "grad_norm": 2.191337605130976, "learning_rate": 3.218504620813986e-06, "loss": 0.442, "step": 7677 }, { "epoch": 0.63, "grad_norm": 3.561383212432562, "learning_rate": 3.2172677811505766e-06, "loss": 0.5061, "step": 7678 }, { "epoch": 0.63, "grad_norm": 3.391575981241691, "learning_rate": 3.216031066448938e-06, "loss": 0.6082, "step": 7679 }, { "epoch": 0.63, "grad_norm": 5.234858555107159, "learning_rate": 3.2147944767957565e-06, "loss": 1.1577, "step": 7680 }, { "epoch": 0.63, "grad_norm": 1.6846154268784963, "learning_rate": 3.213558012277713e-06, "loss": 0.3, "step": 7681 }, { "epoch": 0.63, "grad_norm": 5.810587396979411, "learning_rate": 3.212321672981481e-06, "loss": 1.0758, "step": 7682 }, { "epoch": 0.63, "grad_norm": 2.22152061579591, "learning_rate": 3.2110854589937166e-06, "loss": 0.259, "step": 7683 }, { "epoch": 0.63, "grad_norm": 4.48217190544856, "learning_rate": 3.2098493704010768e-06, "loss": 0.741, "step": 7684 }, { "epoch": 0.63, "grad_norm": 4.013228565717424, "learning_rate": 3.208613407290206e-06, "loss": 0.9747, "step": 7685 }, { "epoch": 0.63, "grad_norm": 3.6159848966723724, "learning_rate": 3.2073775697477393e-06, "loss": 0.8999, "step": 7686 }, { "epoch": 0.63, "grad_norm": 2.7380397280342086, "learning_rate": 3.2061418578603028e-06, "loss": 0.5477, "step": 7687 }, { "epoch": 0.63, "grad_norm": 2.539704720378294, "learning_rate": 3.2049062717145168e-06, "loss": 0.4176, "step": 7688 }, { "epoch": 0.63, "grad_norm": 4.274538445434723, "learning_rate": 3.203670811396987e-06, "loss": 0.975, "step": 7689 }, { "epoch": 0.63, "grad_norm": 4.67172399250756, "learning_rate": 3.2024354769943163e-06, "loss": 0.9177, "step": 7690 }, { "epoch": 0.63, "grad_norm": 3.475114485121228, "learning_rate": 3.2012002685930947e-06, "loss": 0.8625, "step": 7691 }, { "epoch": 0.63, "grad_norm": 3.3914588561677315, "learning_rate": 3.1999651862799063e-06, "loss": 0.5683, "step": 7692 }, { "epoch": 0.63, "grad_norm": 2.081062531847189, "learning_rate": 3.198730230141327e-06, "loss": 0.4778, "step": 7693 }, { "epoch": 0.63, "grad_norm": 5.118738109466712, "learning_rate": 3.197495400263917e-06, "loss": 0.9559, "step": 7694 }, { "epoch": 0.63, "grad_norm": 2.8691710480218955, "learning_rate": 3.1962606967342356e-06, "loss": 0.7172, "step": 7695 }, { "epoch": 0.63, "grad_norm": 4.794101928588226, "learning_rate": 3.1950261196388287e-06, "loss": 0.974, "step": 7696 }, { "epoch": 0.63, "grad_norm": 4.493952052033231, "learning_rate": 3.1937916690642356e-06, "loss": 0.9769, "step": 7697 }, { "epoch": 0.63, "grad_norm": 3.172623602492804, "learning_rate": 3.192557345096986e-06, "loss": 0.4682, "step": 7698 }, { "epoch": 0.63, "grad_norm": 2.5391023927997174, "learning_rate": 3.191323147823602e-06, "loss": 0.3686, "step": 7699 }, { "epoch": 0.63, "grad_norm": 3.2908197921338616, "learning_rate": 3.1900890773305926e-06, "loss": 0.613, "step": 7700 }, { "epoch": 0.63, "grad_norm": 0.957444384242329, "learning_rate": 3.1888551337044615e-06, "loss": 0.1528, "step": 7701 }, { "epoch": 0.63, "grad_norm": 1.736174740514147, "learning_rate": 3.1876213170317048e-06, "loss": 0.2971, "step": 7702 }, { "epoch": 0.63, "grad_norm": 3.9588233427147275, "learning_rate": 3.186387627398805e-06, "loss": 0.7388, "step": 7703 }, { "epoch": 0.63, "grad_norm": 2.5151779126495444, "learning_rate": 3.1851540648922398e-06, "loss": 0.5993, "step": 7704 }, { "epoch": 0.63, "grad_norm": 2.3530189237922574, "learning_rate": 3.1839206295984786e-06, "loss": 0.3692, "step": 7705 }, { "epoch": 0.63, "grad_norm": 3.095440207688822, "learning_rate": 3.1826873216039757e-06, "loss": 0.3849, "step": 7706 }, { "epoch": 0.63, "grad_norm": 3.292342872790543, "learning_rate": 3.181454140995182e-06, "loss": 0.6714, "step": 7707 }, { "epoch": 0.63, "grad_norm": 3.1720375945195083, "learning_rate": 3.1802210878585395e-06, "loss": 0.5537, "step": 7708 }, { "epoch": 0.63, "grad_norm": 4.183990698007337, "learning_rate": 3.1789881622804797e-06, "loss": 0.8163, "step": 7709 }, { "epoch": 0.63, "grad_norm": 3.5311093273622, "learning_rate": 3.1777553643474247e-06, "loss": 0.6283, "step": 7710 }, { "epoch": 0.63, "grad_norm": 3.442566058851378, "learning_rate": 3.1765226941457866e-06, "loss": 0.8416, "step": 7711 }, { "epoch": 0.63, "grad_norm": 4.1627473864282765, "learning_rate": 3.1752901517619733e-06, "loss": 0.6516, "step": 7712 }, { "epoch": 0.63, "grad_norm": 3.5284453351576794, "learning_rate": 3.1740577372823785e-06, "loss": 0.6561, "step": 7713 }, { "epoch": 0.63, "grad_norm": 2.3797594442551833, "learning_rate": 3.1728254507933892e-06, "loss": 0.5099, "step": 7714 }, { "epoch": 0.63, "grad_norm": 4.576113825725778, "learning_rate": 3.1715932923813843e-06, "loss": 1.1334, "step": 7715 }, { "epoch": 0.63, "grad_norm": 4.286629317778869, "learning_rate": 3.170361262132734e-06, "loss": 0.845, "step": 7716 }, { "epoch": 0.63, "grad_norm": 4.009390554053943, "learning_rate": 3.1691293601337953e-06, "loss": 0.8019, "step": 7717 }, { "epoch": 0.63, "grad_norm": 3.370703724590661, "learning_rate": 3.16789758647092e-06, "loss": 0.7708, "step": 7718 }, { "epoch": 0.63, "grad_norm": 5.200191529868398, "learning_rate": 3.166665941230451e-06, "loss": 1.1286, "step": 7719 }, { "epoch": 0.63, "grad_norm": 4.135015759148786, "learning_rate": 3.1654344244987213e-06, "loss": 1.1156, "step": 7720 }, { "epoch": 0.63, "grad_norm": 2.4909081881591635, "learning_rate": 3.1642030363620534e-06, "loss": 0.5092, "step": 7721 }, { "epoch": 0.63, "grad_norm": 5.492886760434505, "learning_rate": 3.1629717769067654e-06, "loss": 1.2737, "step": 7722 }, { "epoch": 0.63, "grad_norm": 4.733333203752356, "learning_rate": 3.161740646219159e-06, "loss": 0.7632, "step": 7723 }, { "epoch": 0.63, "grad_norm": 3.695872923314846, "learning_rate": 3.1605096443855333e-06, "loss": 0.853, "step": 7724 }, { "epoch": 0.63, "grad_norm": 3.2332939949985526, "learning_rate": 3.159278771492176e-06, "loss": 0.4684, "step": 7725 }, { "epoch": 0.63, "grad_norm": 4.055708716202285, "learning_rate": 3.1580480276253665e-06, "loss": 1.2795, "step": 7726 }, { "epoch": 0.63, "grad_norm": 2.895087699370973, "learning_rate": 3.1568174128713738e-06, "loss": 0.5558, "step": 7727 }, { "epoch": 0.63, "grad_norm": 4.45024502271313, "learning_rate": 3.15558692731646e-06, "loss": 0.7325, "step": 7728 }, { "epoch": 0.63, "grad_norm": 2.3016898771741525, "learning_rate": 3.1543565710468743e-06, "loss": 0.4324, "step": 7729 }, { "epoch": 0.63, "grad_norm": 3.6489388052277896, "learning_rate": 3.1531263441488607e-06, "loss": 0.7163, "step": 7730 }, { "epoch": 0.63, "grad_norm": 5.193545400934648, "learning_rate": 3.1518962467086527e-06, "loss": 1.1451, "step": 7731 }, { "epoch": 0.63, "grad_norm": 4.479125338088091, "learning_rate": 3.150666278812475e-06, "loss": 1.0333, "step": 7732 }, { "epoch": 0.63, "grad_norm": 3.0994094237191834, "learning_rate": 3.149436440546545e-06, "loss": 0.7642, "step": 7733 }, { "epoch": 0.63, "grad_norm": 5.9580754736867, "learning_rate": 3.1482067319970642e-06, "loss": 1.2643, "step": 7734 }, { "epoch": 0.63, "grad_norm": 4.753015333630123, "learning_rate": 3.1469771532502336e-06, "loss": 0.8311, "step": 7735 }, { "epoch": 0.63, "grad_norm": 3.4290167032293177, "learning_rate": 3.145747704392239e-06, "loss": 0.8285, "step": 7736 }, { "epoch": 0.63, "grad_norm": 3.3129908290943595, "learning_rate": 3.144518385509261e-06, "loss": 0.5139, "step": 7737 }, { "epoch": 0.63, "grad_norm": 4.672563867413507, "learning_rate": 3.143289196687469e-06, "loss": 1.1612, "step": 7738 }, { "epoch": 0.63, "grad_norm": 6.207846649305042, "learning_rate": 3.142060138013026e-06, "loss": 1.1907, "step": 7739 }, { "epoch": 0.63, "grad_norm": 2.780734791010286, "learning_rate": 3.1408312095720794e-06, "loss": 0.508, "step": 7740 }, { "epoch": 0.63, "grad_norm": 3.380561111288435, "learning_rate": 3.139602411450774e-06, "loss": 0.6048, "step": 7741 }, { "epoch": 0.63, "grad_norm": 2.586262330486288, "learning_rate": 3.138373743735244e-06, "loss": 0.4451, "step": 7742 }, { "epoch": 0.63, "grad_norm": 2.8144155997734766, "learning_rate": 3.1371452065116116e-06, "loss": 0.476, "step": 7743 }, { "epoch": 0.63, "grad_norm": 4.15381259807162, "learning_rate": 3.1359167998659933e-06, "loss": 0.752, "step": 7744 }, { "epoch": 0.63, "grad_norm": 4.023697205289873, "learning_rate": 3.134688523884497e-06, "loss": 0.9533, "step": 7745 }, { "epoch": 0.63, "grad_norm": 3.9939570834213014, "learning_rate": 3.1334603786532147e-06, "loss": 1.0225, "step": 7746 }, { "epoch": 0.63, "grad_norm": 1.9617810999492078, "learning_rate": 3.1322323642582374e-06, "loss": 0.4235, "step": 7747 }, { "epoch": 0.63, "grad_norm": 4.096482581655145, "learning_rate": 3.131004480785642e-06, "loss": 0.7987, "step": 7748 }, { "epoch": 0.63, "grad_norm": 3.7038563822744615, "learning_rate": 3.1297767283214998e-06, "loss": 0.9277, "step": 7749 }, { "epoch": 0.63, "grad_norm": 3.025907679321576, "learning_rate": 3.1285491069518705e-06, "loss": 0.5833, "step": 7750 }, { "epoch": 0.63, "grad_norm": 3.7108010699875966, "learning_rate": 3.127321616762803e-06, "loss": 0.6736, "step": 7751 }, { "epoch": 0.63, "grad_norm": 4.533908261684587, "learning_rate": 3.1260942578403395e-06, "loss": 0.9325, "step": 7752 }, { "epoch": 0.63, "grad_norm": 3.5851016859848137, "learning_rate": 3.1248670302705143e-06, "loss": 0.614, "step": 7753 }, { "epoch": 0.63, "grad_norm": 4.291879126802277, "learning_rate": 3.1236399341393486e-06, "loss": 0.8511, "step": 7754 }, { "epoch": 0.63, "grad_norm": 3.166298720756765, "learning_rate": 3.122412969532858e-06, "loss": 0.413, "step": 7755 }, { "epoch": 0.63, "grad_norm": 2.7107254456828316, "learning_rate": 3.121186136537049e-06, "loss": 0.2174, "step": 7756 }, { "epoch": 0.63, "grad_norm": 2.405980873573781, "learning_rate": 3.119959435237913e-06, "loss": 0.4214, "step": 7757 }, { "epoch": 0.63, "grad_norm": 5.297894197921948, "learning_rate": 3.11873286572144e-06, "loss": 1.178, "step": 7758 }, { "epoch": 0.63, "grad_norm": 1.4532413310352328, "learning_rate": 3.1175064280736044e-06, "loss": 0.2372, "step": 7759 }, { "epoch": 0.63, "grad_norm": 4.050043333659418, "learning_rate": 3.1162801223803756e-06, "loss": 0.8248, "step": 7760 }, { "epoch": 0.63, "grad_norm": 4.781108835281411, "learning_rate": 3.1150539487277125e-06, "loss": 1.0726, "step": 7761 }, { "epoch": 0.63, "grad_norm": 3.1836495197151167, "learning_rate": 3.1138279072015666e-06, "loss": 0.8683, "step": 7762 }, { "epoch": 0.63, "grad_norm": 4.032521770289908, "learning_rate": 3.112601997887873e-06, "loss": 0.7148, "step": 7763 }, { "epoch": 0.63, "grad_norm": 3.2801631855651623, "learning_rate": 3.111376220872565e-06, "loss": 0.567, "step": 7764 }, { "epoch": 0.63, "grad_norm": 1.9504549299211302, "learning_rate": 3.1101505762415668e-06, "loss": 0.3778, "step": 7765 }, { "epoch": 0.63, "grad_norm": 4.444505613029491, "learning_rate": 3.1089250640807865e-06, "loss": 1.1581, "step": 7766 }, { "epoch": 0.63, "grad_norm": 4.876983373389087, "learning_rate": 3.10769968447613e-06, "loss": 0.9147, "step": 7767 }, { "epoch": 0.63, "grad_norm": 3.9787989385327642, "learning_rate": 3.106474437513492e-06, "loss": 0.5907, "step": 7768 }, { "epoch": 0.64, "grad_norm": 4.775997969258128, "learning_rate": 3.1052493232787533e-06, "loss": 0.9217, "step": 7769 }, { "epoch": 0.64, "grad_norm": 4.468930199595321, "learning_rate": 3.104024341857791e-06, "loss": 0.9899, "step": 7770 }, { "epoch": 0.64, "grad_norm": 4.359483541220007, "learning_rate": 3.1027994933364715e-06, "loss": 0.9143, "step": 7771 }, { "epoch": 0.64, "grad_norm": 3.138079374083297, "learning_rate": 3.101574777800651e-06, "loss": 0.5309, "step": 7772 }, { "epoch": 0.64, "grad_norm": 4.473590000027721, "learning_rate": 3.100350195336177e-06, "loss": 0.7457, "step": 7773 }, { "epoch": 0.64, "grad_norm": 2.1993030318420637, "learning_rate": 3.099125746028887e-06, "loss": 0.3841, "step": 7774 }, { "epoch": 0.64, "grad_norm": 2.940398928712938, "learning_rate": 3.0979014299646088e-06, "loss": 0.4528, "step": 7775 }, { "epoch": 0.64, "grad_norm": 1.1397946602839084, "learning_rate": 3.0966772472291623e-06, "loss": 0.1403, "step": 7776 }, { "epoch": 0.64, "grad_norm": 3.985916223616124, "learning_rate": 3.0954531979083575e-06, "loss": 0.7692, "step": 7777 }, { "epoch": 0.64, "grad_norm": 4.298616697163275, "learning_rate": 3.094229282087995e-06, "loss": 1.0578, "step": 7778 }, { "epoch": 0.64, "grad_norm": 4.393406375595787, "learning_rate": 3.0930054998538672e-06, "loss": 0.7686, "step": 7779 }, { "epoch": 0.64, "grad_norm": 2.8192625380118126, "learning_rate": 3.091781851291753e-06, "loss": 0.5322, "step": 7780 }, { "epoch": 0.64, "grad_norm": 5.437817787193546, "learning_rate": 3.0905583364874282e-06, "loss": 1.3211, "step": 7781 }, { "epoch": 0.64, "grad_norm": 4.235151842126615, "learning_rate": 3.0893349555266517e-06, "loss": 1.1217, "step": 7782 }, { "epoch": 0.64, "grad_norm": 3.0124361892118876, "learning_rate": 3.088111708495181e-06, "loss": 0.5699, "step": 7783 }, { "epoch": 0.64, "grad_norm": 3.278913011180968, "learning_rate": 3.0868885954787577e-06, "loss": 0.6601, "step": 7784 }, { "epoch": 0.64, "grad_norm": 2.3234343380067917, "learning_rate": 3.0856656165631204e-06, "loss": 0.4015, "step": 7785 }, { "epoch": 0.64, "grad_norm": 3.225939320324209, "learning_rate": 3.084442771833991e-06, "loss": 0.4539, "step": 7786 }, { "epoch": 0.64, "grad_norm": 3.555604538407607, "learning_rate": 3.0832200613770857e-06, "loss": 0.5867, "step": 7787 }, { "epoch": 0.64, "grad_norm": 2.8838307134062102, "learning_rate": 3.081997485278113e-06, "loss": 0.3765, "step": 7788 }, { "epoch": 0.64, "grad_norm": 3.6069289209766198, "learning_rate": 3.0807750436227695e-06, "loss": 0.7507, "step": 7789 }, { "epoch": 0.64, "grad_norm": 2.254856263935714, "learning_rate": 3.079552736496745e-06, "loss": 0.3731, "step": 7790 }, { "epoch": 0.64, "grad_norm": 3.8397696175368514, "learning_rate": 3.0783305639857132e-06, "loss": 1.0205, "step": 7791 }, { "epoch": 0.64, "grad_norm": 6.492414526493002, "learning_rate": 3.077108526175345e-06, "loss": 1.1767, "step": 7792 }, { "epoch": 0.64, "grad_norm": 4.120289953683984, "learning_rate": 3.075886623151302e-06, "loss": 0.856, "step": 7793 }, { "epoch": 0.64, "grad_norm": 3.1113371636214087, "learning_rate": 3.074664854999232e-06, "loss": 0.6099, "step": 7794 }, { "epoch": 0.64, "grad_norm": 3.380585449987341, "learning_rate": 3.0734432218047783e-06, "loss": 0.6947, "step": 7795 }, { "epoch": 0.64, "grad_norm": 3.683271562483727, "learning_rate": 3.07222172365357e-06, "loss": 0.6046, "step": 7796 }, { "epoch": 0.64, "grad_norm": 3.127171353450437, "learning_rate": 3.0710003606312292e-06, "loss": 0.6197, "step": 7797 }, { "epoch": 0.64, "grad_norm": 5.421346023231678, "learning_rate": 3.069779132823367e-06, "loss": 0.9477, "step": 7798 }, { "epoch": 0.64, "grad_norm": 5.286625588838192, "learning_rate": 3.068558040315588e-06, "loss": 1.1859, "step": 7799 }, { "epoch": 0.64, "grad_norm": 2.930364597159306, "learning_rate": 3.0673370831934833e-06, "loss": 0.6226, "step": 7800 }, { "epoch": 0.64, "grad_norm": 2.8869195032304655, "learning_rate": 3.066116261542639e-06, "loss": 0.5016, "step": 7801 }, { "epoch": 0.64, "grad_norm": 4.3190297058512295, "learning_rate": 3.064895575448631e-06, "loss": 0.9508, "step": 7802 }, { "epoch": 0.64, "grad_norm": 4.201891244728968, "learning_rate": 3.0636750249970184e-06, "loss": 0.6445, "step": 7803 }, { "epoch": 0.64, "grad_norm": 1.9073656928455973, "learning_rate": 3.062454610273361e-06, "loss": 0.3428, "step": 7804 }, { "epoch": 0.64, "grad_norm": 3.1274350126950825, "learning_rate": 3.061234331363203e-06, "loss": 0.5776, "step": 7805 }, { "epoch": 0.64, "grad_norm": 3.757643213513995, "learning_rate": 3.0600141883520796e-06, "loss": 0.8018, "step": 7806 }, { "epoch": 0.64, "grad_norm": 3.816896954660318, "learning_rate": 3.0587941813255196e-06, "loss": 0.6786, "step": 7807 }, { "epoch": 0.64, "grad_norm": 4.991835587302752, "learning_rate": 3.0575743103690408e-06, "loss": 1.0708, "step": 7808 }, { "epoch": 0.64, "grad_norm": 4.723297776792715, "learning_rate": 3.056354575568148e-06, "loss": 1.0296, "step": 7809 }, { "epoch": 0.64, "grad_norm": 6.138433420633675, "learning_rate": 3.05513497700834e-06, "loss": 1.1589, "step": 7810 }, { "epoch": 0.64, "grad_norm": 3.7835659165003084, "learning_rate": 3.0539155147751074e-06, "loss": 0.3927, "step": 7811 }, { "epoch": 0.64, "grad_norm": 4.000530651589436, "learning_rate": 3.0526961889539265e-06, "loss": 0.642, "step": 7812 }, { "epoch": 0.64, "grad_norm": 5.289083728058848, "learning_rate": 3.0514769996302696e-06, "loss": 1.0834, "step": 7813 }, { "epoch": 0.64, "grad_norm": 4.179387690922487, "learning_rate": 3.050257946889594e-06, "loss": 0.7185, "step": 7814 }, { "epoch": 0.64, "grad_norm": 3.614143591546221, "learning_rate": 3.049039030817351e-06, "loss": 0.636, "step": 7815 }, { "epoch": 0.64, "grad_norm": 2.0175919934837028, "learning_rate": 3.0478202514989813e-06, "loss": 0.3539, "step": 7816 }, { "epoch": 0.64, "grad_norm": 5.425152565734742, "learning_rate": 3.046601609019916e-06, "loss": 1.0657, "step": 7817 }, { "epoch": 0.64, "grad_norm": 1.7070207030060107, "learning_rate": 3.0453831034655766e-06, "loss": 0.2859, "step": 7818 }, { "epoch": 0.64, "grad_norm": 5.551122283558936, "learning_rate": 3.0441647349213764e-06, "loss": 1.3125, "step": 7819 }, { "epoch": 0.64, "grad_norm": 4.221502422957849, "learning_rate": 3.042946503472716e-06, "loss": 0.9485, "step": 7820 }, { "epoch": 0.64, "grad_norm": 2.348367201920117, "learning_rate": 3.041728409204988e-06, "loss": 0.3886, "step": 7821 }, { "epoch": 0.64, "grad_norm": 3.970306040011278, "learning_rate": 3.040510452203576e-06, "loss": 1.1752, "step": 7822 }, { "epoch": 0.64, "grad_norm": 4.290881887789476, "learning_rate": 3.039292632553853e-06, "loss": 0.9586, "step": 7823 }, { "epoch": 0.64, "grad_norm": 5.038877725597535, "learning_rate": 3.038074950341184e-06, "loss": 0.799, "step": 7824 }, { "epoch": 0.64, "grad_norm": 1.033598891945941, "learning_rate": 3.036857405650925e-06, "loss": 0.1455, "step": 7825 }, { "epoch": 0.64, "grad_norm": 3.3779552426693624, "learning_rate": 3.0356399985684153e-06, "loss": 0.7625, "step": 7826 }, { "epoch": 0.64, "grad_norm": 4.673095220192702, "learning_rate": 3.0344227291789928e-06, "loss": 0.9734, "step": 7827 }, { "epoch": 0.64, "grad_norm": 3.6096029345475955, "learning_rate": 3.033205597567984e-06, "loss": 0.4377, "step": 7828 }, { "epoch": 0.64, "grad_norm": 4.324901287473498, "learning_rate": 3.0319886038207023e-06, "loss": 0.9661, "step": 7829 }, { "epoch": 0.64, "grad_norm": 2.727965111416931, "learning_rate": 3.0307717480224572e-06, "loss": 0.3719, "step": 7830 }, { "epoch": 0.64, "grad_norm": 4.456816251346994, "learning_rate": 3.02955503025854e-06, "loss": 0.8164, "step": 7831 }, { "epoch": 0.64, "grad_norm": 3.2486302261587734, "learning_rate": 3.0283384506142397e-06, "loss": 0.572, "step": 7832 }, { "epoch": 0.64, "grad_norm": 1.0676076427367587, "learning_rate": 3.027122009174834e-06, "loss": 0.1451, "step": 7833 }, { "epoch": 0.64, "grad_norm": 4.957496653333703, "learning_rate": 3.0259057060255887e-06, "loss": 0.7304, "step": 7834 }, { "epoch": 0.64, "grad_norm": 4.362065522951962, "learning_rate": 3.024689541251763e-06, "loss": 0.9303, "step": 7835 }, { "epoch": 0.64, "grad_norm": 4.788726112158268, "learning_rate": 3.023473514938604e-06, "loss": 1.4238, "step": 7836 }, { "epoch": 0.64, "grad_norm": 3.3604292834068765, "learning_rate": 3.02225762717135e-06, "loss": 0.5941, "step": 7837 }, { "epoch": 0.64, "grad_norm": 4.26210210267571, "learning_rate": 3.021041878035228e-06, "loss": 0.8583, "step": 7838 }, { "epoch": 0.64, "grad_norm": 3.0775307873462916, "learning_rate": 3.0198262676154583e-06, "loss": 0.573, "step": 7839 }, { "epoch": 0.64, "grad_norm": 2.0254216863292878, "learning_rate": 3.018610795997249e-06, "loss": 0.4853, "step": 7840 }, { "epoch": 0.64, "grad_norm": 2.7059526815793205, "learning_rate": 3.0173954632657996e-06, "loss": 0.3955, "step": 7841 }, { "epoch": 0.64, "grad_norm": 5.240168978264894, "learning_rate": 3.0161802695063024e-06, "loss": 0.8435, "step": 7842 }, { "epoch": 0.64, "grad_norm": 5.0858055095332695, "learning_rate": 3.014965214803933e-06, "loss": 1.332, "step": 7843 }, { "epoch": 0.64, "grad_norm": 3.2462707758145446, "learning_rate": 3.013750299243864e-06, "loss": 0.5558, "step": 7844 }, { "epoch": 0.64, "grad_norm": 3.650193179594442, "learning_rate": 3.0125355229112536e-06, "loss": 1.0065, "step": 7845 }, { "epoch": 0.64, "grad_norm": 3.7676518700399604, "learning_rate": 3.0113208858912533e-06, "loss": 1.1567, "step": 7846 }, { "epoch": 0.64, "grad_norm": 1.9609440709283663, "learning_rate": 3.0101063882690046e-06, "loss": 0.3932, "step": 7847 }, { "epoch": 0.64, "grad_norm": 4.373511093110997, "learning_rate": 3.00889203012964e-06, "loss": 0.6438, "step": 7848 }, { "epoch": 0.64, "grad_norm": 2.3333735327867577, "learning_rate": 3.007677811558276e-06, "loss": 0.3598, "step": 7849 }, { "epoch": 0.64, "grad_norm": 4.5025719551068635, "learning_rate": 3.006463732640028e-06, "loss": 0.8174, "step": 7850 }, { "epoch": 0.64, "grad_norm": 3.132660480178307, "learning_rate": 3.0052497934599966e-06, "loss": 0.771, "step": 7851 }, { "epoch": 0.64, "grad_norm": 5.51776375842111, "learning_rate": 3.0040359941032727e-06, "loss": 1.3559, "step": 7852 }, { "epoch": 0.64, "grad_norm": 2.977461470056396, "learning_rate": 3.0028223346549413e-06, "loss": 0.6852, "step": 7853 }, { "epoch": 0.64, "grad_norm": 3.09231298367694, "learning_rate": 3.00160881520007e-06, "loss": 0.5279, "step": 7854 }, { "epoch": 0.64, "grad_norm": 2.180758071905827, "learning_rate": 3.000395435823724e-06, "loss": 0.3719, "step": 7855 }, { "epoch": 0.64, "grad_norm": 4.916943911528527, "learning_rate": 2.9991821966109558e-06, "loss": 0.4142, "step": 7856 }, { "epoch": 0.64, "grad_norm": 5.163191128066598, "learning_rate": 2.9979690976468083e-06, "loss": 1.2684, "step": 7857 }, { "epoch": 0.64, "grad_norm": 5.612341928275986, "learning_rate": 2.9967561390163148e-06, "loss": 1.0136, "step": 7858 }, { "epoch": 0.64, "grad_norm": 1.9913312384897335, "learning_rate": 2.9955433208044983e-06, "loss": 0.2951, "step": 7859 }, { "epoch": 0.64, "grad_norm": 4.2750060420121265, "learning_rate": 2.994330643096371e-06, "loss": 0.6136, "step": 7860 }, { "epoch": 0.64, "grad_norm": 2.1301120622750145, "learning_rate": 2.993118105976936e-06, "loss": 0.3387, "step": 7861 }, { "epoch": 0.64, "grad_norm": 3.1105291461179583, "learning_rate": 2.9919057095311874e-06, "loss": 0.6402, "step": 7862 }, { "epoch": 0.64, "grad_norm": 3.6144575613625025, "learning_rate": 2.99069345384411e-06, "loss": 0.6512, "step": 7863 }, { "epoch": 0.64, "grad_norm": 4.065422619060882, "learning_rate": 2.9894813390006773e-06, "loss": 0.6632, "step": 7864 }, { "epoch": 0.64, "grad_norm": 2.9735292436274023, "learning_rate": 2.988269365085854e-06, "loss": 0.7177, "step": 7865 }, { "epoch": 0.64, "grad_norm": 3.2401111070258053, "learning_rate": 2.9870575321845916e-06, "loss": 0.5627, "step": 7866 }, { "epoch": 0.64, "grad_norm": 4.697499884583697, "learning_rate": 2.985845840381837e-06, "loss": 1.6977, "step": 7867 }, { "epoch": 0.64, "grad_norm": 5.468404310057439, "learning_rate": 2.9846342897625215e-06, "loss": 1.0907, "step": 7868 }, { "epoch": 0.64, "grad_norm": 3.3973242492973816, "learning_rate": 2.983422880411572e-06, "loss": 0.5834, "step": 7869 }, { "epoch": 0.64, "grad_norm": 4.437769930198844, "learning_rate": 2.9822116124139045e-06, "loss": 0.9437, "step": 7870 }, { "epoch": 0.64, "grad_norm": 6.109339166692898, "learning_rate": 2.9810004858544194e-06, "loss": 1.3969, "step": 7871 }, { "epoch": 0.64, "grad_norm": 2.323342271677376, "learning_rate": 2.9797895008180135e-06, "loss": 0.3465, "step": 7872 }, { "epoch": 0.64, "grad_norm": 5.320943319032637, "learning_rate": 2.9785786573895713e-06, "loss": 0.9443, "step": 7873 }, { "epoch": 0.64, "grad_norm": 4.368512254444603, "learning_rate": 2.9773679556539696e-06, "loss": 1.0702, "step": 7874 }, { "epoch": 0.64, "grad_norm": 4.100965499627617, "learning_rate": 2.9761573956960706e-06, "loss": 0.424, "step": 7875 }, { "epoch": 0.64, "grad_norm": 4.743011989969749, "learning_rate": 2.9749469776007324e-06, "loss": 0.9294, "step": 7876 }, { "epoch": 0.64, "grad_norm": 5.097132767859566, "learning_rate": 2.973736701452795e-06, "loss": 1.146, "step": 7877 }, { "epoch": 0.64, "grad_norm": 3.7628372451622654, "learning_rate": 2.9725265673370973e-06, "loss": 0.8206, "step": 7878 }, { "epoch": 0.64, "grad_norm": 5.387763699050225, "learning_rate": 2.971316575338464e-06, "loss": 1.0001, "step": 7879 }, { "epoch": 0.64, "grad_norm": 4.202384180316912, "learning_rate": 2.9701067255417092e-06, "loss": 0.9383, "step": 7880 }, { "epoch": 0.64, "grad_norm": 2.36968601017581, "learning_rate": 2.96889701803164e-06, "loss": 0.4805, "step": 7881 }, { "epoch": 0.64, "grad_norm": 3.9970162535064464, "learning_rate": 2.967687452893051e-06, "loss": 0.4226, "step": 7882 }, { "epoch": 0.64, "grad_norm": 3.5169857613363527, "learning_rate": 2.9664780302107266e-06, "loss": 0.9481, "step": 7883 }, { "epoch": 0.64, "grad_norm": 2.560675619392749, "learning_rate": 2.965268750069441e-06, "loss": 0.5625, "step": 7884 }, { "epoch": 0.64, "grad_norm": 3.6906020056209887, "learning_rate": 2.964059612553961e-06, "loss": 0.5511, "step": 7885 }, { "epoch": 0.64, "grad_norm": 5.3707827503539995, "learning_rate": 2.962850617749042e-06, "loss": 0.9854, "step": 7886 }, { "epoch": 0.64, "grad_norm": 3.169952648374127, "learning_rate": 2.961641765739429e-06, "loss": 0.6199, "step": 7887 }, { "epoch": 0.64, "grad_norm": 4.658972696363917, "learning_rate": 2.9604330566098588e-06, "loss": 1.0271, "step": 7888 }, { "epoch": 0.64, "grad_norm": 2.5783860166921713, "learning_rate": 2.9592244904450536e-06, "loss": 0.6164, "step": 7889 }, { "epoch": 0.64, "grad_norm": 2.6210062336360935, "learning_rate": 2.9580160673297307e-06, "loss": 0.4776, "step": 7890 }, { "epoch": 0.64, "grad_norm": 5.656576655258052, "learning_rate": 2.956807787348594e-06, "loss": 1.2903, "step": 7891 }, { "epoch": 0.65, "grad_norm": 4.450910711609922, "learning_rate": 2.9555996505863394e-06, "loss": 0.779, "step": 7892 }, { "epoch": 0.65, "grad_norm": 4.602170961498176, "learning_rate": 2.954391657127654e-06, "loss": 0.9086, "step": 7893 }, { "epoch": 0.65, "grad_norm": 4.2815459596395264, "learning_rate": 2.9531838070572084e-06, "loss": 1.129, "step": 7894 }, { "epoch": 0.65, "grad_norm": 3.8505282976510635, "learning_rate": 2.9519761004596708e-06, "loss": 0.8004, "step": 7895 }, { "epoch": 0.65, "grad_norm": 3.8476698443942707, "learning_rate": 2.9507685374196954e-06, "loss": 0.7421, "step": 7896 }, { "epoch": 0.65, "grad_norm": 3.2051223760366647, "learning_rate": 2.9495611180219287e-06, "loss": 0.4676, "step": 7897 }, { "epoch": 0.65, "grad_norm": 4.059341674196565, "learning_rate": 2.948353842351002e-06, "loss": 0.7832, "step": 7898 }, { "epoch": 0.65, "grad_norm": 4.376156592708591, "learning_rate": 2.947146710491545e-06, "loss": 0.9234, "step": 7899 }, { "epoch": 0.65, "grad_norm": 4.525761659169275, "learning_rate": 2.9459397225281673e-06, "loss": 0.8849, "step": 7900 }, { "epoch": 0.65, "grad_norm": 4.880761815644048, "learning_rate": 2.9447328785454752e-06, "loss": 0.9877, "step": 7901 }, { "epoch": 0.65, "grad_norm": 2.1428597004178456, "learning_rate": 2.9435261786280645e-06, "loss": 0.509, "step": 7902 }, { "epoch": 0.65, "grad_norm": 3.394815004925326, "learning_rate": 2.942319622860519e-06, "loss": 0.363, "step": 7903 }, { "epoch": 0.65, "grad_norm": 4.4582482783216015, "learning_rate": 2.9411132113274132e-06, "loss": 0.8826, "step": 7904 }, { "epoch": 0.65, "grad_norm": 3.753784692097969, "learning_rate": 2.9399069441133116e-06, "loss": 0.5, "step": 7905 }, { "epoch": 0.65, "grad_norm": 3.0731841609712225, "learning_rate": 2.9387008213027675e-06, "loss": 0.6792, "step": 7906 }, { "epoch": 0.65, "grad_norm": 3.9450796550415745, "learning_rate": 2.937494842980324e-06, "loss": 0.7015, "step": 7907 }, { "epoch": 0.65, "grad_norm": 1.1214717249721415, "learning_rate": 2.9362890092305158e-06, "loss": 0.1444, "step": 7908 }, { "epoch": 0.65, "grad_norm": 3.938371891994917, "learning_rate": 2.935083320137867e-06, "loss": 0.9034, "step": 7909 }, { "epoch": 0.65, "grad_norm": 3.9877577235309896, "learning_rate": 2.9338777757868923e-06, "loss": 0.8414, "step": 7910 }, { "epoch": 0.65, "grad_norm": 2.6328141567571284, "learning_rate": 2.9326723762620924e-06, "loss": 0.4078, "step": 7911 }, { "epoch": 0.65, "grad_norm": 3.990145345446904, "learning_rate": 2.931467121647962e-06, "loss": 0.7359, "step": 7912 }, { "epoch": 0.65, "grad_norm": 5.807798979886192, "learning_rate": 2.930262012028984e-06, "loss": 0.8907, "step": 7913 }, { "epoch": 0.65, "grad_norm": 3.8619260709390737, "learning_rate": 2.929057047489632e-06, "loss": 0.7976, "step": 7914 }, { "epoch": 0.65, "grad_norm": 4.245285559043202, "learning_rate": 2.9278522281143667e-06, "loss": 0.681, "step": 7915 }, { "epoch": 0.65, "grad_norm": 4.452101166075296, "learning_rate": 2.9266475539876447e-06, "loss": 0.5975, "step": 7916 }, { "epoch": 0.65, "grad_norm": 5.729503388721803, "learning_rate": 2.9254430251939046e-06, "loss": 1.127, "step": 7917 }, { "epoch": 0.65, "grad_norm": 4.718684875541518, "learning_rate": 2.9242386418175793e-06, "loss": 0.9964, "step": 7918 }, { "epoch": 0.65, "grad_norm": 3.6566710096133983, "learning_rate": 2.9230344039430913e-06, "loss": 0.8345, "step": 7919 }, { "epoch": 0.65, "grad_norm": 2.0860241466730605, "learning_rate": 2.921830311654853e-06, "loss": 0.332, "step": 7920 }, { "epoch": 0.65, "grad_norm": 3.050894560130508, "learning_rate": 2.9206263650372668e-06, "loss": 0.5202, "step": 7921 }, { "epoch": 0.65, "grad_norm": 3.6740888860709515, "learning_rate": 2.919422564174722e-06, "loss": 0.7684, "step": 7922 }, { "epoch": 0.65, "grad_norm": 5.539719103044569, "learning_rate": 2.9182189091516017e-06, "loss": 1.2597, "step": 7923 }, { "epoch": 0.65, "grad_norm": 2.534927915504683, "learning_rate": 2.9170154000522744e-06, "loss": 0.3663, "step": 7924 }, { "epoch": 0.65, "grad_norm": 3.5698272821477635, "learning_rate": 2.915812036961103e-06, "loss": 1.0274, "step": 7925 }, { "epoch": 0.65, "grad_norm": 3.478769023476825, "learning_rate": 2.914608819962437e-06, "loss": 0.6595, "step": 7926 }, { "epoch": 0.65, "grad_norm": 4.068271121802564, "learning_rate": 2.9134057491406163e-06, "loss": 0.662, "step": 7927 }, { "epoch": 0.65, "grad_norm": 6.19661409649374, "learning_rate": 2.912202824579975e-06, "loss": 1.6517, "step": 7928 }, { "epoch": 0.65, "grad_norm": 3.4816319057866587, "learning_rate": 2.911000046364827e-06, "loss": 0.4724, "step": 7929 }, { "epoch": 0.65, "grad_norm": 5.149261359307092, "learning_rate": 2.9097974145794843e-06, "loss": 0.6429, "step": 7930 }, { "epoch": 0.65, "grad_norm": 2.424747973768166, "learning_rate": 2.908594929308246e-06, "loss": 0.45, "step": 7931 }, { "epoch": 0.65, "grad_norm": 2.8112624800692414, "learning_rate": 2.907392590635404e-06, "loss": 0.3496, "step": 7932 }, { "epoch": 0.65, "grad_norm": 3.0302301469921433, "learning_rate": 2.9061903986452323e-06, "loss": 0.6052, "step": 7933 }, { "epoch": 0.65, "grad_norm": 2.9909120510204357, "learning_rate": 2.904988353422003e-06, "loss": 0.6163, "step": 7934 }, { "epoch": 0.65, "grad_norm": 4.088865606963298, "learning_rate": 2.9037864550499704e-06, "loss": 0.9871, "step": 7935 }, { "epoch": 0.65, "grad_norm": 4.7197420214941594, "learning_rate": 2.902584703613385e-06, "loss": 1.0631, "step": 7936 }, { "epoch": 0.65, "grad_norm": 3.4037252525132544, "learning_rate": 2.9013830991964838e-06, "loss": 0.5726, "step": 7937 }, { "epoch": 0.65, "grad_norm": 4.86857257087212, "learning_rate": 2.900181641883494e-06, "loss": 1.2704, "step": 7938 }, { "epoch": 0.65, "grad_norm": 2.9548905996573103, "learning_rate": 2.8989803317586353e-06, "loss": 0.4708, "step": 7939 }, { "epoch": 0.65, "grad_norm": 3.606607452653022, "learning_rate": 2.8977791689061087e-06, "loss": 0.7239, "step": 7940 }, { "epoch": 0.65, "grad_norm": 5.19923764561386, "learning_rate": 2.8965781534101132e-06, "loss": 1.0878, "step": 7941 }, { "epoch": 0.65, "grad_norm": 3.4083004770293286, "learning_rate": 2.895377285354836e-06, "loss": 0.5902, "step": 7942 }, { "epoch": 0.65, "grad_norm": 3.7862425947863994, "learning_rate": 2.8941765648244513e-06, "loss": 0.7949, "step": 7943 }, { "epoch": 0.65, "grad_norm": 5.819961564630001, "learning_rate": 2.892975991903125e-06, "loss": 1.3285, "step": 7944 }, { "epoch": 0.65, "grad_norm": 5.505179163497323, "learning_rate": 2.891775566675014e-06, "loss": 1.2131, "step": 7945 }, { "epoch": 0.65, "grad_norm": 3.800564826454519, "learning_rate": 2.8905752892242587e-06, "loss": 1.0754, "step": 7946 }, { "epoch": 0.65, "grad_norm": 2.6659245553942985, "learning_rate": 2.889375159634995e-06, "loss": 0.2595, "step": 7947 }, { "epoch": 0.65, "grad_norm": 4.867679753509104, "learning_rate": 2.8881751779913498e-06, "loss": 0.8299, "step": 7948 }, { "epoch": 0.65, "grad_norm": 3.1805636058524747, "learning_rate": 2.886975344377432e-06, "loss": 0.7716, "step": 7949 }, { "epoch": 0.65, "grad_norm": 2.9457803081893306, "learning_rate": 2.8857756588773457e-06, "loss": 0.673, "step": 7950 }, { "epoch": 0.65, "grad_norm": 3.854688609728728, "learning_rate": 2.884576121575187e-06, "loss": 0.8473, "step": 7951 }, { "epoch": 0.65, "grad_norm": 3.55813344986194, "learning_rate": 2.8833767325550345e-06, "loss": 0.8929, "step": 7952 }, { "epoch": 0.65, "grad_norm": 4.062536876817694, "learning_rate": 2.8821774919009605e-06, "loss": 1.2235, "step": 7953 }, { "epoch": 0.65, "grad_norm": 4.91308594382761, "learning_rate": 2.8809783996970274e-06, "loss": 0.9671, "step": 7954 }, { "epoch": 0.65, "grad_norm": 4.705628943410485, "learning_rate": 2.8797794560272875e-06, "loss": 1.1877, "step": 7955 }, { "epoch": 0.65, "grad_norm": 4.095020489564123, "learning_rate": 2.8785806609757815e-06, "loss": 0.8418, "step": 7956 }, { "epoch": 0.65, "grad_norm": 4.12676106657002, "learning_rate": 2.8773820146265375e-06, "loss": 1.25, "step": 7957 }, { "epoch": 0.65, "grad_norm": 4.389999860397088, "learning_rate": 2.8761835170635765e-06, "loss": 0.8575, "step": 7958 }, { "epoch": 0.65, "grad_norm": 3.4720159253229315, "learning_rate": 2.8749851683709072e-06, "loss": 0.6071, "step": 7959 }, { "epoch": 0.65, "grad_norm": 2.980189054467754, "learning_rate": 2.8737869686325304e-06, "loss": 0.4888, "step": 7960 }, { "epoch": 0.65, "grad_norm": 3.750422703747019, "learning_rate": 2.872588917932434e-06, "loss": 0.8012, "step": 7961 }, { "epoch": 0.65, "grad_norm": 4.50027378894247, "learning_rate": 2.871391016354597e-06, "loss": 0.9064, "step": 7962 }, { "epoch": 0.65, "grad_norm": 3.299368076828362, "learning_rate": 2.8701932639829846e-06, "loss": 0.5323, "step": 7963 }, { "epoch": 0.65, "grad_norm": 3.9160283537957787, "learning_rate": 2.868995660901557e-06, "loss": 1.0933, "step": 7964 }, { "epoch": 0.65, "grad_norm": 2.6159973575585713, "learning_rate": 2.867798207194258e-06, "loss": 0.4785, "step": 7965 }, { "epoch": 0.65, "grad_norm": 2.854649565232945, "learning_rate": 2.866600902945025e-06, "loss": 0.2629, "step": 7966 }, { "epoch": 0.65, "grad_norm": 3.442187897459728, "learning_rate": 2.865403748237784e-06, "loss": 0.6322, "step": 7967 }, { "epoch": 0.65, "grad_norm": 5.0672308602635106, "learning_rate": 2.864206743156453e-06, "loss": 0.5079, "step": 7968 }, { "epoch": 0.65, "grad_norm": 4.447223811256291, "learning_rate": 2.8630098877849322e-06, "loss": 0.6896, "step": 7969 }, { "epoch": 0.65, "grad_norm": 3.772683191649797, "learning_rate": 2.861813182207117e-06, "loss": 1.1045, "step": 7970 }, { "epoch": 0.65, "grad_norm": 4.90869334797886, "learning_rate": 2.8606166265068935e-06, "loss": 1.2193, "step": 7971 }, { "epoch": 0.65, "grad_norm": 3.66447128756878, "learning_rate": 2.8594202207681333e-06, "loss": 0.9918, "step": 7972 }, { "epoch": 0.65, "grad_norm": 2.1709142790837173, "learning_rate": 2.8582239650747024e-06, "loss": 0.3579, "step": 7973 }, { "epoch": 0.65, "grad_norm": 3.542718894700696, "learning_rate": 2.8570278595104478e-06, "loss": 0.4313, "step": 7974 }, { "epoch": 0.65, "grad_norm": 5.470565752435574, "learning_rate": 2.855831904159214e-06, "loss": 1.3507, "step": 7975 }, { "epoch": 0.65, "grad_norm": 3.017187077630708, "learning_rate": 2.8546360991048325e-06, "loss": 0.4235, "step": 7976 }, { "epoch": 0.65, "grad_norm": 3.411440865234151, "learning_rate": 2.8534404444311235e-06, "loss": 0.6309, "step": 7977 }, { "epoch": 0.65, "grad_norm": 5.200528672997894, "learning_rate": 2.8522449402218984e-06, "loss": 1.0034, "step": 7978 }, { "epoch": 0.65, "grad_norm": 2.373192243595398, "learning_rate": 2.8510495865609573e-06, "loss": 0.6623, "step": 7979 }, { "epoch": 0.65, "grad_norm": 4.968360904913381, "learning_rate": 2.8498543835320856e-06, "loss": 1.2938, "step": 7980 }, { "epoch": 0.65, "grad_norm": 3.224467989703306, "learning_rate": 2.8486593312190668e-06, "loss": 0.5887, "step": 7981 }, { "epoch": 0.65, "grad_norm": 2.654242604119693, "learning_rate": 2.8474644297056643e-06, "loss": 0.4886, "step": 7982 }, { "epoch": 0.65, "grad_norm": 3.7602370743626894, "learning_rate": 2.8462696790756362e-06, "loss": 0.6524, "step": 7983 }, { "epoch": 0.65, "grad_norm": 4.902133042146245, "learning_rate": 2.845075079412731e-06, "loss": 0.9172, "step": 7984 }, { "epoch": 0.65, "grad_norm": 5.455886719214672, "learning_rate": 2.8438806308006874e-06, "loss": 1.2014, "step": 7985 }, { "epoch": 0.65, "grad_norm": 2.8729653038084955, "learning_rate": 2.842686333323226e-06, "loss": 0.5934, "step": 7986 }, { "epoch": 0.65, "grad_norm": 2.189609924684086, "learning_rate": 2.841492187064063e-06, "loss": 0.3625, "step": 7987 }, { "epoch": 0.65, "grad_norm": 3.3526536316346363, "learning_rate": 2.8402981921069044e-06, "loss": 0.4967, "step": 7988 }, { "epoch": 0.65, "grad_norm": 2.623895560107847, "learning_rate": 2.8391043485354436e-06, "loss": 0.5459, "step": 7989 }, { "epoch": 0.65, "grad_norm": 3.154512611660718, "learning_rate": 2.8379106564333637e-06, "loss": 0.4984, "step": 7990 }, { "epoch": 0.65, "grad_norm": 1.224795514043236, "learning_rate": 2.8367171158843386e-06, "loss": 0.1345, "step": 7991 }, { "epoch": 0.65, "grad_norm": 3.1610715773663984, "learning_rate": 2.835523726972028e-06, "loss": 0.6905, "step": 7992 }, { "epoch": 0.65, "grad_norm": 3.3696818259978287, "learning_rate": 2.834330489780084e-06, "loss": 0.7145, "step": 7993 }, { "epoch": 0.65, "grad_norm": 3.2641499570271626, "learning_rate": 2.8331374043921472e-06, "loss": 0.7458, "step": 7994 }, { "epoch": 0.65, "grad_norm": 3.941652968343014, "learning_rate": 2.831944470891851e-06, "loss": 1.0364, "step": 7995 }, { "epoch": 0.65, "grad_norm": 4.5782653321947455, "learning_rate": 2.8307516893628097e-06, "loss": 1.2017, "step": 7996 }, { "epoch": 0.65, "grad_norm": 3.768539344559609, "learning_rate": 2.8295590598886356e-06, "loss": 0.9792, "step": 7997 }, { "epoch": 0.65, "grad_norm": 3.882226709861375, "learning_rate": 2.828366582552924e-06, "loss": 0.7185, "step": 7998 }, { "epoch": 0.65, "grad_norm": 5.30969061938397, "learning_rate": 2.827174257439265e-06, "loss": 0.8603, "step": 7999 }, { "epoch": 0.65, "grad_norm": 3.865148421794005, "learning_rate": 2.8259820846312326e-06, "loss": 0.6394, "step": 8000 }, { "epoch": 0.65, "grad_norm": 3.216494413050757, "learning_rate": 2.824790064212396e-06, "loss": 0.4633, "step": 8001 }, { "epoch": 0.65, "grad_norm": 5.186243312675972, "learning_rate": 2.8235981962663107e-06, "loss": 0.5483, "step": 8002 }, { "epoch": 0.65, "grad_norm": 5.440202878375771, "learning_rate": 2.8224064808765182e-06, "loss": 0.9243, "step": 8003 }, { "epoch": 0.65, "grad_norm": 4.79307107805714, "learning_rate": 2.8212149181265547e-06, "loss": 1.046, "step": 8004 }, { "epoch": 0.65, "grad_norm": 3.234343446088011, "learning_rate": 2.820023508099944e-06, "loss": 0.5725, "step": 8005 }, { "epoch": 0.65, "grad_norm": 3.7712916691735443, "learning_rate": 2.8188322508801967e-06, "loss": 0.9586, "step": 8006 }, { "epoch": 0.65, "grad_norm": 5.110195433030602, "learning_rate": 2.817641146550817e-06, "loss": 0.8975, "step": 8007 }, { "epoch": 0.65, "grad_norm": 3.564456190651159, "learning_rate": 2.8164501951952973e-06, "loss": 0.7762, "step": 8008 }, { "epoch": 0.65, "grad_norm": 3.222844540856964, "learning_rate": 2.8152593968971143e-06, "loss": 0.7783, "step": 8009 }, { "epoch": 0.65, "grad_norm": 3.2829279961044495, "learning_rate": 2.814068751739739e-06, "loss": 0.8104, "step": 8010 }, { "epoch": 0.65, "grad_norm": 2.6984117927456697, "learning_rate": 2.8128782598066327e-06, "loss": 0.4785, "step": 8011 }, { "epoch": 0.65, "grad_norm": 3.0633281365883747, "learning_rate": 2.8116879211812407e-06, "loss": 0.7631, "step": 8012 }, { "epoch": 0.65, "grad_norm": 4.632535475645752, "learning_rate": 2.810497735947003e-06, "loss": 0.5679, "step": 8013 }, { "epoch": 0.66, "grad_norm": 3.8235228300560817, "learning_rate": 2.809307704187344e-06, "loss": 0.3668, "step": 8014 }, { "epoch": 0.66, "grad_norm": 5.161518638795988, "learning_rate": 2.8081178259856813e-06, "loss": 1.2164, "step": 8015 }, { "epoch": 0.66, "grad_norm": 3.788168760095346, "learning_rate": 2.806928101425419e-06, "loss": 0.748, "step": 8016 }, { "epoch": 0.66, "grad_norm": 3.595830211979231, "learning_rate": 2.8057385305899533e-06, "loss": 0.7062, "step": 8017 }, { "epoch": 0.66, "grad_norm": 3.2491092651746767, "learning_rate": 2.804549113562667e-06, "loss": 0.7872, "step": 8018 }, { "epoch": 0.66, "grad_norm": 2.167759757082825, "learning_rate": 2.803359850426935e-06, "loss": 0.296, "step": 8019 }, { "epoch": 0.66, "grad_norm": 3.6383704732156836, "learning_rate": 2.802170741266116e-06, "loss": 0.8412, "step": 8020 }, { "epoch": 0.66, "grad_norm": 2.4452165564431327, "learning_rate": 2.8009817861635622e-06, "loss": 0.4422, "step": 8021 }, { "epoch": 0.66, "grad_norm": 4.406342192462903, "learning_rate": 2.7997929852026164e-06, "loss": 1.0347, "step": 8022 }, { "epoch": 0.66, "grad_norm": 2.505017668203887, "learning_rate": 2.7986043384666055e-06, "loss": 0.4232, "step": 8023 }, { "epoch": 0.66, "grad_norm": 2.8697517932358667, "learning_rate": 2.797415846038851e-06, "loss": 0.4334, "step": 8024 }, { "epoch": 0.66, "grad_norm": 3.7275342691111857, "learning_rate": 2.7962275080026612e-06, "loss": 0.6212, "step": 8025 }, { "epoch": 0.66, "grad_norm": 4.908361652822283, "learning_rate": 2.795039324441331e-06, "loss": 0.8908, "step": 8026 }, { "epoch": 0.66, "grad_norm": 4.727079111363858, "learning_rate": 2.7938512954381503e-06, "loss": 1.0258, "step": 8027 }, { "epoch": 0.66, "grad_norm": 3.144923134552206, "learning_rate": 2.79266342107639e-06, "loss": 0.6403, "step": 8028 }, { "epoch": 0.66, "grad_norm": 2.8843594757838744, "learning_rate": 2.791475701439317e-06, "loss": 0.3594, "step": 8029 }, { "epoch": 0.66, "grad_norm": 4.49582032249646, "learning_rate": 2.790288136610187e-06, "loss": 1.0168, "step": 8030 }, { "epoch": 0.66, "grad_norm": 3.0150535405758636, "learning_rate": 2.7891007266722435e-06, "loss": 0.7466, "step": 8031 }, { "epoch": 0.66, "grad_norm": 5.821039224362037, "learning_rate": 2.787913471708715e-06, "loss": 0.7878, "step": 8032 }, { "epoch": 0.66, "grad_norm": 3.5964597099929008, "learning_rate": 2.7867263718028246e-06, "loss": 0.8349, "step": 8033 }, { "epoch": 0.66, "grad_norm": 5.215248501926799, "learning_rate": 2.7855394270377843e-06, "loss": 0.7862, "step": 8034 }, { "epoch": 0.66, "grad_norm": 5.023413160171227, "learning_rate": 2.784352637496792e-06, "loss": 1.2161, "step": 8035 }, { "epoch": 0.66, "grad_norm": 3.616951739827295, "learning_rate": 2.7831660032630405e-06, "loss": 0.7639, "step": 8036 }, { "epoch": 0.66, "grad_norm": 4.343166757027107, "learning_rate": 2.781979524419701e-06, "loss": 0.9321, "step": 8037 }, { "epoch": 0.66, "grad_norm": 4.542165060427963, "learning_rate": 2.780793201049945e-06, "loss": 0.6174, "step": 8038 }, { "epoch": 0.66, "grad_norm": 5.00732678304251, "learning_rate": 2.7796070332369274e-06, "loss": 1.2496, "step": 8039 }, { "epoch": 0.66, "grad_norm": 6.386606258896781, "learning_rate": 2.7784210210637937e-06, "loss": 1.5798, "step": 8040 }, { "epoch": 0.66, "grad_norm": 4.456299499160534, "learning_rate": 2.7772351646136795e-06, "loss": 0.6512, "step": 8041 }, { "epoch": 0.66, "grad_norm": 4.750388118818402, "learning_rate": 2.776049463969705e-06, "loss": 1.2437, "step": 8042 }, { "epoch": 0.66, "grad_norm": 2.1606977279478032, "learning_rate": 2.7748639192149863e-06, "loss": 0.3467, "step": 8043 }, { "epoch": 0.66, "grad_norm": 1.8466243448013244, "learning_rate": 2.7736785304326217e-06, "loss": 0.3452, "step": 8044 }, { "epoch": 0.66, "grad_norm": 3.14618056656971, "learning_rate": 2.772493297705703e-06, "loss": 0.7901, "step": 8045 }, { "epoch": 0.66, "grad_norm": 4.886642021842415, "learning_rate": 2.771308221117309e-06, "loss": 1.0205, "step": 8046 }, { "epoch": 0.66, "grad_norm": 2.977361372003828, "learning_rate": 2.7701233007505104e-06, "loss": 0.6876, "step": 8047 }, { "epoch": 0.66, "grad_norm": 2.668384948430321, "learning_rate": 2.7689385366883654e-06, "loss": 0.3277, "step": 8048 }, { "epoch": 0.66, "grad_norm": 4.426996210231428, "learning_rate": 2.7677539290139177e-06, "loss": 1.0532, "step": 8049 }, { "epoch": 0.66, "grad_norm": 4.458992091406994, "learning_rate": 2.766569477810205e-06, "loss": 0.9114, "step": 8050 }, { "epoch": 0.66, "grad_norm": 4.174900594278151, "learning_rate": 2.7653851831602514e-06, "loss": 0.7051, "step": 8051 }, { "epoch": 0.66, "grad_norm": 3.8040453815644266, "learning_rate": 2.764201045147071e-06, "loss": 0.8327, "step": 8052 }, { "epoch": 0.66, "grad_norm": 4.293419797368659, "learning_rate": 2.76301706385367e-06, "loss": 1.0003, "step": 8053 }, { "epoch": 0.66, "grad_norm": 3.393621564583542, "learning_rate": 2.7618332393630353e-06, "loss": 0.6693, "step": 8054 }, { "epoch": 0.66, "grad_norm": 4.725986573141455, "learning_rate": 2.7606495717581498e-06, "loss": 0.7114, "step": 8055 }, { "epoch": 0.66, "grad_norm": 4.669401274710586, "learning_rate": 2.7594660611219838e-06, "loss": 1.4499, "step": 8056 }, { "epoch": 0.66, "grad_norm": 4.61047399172766, "learning_rate": 2.7582827075374987e-06, "loss": 0.9271, "step": 8057 }, { "epoch": 0.66, "grad_norm": 5.410563440010257, "learning_rate": 2.7570995110876364e-06, "loss": 1.7803, "step": 8058 }, { "epoch": 0.66, "grad_norm": 3.106155063712242, "learning_rate": 2.75591647185534e-06, "loss": 0.3749, "step": 8059 }, { "epoch": 0.66, "grad_norm": 3.6487211232728236, "learning_rate": 2.7547335899235304e-06, "loss": 0.6993, "step": 8060 }, { "epoch": 0.66, "grad_norm": 3.1968521220751, "learning_rate": 2.7535508653751252e-06, "loss": 0.5003, "step": 8061 }, { "epoch": 0.66, "grad_norm": 2.1849582457043737, "learning_rate": 2.7523682982930278e-06, "loss": 0.4475, "step": 8062 }, { "epoch": 0.66, "grad_norm": 5.046657932174976, "learning_rate": 2.7511858887601304e-06, "loss": 0.7241, "step": 8063 }, { "epoch": 0.66, "grad_norm": 5.054131823721945, "learning_rate": 2.7500036368593153e-06, "loss": 1.1251, "step": 8064 }, { "epoch": 0.66, "grad_norm": 3.0832769446657418, "learning_rate": 2.7488215426734554e-06, "loss": 0.6398, "step": 8065 }, { "epoch": 0.66, "grad_norm": 3.3105219144880476, "learning_rate": 2.7476396062854065e-06, "loss": 0.5005, "step": 8066 }, { "epoch": 0.66, "grad_norm": 2.8711836856018884, "learning_rate": 2.7464578277780187e-06, "loss": 0.5241, "step": 8067 }, { "epoch": 0.66, "grad_norm": 4.941614254848235, "learning_rate": 2.74527620723413e-06, "loss": 0.7623, "step": 8068 }, { "epoch": 0.66, "grad_norm": 5.47330668948666, "learning_rate": 2.7440947447365664e-06, "loss": 0.9966, "step": 8069 }, { "epoch": 0.66, "grad_norm": 3.6291212023548796, "learning_rate": 2.7429134403681435e-06, "loss": 0.5474, "step": 8070 }, { "epoch": 0.66, "grad_norm": 4.157885256218511, "learning_rate": 2.741732294211667e-06, "loss": 1.1754, "step": 8071 }, { "epoch": 0.66, "grad_norm": 3.6000847904713003, "learning_rate": 2.740551306349927e-06, "loss": 0.9091, "step": 8072 }, { "epoch": 0.66, "grad_norm": 0.8847891185750434, "learning_rate": 2.739370476865707e-06, "loss": 0.1354, "step": 8073 }, { "epoch": 0.66, "grad_norm": 2.179574267260189, "learning_rate": 2.738189805841781e-06, "loss": 0.4367, "step": 8074 }, { "epoch": 0.66, "grad_norm": 4.54789867852281, "learning_rate": 2.7370092933609037e-06, "loss": 1.2261, "step": 8075 }, { "epoch": 0.66, "grad_norm": 3.0687699554112644, "learning_rate": 2.7358289395058284e-06, "loss": 0.5557, "step": 8076 }, { "epoch": 0.66, "grad_norm": 3.7615195258812313, "learning_rate": 2.7346487443592888e-06, "loss": 0.8794, "step": 8077 }, { "epoch": 0.66, "grad_norm": 3.876920388057226, "learning_rate": 2.7334687080040134e-06, "loss": 1.0369, "step": 8078 }, { "epoch": 0.66, "grad_norm": 4.344281397286893, "learning_rate": 2.732288830522718e-06, "loss": 0.9742, "step": 8079 }, { "epoch": 0.66, "grad_norm": 3.4912933701112414, "learning_rate": 2.731109111998106e-06, "loss": 0.6292, "step": 8080 }, { "epoch": 0.66, "grad_norm": 5.110955242715626, "learning_rate": 2.729929552512871e-06, "loss": 1.0262, "step": 8081 }, { "epoch": 0.66, "grad_norm": 3.7977212752673126, "learning_rate": 2.7287501521496966e-06, "loss": 0.7897, "step": 8082 }, { "epoch": 0.66, "grad_norm": 3.5688064240040456, "learning_rate": 2.7275709109912506e-06, "loss": 0.8668, "step": 8083 }, { "epoch": 0.66, "grad_norm": 3.3072778372473155, "learning_rate": 2.726391829120194e-06, "loss": 0.579, "step": 8084 }, { "epoch": 0.66, "grad_norm": 4.019471690935791, "learning_rate": 2.7252129066191758e-06, "loss": 0.6334, "step": 8085 }, { "epoch": 0.66, "grad_norm": 1.1563780617412596, "learning_rate": 2.7240341435708316e-06, "loss": 0.1597, "step": 8086 }, { "epoch": 0.66, "grad_norm": 3.7132075006676404, "learning_rate": 2.7228555400577904e-06, "loss": 0.5548, "step": 8087 }, { "epoch": 0.66, "grad_norm": 4.412152437555208, "learning_rate": 2.7216770961626672e-06, "loss": 0.8515, "step": 8088 }, { "epoch": 0.66, "grad_norm": 3.37663916419196, "learning_rate": 2.720498811968062e-06, "loss": 0.6956, "step": 8089 }, { "epoch": 0.66, "grad_norm": 2.9745511389014134, "learning_rate": 2.7193206875565715e-06, "loss": 0.764, "step": 8090 }, { "epoch": 0.66, "grad_norm": 4.1630440192612665, "learning_rate": 2.7181427230107738e-06, "loss": 0.8398, "step": 8091 }, { "epoch": 0.66, "grad_norm": 1.285493124951717, "learning_rate": 2.7169649184132403e-06, "loss": 0.1758, "step": 8092 }, { "epoch": 0.66, "grad_norm": 2.303442121768274, "learning_rate": 2.7157872738465317e-06, "loss": 0.4663, "step": 8093 }, { "epoch": 0.66, "grad_norm": 2.4441901971587376, "learning_rate": 2.714609789393193e-06, "loss": 0.5931, "step": 8094 }, { "epoch": 0.66, "grad_norm": 4.705057991518503, "learning_rate": 2.7134324651357625e-06, "loss": 0.9862, "step": 8095 }, { "epoch": 0.66, "grad_norm": 4.168949543449048, "learning_rate": 2.7122553011567636e-06, "loss": 0.5788, "step": 8096 }, { "epoch": 0.66, "grad_norm": 4.597665183472927, "learning_rate": 2.711078297538713e-06, "loss": 0.8572, "step": 8097 }, { "epoch": 0.66, "grad_norm": 3.510929055765163, "learning_rate": 2.7099014543641116e-06, "loss": 0.8463, "step": 8098 }, { "epoch": 0.66, "grad_norm": 2.4589615554959883, "learning_rate": 2.708724771715454e-06, "loss": 0.3028, "step": 8099 }, { "epoch": 0.66, "grad_norm": 1.1060126440401574, "learning_rate": 2.707548249675216e-06, "loss": 0.155, "step": 8100 }, { "epoch": 0.66, "grad_norm": 2.7680275308789666, "learning_rate": 2.7063718883258694e-06, "loss": 0.4032, "step": 8101 }, { "epoch": 0.66, "grad_norm": 2.683328697382099, "learning_rate": 2.7051956877498707e-06, "loss": 0.2433, "step": 8102 }, { "epoch": 0.66, "grad_norm": 3.528927015598015, "learning_rate": 2.7040196480296677e-06, "loss": 0.6996, "step": 8103 }, { "epoch": 0.66, "grad_norm": 1.9414316471303987, "learning_rate": 2.702843769247698e-06, "loss": 0.2005, "step": 8104 }, { "epoch": 0.66, "grad_norm": 2.1785227594638092, "learning_rate": 2.7016680514863796e-06, "loss": 0.3247, "step": 8105 }, { "epoch": 0.66, "grad_norm": 4.559538548866543, "learning_rate": 2.700492494828131e-06, "loss": 0.8933, "step": 8106 }, { "epoch": 0.66, "grad_norm": 2.3675280676526933, "learning_rate": 2.699317099355349e-06, "loss": 0.5503, "step": 8107 }, { "epoch": 0.66, "grad_norm": 3.46769595659413, "learning_rate": 2.6981418651504256e-06, "loss": 0.6654, "step": 8108 }, { "epoch": 0.66, "grad_norm": 1.3525915329662892, "learning_rate": 2.69696679229574e-06, "loss": 0.1927, "step": 8109 }, { "epoch": 0.66, "grad_norm": 3.8039875762312847, "learning_rate": 2.6957918808736593e-06, "loss": 0.7026, "step": 8110 }, { "epoch": 0.66, "grad_norm": 3.980857406008913, "learning_rate": 2.6946171309665413e-06, "loss": 0.7007, "step": 8111 }, { "epoch": 0.66, "grad_norm": 2.9225626645846536, "learning_rate": 2.693442542656728e-06, "loss": 0.6474, "step": 8112 }, { "epoch": 0.66, "grad_norm": 6.054818429499179, "learning_rate": 2.692268116026554e-06, "loss": 1.148, "step": 8113 }, { "epoch": 0.66, "grad_norm": 3.626438590520175, "learning_rate": 2.6910938511583424e-06, "loss": 0.7244, "step": 8114 }, { "epoch": 0.66, "grad_norm": 4.309158002175827, "learning_rate": 2.689919748134403e-06, "loss": 1.201, "step": 8115 }, { "epoch": 0.66, "grad_norm": 3.8702321853862456, "learning_rate": 2.6887458070370374e-06, "loss": 0.726, "step": 8116 }, { "epoch": 0.66, "grad_norm": 3.6861780001976747, "learning_rate": 2.6875720279485305e-06, "loss": 0.8203, "step": 8117 }, { "epoch": 0.66, "grad_norm": 4.616881851475864, "learning_rate": 2.686398410951161e-06, "loss": 0.5582, "step": 8118 }, { "epoch": 0.66, "grad_norm": 2.726228925977059, "learning_rate": 2.685224956127194e-06, "loss": 0.4601, "step": 8119 }, { "epoch": 0.66, "grad_norm": 3.5189700331903744, "learning_rate": 2.684051663558884e-06, "loss": 0.7785, "step": 8120 }, { "epoch": 0.66, "grad_norm": 3.1994821182522797, "learning_rate": 2.6828785333284736e-06, "loss": 0.5161, "step": 8121 }, { "epoch": 0.66, "grad_norm": 1.233334262619315, "learning_rate": 2.6817055655181947e-06, "loss": 0.1662, "step": 8122 }, { "epoch": 0.66, "grad_norm": 4.166189713882294, "learning_rate": 2.6805327602102647e-06, "loss": 1.0081, "step": 8123 }, { "epoch": 0.66, "grad_norm": 4.427378529153368, "learning_rate": 2.6793601174868934e-06, "loss": 0.8678, "step": 8124 }, { "epoch": 0.66, "grad_norm": 3.995741956743334, "learning_rate": 2.678187637430279e-06, "loss": 0.6496, "step": 8125 }, { "epoch": 0.66, "grad_norm": 2.462789535181405, "learning_rate": 2.677015320122607e-06, "loss": 0.4996, "step": 8126 }, { "epoch": 0.66, "grad_norm": 4.286132345182877, "learning_rate": 2.6758431656460503e-06, "loss": 1.2132, "step": 8127 }, { "epoch": 0.66, "grad_norm": 4.582607649074299, "learning_rate": 2.6746711740827757e-06, "loss": 0.9266, "step": 8128 }, { "epoch": 0.66, "grad_norm": 2.279407521043276, "learning_rate": 2.6734993455149295e-06, "loss": 0.2849, "step": 8129 }, { "epoch": 0.66, "grad_norm": 3.293848497282886, "learning_rate": 2.6723276800246544e-06, "loss": 0.7391, "step": 8130 }, { "epoch": 0.66, "grad_norm": 4.5846162274642515, "learning_rate": 2.671156177694079e-06, "loss": 0.8377, "step": 8131 }, { "epoch": 0.66, "grad_norm": 4.437207797111822, "learning_rate": 2.6699848386053208e-06, "loss": 1.0667, "step": 8132 }, { "epoch": 0.66, "grad_norm": 3.6627445024534078, "learning_rate": 2.6688136628404858e-06, "loss": 0.9905, "step": 8133 }, { "epoch": 0.66, "grad_norm": 3.202182827673672, "learning_rate": 2.6676426504816666e-06, "loss": 0.5172, "step": 8134 }, { "epoch": 0.66, "grad_norm": 4.655701150778445, "learning_rate": 2.666471801610947e-06, "loss": 0.7972, "step": 8135 }, { "epoch": 0.67, "grad_norm": 5.265227474862339, "learning_rate": 2.665301116310401e-06, "loss": 1.0035, "step": 8136 }, { "epoch": 0.67, "grad_norm": 2.2479087704532916, "learning_rate": 2.664130594662083e-06, "loss": 0.3387, "step": 8137 }, { "epoch": 0.67, "grad_norm": 4.905996753323291, "learning_rate": 2.6629602367480456e-06, "loss": 1.3322, "step": 8138 }, { "epoch": 0.67, "grad_norm": 2.489019225321688, "learning_rate": 2.6617900426503267e-06, "loss": 0.4995, "step": 8139 }, { "epoch": 0.67, "grad_norm": 3.9753243013472632, "learning_rate": 2.6606200124509474e-06, "loss": 0.9357, "step": 8140 }, { "epoch": 0.67, "grad_norm": 3.2508496008821526, "learning_rate": 2.6594501462319243e-06, "loss": 0.4631, "step": 8141 }, { "epoch": 0.67, "grad_norm": 3.6037522115894522, "learning_rate": 2.658280444075261e-06, "loss": 0.8874, "step": 8142 }, { "epoch": 0.67, "grad_norm": 2.9317164559359346, "learning_rate": 2.657110906062946e-06, "loss": 0.6426, "step": 8143 }, { "epoch": 0.67, "grad_norm": 6.017729186475728, "learning_rate": 2.6559415322769604e-06, "loss": 1.4262, "step": 8144 }, { "epoch": 0.67, "grad_norm": 2.8748166235299153, "learning_rate": 2.654772322799274e-06, "loss": 0.509, "step": 8145 }, { "epoch": 0.67, "grad_norm": 3.122497071149187, "learning_rate": 2.65360327771184e-06, "loss": 0.7123, "step": 8146 }, { "epoch": 0.67, "grad_norm": 1.6791406594043676, "learning_rate": 2.6524343970966036e-06, "loss": 0.1717, "step": 8147 }, { "epoch": 0.67, "grad_norm": 1.7615678659571652, "learning_rate": 2.6512656810355e-06, "loss": 0.3311, "step": 8148 }, { "epoch": 0.67, "grad_norm": 3.697669974612375, "learning_rate": 2.6500971296104506e-06, "loss": 0.84, "step": 8149 }, { "epoch": 0.67, "grad_norm": 2.7332577255496306, "learning_rate": 2.648928742903367e-06, "loss": 0.5106, "step": 8150 }, { "epoch": 0.67, "grad_norm": 4.330105445552753, "learning_rate": 2.6477605209961453e-06, "loss": 0.7977, "step": 8151 }, { "epoch": 0.67, "grad_norm": 4.589037364480364, "learning_rate": 2.6465924639706753e-06, "loss": 1.2426, "step": 8152 }, { "epoch": 0.67, "grad_norm": 1.6558531337873237, "learning_rate": 2.64542457190883e-06, "loss": 0.1925, "step": 8153 }, { "epoch": 0.67, "grad_norm": 3.077278142721715, "learning_rate": 2.6442568448924754e-06, "loss": 0.4792, "step": 8154 }, { "epoch": 0.67, "grad_norm": 5.43354957949788, "learning_rate": 2.6430892830034634e-06, "loss": 0.8561, "step": 8155 }, { "epoch": 0.67, "grad_norm": 3.8153529256250165, "learning_rate": 2.6419218863236374e-06, "loss": 0.4848, "step": 8156 }, { "epoch": 0.67, "grad_norm": 2.555375178166355, "learning_rate": 2.640754654934823e-06, "loss": 0.4334, "step": 8157 }, { "epoch": 0.67, "grad_norm": 5.515588811786006, "learning_rate": 2.6395875889188393e-06, "loss": 1.1924, "step": 8158 }, { "epoch": 0.67, "grad_norm": 3.011717132887081, "learning_rate": 2.638420688357493e-06, "loss": 0.5709, "step": 8159 }, { "epoch": 0.67, "grad_norm": 2.4878893153708304, "learning_rate": 2.6372539533325793e-06, "loss": 0.546, "step": 8160 }, { "epoch": 0.67, "grad_norm": 4.902388199732581, "learning_rate": 2.6360873839258804e-06, "loss": 1.0235, "step": 8161 }, { "epoch": 0.67, "grad_norm": 4.194007613848852, "learning_rate": 2.63492098021917e-06, "loss": 0.7716, "step": 8162 }, { "epoch": 0.67, "grad_norm": 3.464355787218569, "learning_rate": 2.6337547422942046e-06, "loss": 0.4862, "step": 8163 }, { "epoch": 0.67, "grad_norm": 4.964077826087806, "learning_rate": 2.6325886702327335e-06, "loss": 1.1743, "step": 8164 }, { "epoch": 0.67, "grad_norm": 4.992992442416339, "learning_rate": 2.6314227641164936e-06, "loss": 1.2173, "step": 8165 }, { "epoch": 0.67, "grad_norm": 4.2534889852906685, "learning_rate": 2.6302570240272118e-06, "loss": 1.0937, "step": 8166 }, { "epoch": 0.67, "grad_norm": 2.036733498750401, "learning_rate": 2.629091450046598e-06, "loss": 0.3887, "step": 8167 }, { "epoch": 0.67, "grad_norm": 3.7368829772271814, "learning_rate": 2.6279260422563567e-06, "loss": 0.7874, "step": 8168 }, { "epoch": 0.67, "grad_norm": 2.411251795911712, "learning_rate": 2.6267608007381745e-06, "loss": 0.4679, "step": 8169 }, { "epoch": 0.67, "grad_norm": 3.112337241361299, "learning_rate": 2.625595725573732e-06, "loss": 0.5023, "step": 8170 }, { "epoch": 0.67, "grad_norm": 2.904542380357021, "learning_rate": 2.6244308168446958e-06, "loss": 0.6493, "step": 8171 }, { "epoch": 0.67, "grad_norm": 2.8234626865452395, "learning_rate": 2.623266074632721e-06, "loss": 0.5458, "step": 8172 }, { "epoch": 0.67, "grad_norm": 5.07468524187268, "learning_rate": 2.622101499019453e-06, "loss": 1.0779, "step": 8173 }, { "epoch": 0.67, "grad_norm": 3.548773180308887, "learning_rate": 2.6209370900865183e-06, "loss": 0.4463, "step": 8174 }, { "epoch": 0.67, "grad_norm": 3.957265224808206, "learning_rate": 2.6197728479155403e-06, "loss": 0.79, "step": 8175 }, { "epoch": 0.67, "grad_norm": 1.9613969150310653, "learning_rate": 2.618608772588127e-06, "loss": 0.3682, "step": 8176 }, { "epoch": 0.67, "grad_norm": 4.452334592025262, "learning_rate": 2.6174448641858744e-06, "loss": 0.4594, "step": 8177 }, { "epoch": 0.67, "grad_norm": 5.301706376374276, "learning_rate": 2.6162811227903683e-06, "loss": 0.8597, "step": 8178 }, { "epoch": 0.67, "grad_norm": 5.402394813245053, "learning_rate": 2.6151175484831835e-06, "loss": 1.2955, "step": 8179 }, { "epoch": 0.67, "grad_norm": 4.030558375461062, "learning_rate": 2.613954141345878e-06, "loss": 0.63, "step": 8180 }, { "epoch": 0.67, "grad_norm": 4.090141052480777, "learning_rate": 2.612790901460003e-06, "loss": 0.8791, "step": 8181 }, { "epoch": 0.67, "grad_norm": 2.8795763058173005, "learning_rate": 2.6116278289070963e-06, "loss": 0.6026, "step": 8182 }, { "epoch": 0.67, "grad_norm": 5.105086730112901, "learning_rate": 2.6104649237686864e-06, "loss": 1.0118, "step": 8183 }, { "epoch": 0.67, "grad_norm": 2.711704806278075, "learning_rate": 2.609302186126284e-06, "loss": 0.324, "step": 8184 }, { "epoch": 0.67, "grad_norm": 5.544883390181114, "learning_rate": 2.6081396160613957e-06, "loss": 1.4639, "step": 8185 }, { "epoch": 0.67, "grad_norm": 3.220550620883011, "learning_rate": 2.60697721365551e-06, "loss": 0.5899, "step": 8186 }, { "epoch": 0.67, "grad_norm": 2.0355312131320495, "learning_rate": 2.6058149789901066e-06, "loss": 0.2735, "step": 8187 }, { "epoch": 0.67, "grad_norm": 3.8453055950699087, "learning_rate": 2.6046529121466537e-06, "loss": 0.6375, "step": 8188 }, { "epoch": 0.67, "grad_norm": 4.551515254155337, "learning_rate": 2.6034910132066066e-06, "loss": 0.675, "step": 8189 }, { "epoch": 0.67, "grad_norm": 3.613874741705259, "learning_rate": 2.60232928225141e-06, "loss": 0.5582, "step": 8190 }, { "epoch": 0.67, "grad_norm": 4.874853243116013, "learning_rate": 2.6011677193624984e-06, "loss": 1.1525, "step": 8191 }, { "epoch": 0.67, "grad_norm": 4.102358534071514, "learning_rate": 2.6000063246212882e-06, "loss": 0.68, "step": 8192 }, { "epoch": 0.67, "grad_norm": 4.009976735808661, "learning_rate": 2.598845098109189e-06, "loss": 1.2542, "step": 8193 }, { "epoch": 0.67, "grad_norm": 3.4149214680031434, "learning_rate": 2.5976840399075987e-06, "loss": 0.8226, "step": 8194 }, { "epoch": 0.67, "grad_norm": 2.9917850748077006, "learning_rate": 2.5965231500979026e-06, "loss": 0.5335, "step": 8195 }, { "epoch": 0.67, "grad_norm": 4.042737043474195, "learning_rate": 2.595362428761476e-06, "loss": 0.8692, "step": 8196 }, { "epoch": 0.67, "grad_norm": 4.079629908302849, "learning_rate": 2.5942018759796756e-06, "loss": 1.0781, "step": 8197 }, { "epoch": 0.67, "grad_norm": 6.623417473132643, "learning_rate": 2.5930414918338542e-06, "loss": 1.107, "step": 8198 }, { "epoch": 0.67, "grad_norm": 4.6628269852169915, "learning_rate": 2.59188127640535e-06, "loss": 1.1345, "step": 8199 }, { "epoch": 0.67, "grad_norm": 3.9365790682542925, "learning_rate": 2.590721229775487e-06, "loss": 0.7339, "step": 8200 }, { "epoch": 0.67, "grad_norm": 3.549101199921481, "learning_rate": 2.589561352025581e-06, "loss": 0.5565, "step": 8201 }, { "epoch": 0.67, "grad_norm": 3.8907557400194692, "learning_rate": 2.5884016432369352e-06, "loss": 0.827, "step": 8202 }, { "epoch": 0.67, "grad_norm": 5.923777107347025, "learning_rate": 2.587242103490837e-06, "loss": 0.958, "step": 8203 }, { "epoch": 0.67, "grad_norm": 3.0709012492772443, "learning_rate": 2.586082732868567e-06, "loss": 0.5223, "step": 8204 }, { "epoch": 0.67, "grad_norm": 4.845501551249635, "learning_rate": 2.5849235314513923e-06, "loss": 0.9612, "step": 8205 }, { "epoch": 0.67, "grad_norm": 3.085875781413251, "learning_rate": 2.583764499320567e-06, "loss": 0.64, "step": 8206 }, { "epoch": 0.67, "grad_norm": 1.994995047525389, "learning_rate": 2.5826056365573356e-06, "loss": 0.421, "step": 8207 }, { "epoch": 0.67, "grad_norm": 3.005993934062253, "learning_rate": 2.58144694324293e-06, "loss": 0.7486, "step": 8208 }, { "epoch": 0.67, "grad_norm": 3.7070171019910383, "learning_rate": 2.5802884194585664e-06, "loss": 0.4059, "step": 8209 }, { "epoch": 0.67, "grad_norm": 3.6311262154020905, "learning_rate": 2.5791300652854536e-06, "loss": 0.421, "step": 8210 }, { "epoch": 0.67, "grad_norm": 3.126244190997835, "learning_rate": 2.5779718808047882e-06, "loss": 0.8678, "step": 8211 }, { "epoch": 0.67, "grad_norm": 4.054898585744836, "learning_rate": 2.576813866097753e-06, "loss": 0.7063, "step": 8212 }, { "epoch": 0.67, "grad_norm": 5.269210773538143, "learning_rate": 2.5756560212455216e-06, "loss": 1.0215, "step": 8213 }, { "epoch": 0.67, "grad_norm": 5.403454994734814, "learning_rate": 2.5744983463292504e-06, "loss": 0.9039, "step": 8214 }, { "epoch": 0.67, "grad_norm": 2.943293988287031, "learning_rate": 2.5733408414300914e-06, "loss": 0.5114, "step": 8215 }, { "epoch": 0.67, "grad_norm": 2.0435797640128968, "learning_rate": 2.5721835066291767e-06, "loss": 0.2742, "step": 8216 }, { "epoch": 0.67, "grad_norm": 3.8488265573426546, "learning_rate": 2.571026342007632e-06, "loss": 0.9684, "step": 8217 }, { "epoch": 0.67, "grad_norm": 2.298377465252303, "learning_rate": 2.5698693476465704e-06, "loss": 0.3515, "step": 8218 }, { "epoch": 0.67, "grad_norm": 3.391490206326064, "learning_rate": 2.568712523627093e-06, "loss": 0.6765, "step": 8219 }, { "epoch": 0.67, "grad_norm": 4.595083739065445, "learning_rate": 2.567555870030285e-06, "loss": 1.0814, "step": 8220 }, { "epoch": 0.67, "grad_norm": 5.22699420619472, "learning_rate": 2.5663993869372244e-06, "loss": 0.8324, "step": 8221 }, { "epoch": 0.67, "grad_norm": 3.8398058751542443, "learning_rate": 2.565243074428976e-06, "loss": 0.7015, "step": 8222 }, { "epoch": 0.67, "grad_norm": 2.710784195102657, "learning_rate": 2.5640869325865912e-06, "loss": 0.4557, "step": 8223 }, { "epoch": 0.67, "grad_norm": 3.9712039669774226, "learning_rate": 2.5629309614911123e-06, "loss": 0.6181, "step": 8224 }, { "epoch": 0.67, "grad_norm": 4.573667204632561, "learning_rate": 2.561775161223568e-06, "loss": 1.0692, "step": 8225 }, { "epoch": 0.67, "grad_norm": 3.3745376434268177, "learning_rate": 2.560619531864972e-06, "loss": 0.6221, "step": 8226 }, { "epoch": 0.67, "grad_norm": 3.8847605733345594, "learning_rate": 2.5594640734963306e-06, "loss": 0.7908, "step": 8227 }, { "epoch": 0.67, "grad_norm": 4.16579864794284, "learning_rate": 2.5583087861986365e-06, "loss": 0.7152, "step": 8228 }, { "epoch": 0.67, "grad_norm": 1.332205382754697, "learning_rate": 2.557153670052872e-06, "loss": 0.1777, "step": 8229 }, { "epoch": 0.67, "grad_norm": 3.4438633903948213, "learning_rate": 2.5559987251400024e-06, "loss": 0.5551, "step": 8230 }, { "epoch": 0.67, "grad_norm": 1.765037699508793, "learning_rate": 2.554843951540987e-06, "loss": 0.1958, "step": 8231 }, { "epoch": 0.67, "grad_norm": 4.59795873464682, "learning_rate": 2.553689349336769e-06, "loss": 0.8692, "step": 8232 }, { "epoch": 0.67, "grad_norm": 4.002247911399786, "learning_rate": 2.5525349186082793e-06, "loss": 0.8691, "step": 8233 }, { "epoch": 0.67, "grad_norm": 3.9778972950174603, "learning_rate": 2.551380659436441e-06, "loss": 0.5852, "step": 8234 }, { "epoch": 0.67, "grad_norm": 3.141822415205915, "learning_rate": 2.550226571902162e-06, "loss": 0.4145, "step": 8235 }, { "epoch": 0.67, "grad_norm": 3.8667126450551237, "learning_rate": 2.549072656086341e-06, "loss": 0.6026, "step": 8236 }, { "epoch": 0.67, "grad_norm": 4.996300879810245, "learning_rate": 2.547918912069859e-06, "loss": 0.9623, "step": 8237 }, { "epoch": 0.67, "grad_norm": 3.6904477810105543, "learning_rate": 2.546765339933589e-06, "loss": 0.7617, "step": 8238 }, { "epoch": 0.67, "grad_norm": 4.581868726425581, "learning_rate": 2.5456119397583923e-06, "loss": 0.853, "step": 8239 }, { "epoch": 0.67, "grad_norm": 6.0197529089066615, "learning_rate": 2.544458711625117e-06, "loss": 1.4534, "step": 8240 }, { "epoch": 0.67, "grad_norm": 3.83866500070881, "learning_rate": 2.5433056556145996e-06, "loss": 0.9129, "step": 8241 }, { "epoch": 0.67, "grad_norm": 3.852122294853424, "learning_rate": 2.5421527718076657e-06, "loss": 0.7918, "step": 8242 }, { "epoch": 0.67, "grad_norm": 4.895027520673276, "learning_rate": 2.541000060285125e-06, "loss": 0.7276, "step": 8243 }, { "epoch": 0.67, "grad_norm": 3.2427021641886995, "learning_rate": 2.539847521127777e-06, "loss": 0.5534, "step": 8244 }, { "epoch": 0.67, "grad_norm": 1.890394714776982, "learning_rate": 2.538695154416414e-06, "loss": 0.2905, "step": 8245 }, { "epoch": 0.67, "grad_norm": 5.208652062064113, "learning_rate": 2.537542960231807e-06, "loss": 1.1838, "step": 8246 }, { "epoch": 0.67, "grad_norm": 5.599313100308787, "learning_rate": 2.536390938654722e-06, "loss": 1.379, "step": 8247 }, { "epoch": 0.67, "grad_norm": 2.5905100343439336, "learning_rate": 2.535239089765912e-06, "loss": 0.6203, "step": 8248 }, { "epoch": 0.67, "grad_norm": 3.955544128632737, "learning_rate": 2.5340874136461138e-06, "loss": 0.8615, "step": 8249 }, { "epoch": 0.67, "grad_norm": 2.1298616750007944, "learning_rate": 2.5329359103760555e-06, "loss": 0.5261, "step": 8250 }, { "epoch": 0.67, "grad_norm": 4.328799199804836, "learning_rate": 2.5317845800364538e-06, "loss": 1.198, "step": 8251 }, { "epoch": 0.67, "grad_norm": 1.3281557786608587, "learning_rate": 2.530633422708011e-06, "loss": 0.1665, "step": 8252 }, { "epoch": 0.67, "grad_norm": 3.7688107112634257, "learning_rate": 2.529482438471421e-06, "loss": 0.6611, "step": 8253 }, { "epoch": 0.67, "grad_norm": 6.398786157877805, "learning_rate": 2.5283316274073577e-06, "loss": 1.1927, "step": 8254 }, { "epoch": 0.67, "grad_norm": 4.653815342421929, "learning_rate": 2.527180989596491e-06, "loss": 1.0186, "step": 8255 }, { "epoch": 0.67, "grad_norm": 4.075590632175973, "learning_rate": 2.526030525119475e-06, "loss": 0.6921, "step": 8256 }, { "epoch": 0.67, "grad_norm": 4.855568441119522, "learning_rate": 2.524880234056952e-06, "loss": 1.2602, "step": 8257 }, { "epoch": 0.67, "grad_norm": 4.687147851686595, "learning_rate": 2.5237301164895538e-06, "loss": 0.6533, "step": 8258 }, { "epoch": 0.68, "grad_norm": 4.554934045264841, "learning_rate": 2.5225801724978994e-06, "loss": 0.7608, "step": 8259 }, { "epoch": 0.68, "grad_norm": 5.047030105038087, "learning_rate": 2.5214304021625906e-06, "loss": 0.8476, "step": 8260 }, { "epoch": 0.68, "grad_norm": 2.319276269283469, "learning_rate": 2.5202808055642264e-06, "loss": 0.2347, "step": 8261 }, { "epoch": 0.68, "grad_norm": 4.030062028737353, "learning_rate": 2.5191313827833834e-06, "loss": 0.6413, "step": 8262 }, { "epoch": 0.68, "grad_norm": 4.86433153661269, "learning_rate": 2.517982133900634e-06, "loss": 0.6122, "step": 8263 }, { "epoch": 0.68, "grad_norm": 4.360572337027347, "learning_rate": 2.5168330589965356e-06, "loss": 0.8149, "step": 8264 }, { "epoch": 0.68, "grad_norm": 4.517862354534247, "learning_rate": 2.5156841581516344e-06, "loss": 0.7741, "step": 8265 }, { "epoch": 0.68, "grad_norm": 5.048209300032961, "learning_rate": 2.5145354314464606e-06, "loss": 1.0705, "step": 8266 }, { "epoch": 0.68, "grad_norm": 4.9624911465341786, "learning_rate": 2.5133868789615357e-06, "loss": 1.0833, "step": 8267 }, { "epoch": 0.68, "grad_norm": 2.4665013356509604, "learning_rate": 2.5122385007773685e-06, "loss": 0.337, "step": 8268 }, { "epoch": 0.68, "grad_norm": 3.2481453941790934, "learning_rate": 2.5110902969744567e-06, "loss": 0.4088, "step": 8269 }, { "epoch": 0.68, "grad_norm": 2.853603954957759, "learning_rate": 2.5099422676332825e-06, "loss": 0.3283, "step": 8270 }, { "epoch": 0.68, "grad_norm": 5.159783771639473, "learning_rate": 2.508794412834321e-06, "loss": 1.1678, "step": 8271 }, { "epoch": 0.68, "grad_norm": 5.484698844000725, "learning_rate": 2.507646732658027e-06, "loss": 0.9776, "step": 8272 }, { "epoch": 0.68, "grad_norm": 4.201518136585807, "learning_rate": 2.5064992271848504e-06, "loss": 0.5837, "step": 8273 }, { "epoch": 0.68, "grad_norm": 2.9111991079769775, "learning_rate": 2.505351896495226e-06, "loss": 0.7384, "step": 8274 }, { "epoch": 0.68, "grad_norm": 4.718743486607333, "learning_rate": 2.504204740669579e-06, "loss": 0.7954, "step": 8275 }, { "epoch": 0.68, "grad_norm": 3.008261239270777, "learning_rate": 2.5030577597883166e-06, "loss": 0.6152, "step": 8276 }, { "epoch": 0.68, "grad_norm": 7.10932136638636, "learning_rate": 2.5019109539318374e-06, "loss": 1.3207, "step": 8277 }, { "epoch": 0.68, "grad_norm": 3.8281334738196775, "learning_rate": 2.5007643231805316e-06, "loss": 0.5473, "step": 8278 }, { "epoch": 0.68, "grad_norm": 2.88466449570412, "learning_rate": 2.499617867614768e-06, "loss": 0.5475, "step": 8279 }, { "epoch": 0.68, "grad_norm": 4.86738131822349, "learning_rate": 2.4984715873149097e-06, "loss": 0.9485, "step": 8280 }, { "epoch": 0.68, "grad_norm": 4.210014976007869, "learning_rate": 2.497325482361307e-06, "loss": 0.8218, "step": 8281 }, { "epoch": 0.68, "grad_norm": 3.485578773064971, "learning_rate": 2.4961795528342977e-06, "loss": 0.6658, "step": 8282 }, { "epoch": 0.68, "grad_norm": 5.424818209340035, "learning_rate": 2.495033798814203e-06, "loss": 1.1699, "step": 8283 }, { "epoch": 0.68, "grad_norm": 2.1328923324244364, "learning_rate": 2.493888220381338e-06, "loss": 0.3901, "step": 8284 }, { "epoch": 0.68, "grad_norm": 4.107847943479869, "learning_rate": 2.492742817616002e-06, "loss": 1.0158, "step": 8285 }, { "epoch": 0.68, "grad_norm": 3.08409785382572, "learning_rate": 2.4915975905984825e-06, "loss": 0.5241, "step": 8286 }, { "epoch": 0.68, "grad_norm": 2.605708211617868, "learning_rate": 2.490452539409055e-06, "loss": 0.6845, "step": 8287 }, { "epoch": 0.68, "grad_norm": 3.7541933540948054, "learning_rate": 2.4893076641279857e-06, "loss": 0.7459, "step": 8288 }, { "epoch": 0.68, "grad_norm": 4.325334689993187, "learning_rate": 2.4881629648355197e-06, "loss": 0.7375, "step": 8289 }, { "epoch": 0.68, "grad_norm": 4.571401693590738, "learning_rate": 2.487018441611899e-06, "loss": 1.1225, "step": 8290 }, { "epoch": 0.68, "grad_norm": 4.972920543433598, "learning_rate": 2.485874094537349e-06, "loss": 1.2411, "step": 8291 }, { "epoch": 0.68, "grad_norm": 2.8754292288507934, "learning_rate": 2.484729923692085e-06, "loss": 0.6286, "step": 8292 }, { "epoch": 0.68, "grad_norm": 1.3322231779253642, "learning_rate": 2.4835859291563054e-06, "loss": 0.2827, "step": 8293 }, { "epoch": 0.68, "grad_norm": 4.1227954724856115, "learning_rate": 2.4824421110102022e-06, "loss": 0.7501, "step": 8294 }, { "epoch": 0.68, "grad_norm": 3.5513355488999294, "learning_rate": 2.481298469333949e-06, "loss": 0.5861, "step": 8295 }, { "epoch": 0.68, "grad_norm": 3.4934041611030273, "learning_rate": 2.4801550042077118e-06, "loss": 0.8104, "step": 8296 }, { "epoch": 0.68, "grad_norm": 4.914640469913183, "learning_rate": 2.4790117157116417e-06, "loss": 1.4283, "step": 8297 }, { "epoch": 0.68, "grad_norm": 5.321212069371395, "learning_rate": 2.477868603925879e-06, "loss": 0.7995, "step": 8298 }, { "epoch": 0.68, "grad_norm": 2.9954868489166917, "learning_rate": 2.4767256689305537e-06, "loss": 0.8571, "step": 8299 }, { "epoch": 0.68, "grad_norm": 2.999989814980641, "learning_rate": 2.475582910805775e-06, "loss": 0.6888, "step": 8300 }, { "epoch": 0.68, "grad_norm": 3.4585531621945518, "learning_rate": 2.474440329631648e-06, "loss": 0.7443, "step": 8301 }, { "epoch": 0.68, "grad_norm": 3.4130655659528393, "learning_rate": 2.473297925488263e-06, "loss": 0.8947, "step": 8302 }, { "epoch": 0.68, "grad_norm": 7.689007018264186, "learning_rate": 2.4721556984556968e-06, "loss": 1.1999, "step": 8303 }, { "epoch": 0.68, "grad_norm": 4.470032069770875, "learning_rate": 2.471013648614015e-06, "loss": 1.0237, "step": 8304 }, { "epoch": 0.68, "grad_norm": 2.3840771568384413, "learning_rate": 2.4698717760432723e-06, "loss": 0.3854, "step": 8305 }, { "epoch": 0.68, "grad_norm": 3.5984289964252936, "learning_rate": 2.468730080823505e-06, "loss": 0.5884, "step": 8306 }, { "epoch": 0.68, "grad_norm": 2.580650107432294, "learning_rate": 2.4675885630347423e-06, "loss": 0.4019, "step": 8307 }, { "epoch": 0.68, "grad_norm": 3.814392672951054, "learning_rate": 2.466447222757003e-06, "loss": 1.1511, "step": 8308 }, { "epoch": 0.68, "grad_norm": 3.8923529634070535, "learning_rate": 2.465306060070285e-06, "loss": 0.7966, "step": 8309 }, { "epoch": 0.68, "grad_norm": 2.927392623000533, "learning_rate": 2.4641650750545816e-06, "loss": 0.4876, "step": 8310 }, { "epoch": 0.68, "grad_norm": 2.713209401249808, "learning_rate": 2.4630242677898718e-06, "loss": 0.5281, "step": 8311 }, { "epoch": 0.68, "grad_norm": 4.069931845338178, "learning_rate": 2.461883638356118e-06, "loss": 1.0768, "step": 8312 }, { "epoch": 0.68, "grad_norm": 3.043007353870255, "learning_rate": 2.4607431868332756e-06, "loss": 0.7361, "step": 8313 }, { "epoch": 0.68, "grad_norm": 4.543555884846014, "learning_rate": 2.4596029133012845e-06, "loss": 0.5821, "step": 8314 }, { "epoch": 0.68, "grad_norm": 3.6930194005161634, "learning_rate": 2.4584628178400737e-06, "loss": 0.4279, "step": 8315 }, { "epoch": 0.68, "grad_norm": 3.3378103611275263, "learning_rate": 2.45732290052956e-06, "loss": 0.591, "step": 8316 }, { "epoch": 0.68, "grad_norm": 3.6515986206695095, "learning_rate": 2.456183161449644e-06, "loss": 0.696, "step": 8317 }, { "epoch": 0.68, "grad_norm": 2.368809991491645, "learning_rate": 2.455043600680217e-06, "loss": 0.237, "step": 8318 }, { "epoch": 0.68, "grad_norm": 4.278257713890669, "learning_rate": 2.4539042183011585e-06, "loss": 1.0608, "step": 8319 }, { "epoch": 0.68, "grad_norm": 3.390456938320225, "learning_rate": 2.4527650143923334e-06, "loss": 0.5158, "step": 8320 }, { "epoch": 0.68, "grad_norm": 2.320303803659349, "learning_rate": 2.4516259890335947e-06, "loss": 0.322, "step": 8321 }, { "epoch": 0.68, "grad_norm": 3.9903038975532277, "learning_rate": 2.450487142304786e-06, "loss": 0.8123, "step": 8322 }, { "epoch": 0.68, "grad_norm": 3.1891243338684485, "learning_rate": 2.4493484742857316e-06, "loss": 0.4653, "step": 8323 }, { "epoch": 0.68, "grad_norm": 3.5908340128294984, "learning_rate": 2.4482099850562496e-06, "loss": 0.6931, "step": 8324 }, { "epoch": 0.68, "grad_norm": 3.3660223577689057, "learning_rate": 2.447071674696141e-06, "loss": 0.4745, "step": 8325 }, { "epoch": 0.68, "grad_norm": 3.8100675807839512, "learning_rate": 2.4459335432851977e-06, "loss": 0.9717, "step": 8326 }, { "epoch": 0.68, "grad_norm": 4.339816506479468, "learning_rate": 2.4447955909031973e-06, "loss": 1.1936, "step": 8327 }, { "epoch": 0.68, "grad_norm": 4.270165734853613, "learning_rate": 2.443657817629908e-06, "loss": 1.266, "step": 8328 }, { "epoch": 0.68, "grad_norm": 1.1624670879267303, "learning_rate": 2.442520223545078e-06, "loss": 0.1972, "step": 8329 }, { "epoch": 0.68, "grad_norm": 3.0678233836269317, "learning_rate": 2.4413828087284504e-06, "loss": 0.2616, "step": 8330 }, { "epoch": 0.68, "grad_norm": 4.583266213174848, "learning_rate": 2.440245573259753e-06, "loss": 0.9658, "step": 8331 }, { "epoch": 0.68, "grad_norm": 4.120895013381585, "learning_rate": 2.4391085172187005e-06, "loss": 0.6695, "step": 8332 }, { "epoch": 0.68, "grad_norm": 2.680942057669382, "learning_rate": 2.437971640684998e-06, "loss": 0.5309, "step": 8333 }, { "epoch": 0.68, "grad_norm": 5.200500902511551, "learning_rate": 2.4368349437383314e-06, "loss": 1.583, "step": 8334 }, { "epoch": 0.68, "grad_norm": 4.202678182159394, "learning_rate": 2.4356984264583806e-06, "loss": 0.8447, "step": 8335 }, { "epoch": 0.68, "grad_norm": 2.9494206946055037, "learning_rate": 2.43456208892481e-06, "loss": 0.5021, "step": 8336 }, { "epoch": 0.68, "grad_norm": 1.2127148178284841, "learning_rate": 2.433425931217272e-06, "loss": 0.1644, "step": 8337 }, { "epoch": 0.68, "grad_norm": 4.0608201347588615, "learning_rate": 2.4322899534154085e-06, "loss": 0.9022, "step": 8338 }, { "epoch": 0.68, "grad_norm": 4.657162564141952, "learning_rate": 2.4311541555988433e-06, "loss": 0.8073, "step": 8339 }, { "epoch": 0.68, "grad_norm": 3.011262415440675, "learning_rate": 2.430018537847193e-06, "loss": 0.4338, "step": 8340 }, { "epoch": 0.68, "grad_norm": 4.61578686555714, "learning_rate": 2.4288831002400574e-06, "loss": 1.118, "step": 8341 }, { "epoch": 0.68, "grad_norm": 4.3072127189866585, "learning_rate": 2.427747842857027e-06, "loss": 0.9043, "step": 8342 }, { "epoch": 0.68, "grad_norm": 2.891045801602025, "learning_rate": 2.4266127657776777e-06, "loss": 0.6071, "step": 8343 }, { "epoch": 0.68, "grad_norm": 4.199359033982037, "learning_rate": 2.4254778690815743e-06, "loss": 0.8963, "step": 8344 }, { "epoch": 0.68, "grad_norm": 4.821403285387405, "learning_rate": 2.42434315284827e-06, "loss": 0.7843, "step": 8345 }, { "epoch": 0.68, "grad_norm": 2.576793763179655, "learning_rate": 2.4232086171572993e-06, "loss": 0.3895, "step": 8346 }, { "epoch": 0.68, "grad_norm": 4.1108494663998565, "learning_rate": 2.4220742620881906e-06, "loss": 0.9387, "step": 8347 }, { "epoch": 0.68, "grad_norm": 4.403348312465952, "learning_rate": 2.420940087720457e-06, "loss": 0.6194, "step": 8348 }, { "epoch": 0.68, "grad_norm": 2.4689509241681535, "learning_rate": 2.4198060941335987e-06, "loss": 0.5313, "step": 8349 }, { "epoch": 0.68, "grad_norm": 2.4975703045065374, "learning_rate": 2.4186722814071043e-06, "loss": 0.5038, "step": 8350 }, { "epoch": 0.68, "grad_norm": 1.4524371048512315, "learning_rate": 2.4175386496204513e-06, "loss": 0.3769, "step": 8351 }, { "epoch": 0.68, "grad_norm": 5.33869852814967, "learning_rate": 2.416405198853098e-06, "loss": 0.9853, "step": 8352 }, { "epoch": 0.68, "grad_norm": 2.479560559718521, "learning_rate": 2.415271929184496e-06, "loss": 0.3689, "step": 8353 }, { "epoch": 0.68, "grad_norm": 3.584706802703382, "learning_rate": 2.4141388406940852e-06, "loss": 0.5739, "step": 8354 }, { "epoch": 0.68, "grad_norm": 3.314608982252052, "learning_rate": 2.413005933461286e-06, "loss": 0.6743, "step": 8355 }, { "epoch": 0.68, "grad_norm": 3.9993676635570075, "learning_rate": 2.4118732075655144e-06, "loss": 0.7207, "step": 8356 }, { "epoch": 0.68, "grad_norm": 5.742565487556265, "learning_rate": 2.410740663086165e-06, "loss": 0.722, "step": 8357 }, { "epoch": 0.68, "grad_norm": 2.9454738545193697, "learning_rate": 2.409608300102627e-06, "loss": 0.6875, "step": 8358 }, { "epoch": 0.68, "grad_norm": 1.9855888175733343, "learning_rate": 2.4084761186942734e-06, "loss": 0.3532, "step": 8359 }, { "epoch": 0.68, "grad_norm": 3.011871695896224, "learning_rate": 2.4073441189404657e-06, "loss": 0.5849, "step": 8360 }, { "epoch": 0.68, "grad_norm": 3.309430061951973, "learning_rate": 2.4062123009205525e-06, "loss": 0.5575, "step": 8361 }, { "epoch": 0.68, "grad_norm": 2.5880549181074146, "learning_rate": 2.40508066471387e-06, "loss": 0.3133, "step": 8362 }, { "epoch": 0.68, "grad_norm": 3.3269320174595007, "learning_rate": 2.403949210399738e-06, "loss": 0.7437, "step": 8363 }, { "epoch": 0.68, "grad_norm": 4.732629529579054, "learning_rate": 2.4028179380574684e-06, "loss": 0.7449, "step": 8364 }, { "epoch": 0.68, "grad_norm": 4.433683946189046, "learning_rate": 2.4016868477663586e-06, "loss": 0.3557, "step": 8365 }, { "epoch": 0.68, "grad_norm": 4.778321708777118, "learning_rate": 2.400555939605693e-06, "loss": 0.8214, "step": 8366 }, { "epoch": 0.68, "grad_norm": 3.4557327901594586, "learning_rate": 2.3994252136547426e-06, "loss": 0.6301, "step": 8367 }, { "epoch": 0.68, "grad_norm": 4.922938761690179, "learning_rate": 2.398294669992769e-06, "loss": 1.1046, "step": 8368 }, { "epoch": 0.68, "grad_norm": 3.402524175059253, "learning_rate": 2.397164308699014e-06, "loss": 0.6317, "step": 8369 }, { "epoch": 0.68, "grad_norm": 3.8519980969718537, "learning_rate": 2.396034129852716e-06, "loss": 0.9654, "step": 8370 }, { "epoch": 0.68, "grad_norm": 3.287095492080058, "learning_rate": 2.3949041335330914e-06, "loss": 0.5373, "step": 8371 }, { "epoch": 0.68, "grad_norm": 2.4762121347365547, "learning_rate": 2.3937743198193493e-06, "loss": 0.3418, "step": 8372 }, { "epoch": 0.68, "grad_norm": 4.290139824225999, "learning_rate": 2.3926446887906867e-06, "loss": 0.6132, "step": 8373 }, { "epoch": 0.68, "grad_norm": 3.6184754356381212, "learning_rate": 2.3915152405262824e-06, "loss": 0.5967, "step": 8374 }, { "epoch": 0.68, "grad_norm": 4.263744342832122, "learning_rate": 2.390385975105308e-06, "loss": 0.6789, "step": 8375 }, { "epoch": 0.68, "grad_norm": 3.8193430996779605, "learning_rate": 2.3892568926069186e-06, "loss": 0.8379, "step": 8376 }, { "epoch": 0.68, "grad_norm": 3.210336423817697, "learning_rate": 2.3881279931102602e-06, "loss": 0.5075, "step": 8377 }, { "epoch": 0.68, "grad_norm": 4.762238365586828, "learning_rate": 2.386999276694462e-06, "loss": 1.466, "step": 8378 }, { "epoch": 0.68, "grad_norm": 5.179692768554767, "learning_rate": 2.3858707434386447e-06, "loss": 1.0722, "step": 8379 }, { "epoch": 0.68, "grad_norm": 1.3857918978716115, "learning_rate": 2.3847423934219094e-06, "loss": 0.2436, "step": 8380 }, { "epoch": 0.69, "grad_norm": 4.555509662623433, "learning_rate": 2.383614226723351e-06, "loss": 0.7331, "step": 8381 }, { "epoch": 0.69, "grad_norm": 3.9332803872256683, "learning_rate": 2.382486243422049e-06, "loss": 0.5333, "step": 8382 }, { "epoch": 0.69, "grad_norm": 3.0154124204619657, "learning_rate": 2.381358443597069e-06, "loss": 0.5797, "step": 8383 }, { "epoch": 0.69, "grad_norm": 3.465284294569563, "learning_rate": 2.3802308273274682e-06, "loss": 0.7175, "step": 8384 }, { "epoch": 0.69, "grad_norm": 3.100309840782991, "learning_rate": 2.379103394692284e-06, "loss": 0.4776, "step": 8385 }, { "epoch": 0.69, "grad_norm": 3.2263436218775166, "learning_rate": 2.3779761457705443e-06, "loss": 0.635, "step": 8386 }, { "epoch": 0.69, "grad_norm": 2.996779188540018, "learning_rate": 2.376849080641268e-06, "loss": 0.578, "step": 8387 }, { "epoch": 0.69, "grad_norm": 2.8123089400031707, "learning_rate": 2.375722199383454e-06, "loss": 0.5775, "step": 8388 }, { "epoch": 0.69, "grad_norm": 2.928601130154547, "learning_rate": 2.374595502076092e-06, "loss": 0.6623, "step": 8389 }, { "epoch": 0.69, "grad_norm": 2.828229489008046, "learning_rate": 2.37346898879816e-06, "loss": 0.3584, "step": 8390 }, { "epoch": 0.69, "grad_norm": 6.349399585739406, "learning_rate": 2.372342659628623e-06, "loss": 1.202, "step": 8391 }, { "epoch": 0.69, "grad_norm": 4.303782777829738, "learning_rate": 2.371216514646428e-06, "loss": 0.6508, "step": 8392 }, { "epoch": 0.69, "grad_norm": 3.7366792714066372, "learning_rate": 2.3700905539305147e-06, "loss": 0.6159, "step": 8393 }, { "epoch": 0.69, "grad_norm": 4.456788330418907, "learning_rate": 2.3689647775598084e-06, "loss": 0.9043, "step": 8394 }, { "epoch": 0.69, "grad_norm": 4.601094095713435, "learning_rate": 2.3678391856132203e-06, "loss": 0.8011, "step": 8395 }, { "epoch": 0.69, "grad_norm": 5.441912768334063, "learning_rate": 2.366713778169653e-06, "loss": 1.1761, "step": 8396 }, { "epoch": 0.69, "grad_norm": 5.56436476136244, "learning_rate": 2.365588555307987e-06, "loss": 1.0629, "step": 8397 }, { "epoch": 0.69, "grad_norm": 4.026481870763221, "learning_rate": 2.364463517107099e-06, "loss": 0.7102, "step": 8398 }, { "epoch": 0.69, "grad_norm": 3.1710057109812704, "learning_rate": 2.363338663645848e-06, "loss": 0.5157, "step": 8399 }, { "epoch": 0.69, "grad_norm": 2.685673201066071, "learning_rate": 2.362213995003082e-06, "loss": 0.6991, "step": 8400 }, { "epoch": 0.69, "grad_norm": 3.694032386329679, "learning_rate": 2.3610895112576372e-06, "loss": 0.7543, "step": 8401 }, { "epoch": 0.69, "grad_norm": 3.799422054752237, "learning_rate": 2.359965212488331e-06, "loss": 0.7221, "step": 8402 }, { "epoch": 0.69, "grad_norm": 3.511747862169461, "learning_rate": 2.3588410987739763e-06, "loss": 0.6048, "step": 8403 }, { "epoch": 0.69, "grad_norm": 2.7636626894282883, "learning_rate": 2.3577171701933638e-06, "loss": 0.517, "step": 8404 }, { "epoch": 0.69, "grad_norm": 3.0276506358452417, "learning_rate": 2.3565934268252787e-06, "loss": 0.5485, "step": 8405 }, { "epoch": 0.69, "grad_norm": 4.492551641659161, "learning_rate": 2.35546986874849e-06, "loss": 1.0428, "step": 8406 }, { "epoch": 0.69, "grad_norm": 2.7812895303791634, "learning_rate": 2.354346496041755e-06, "loss": 0.5173, "step": 8407 }, { "epoch": 0.69, "grad_norm": 5.335214488177956, "learning_rate": 2.353223308783818e-06, "loss": 1.0085, "step": 8408 }, { "epoch": 0.69, "grad_norm": 4.818332111917985, "learning_rate": 2.3521003070534065e-06, "loss": 0.9629, "step": 8409 }, { "epoch": 0.69, "grad_norm": 4.515678266661508, "learning_rate": 2.35097749092924e-06, "loss": 0.9099, "step": 8410 }, { "epoch": 0.69, "grad_norm": 5.35139416211975, "learning_rate": 2.349854860490023e-06, "loss": 1.1736, "step": 8411 }, { "epoch": 0.69, "grad_norm": 3.887872785684858, "learning_rate": 2.3487324158144463e-06, "loss": 0.8037, "step": 8412 }, { "epoch": 0.69, "grad_norm": 4.827747082554949, "learning_rate": 2.347610156981191e-06, "loss": 1.2159, "step": 8413 }, { "epoch": 0.69, "grad_norm": 4.086398257738493, "learning_rate": 2.3464880840689187e-06, "loss": 0.692, "step": 8414 }, { "epoch": 0.69, "grad_norm": 4.767076008447161, "learning_rate": 2.345366197156283e-06, "loss": 0.7561, "step": 8415 }, { "epoch": 0.69, "grad_norm": 4.47313845423047, "learning_rate": 2.3442444963219246e-06, "loss": 0.6902, "step": 8416 }, { "epoch": 0.69, "grad_norm": 4.653822729716385, "learning_rate": 2.3431229816444704e-06, "loss": 1.0065, "step": 8417 }, { "epoch": 0.69, "grad_norm": 5.483539108307993, "learning_rate": 2.3420016532025312e-06, "loss": 1.0273, "step": 8418 }, { "epoch": 0.69, "grad_norm": 5.6041543053271745, "learning_rate": 2.34088051107471e-06, "loss": 0.7899, "step": 8419 }, { "epoch": 0.69, "grad_norm": 3.3378761203948524, "learning_rate": 2.3397595553395903e-06, "loss": 0.5621, "step": 8420 }, { "epoch": 0.69, "grad_norm": 3.6709496478302652, "learning_rate": 2.3386387860757487e-06, "loss": 0.4824, "step": 8421 }, { "epoch": 0.69, "grad_norm": 3.699658076987211, "learning_rate": 2.337518203361746e-06, "loss": 0.8225, "step": 8422 }, { "epoch": 0.69, "grad_norm": 4.009785215675382, "learning_rate": 2.33639780727613e-06, "loss": 1.0486, "step": 8423 }, { "epoch": 0.69, "grad_norm": 5.150383172250053, "learning_rate": 2.3352775978974355e-06, "loss": 0.9872, "step": 8424 }, { "epoch": 0.69, "grad_norm": 2.66804436038213, "learning_rate": 2.334157575304186e-06, "loss": 0.5993, "step": 8425 }, { "epoch": 0.69, "grad_norm": 3.642409328570551, "learning_rate": 2.3330377395748878e-06, "loss": 0.5431, "step": 8426 }, { "epoch": 0.69, "grad_norm": 4.604401620902554, "learning_rate": 2.331918090788037e-06, "loss": 0.9804, "step": 8427 }, { "epoch": 0.69, "grad_norm": 3.9247541734623117, "learning_rate": 2.3307986290221162e-06, "loss": 0.8486, "step": 8428 }, { "epoch": 0.69, "grad_norm": 3.126764203684734, "learning_rate": 2.329679354355595e-06, "loss": 0.5038, "step": 8429 }, { "epoch": 0.69, "grad_norm": 4.196092083621251, "learning_rate": 2.32856026686693e-06, "loss": 0.882, "step": 8430 }, { "epoch": 0.69, "grad_norm": 2.9754652441676246, "learning_rate": 2.3274413666345665e-06, "loss": 0.2893, "step": 8431 }, { "epoch": 0.69, "grad_norm": 3.1589207779363084, "learning_rate": 2.32632265373693e-06, "loss": 0.4983, "step": 8432 }, { "epoch": 0.69, "grad_norm": 3.651300803151679, "learning_rate": 2.325204128252441e-06, "loss": 0.8112, "step": 8433 }, { "epoch": 0.69, "grad_norm": 3.402444537328381, "learning_rate": 2.3240857902595002e-06, "loss": 0.4586, "step": 8434 }, { "epoch": 0.69, "grad_norm": 4.410746347629286, "learning_rate": 2.3229676398365e-06, "loss": 1.0668, "step": 8435 }, { "epoch": 0.69, "grad_norm": 4.030115535323992, "learning_rate": 2.32184967706182e-06, "loss": 0.9424, "step": 8436 }, { "epoch": 0.69, "grad_norm": 3.7400689708001287, "learning_rate": 2.3207319020138197e-06, "loss": 0.5923, "step": 8437 }, { "epoch": 0.69, "grad_norm": 3.384599097966671, "learning_rate": 2.319614314770853e-06, "loss": 0.8099, "step": 8438 }, { "epoch": 0.69, "grad_norm": 4.172882423162879, "learning_rate": 2.3184969154112585e-06, "loss": 0.4573, "step": 8439 }, { "epoch": 0.69, "grad_norm": 2.6910280218831772, "learning_rate": 2.3173797040133595e-06, "loss": 0.4993, "step": 8440 }, { "epoch": 0.69, "grad_norm": 3.786782603564133, "learning_rate": 2.3162626806554687e-06, "loss": 0.746, "step": 8441 }, { "epoch": 0.69, "grad_norm": 4.511630429143293, "learning_rate": 2.3151458454158867e-06, "loss": 0.656, "step": 8442 }, { "epoch": 0.69, "grad_norm": 4.738657960078739, "learning_rate": 2.3140291983728936e-06, "loss": 0.9872, "step": 8443 }, { "epoch": 0.69, "grad_norm": 2.782023982418744, "learning_rate": 2.312912739604765e-06, "loss": 0.3056, "step": 8444 }, { "epoch": 0.69, "grad_norm": 2.234880690556221, "learning_rate": 2.311796469189759e-06, "loss": 0.469, "step": 8445 }, { "epoch": 0.69, "grad_norm": 2.070585301608283, "learning_rate": 2.310680387206121e-06, "loss": 0.3329, "step": 8446 }, { "epoch": 0.69, "grad_norm": 5.966473252835259, "learning_rate": 2.309564493732086e-06, "loss": 1.1336, "step": 8447 }, { "epoch": 0.69, "grad_norm": 2.5164535722361734, "learning_rate": 2.3084487888458697e-06, "loss": 0.5251, "step": 8448 }, { "epoch": 0.69, "grad_norm": 4.947278176717033, "learning_rate": 2.3073332726256807e-06, "loss": 1.2562, "step": 8449 }, { "epoch": 0.69, "grad_norm": 4.970368894447125, "learning_rate": 2.3062179451497095e-06, "loss": 0.9964, "step": 8450 }, { "epoch": 0.69, "grad_norm": 4.754486466883372, "learning_rate": 2.305102806496137e-06, "loss": 1.2073, "step": 8451 }, { "epoch": 0.69, "grad_norm": 2.720551063877111, "learning_rate": 2.303987856743129e-06, "loss": 0.5452, "step": 8452 }, { "epoch": 0.69, "grad_norm": 4.287684194272693, "learning_rate": 2.3028730959688417e-06, "loss": 0.7301, "step": 8453 }, { "epoch": 0.69, "grad_norm": 5.782316811576312, "learning_rate": 2.30175852425141e-06, "loss": 0.9407, "step": 8454 }, { "epoch": 0.69, "grad_norm": 2.008913663948316, "learning_rate": 2.3006441416689633e-06, "loss": 0.3661, "step": 8455 }, { "epoch": 0.69, "grad_norm": 3.318025086232537, "learning_rate": 2.2995299482996146e-06, "loss": 0.6932, "step": 8456 }, { "epoch": 0.69, "grad_norm": 3.502260191231009, "learning_rate": 2.2984159442214637e-06, "loss": 0.625, "step": 8457 }, { "epoch": 0.69, "grad_norm": 3.8011882692816124, "learning_rate": 2.2973021295125985e-06, "loss": 0.607, "step": 8458 }, { "epoch": 0.69, "grad_norm": 4.626678714352333, "learning_rate": 2.2961885042510927e-06, "loss": 1.0223, "step": 8459 }, { "epoch": 0.69, "grad_norm": 4.755927228048318, "learning_rate": 2.2950750685150045e-06, "loss": 0.8516, "step": 8460 }, { "epoch": 0.69, "grad_norm": 3.896744191915756, "learning_rate": 2.293961822382382e-06, "loss": 0.6381, "step": 8461 }, { "epoch": 0.69, "grad_norm": 3.604139263103291, "learning_rate": 2.2928487659312594e-06, "loss": 0.5576, "step": 8462 }, { "epoch": 0.69, "grad_norm": 4.739531463436227, "learning_rate": 2.291735899239658e-06, "loss": 0.9477, "step": 8463 }, { "epoch": 0.69, "grad_norm": 4.074405910792501, "learning_rate": 2.2906232223855824e-06, "loss": 0.7331, "step": 8464 }, { "epoch": 0.69, "grad_norm": 3.6067943074762, "learning_rate": 2.289510735447029e-06, "loss": 0.8256, "step": 8465 }, { "epoch": 0.69, "grad_norm": 5.256104320093683, "learning_rate": 2.288398438501976e-06, "loss": 1.0537, "step": 8466 }, { "epoch": 0.69, "grad_norm": 2.9481643168142435, "learning_rate": 2.2872863316283906e-06, "loss": 0.5068, "step": 8467 }, { "epoch": 0.69, "grad_norm": 3.7919291921357177, "learning_rate": 2.2861744149042275e-06, "loss": 0.7032, "step": 8468 }, { "epoch": 0.69, "grad_norm": 2.4314916854245245, "learning_rate": 2.285062688407428e-06, "loss": 0.4225, "step": 8469 }, { "epoch": 0.69, "grad_norm": 2.039513891459796, "learning_rate": 2.283951152215918e-06, "loss": 0.3852, "step": 8470 }, { "epoch": 0.69, "grad_norm": 2.839655352129458, "learning_rate": 2.282839806407614e-06, "loss": 0.5821, "step": 8471 }, { "epoch": 0.69, "grad_norm": 4.067054657714496, "learning_rate": 2.2817286510604125e-06, "loss": 0.7038, "step": 8472 }, { "epoch": 0.69, "grad_norm": 3.5436743447807757, "learning_rate": 2.280617686252203e-06, "loss": 0.7465, "step": 8473 }, { "epoch": 0.69, "grad_norm": 2.627480036880374, "learning_rate": 2.279506912060859e-06, "loss": 0.5592, "step": 8474 }, { "epoch": 0.69, "grad_norm": 3.5964562108123093, "learning_rate": 2.2783963285642403e-06, "loss": 0.6405, "step": 8475 }, { "epoch": 0.69, "grad_norm": 4.097271718922358, "learning_rate": 2.2772859358401962e-06, "loss": 1.1977, "step": 8476 }, { "epoch": 0.69, "grad_norm": 5.390702689594146, "learning_rate": 2.2761757339665576e-06, "loss": 0.6897, "step": 8477 }, { "epoch": 0.69, "grad_norm": 3.4958214177554683, "learning_rate": 2.2750657230211452e-06, "loss": 0.8866, "step": 8478 }, { "epoch": 0.69, "grad_norm": 4.150754538967326, "learning_rate": 2.2739559030817687e-06, "loss": 0.811, "step": 8479 }, { "epoch": 0.69, "grad_norm": 4.187699471097217, "learning_rate": 2.272846274226218e-06, "loss": 1.2004, "step": 8480 }, { "epoch": 0.69, "grad_norm": 2.077357679192823, "learning_rate": 2.2717368365322747e-06, "loss": 0.3078, "step": 8481 }, { "epoch": 0.69, "grad_norm": 5.514300456812758, "learning_rate": 2.2706275900777075e-06, "loss": 0.936, "step": 8482 }, { "epoch": 0.69, "grad_norm": 4.7356245830003685, "learning_rate": 2.2695185349402664e-06, "loss": 0.9696, "step": 8483 }, { "epoch": 0.69, "grad_norm": 4.098094550770222, "learning_rate": 2.2684096711976926e-06, "loss": 0.8091, "step": 8484 }, { "epoch": 0.69, "grad_norm": 4.791551196522658, "learning_rate": 2.2673009989277136e-06, "loss": 0.6636, "step": 8485 }, { "epoch": 0.69, "grad_norm": 4.060922227655955, "learning_rate": 2.266192518208041e-06, "loss": 0.5593, "step": 8486 }, { "epoch": 0.69, "grad_norm": 3.8403922186175374, "learning_rate": 2.2650842291163755e-06, "loss": 0.7785, "step": 8487 }, { "epoch": 0.69, "grad_norm": 2.6763855490718926, "learning_rate": 2.2639761317304047e-06, "loss": 0.4132, "step": 8488 }, { "epoch": 0.69, "grad_norm": 4.600935939884642, "learning_rate": 2.262868226127799e-06, "loss": 1.0364, "step": 8489 }, { "epoch": 0.69, "grad_norm": 3.982098126888399, "learning_rate": 2.261760512386218e-06, "loss": 0.9001, "step": 8490 }, { "epoch": 0.69, "grad_norm": 2.4049285672036613, "learning_rate": 2.260652990583308e-06, "loss": 0.4007, "step": 8491 }, { "epoch": 0.69, "grad_norm": 1.9316133492785437, "learning_rate": 2.259545660796702e-06, "loss": 0.4114, "step": 8492 }, { "epoch": 0.69, "grad_norm": 3.183810875278495, "learning_rate": 2.2584385231040202e-06, "loss": 0.5691, "step": 8493 }, { "epoch": 0.69, "grad_norm": 4.435946658191863, "learning_rate": 2.2573315775828655e-06, "loss": 0.9812, "step": 8494 }, { "epoch": 0.69, "grad_norm": 4.831926589661101, "learning_rate": 2.2562248243108305e-06, "loss": 0.8021, "step": 8495 }, { "epoch": 0.69, "grad_norm": 3.4545849790556784, "learning_rate": 2.255118263365496e-06, "loss": 0.7481, "step": 8496 }, { "epoch": 0.69, "grad_norm": 3.1328991820797314, "learning_rate": 2.254011894824424e-06, "loss": 0.5929, "step": 8497 }, { "epoch": 0.69, "grad_norm": 3.2939912257282327, "learning_rate": 2.2529057187651675e-06, "loss": 0.7333, "step": 8498 }, { "epoch": 0.69, "grad_norm": 4.835804670368205, "learning_rate": 2.2517997352652663e-06, "loss": 1.0314, "step": 8499 }, { "epoch": 0.69, "grad_norm": 3.942105641728355, "learning_rate": 2.2506939444022423e-06, "loss": 1.1971, "step": 8500 }, { "epoch": 0.69, "grad_norm": 4.051373609684334, "learning_rate": 2.249588346253607e-06, "loss": 0.9748, "step": 8501 }, { "epoch": 0.69, "grad_norm": 3.5037761957391838, "learning_rate": 2.2484829408968593e-06, "loss": 0.3577, "step": 8502 }, { "epoch": 0.7, "grad_norm": 4.378439313967647, "learning_rate": 2.247377728409483e-06, "loss": 0.9042, "step": 8503 }, { "epoch": 0.7, "grad_norm": 4.074400812627169, "learning_rate": 2.246272708868948e-06, "loss": 0.647, "step": 8504 }, { "epoch": 0.7, "grad_norm": 2.423967160613771, "learning_rate": 2.245167882352714e-06, "loss": 0.513, "step": 8505 }, { "epoch": 0.7, "grad_norm": 3.1295114828424264, "learning_rate": 2.24406324893822e-06, "loss": 0.6978, "step": 8506 }, { "epoch": 0.7, "grad_norm": 4.43587445844676, "learning_rate": 2.2429588087028993e-06, "loss": 0.6342, "step": 8507 }, { "epoch": 0.7, "grad_norm": 3.0636716675478004, "learning_rate": 2.2418545617241665e-06, "loss": 0.5572, "step": 8508 }, { "epoch": 0.7, "grad_norm": 4.740701769536277, "learning_rate": 2.2407505080794257e-06, "loss": 1.0533, "step": 8509 }, { "epoch": 0.7, "grad_norm": 3.052240351497302, "learning_rate": 2.239646647846068e-06, "loss": 0.6739, "step": 8510 }, { "epoch": 0.7, "grad_norm": 3.4173514237523195, "learning_rate": 2.2385429811014654e-06, "loss": 0.7716, "step": 8511 }, { "epoch": 0.7, "grad_norm": 3.8918025523546453, "learning_rate": 2.2374395079229837e-06, "loss": 0.7011, "step": 8512 }, { "epoch": 0.7, "grad_norm": 3.4332955544209383, "learning_rate": 2.236336228387968e-06, "loss": 0.5024, "step": 8513 }, { "epoch": 0.7, "grad_norm": 5.512813254636162, "learning_rate": 2.235233142573755e-06, "loss": 0.8418, "step": 8514 }, { "epoch": 0.7, "grad_norm": 3.401873909504076, "learning_rate": 2.2341302505576663e-06, "loss": 0.5164, "step": 8515 }, { "epoch": 0.7, "grad_norm": 2.8503631153374727, "learning_rate": 2.233027552417012e-06, "loss": 0.3192, "step": 8516 }, { "epoch": 0.7, "grad_norm": 3.240333467526782, "learning_rate": 2.2319250482290826e-06, "loss": 0.7147, "step": 8517 }, { "epoch": 0.7, "grad_norm": 4.160602406918777, "learning_rate": 2.2308227380711605e-06, "loss": 0.9787, "step": 8518 }, { "epoch": 0.7, "grad_norm": 5.500392377011563, "learning_rate": 2.229720622020513e-06, "loss": 1.2103, "step": 8519 }, { "epoch": 0.7, "grad_norm": 2.6966145907371657, "learning_rate": 2.2286187001543936e-06, "loss": 0.6251, "step": 8520 }, { "epoch": 0.7, "grad_norm": 3.258155138649007, "learning_rate": 2.2275169725500424e-06, "loss": 0.471, "step": 8521 }, { "epoch": 0.7, "grad_norm": 4.217948330809415, "learning_rate": 2.226415439284687e-06, "loss": 1.0303, "step": 8522 }, { "epoch": 0.7, "grad_norm": 5.27214185715932, "learning_rate": 2.2253141004355367e-06, "loss": 1.1734, "step": 8523 }, { "epoch": 0.7, "grad_norm": 3.254554209891187, "learning_rate": 2.2242129560797933e-06, "loss": 0.5383, "step": 8524 }, { "epoch": 0.7, "grad_norm": 3.3820214523269243, "learning_rate": 2.2231120062946405e-06, "loss": 0.5971, "step": 8525 }, { "epoch": 0.7, "grad_norm": 3.635763140895818, "learning_rate": 2.2220112511572533e-06, "loss": 0.6923, "step": 8526 }, { "epoch": 0.7, "grad_norm": 5.1862781005744125, "learning_rate": 2.2209106907447853e-06, "loss": 0.8966, "step": 8527 }, { "epoch": 0.7, "grad_norm": 5.845679683065618, "learning_rate": 2.2198103251343856e-06, "loss": 1.4883, "step": 8528 }, { "epoch": 0.7, "grad_norm": 2.715864144147195, "learning_rate": 2.2187101544031807e-06, "loss": 0.3922, "step": 8529 }, { "epoch": 0.7, "grad_norm": 3.7776553147772858, "learning_rate": 2.2176101786282893e-06, "loss": 0.994, "step": 8530 }, { "epoch": 0.7, "grad_norm": 4.880908010643266, "learning_rate": 2.216510397886816e-06, "loss": 0.57, "step": 8531 }, { "epoch": 0.7, "grad_norm": 3.80791939885431, "learning_rate": 2.21541081225585e-06, "loss": 0.5118, "step": 8532 }, { "epoch": 0.7, "grad_norm": 4.443131290143332, "learning_rate": 2.214311421812467e-06, "loss": 1.2995, "step": 8533 }, { "epoch": 0.7, "grad_norm": 4.320553504408982, "learning_rate": 2.2132122266337326e-06, "loss": 1.0689, "step": 8534 }, { "epoch": 0.7, "grad_norm": 4.136281392446387, "learning_rate": 2.2121132267966907e-06, "loss": 0.6286, "step": 8535 }, { "epoch": 0.7, "grad_norm": 4.513037998640322, "learning_rate": 2.211014422378378e-06, "loss": 0.8584, "step": 8536 }, { "epoch": 0.7, "grad_norm": 4.308923289937153, "learning_rate": 2.2099158134558175e-06, "loss": 0.9038, "step": 8537 }, { "epoch": 0.7, "grad_norm": 2.8295110576552265, "learning_rate": 2.2088174001060154e-06, "loss": 0.6404, "step": 8538 }, { "epoch": 0.7, "grad_norm": 3.0955056702896093, "learning_rate": 2.2077191824059685e-06, "loss": 0.2939, "step": 8539 }, { "epoch": 0.7, "grad_norm": 4.883527411650362, "learning_rate": 2.2066211604326533e-06, "loss": 1.0983, "step": 8540 }, { "epoch": 0.7, "grad_norm": 3.306017936599753, "learning_rate": 2.2055233342630372e-06, "loss": 0.7268, "step": 8541 }, { "epoch": 0.7, "grad_norm": 4.035332985908827, "learning_rate": 2.204425703974076e-06, "loss": 0.5606, "step": 8542 }, { "epoch": 0.7, "grad_norm": 3.3892802423350403, "learning_rate": 2.2033282696427046e-06, "loss": 0.5014, "step": 8543 }, { "epoch": 0.7, "grad_norm": 5.318912314596869, "learning_rate": 2.2022310313458506e-06, "loss": 0.9304, "step": 8544 }, { "epoch": 0.7, "grad_norm": 4.10509095348596, "learning_rate": 2.201133989160427e-06, "loss": 0.8174, "step": 8545 }, { "epoch": 0.7, "grad_norm": 3.525349725727181, "learning_rate": 2.200037143163328e-06, "loss": 0.6178, "step": 8546 }, { "epoch": 0.7, "grad_norm": 3.3115879393343763, "learning_rate": 2.19894049343144e-06, "loss": 0.7725, "step": 8547 }, { "epoch": 0.7, "grad_norm": 2.3625063552278305, "learning_rate": 2.1978440400416334e-06, "loss": 0.5882, "step": 8548 }, { "epoch": 0.7, "grad_norm": 3.257631859288316, "learning_rate": 2.1967477830707644e-06, "loss": 0.4958, "step": 8549 }, { "epoch": 0.7, "grad_norm": 3.1902414133312607, "learning_rate": 2.195651722595676e-06, "loss": 0.4216, "step": 8550 }, { "epoch": 0.7, "grad_norm": 3.37316639130754, "learning_rate": 2.1945558586931994e-06, "loss": 0.8959, "step": 8551 }, { "epoch": 0.7, "grad_norm": 2.787099690497614, "learning_rate": 2.1934601914401454e-06, "loss": 0.4605, "step": 8552 }, { "epoch": 0.7, "grad_norm": 4.080559088720089, "learning_rate": 2.1923647209133182e-06, "loss": 1.084, "step": 8553 }, { "epoch": 0.7, "grad_norm": 4.811972776542819, "learning_rate": 2.1912694471895053e-06, "loss": 1.3616, "step": 8554 }, { "epoch": 0.7, "grad_norm": 2.9511910177455705, "learning_rate": 2.1901743703454804e-06, "loss": 0.7359, "step": 8555 }, { "epoch": 0.7, "grad_norm": 2.457071580024648, "learning_rate": 2.1890794904580054e-06, "loss": 0.2913, "step": 8556 }, { "epoch": 0.7, "grad_norm": 6.205523943575567, "learning_rate": 2.187984807603823e-06, "loss": 0.9544, "step": 8557 }, { "epoch": 0.7, "grad_norm": 2.9482181278145814, "learning_rate": 2.18689032185967e-06, "loss": 0.4539, "step": 8558 }, { "epoch": 0.7, "grad_norm": 4.634360609800492, "learning_rate": 2.1857960333022605e-06, "loss": 0.7313, "step": 8559 }, { "epoch": 0.7, "grad_norm": 3.2260508333855866, "learning_rate": 2.1847019420083014e-06, "loss": 0.7196, "step": 8560 }, { "epoch": 0.7, "grad_norm": 4.2922376954795105, "learning_rate": 2.1836080480544847e-06, "loss": 1.0641, "step": 8561 }, { "epoch": 0.7, "grad_norm": 4.930928441831621, "learning_rate": 2.182514351517488e-06, "loss": 0.9905, "step": 8562 }, { "epoch": 0.7, "grad_norm": 4.876421767273682, "learning_rate": 2.1814208524739723e-06, "loss": 1.2121, "step": 8563 }, { "epoch": 0.7, "grad_norm": 5.4454066148723985, "learning_rate": 2.1803275510005876e-06, "loss": 0.9244, "step": 8564 }, { "epoch": 0.7, "grad_norm": 4.20130506058506, "learning_rate": 2.1792344471739708e-06, "loss": 0.4858, "step": 8565 }, { "epoch": 0.7, "grad_norm": 2.7595268897264678, "learning_rate": 2.178141541070743e-06, "loss": 0.5205, "step": 8566 }, { "epoch": 0.7, "grad_norm": 3.694964593190989, "learning_rate": 2.177048832767513e-06, "loss": 0.8947, "step": 8567 }, { "epoch": 0.7, "grad_norm": 5.112295196327415, "learning_rate": 2.1759563223408754e-06, "loss": 1.0051, "step": 8568 }, { "epoch": 0.7, "grad_norm": 3.065277813148473, "learning_rate": 2.174864009867408e-06, "loss": 0.3969, "step": 8569 }, { "epoch": 0.7, "grad_norm": 4.397957121363607, "learning_rate": 2.173771895423678e-06, "loss": 1.0383, "step": 8570 }, { "epoch": 0.7, "grad_norm": 3.3720889266708243, "learning_rate": 2.1726799790862384e-06, "loss": 0.5068, "step": 8571 }, { "epoch": 0.7, "grad_norm": 1.143698773057102, "learning_rate": 2.17158826093163e-06, "loss": 0.1789, "step": 8572 }, { "epoch": 0.7, "grad_norm": 4.673864608620568, "learning_rate": 2.170496741036373e-06, "loss": 1.0408, "step": 8573 }, { "epoch": 0.7, "grad_norm": 3.5684299500075745, "learning_rate": 2.1694054194769827e-06, "loss": 0.3384, "step": 8574 }, { "epoch": 0.7, "grad_norm": 5.626045869543701, "learning_rate": 2.1683142963299513e-06, "loss": 0.9893, "step": 8575 }, { "epoch": 0.7, "grad_norm": 2.8760043551483974, "learning_rate": 2.1672233716717644e-06, "loss": 0.7937, "step": 8576 }, { "epoch": 0.7, "grad_norm": 4.966860606458185, "learning_rate": 2.166132645578891e-06, "loss": 1.2491, "step": 8577 }, { "epoch": 0.7, "grad_norm": 3.7472715777648595, "learning_rate": 2.165042118127786e-06, "loss": 0.5268, "step": 8578 }, { "epoch": 0.7, "grad_norm": 5.278882749479071, "learning_rate": 2.1639517893948926e-06, "loss": 1.8395, "step": 8579 }, { "epoch": 0.7, "grad_norm": 3.4903472991909483, "learning_rate": 2.162861659456634e-06, "loss": 0.6644, "step": 8580 }, { "epoch": 0.7, "grad_norm": 2.1172260213709335, "learning_rate": 2.161771728389427e-06, "loss": 0.3231, "step": 8581 }, { "epoch": 0.7, "grad_norm": 4.556372738087387, "learning_rate": 2.1606819962696684e-06, "loss": 0.7001, "step": 8582 }, { "epoch": 0.7, "grad_norm": 3.651860880286598, "learning_rate": 2.159592463173746e-06, "loss": 0.7687, "step": 8583 }, { "epoch": 0.7, "grad_norm": 3.119051509401893, "learning_rate": 2.1585031291780302e-06, "loss": 0.4643, "step": 8584 }, { "epoch": 0.7, "grad_norm": 4.4095767126412575, "learning_rate": 2.1574139943588807e-06, "loss": 0.7777, "step": 8585 }, { "epoch": 0.7, "grad_norm": 2.109160471129687, "learning_rate": 2.156325058792637e-06, "loss": 0.4316, "step": 8586 }, { "epoch": 0.7, "grad_norm": 5.725993390309628, "learning_rate": 2.1552363225556316e-06, "loss": 0.9986, "step": 8587 }, { "epoch": 0.7, "grad_norm": 3.528936179212356, "learning_rate": 2.154147785724181e-06, "loss": 0.6542, "step": 8588 }, { "epoch": 0.7, "grad_norm": 2.940343252172774, "learning_rate": 2.153059448374584e-06, "loss": 0.5965, "step": 8589 }, { "epoch": 0.7, "grad_norm": 2.614609999316483, "learning_rate": 2.151971310583129e-06, "loss": 0.5886, "step": 8590 }, { "epoch": 0.7, "grad_norm": 3.928864239744701, "learning_rate": 2.150883372426093e-06, "loss": 0.7739, "step": 8591 }, { "epoch": 0.7, "grad_norm": 3.89137734702575, "learning_rate": 2.149795633979731e-06, "loss": 0.7796, "step": 8592 }, { "epoch": 0.7, "grad_norm": 4.166206859131393, "learning_rate": 2.1487080953202912e-06, "loss": 0.6869, "step": 8593 }, { "epoch": 0.7, "grad_norm": 3.716597618134034, "learning_rate": 2.147620756524004e-06, "loss": 0.5369, "step": 8594 }, { "epoch": 0.7, "grad_norm": 3.704012501500497, "learning_rate": 2.1465336176670893e-06, "loss": 0.804, "step": 8595 }, { "epoch": 0.7, "grad_norm": 3.8171949102624865, "learning_rate": 2.145446678825751e-06, "loss": 0.8484, "step": 8596 }, { "epoch": 0.7, "grad_norm": 4.5497017487230424, "learning_rate": 2.144359940076176e-06, "loss": 0.7923, "step": 8597 }, { "epoch": 0.7, "grad_norm": 4.219090379332379, "learning_rate": 2.1432734014945417e-06, "loss": 0.6083, "step": 8598 }, { "epoch": 0.7, "grad_norm": 4.0059478905592725, "learning_rate": 2.1421870631570083e-06, "loss": 0.8369, "step": 8599 }, { "epoch": 0.7, "grad_norm": 4.82593726098331, "learning_rate": 2.1411009251397257e-06, "loss": 0.7378, "step": 8600 }, { "epoch": 0.7, "grad_norm": 4.2473968851947275, "learning_rate": 2.140014987518826e-06, "loss": 0.8269, "step": 8601 }, { "epoch": 0.7, "grad_norm": 2.875191924636826, "learning_rate": 2.13892925037043e-06, "loss": 0.694, "step": 8602 }, { "epoch": 0.7, "grad_norm": 3.177039573557034, "learning_rate": 2.1378437137706413e-06, "loss": 0.4679, "step": 8603 }, { "epoch": 0.7, "grad_norm": 4.005304744198496, "learning_rate": 2.136758377795552e-06, "loss": 0.8098, "step": 8604 }, { "epoch": 0.7, "grad_norm": 4.245893912971883, "learning_rate": 2.1356732425212406e-06, "loss": 1.0079, "step": 8605 }, { "epoch": 0.7, "grad_norm": 4.2508512678952, "learning_rate": 2.1345883080237684e-06, "loss": 0.7945, "step": 8606 }, { "epoch": 0.7, "grad_norm": 3.246922879484139, "learning_rate": 2.133503574379185e-06, "loss": 0.465, "step": 8607 }, { "epoch": 0.7, "grad_norm": 2.7239696989996944, "learning_rate": 2.1324190416635275e-06, "loss": 0.6172, "step": 8608 }, { "epoch": 0.7, "grad_norm": 4.058882977329405, "learning_rate": 2.131334709952814e-06, "loss": 0.8411, "step": 8609 }, { "epoch": 0.7, "grad_norm": 6.055853011000352, "learning_rate": 2.1302505793230534e-06, "loss": 1.2077, "step": 8610 }, { "epoch": 0.7, "grad_norm": 3.238300224717042, "learning_rate": 2.129166649850237e-06, "loss": 0.5948, "step": 8611 }, { "epoch": 0.7, "grad_norm": 2.8779211339993243, "learning_rate": 2.128082921610345e-06, "loss": 0.3973, "step": 8612 }, { "epoch": 0.7, "grad_norm": 4.3118160894168165, "learning_rate": 2.1269993946793414e-06, "loss": 0.6056, "step": 8613 }, { "epoch": 0.7, "grad_norm": 4.916006967340428, "learning_rate": 2.1259160691331794e-06, "loss": 0.5635, "step": 8614 }, { "epoch": 0.7, "grad_norm": 3.3416903112367136, "learning_rate": 2.1248329450477904e-06, "loss": 0.8378, "step": 8615 }, { "epoch": 0.7, "grad_norm": 4.3951543935934, "learning_rate": 2.1237500224990994e-06, "loss": 0.5687, "step": 8616 }, { "epoch": 0.7, "grad_norm": 4.875284908893516, "learning_rate": 2.122667301563014e-06, "loss": 0.8145, "step": 8617 }, { "epoch": 0.7, "grad_norm": 3.8377233678186777, "learning_rate": 2.121584782315429e-06, "loss": 0.3168, "step": 8618 }, { "epoch": 0.7, "grad_norm": 3.5394341484148724, "learning_rate": 2.1205024648322254e-06, "loss": 0.5774, "step": 8619 }, { "epoch": 0.7, "grad_norm": 4.066940806716527, "learning_rate": 2.1194203491892657e-06, "loss": 0.6829, "step": 8620 }, { "epoch": 0.7, "grad_norm": 3.0735806068333074, "learning_rate": 2.1183384354624053e-06, "loss": 0.5733, "step": 8621 }, { "epoch": 0.7, "grad_norm": 4.357778850248498, "learning_rate": 2.117256723727477e-06, "loss": 0.5016, "step": 8622 }, { "epoch": 0.7, "grad_norm": 5.96812977430919, "learning_rate": 2.1161752140603077e-06, "loss": 1.1085, "step": 8623 }, { "epoch": 0.7, "grad_norm": 3.0458089164222693, "learning_rate": 2.1150939065367042e-06, "loss": 0.8446, "step": 8624 }, { "epoch": 0.7, "grad_norm": 3.750234262423207, "learning_rate": 2.114012801232465e-06, "loss": 0.7071, "step": 8625 }, { "epoch": 0.71, "grad_norm": 3.686081707197256, "learning_rate": 2.1129318982233673e-06, "loss": 0.7895, "step": 8626 }, { "epoch": 0.71, "grad_norm": 4.589439961308198, "learning_rate": 2.1118511975851786e-06, "loss": 0.8295, "step": 8627 }, { "epoch": 0.71, "grad_norm": 4.381214266267079, "learning_rate": 2.1107706993936517e-06, "loss": 1.0563, "step": 8628 }, { "epoch": 0.71, "grad_norm": 3.7484594029638356, "learning_rate": 2.109690403724525e-06, "loss": 0.7031, "step": 8629 }, { "epoch": 0.71, "grad_norm": 3.878346462910791, "learning_rate": 2.1086103106535214e-06, "loss": 0.5772, "step": 8630 }, { "epoch": 0.71, "grad_norm": 3.6799841612222877, "learning_rate": 2.1075304202563545e-06, "loss": 0.4902, "step": 8631 }, { "epoch": 0.71, "grad_norm": 3.3065349872163217, "learning_rate": 2.106450732608715e-06, "loss": 0.5272, "step": 8632 }, { "epoch": 0.71, "grad_norm": 4.312670386870131, "learning_rate": 2.105371247786286e-06, "loss": 0.6805, "step": 8633 }, { "epoch": 0.71, "grad_norm": 4.36764524941005, "learning_rate": 2.1042919658647354e-06, "loss": 0.6347, "step": 8634 }, { "epoch": 0.71, "grad_norm": 1.0413073985924002, "learning_rate": 2.1032128869197177e-06, "loss": 0.14, "step": 8635 }, { "epoch": 0.71, "grad_norm": 5.05262620919622, "learning_rate": 2.102134011026868e-06, "loss": 1.2925, "step": 8636 }, { "epoch": 0.71, "grad_norm": 3.59306747824626, "learning_rate": 2.1010553382618137e-06, "loss": 0.6344, "step": 8637 }, { "epoch": 0.71, "grad_norm": 5.6625150732439735, "learning_rate": 2.099976868700163e-06, "loss": 1.0499, "step": 8638 }, { "epoch": 0.71, "grad_norm": 3.1795958342969723, "learning_rate": 2.0988986024175124e-06, "loss": 0.5804, "step": 8639 }, { "epoch": 0.71, "grad_norm": 5.363379536622431, "learning_rate": 2.097820539489444e-06, "loss": 1.1703, "step": 8640 }, { "epoch": 0.71, "grad_norm": 3.0096364022705266, "learning_rate": 2.096742679991526e-06, "loss": 0.7554, "step": 8641 }, { "epoch": 0.71, "grad_norm": 3.455715040513336, "learning_rate": 2.0956650239993125e-06, "loss": 0.8611, "step": 8642 }, { "epoch": 0.71, "grad_norm": 1.35198840252164, "learning_rate": 2.0945875715883395e-06, "loss": 0.1979, "step": 8643 }, { "epoch": 0.71, "grad_norm": 4.093523913388497, "learning_rate": 2.0935103228341334e-06, "loss": 0.7536, "step": 8644 }, { "epoch": 0.71, "grad_norm": 3.354438838369588, "learning_rate": 2.092433277812204e-06, "loss": 0.3875, "step": 8645 }, { "epoch": 0.71, "grad_norm": 3.097243062777404, "learning_rate": 2.091356436598049e-06, "loss": 0.555, "step": 8646 }, { "epoch": 0.71, "grad_norm": 4.154097071235236, "learning_rate": 2.0902797992671485e-06, "loss": 0.6429, "step": 8647 }, { "epoch": 0.71, "grad_norm": 3.794055154080764, "learning_rate": 2.0892033658949734e-06, "loss": 1.0275, "step": 8648 }, { "epoch": 0.71, "grad_norm": 3.1779714128589682, "learning_rate": 2.088127136556972e-06, "loss": 0.8594, "step": 8649 }, { "epoch": 0.71, "grad_norm": 2.656850932064562, "learning_rate": 2.087051111328586e-06, "loss": 0.3271, "step": 8650 }, { "epoch": 0.71, "grad_norm": 4.103798675756965, "learning_rate": 2.0859752902852425e-06, "loss": 0.6619, "step": 8651 }, { "epoch": 0.71, "grad_norm": 2.213857090780219, "learning_rate": 2.084899673502347e-06, "loss": 0.4175, "step": 8652 }, { "epoch": 0.71, "grad_norm": 2.799983173929279, "learning_rate": 2.0838242610552974e-06, "loss": 0.3823, "step": 8653 }, { "epoch": 0.71, "grad_norm": 2.879700441990109, "learning_rate": 2.082749053019478e-06, "loss": 0.217, "step": 8654 }, { "epoch": 0.71, "grad_norm": 2.2683120918792414, "learning_rate": 2.081674049470252e-06, "loss": 0.2636, "step": 8655 }, { "epoch": 0.71, "grad_norm": 3.2220936779577714, "learning_rate": 2.080599250482975e-06, "loss": 0.6549, "step": 8656 }, { "epoch": 0.71, "grad_norm": 3.4491755965852686, "learning_rate": 2.0795246561329853e-06, "loss": 0.7324, "step": 8657 }, { "epoch": 0.71, "grad_norm": 5.355232166552895, "learning_rate": 2.078450266495607e-06, "loss": 1.1377, "step": 8658 }, { "epoch": 0.71, "grad_norm": 3.7165940900652568, "learning_rate": 2.077376081646152e-06, "loss": 0.4958, "step": 8659 }, { "epoch": 0.71, "grad_norm": 4.523644529564438, "learning_rate": 2.0763021016599126e-06, "loss": 0.8052, "step": 8660 }, { "epoch": 0.71, "grad_norm": 3.658025097991605, "learning_rate": 2.075228326612172e-06, "loss": 0.6866, "step": 8661 }, { "epoch": 0.71, "grad_norm": 4.67246750953226, "learning_rate": 2.074154756578197e-06, "loss": 1.0907, "step": 8662 }, { "epoch": 0.71, "grad_norm": 2.4601611546585773, "learning_rate": 2.0730813916332406e-06, "loss": 0.4387, "step": 8663 }, { "epoch": 0.71, "grad_norm": 5.437873537315912, "learning_rate": 2.0720082318525405e-06, "loss": 1.1664, "step": 8664 }, { "epoch": 0.71, "grad_norm": 4.804471556316076, "learning_rate": 2.070935277311322e-06, "loss": 1.148, "step": 8665 }, { "epoch": 0.71, "grad_norm": 2.3163855581907216, "learning_rate": 2.0698625280847917e-06, "loss": 0.3145, "step": 8666 }, { "epoch": 0.71, "grad_norm": 4.303632904484169, "learning_rate": 2.0687899842481486e-06, "loss": 1.0898, "step": 8667 }, { "epoch": 0.71, "grad_norm": 5.116890375410322, "learning_rate": 2.0677176458765686e-06, "loss": 0.9163, "step": 8668 }, { "epoch": 0.71, "grad_norm": 3.764200063004666, "learning_rate": 2.06664551304522e-06, "loss": 0.4481, "step": 8669 }, { "epoch": 0.71, "grad_norm": 4.14798796756444, "learning_rate": 2.0655735858292554e-06, "loss": 0.4932, "step": 8670 }, { "epoch": 0.71, "grad_norm": 7.033735479867001, "learning_rate": 2.0645018643038132e-06, "loss": 1.2896, "step": 8671 }, { "epoch": 0.71, "grad_norm": 2.9940314904487093, "learning_rate": 2.0634303485440133e-06, "loss": 0.7398, "step": 8672 }, { "epoch": 0.71, "grad_norm": 2.5589449579387455, "learning_rate": 2.0623590386249665e-06, "loss": 0.3774, "step": 8673 }, { "epoch": 0.71, "grad_norm": 5.08627383768658, "learning_rate": 2.0612879346217655e-06, "loss": 1.1062, "step": 8674 }, { "epoch": 0.71, "grad_norm": 4.217803109246731, "learning_rate": 2.0602170366094916e-06, "loss": 0.7815, "step": 8675 }, { "epoch": 0.71, "grad_norm": 3.417057549848958, "learning_rate": 2.059146344663211e-06, "loss": 0.6506, "step": 8676 }, { "epoch": 0.71, "grad_norm": 4.1929784125578, "learning_rate": 2.0580758588579712e-06, "loss": 1.117, "step": 8677 }, { "epoch": 0.71, "grad_norm": 2.326294409846503, "learning_rate": 2.057005579268811e-06, "loss": 0.432, "step": 8678 }, { "epoch": 0.71, "grad_norm": 2.8798573621145542, "learning_rate": 2.055935505970751e-06, "loss": 0.5126, "step": 8679 }, { "epoch": 0.71, "grad_norm": 3.567847232897861, "learning_rate": 2.0548656390388e-06, "loss": 0.579, "step": 8680 }, { "epoch": 0.71, "grad_norm": 6.5674547113868655, "learning_rate": 2.0537959785479517e-06, "loss": 1.1413, "step": 8681 }, { "epoch": 0.71, "grad_norm": 5.061943075033028, "learning_rate": 2.052726524573182e-06, "loss": 1.0565, "step": 8682 }, { "epoch": 0.71, "grad_norm": 4.504154197985221, "learning_rate": 2.0516572771894577e-06, "loss": 0.7166, "step": 8683 }, { "epoch": 0.71, "grad_norm": 4.522642446726566, "learning_rate": 2.0505882364717254e-06, "loss": 0.9991, "step": 8684 }, { "epoch": 0.71, "grad_norm": 4.329741163430596, "learning_rate": 2.049519402494922e-06, "loss": 0.8487, "step": 8685 }, { "epoch": 0.71, "grad_norm": 4.969283958897433, "learning_rate": 2.048450775333968e-06, "loss": 0.7496, "step": 8686 }, { "epoch": 0.71, "grad_norm": 5.608275774674261, "learning_rate": 2.0473823550637694e-06, "loss": 1.44, "step": 8687 }, { "epoch": 0.71, "grad_norm": 4.140075850975716, "learning_rate": 2.04631414175922e-06, "loss": 0.7435, "step": 8688 }, { "epoch": 0.71, "grad_norm": 3.7000203667267555, "learning_rate": 2.045246135495192e-06, "loss": 0.7559, "step": 8689 }, { "epoch": 0.71, "grad_norm": 5.45115455835716, "learning_rate": 2.0441783363465517e-06, "loss": 1.0826, "step": 8690 }, { "epoch": 0.71, "grad_norm": 5.183676669097272, "learning_rate": 2.043110744388146e-06, "loss": 0.8767, "step": 8691 }, { "epoch": 0.71, "grad_norm": 4.424393510654391, "learning_rate": 2.042043359694808e-06, "loss": 0.876, "step": 8692 }, { "epoch": 0.71, "grad_norm": 3.366657168875679, "learning_rate": 2.0409761823413583e-06, "loss": 0.5922, "step": 8693 }, { "epoch": 0.71, "grad_norm": 4.664891129493486, "learning_rate": 2.039909212402602e-06, "loss": 0.9714, "step": 8694 }, { "epoch": 0.71, "grad_norm": 5.585462203946919, "learning_rate": 2.038842449953326e-06, "loss": 0.7847, "step": 8695 }, { "epoch": 0.71, "grad_norm": 4.767976704969888, "learning_rate": 2.037775895068307e-06, "loss": 1.002, "step": 8696 }, { "epoch": 0.71, "grad_norm": 4.176040644881519, "learning_rate": 2.0367095478223076e-06, "loss": 0.6875, "step": 8697 }, { "epoch": 0.71, "grad_norm": 2.6062123742969523, "learning_rate": 2.035643408290071e-06, "loss": 0.3599, "step": 8698 }, { "epoch": 0.71, "grad_norm": 4.103258869530679, "learning_rate": 2.034577476546331e-06, "loss": 0.6648, "step": 8699 }, { "epoch": 0.71, "grad_norm": 5.065527963652389, "learning_rate": 2.033511752665806e-06, "loss": 1.0606, "step": 8700 }, { "epoch": 0.71, "grad_norm": 4.89701876913943, "learning_rate": 2.0324462367231953e-06, "loss": 0.9328, "step": 8701 }, { "epoch": 0.71, "grad_norm": 4.282913577894604, "learning_rate": 2.031380928793188e-06, "loss": 1.1449, "step": 8702 }, { "epoch": 0.71, "grad_norm": 2.093768652859012, "learning_rate": 2.0303158289504583e-06, "loss": 0.5894, "step": 8703 }, { "epoch": 0.71, "grad_norm": 3.5779395972063974, "learning_rate": 2.0292509372696652e-06, "loss": 0.5017, "step": 8704 }, { "epoch": 0.71, "grad_norm": 2.4919811415807835, "learning_rate": 2.028186253825454e-06, "loss": 0.5298, "step": 8705 }, { "epoch": 0.71, "grad_norm": 3.553309452899958, "learning_rate": 2.027121778692451e-06, "loss": 0.6475, "step": 8706 }, { "epoch": 0.71, "grad_norm": 2.8409447438725413, "learning_rate": 2.026057511945274e-06, "loss": 0.5548, "step": 8707 }, { "epoch": 0.71, "grad_norm": 4.901430709220455, "learning_rate": 2.0249934536585223e-06, "loss": 1.1477, "step": 8708 }, { "epoch": 0.71, "grad_norm": 4.586139984985449, "learning_rate": 2.023929603906783e-06, "loss": 1.0993, "step": 8709 }, { "epoch": 0.71, "grad_norm": 5.95086982419721, "learning_rate": 2.0228659627646257e-06, "loss": 1.1894, "step": 8710 }, { "epoch": 0.71, "grad_norm": 3.8891510456627842, "learning_rate": 2.02180253030661e-06, "loss": 0.5169, "step": 8711 }, { "epoch": 0.71, "grad_norm": 3.984012817968478, "learning_rate": 2.020739306607274e-06, "loss": 0.7946, "step": 8712 }, { "epoch": 0.71, "grad_norm": 4.3204634258703845, "learning_rate": 2.0196762917411466e-06, "loss": 1.0686, "step": 8713 }, { "epoch": 0.71, "grad_norm": 3.26589177246894, "learning_rate": 2.018613485782743e-06, "loss": 0.9131, "step": 8714 }, { "epoch": 0.71, "grad_norm": 4.39516109327882, "learning_rate": 2.0175508888065563e-06, "loss": 0.7271, "step": 8715 }, { "epoch": 0.71, "grad_norm": 4.154450605412165, "learning_rate": 2.0164885008870755e-06, "loss": 0.7811, "step": 8716 }, { "epoch": 0.71, "grad_norm": 3.72573839412881, "learning_rate": 2.0154263220987642e-06, "loss": 0.7865, "step": 8717 }, { "epoch": 0.71, "grad_norm": 2.212752989508091, "learning_rate": 2.014364352516079e-06, "loss": 0.3108, "step": 8718 }, { "epoch": 0.71, "grad_norm": 4.321230885821851, "learning_rate": 2.013302592213459e-06, "loss": 0.8986, "step": 8719 }, { "epoch": 0.71, "grad_norm": 4.524244018089491, "learning_rate": 2.0122410412653294e-06, "loss": 0.5415, "step": 8720 }, { "epoch": 0.71, "grad_norm": 4.248701771187896, "learning_rate": 2.0111796997460997e-06, "loss": 1.0274, "step": 8721 }, { "epoch": 0.71, "grad_norm": 3.8017975856320105, "learning_rate": 2.010118567730167e-06, "loss": 0.7047, "step": 8722 }, { "epoch": 0.71, "grad_norm": 3.6514091243692848, "learning_rate": 2.0090576452919095e-06, "loss": 0.5102, "step": 8723 }, { "epoch": 0.71, "grad_norm": 5.198097127062312, "learning_rate": 2.0079969325056947e-06, "loss": 0.9692, "step": 8724 }, { "epoch": 0.71, "grad_norm": 3.892927176413908, "learning_rate": 2.006936429445873e-06, "loss": 0.4655, "step": 8725 }, { "epoch": 0.71, "grad_norm": 4.656582107206415, "learning_rate": 2.005876136186782e-06, "loss": 1.165, "step": 8726 }, { "epoch": 0.71, "grad_norm": 1.7809918320027742, "learning_rate": 2.0048160528027438e-06, "loss": 0.4113, "step": 8727 }, { "epoch": 0.71, "grad_norm": 4.950507539669394, "learning_rate": 2.003756179368067e-06, "loss": 1.0302, "step": 8728 }, { "epoch": 0.71, "grad_norm": 2.698170971533982, "learning_rate": 2.00269651595704e-06, "loss": 0.5472, "step": 8729 }, { "epoch": 0.71, "grad_norm": 4.94494712874036, "learning_rate": 2.0016370626439454e-06, "loss": 0.7062, "step": 8730 }, { "epoch": 0.71, "grad_norm": 4.128425975096584, "learning_rate": 2.000577819503041e-06, "loss": 0.5132, "step": 8731 }, { "epoch": 0.71, "grad_norm": 4.225973200281443, "learning_rate": 1.9995187866085786e-06, "loss": 0.4715, "step": 8732 }, { "epoch": 0.71, "grad_norm": 3.4534934469698375, "learning_rate": 1.998459964034791e-06, "loss": 0.5686, "step": 8733 }, { "epoch": 0.71, "grad_norm": 4.407785171635663, "learning_rate": 1.9974013518558993e-06, "loss": 0.615, "step": 8734 }, { "epoch": 0.71, "grad_norm": 3.2206323327255437, "learning_rate": 1.996342950146103e-06, "loss": 0.4733, "step": 8735 }, { "epoch": 0.71, "grad_norm": 2.6897605026812093, "learning_rate": 1.995284758979594e-06, "loss": 0.3708, "step": 8736 }, { "epoch": 0.71, "grad_norm": 3.7424303998891046, "learning_rate": 1.9942267784305475e-06, "loss": 0.6507, "step": 8737 }, { "epoch": 0.71, "grad_norm": 3.741602852999754, "learning_rate": 1.9931690085731225e-06, "loss": 0.5439, "step": 8738 }, { "epoch": 0.71, "grad_norm": 4.083622347875406, "learning_rate": 1.9921114494814657e-06, "loss": 0.8254, "step": 8739 }, { "epoch": 0.71, "grad_norm": 2.4516868864992776, "learning_rate": 1.991054101229704e-06, "loss": 0.5842, "step": 8740 }, { "epoch": 0.71, "grad_norm": 2.7441013518235735, "learning_rate": 1.9899969638919554e-06, "loss": 0.2529, "step": 8741 }, { "epoch": 0.71, "grad_norm": 4.86480139464636, "learning_rate": 1.9889400375423196e-06, "loss": 0.9784, "step": 8742 }, { "epoch": 0.71, "grad_norm": 4.910379725924154, "learning_rate": 1.987883322254883e-06, "loss": 0.7598, "step": 8743 }, { "epoch": 0.71, "grad_norm": 4.881084209076858, "learning_rate": 1.9868268181037186e-06, "loss": 0.6126, "step": 8744 }, { "epoch": 0.71, "grad_norm": 3.692959313734585, "learning_rate": 1.9857705251628796e-06, "loss": 0.8387, "step": 8745 }, { "epoch": 0.71, "grad_norm": 4.09280388117034, "learning_rate": 1.98471444350641e-06, "loss": 0.764, "step": 8746 }, { "epoch": 0.71, "grad_norm": 4.425010778688737, "learning_rate": 1.9836585732083334e-06, "loss": 0.9115, "step": 8747 }, { "epoch": 0.72, "grad_norm": 1.6966358191954547, "learning_rate": 1.982602914342664e-06, "loss": 0.3736, "step": 8748 }, { "epoch": 0.72, "grad_norm": 3.9070587122298943, "learning_rate": 1.9815474669833985e-06, "loss": 0.7047, "step": 8749 }, { "epoch": 0.72, "grad_norm": 2.9935935008073384, "learning_rate": 1.9804922312045193e-06, "loss": 0.3165, "step": 8750 }, { "epoch": 0.72, "grad_norm": 3.733577568711758, "learning_rate": 1.9794372070799955e-06, "loss": 0.7414, "step": 8751 }, { "epoch": 0.72, "grad_norm": 4.562508336536102, "learning_rate": 1.978382394683776e-06, "loss": 0.8799, "step": 8752 }, { "epoch": 0.72, "grad_norm": 4.489307752013866, "learning_rate": 1.9773277940898007e-06, "loss": 0.9381, "step": 8753 }, { "epoch": 0.72, "grad_norm": 5.075482943719641, "learning_rate": 1.9762734053719923e-06, "loss": 0.9186, "step": 8754 }, { "epoch": 0.72, "grad_norm": 4.720913936362509, "learning_rate": 1.975219228604259e-06, "loss": 1.1617, "step": 8755 }, { "epoch": 0.72, "grad_norm": 5.289513198260462, "learning_rate": 1.9741652638604952e-06, "loss": 0.9923, "step": 8756 }, { "epoch": 0.72, "grad_norm": 5.238267357501408, "learning_rate": 1.9731115112145765e-06, "loss": 1.1233, "step": 8757 }, { "epoch": 0.72, "grad_norm": 4.7919538037923735, "learning_rate": 1.9720579707403677e-06, "loss": 1.1525, "step": 8758 }, { "epoch": 0.72, "grad_norm": 3.342877462199351, "learning_rate": 1.9710046425117175e-06, "loss": 0.6129, "step": 8759 }, { "epoch": 0.72, "grad_norm": 5.035221423131665, "learning_rate": 1.9699515266024614e-06, "loss": 0.9605, "step": 8760 }, { "epoch": 0.72, "grad_norm": 5.81820395917385, "learning_rate": 1.968898623086415e-06, "loss": 1.3903, "step": 8761 }, { "epoch": 0.72, "grad_norm": 4.519836837579079, "learning_rate": 1.967845932037385e-06, "loss": 0.8829, "step": 8762 }, { "epoch": 0.72, "grad_norm": 3.4543995131134193, "learning_rate": 1.966793453529158e-06, "loss": 0.7544, "step": 8763 }, { "epoch": 0.72, "grad_norm": 1.9214886394824024, "learning_rate": 1.9657411876355086e-06, "loss": 0.2512, "step": 8764 }, { "epoch": 0.72, "grad_norm": 3.9448899170277545, "learning_rate": 1.9646891344301972e-06, "loss": 0.9378, "step": 8765 }, { "epoch": 0.72, "grad_norm": 3.2417350766028163, "learning_rate": 1.9636372939869677e-06, "loss": 0.4702, "step": 8766 }, { "epoch": 0.72, "grad_norm": 5.483562613077437, "learning_rate": 1.9625856663795495e-06, "loss": 1.1118, "step": 8767 }, { "epoch": 0.72, "grad_norm": 3.59874945966871, "learning_rate": 1.9615342516816595e-06, "loss": 0.5948, "step": 8768 }, { "epoch": 0.72, "grad_norm": 5.248160818585475, "learning_rate": 1.9604830499669927e-06, "loss": 1.2771, "step": 8769 }, { "epoch": 0.72, "grad_norm": 3.0465365777007856, "learning_rate": 1.959432061309236e-06, "loss": 0.6711, "step": 8770 }, { "epoch": 0.72, "grad_norm": 4.113986014902147, "learning_rate": 1.9583812857820595e-06, "loss": 0.8002, "step": 8771 }, { "epoch": 0.72, "grad_norm": 2.5081778890051227, "learning_rate": 1.9573307234591177e-06, "loss": 0.4122, "step": 8772 }, { "epoch": 0.72, "grad_norm": 2.12267022546443, "learning_rate": 1.956280374414051e-06, "loss": 0.1497, "step": 8773 }, { "epoch": 0.72, "grad_norm": 2.569631610718901, "learning_rate": 1.9552302387204847e-06, "loss": 0.4071, "step": 8774 }, { "epoch": 0.72, "grad_norm": 3.095527411898248, "learning_rate": 1.9541803164520264e-06, "loss": 0.6379, "step": 8775 }, { "epoch": 0.72, "grad_norm": 3.0478573325617098, "learning_rate": 1.9531306076822738e-06, "loss": 0.723, "step": 8776 }, { "epoch": 0.72, "grad_norm": 3.275622302591984, "learning_rate": 1.952081112484804e-06, "loss": 0.5652, "step": 8777 }, { "epoch": 0.72, "grad_norm": 2.985750730985547, "learning_rate": 1.951031830933184e-06, "loss": 0.5842, "step": 8778 }, { "epoch": 0.72, "grad_norm": 3.4450346943209933, "learning_rate": 1.9499827631009644e-06, "loss": 0.4955, "step": 8779 }, { "epoch": 0.72, "grad_norm": 3.2878789755454196, "learning_rate": 1.948933909061678e-06, "loss": 0.4982, "step": 8780 }, { "epoch": 0.72, "grad_norm": 2.9108891559935954, "learning_rate": 1.9478852688888467e-06, "loss": 0.4906, "step": 8781 }, { "epoch": 0.72, "grad_norm": 3.1479115211412, "learning_rate": 1.946836842655975e-06, "loss": 0.2868, "step": 8782 }, { "epoch": 0.72, "grad_norm": 3.778667943474665, "learning_rate": 1.9457886304365533e-06, "loss": 0.7543, "step": 8783 }, { "epoch": 0.72, "grad_norm": 4.356567930598804, "learning_rate": 1.9447406323040562e-06, "loss": 0.7398, "step": 8784 }, { "epoch": 0.72, "grad_norm": 2.540616076380403, "learning_rate": 1.9436928483319467e-06, "loss": 0.7212, "step": 8785 }, { "epoch": 0.72, "grad_norm": 5.11103211585045, "learning_rate": 1.942645278593665e-06, "loss": 0.8136, "step": 8786 }, { "epoch": 0.72, "grad_norm": 3.5630316440371184, "learning_rate": 1.9415979231626443e-06, "loss": 0.6716, "step": 8787 }, { "epoch": 0.72, "grad_norm": 3.1048582060855607, "learning_rate": 1.940550782112299e-06, "loss": 0.7179, "step": 8788 }, { "epoch": 0.72, "grad_norm": 4.122237785674999, "learning_rate": 1.9395038555160285e-06, "loss": 0.7376, "step": 8789 }, { "epoch": 0.72, "grad_norm": 2.965063265640028, "learning_rate": 1.93845714344722e-06, "loss": 0.3742, "step": 8790 }, { "epoch": 0.72, "grad_norm": 4.4587164331302445, "learning_rate": 1.9374106459792406e-06, "loss": 0.8145, "step": 8791 }, { "epoch": 0.72, "grad_norm": 3.4020388320522774, "learning_rate": 1.9363643631854483e-06, "loss": 0.6054, "step": 8792 }, { "epoch": 0.72, "grad_norm": 4.382568314360286, "learning_rate": 1.9353182951391793e-06, "loss": 0.8411, "step": 8793 }, { "epoch": 0.72, "grad_norm": 3.2978674612559438, "learning_rate": 1.93427244191376e-06, "loss": 0.5739, "step": 8794 }, { "epoch": 0.72, "grad_norm": 3.3205900275988025, "learning_rate": 1.9332268035825006e-06, "loss": 0.4264, "step": 8795 }, { "epoch": 0.72, "grad_norm": 4.372332606382506, "learning_rate": 1.9321813802186972e-06, "loss": 1.0462, "step": 8796 }, { "epoch": 0.72, "grad_norm": 4.77716927624133, "learning_rate": 1.931136171895627e-06, "loss": 1.1175, "step": 8797 }, { "epoch": 0.72, "grad_norm": 3.4186599091483756, "learning_rate": 1.9300911786865544e-06, "loss": 0.6359, "step": 8798 }, { "epoch": 0.72, "grad_norm": 5.371971117921947, "learning_rate": 1.92904640066473e-06, "loss": 1.0489, "step": 8799 }, { "epoch": 0.72, "grad_norm": 4.125585132588605, "learning_rate": 1.9280018379033884e-06, "loss": 0.8945, "step": 8800 }, { "epoch": 0.72, "grad_norm": 4.69143808714922, "learning_rate": 1.926957490475748e-06, "loss": 0.5875, "step": 8801 }, { "epoch": 0.72, "grad_norm": 5.550472159142619, "learning_rate": 1.925913358455016e-06, "loss": 0.8768, "step": 8802 }, { "epoch": 0.72, "grad_norm": 3.636115589262085, "learning_rate": 1.9248694419143776e-06, "loss": 0.741, "step": 8803 }, { "epoch": 0.72, "grad_norm": 3.041781653491779, "learning_rate": 1.923825740927008e-06, "loss": 0.507, "step": 8804 }, { "epoch": 0.72, "grad_norm": 3.4530873656903327, "learning_rate": 1.922782255566066e-06, "loss": 0.7788, "step": 8805 }, { "epoch": 0.72, "grad_norm": 4.45092229345249, "learning_rate": 1.921738985904696e-06, "loss": 0.9431, "step": 8806 }, { "epoch": 0.72, "grad_norm": 5.465998075197892, "learning_rate": 1.9206959320160286e-06, "loss": 1.1356, "step": 8807 }, { "epoch": 0.72, "grad_norm": 4.004891858662268, "learning_rate": 1.9196530939731727e-06, "loss": 1.044, "step": 8808 }, { "epoch": 0.72, "grad_norm": 4.106135688800069, "learning_rate": 1.9186104718492315e-06, "loss": 1.1551, "step": 8809 }, { "epoch": 0.72, "grad_norm": 7.0940084033406805, "learning_rate": 1.917568065717284e-06, "loss": 1.6003, "step": 8810 }, { "epoch": 0.72, "grad_norm": 5.167711096702554, "learning_rate": 1.9165258756504003e-06, "loss": 0.9334, "step": 8811 }, { "epoch": 0.72, "grad_norm": 2.4616970813945356, "learning_rate": 1.9154839017216336e-06, "loss": 0.4413, "step": 8812 }, { "epoch": 0.72, "grad_norm": 2.3795637550189124, "learning_rate": 1.914442144004021e-06, "loss": 0.3302, "step": 8813 }, { "epoch": 0.72, "grad_norm": 3.510062021432281, "learning_rate": 1.913400602570588e-06, "loss": 0.8562, "step": 8814 }, { "epoch": 0.72, "grad_norm": 3.57704329704265, "learning_rate": 1.9123592774943383e-06, "loss": 0.6834, "step": 8815 }, { "epoch": 0.72, "grad_norm": 3.4098023225599374, "learning_rate": 1.911318168848265e-06, "loss": 0.5838, "step": 8816 }, { "epoch": 0.72, "grad_norm": 4.032834390685839, "learning_rate": 1.9102772767053467e-06, "loss": 0.9763, "step": 8817 }, { "epoch": 0.72, "grad_norm": 3.883315323065235, "learning_rate": 1.909236601138545e-06, "loss": 0.8635, "step": 8818 }, { "epoch": 0.72, "grad_norm": 3.9993275066765177, "learning_rate": 1.908196142220808e-06, "loss": 0.3683, "step": 8819 }, { "epoch": 0.72, "grad_norm": 4.358880564580176, "learning_rate": 1.9071559000250633e-06, "loss": 0.8507, "step": 8820 }, { "epoch": 0.72, "grad_norm": 3.209064044779201, "learning_rate": 1.906115874624231e-06, "loss": 0.548, "step": 8821 }, { "epoch": 0.72, "grad_norm": 2.7333119421762047, "learning_rate": 1.905076066091211e-06, "loss": 0.6069, "step": 8822 }, { "epoch": 0.72, "grad_norm": 3.60706101308045, "learning_rate": 1.904036474498891e-06, "loss": 0.6224, "step": 8823 }, { "epoch": 0.72, "grad_norm": 4.546860315142606, "learning_rate": 1.9029970999201387e-06, "loss": 0.8429, "step": 8824 }, { "epoch": 0.72, "grad_norm": 3.8835937404593306, "learning_rate": 1.9019579424278133e-06, "loss": 0.6663, "step": 8825 }, { "epoch": 0.72, "grad_norm": 3.967240733651116, "learning_rate": 1.900919002094752e-06, "loss": 0.4519, "step": 8826 }, { "epoch": 0.72, "grad_norm": 2.90936097733532, "learning_rate": 1.8998802789937815e-06, "loss": 0.5688, "step": 8827 }, { "epoch": 0.72, "grad_norm": 4.5351784039807725, "learning_rate": 1.898841773197711e-06, "loss": 0.5924, "step": 8828 }, { "epoch": 0.72, "grad_norm": 2.772796655438025, "learning_rate": 1.8978034847793364e-06, "loss": 0.2402, "step": 8829 }, { "epoch": 0.72, "grad_norm": 3.2992077415812853, "learning_rate": 1.8967654138114366e-06, "loss": 0.5012, "step": 8830 }, { "epoch": 0.72, "grad_norm": 2.9005704020665144, "learning_rate": 1.895727560366778e-06, "loss": 0.4381, "step": 8831 }, { "epoch": 0.72, "grad_norm": 3.394153183451265, "learning_rate": 1.8946899245181056e-06, "loss": 0.7508, "step": 8832 }, { "epoch": 0.72, "grad_norm": 2.9745202934259174, "learning_rate": 1.893652506338155e-06, "loss": 0.6123, "step": 8833 }, { "epoch": 0.72, "grad_norm": 4.3302935435473335, "learning_rate": 1.892615305899645e-06, "loss": 1.2427, "step": 8834 }, { "epoch": 0.72, "grad_norm": 3.408078375774764, "learning_rate": 1.8915783232752788e-06, "loss": 0.557, "step": 8835 }, { "epoch": 0.72, "grad_norm": 4.289875698847576, "learning_rate": 1.8905415585377458e-06, "loss": 0.7968, "step": 8836 }, { "epoch": 0.72, "grad_norm": 5.004776925266335, "learning_rate": 1.8895050117597152e-06, "loss": 1.0907, "step": 8837 }, { "epoch": 0.72, "grad_norm": 3.6106359538734822, "learning_rate": 1.8884686830138465e-06, "loss": 0.7138, "step": 8838 }, { "epoch": 0.72, "grad_norm": 4.4332554602896295, "learning_rate": 1.8874325723727831e-06, "loss": 0.9122, "step": 8839 }, { "epoch": 0.72, "grad_norm": 3.869664636302528, "learning_rate": 1.8863966799091492e-06, "loss": 0.7228, "step": 8840 }, { "epoch": 0.72, "grad_norm": 3.979299170617078, "learning_rate": 1.885361005695558e-06, "loss": 0.653, "step": 8841 }, { "epoch": 0.72, "grad_norm": 3.048090927646827, "learning_rate": 1.8843255498046065e-06, "loss": 0.6582, "step": 8842 }, { "epoch": 0.72, "grad_norm": 3.7555027623016293, "learning_rate": 1.8832903123088725e-06, "loss": 0.7517, "step": 8843 }, { "epoch": 0.72, "grad_norm": 4.254266635051504, "learning_rate": 1.882255293280924e-06, "loss": 0.9816, "step": 8844 }, { "epoch": 0.72, "grad_norm": 3.299368885423742, "learning_rate": 1.8812204927933108e-06, "loss": 0.7299, "step": 8845 }, { "epoch": 0.72, "grad_norm": 3.2000511906844875, "learning_rate": 1.8801859109185682e-06, "loss": 0.5642, "step": 8846 }, { "epoch": 0.72, "grad_norm": 4.618016929122934, "learning_rate": 1.879151547729216e-06, "loss": 0.7709, "step": 8847 }, { "epoch": 0.72, "grad_norm": 5.2500007244119935, "learning_rate": 1.87811740329776e-06, "loss": 1.1265, "step": 8848 }, { "epoch": 0.72, "grad_norm": 3.7510578134177135, "learning_rate": 1.8770834776966855e-06, "loss": 0.682, "step": 8849 }, { "epoch": 0.72, "grad_norm": 2.617896038858609, "learning_rate": 1.8760497709984683e-06, "loss": 0.4952, "step": 8850 }, { "epoch": 0.72, "grad_norm": 3.9926633184568883, "learning_rate": 1.8750162832755669e-06, "loss": 0.6637, "step": 8851 }, { "epoch": 0.72, "grad_norm": 2.8388230579044857, "learning_rate": 1.873983014600424e-06, "loss": 0.4729, "step": 8852 }, { "epoch": 0.72, "grad_norm": 4.664516812490497, "learning_rate": 1.8729499650454691e-06, "loss": 0.9084, "step": 8853 }, { "epoch": 0.72, "grad_norm": 3.0321434265716807, "learning_rate": 1.8719171346831106e-06, "loss": 0.656, "step": 8854 }, { "epoch": 0.72, "grad_norm": 3.8632003393609278, "learning_rate": 1.8708845235857498e-06, "loss": 0.6406, "step": 8855 }, { "epoch": 0.72, "grad_norm": 2.9673550677903306, "learning_rate": 1.8698521318257635e-06, "loss": 0.5056, "step": 8856 }, { "epoch": 0.72, "grad_norm": 4.9194441145925065, "learning_rate": 1.8688199594755208e-06, "loss": 0.853, "step": 8857 }, { "epoch": 0.72, "grad_norm": 3.0424142971342847, "learning_rate": 1.8677880066073718e-06, "loss": 0.5882, "step": 8858 }, { "epoch": 0.72, "grad_norm": 3.4533604590139966, "learning_rate": 1.866756273293654e-06, "loss": 0.6266, "step": 8859 }, { "epoch": 0.72, "grad_norm": 1.1540424959588524, "learning_rate": 1.8657247596066834e-06, "loss": 0.1113, "step": 8860 }, { "epoch": 0.72, "grad_norm": 2.937330664401966, "learning_rate": 1.8646934656187671e-06, "loss": 0.5732, "step": 8861 }, { "epoch": 0.72, "grad_norm": 5.884979876373652, "learning_rate": 1.863662391402194e-06, "loss": 0.6385, "step": 8862 }, { "epoch": 0.72, "grad_norm": 4.175052027188275, "learning_rate": 1.862631537029238e-06, "loss": 0.9419, "step": 8863 }, { "epoch": 0.72, "grad_norm": 4.946973992101543, "learning_rate": 1.8616009025721572e-06, "loss": 0.8249, "step": 8864 }, { "epoch": 0.72, "grad_norm": 3.1444148879339875, "learning_rate": 1.860570488103196e-06, "loss": 0.4323, "step": 8865 }, { "epoch": 0.72, "grad_norm": 5.590713657852092, "learning_rate": 1.85954029369458e-06, "loss": 0.7788, "step": 8866 }, { "epoch": 0.72, "grad_norm": 1.320357196050719, "learning_rate": 1.858510319418521e-06, "loss": 0.1759, "step": 8867 }, { "epoch": 0.72, "grad_norm": 4.33373464431892, "learning_rate": 1.8574805653472178e-06, "loss": 0.7957, "step": 8868 }, { "epoch": 0.72, "grad_norm": 3.852412842501961, "learning_rate": 1.8564510315528517e-06, "loss": 0.6909, "step": 8869 }, { "epoch": 0.72, "grad_norm": 2.0505451876407883, "learning_rate": 1.8554217181075862e-06, "loss": 0.3888, "step": 8870 }, { "epoch": 0.73, "grad_norm": 3.758074011530697, "learning_rate": 1.8543926250835749e-06, "loss": 0.7105, "step": 8871 }, { "epoch": 0.73, "grad_norm": 4.977808151424315, "learning_rate": 1.8533637525529485e-06, "loss": 1.0953, "step": 8872 }, { "epoch": 0.73, "grad_norm": 5.328100722581435, "learning_rate": 1.8523351005878293e-06, "loss": 1.1907, "step": 8873 }, { "epoch": 0.73, "grad_norm": 2.510754729106961, "learning_rate": 1.8513066692603204e-06, "loss": 0.4291, "step": 8874 }, { "epoch": 0.73, "grad_norm": 2.8439070213468214, "learning_rate": 1.8502784586425116e-06, "loss": 0.5486, "step": 8875 }, { "epoch": 0.73, "grad_norm": 4.4516253149991885, "learning_rate": 1.849250468806476e-06, "loss": 0.8449, "step": 8876 }, { "epoch": 0.73, "grad_norm": 4.592603633187335, "learning_rate": 1.8482226998242692e-06, "loss": 0.8437, "step": 8877 }, { "epoch": 0.73, "grad_norm": 3.0296036545205443, "learning_rate": 1.8471951517679348e-06, "loss": 0.4635, "step": 8878 }, { "epoch": 0.73, "grad_norm": 1.8423645483049949, "learning_rate": 1.846167824709499e-06, "loss": 0.2186, "step": 8879 }, { "epoch": 0.73, "grad_norm": 4.848669674508421, "learning_rate": 1.845140718720973e-06, "loss": 1.1526, "step": 8880 }, { "epoch": 0.73, "grad_norm": 4.037018099967917, "learning_rate": 1.844113833874353e-06, "loss": 0.8278, "step": 8881 }, { "epoch": 0.73, "grad_norm": 4.3710378600880455, "learning_rate": 1.8430871702416198e-06, "loss": 0.8264, "step": 8882 }, { "epoch": 0.73, "grad_norm": 4.301263634150496, "learning_rate": 1.8420607278947362e-06, "loss": 0.791, "step": 8883 }, { "epoch": 0.73, "grad_norm": 2.0243568108169714, "learning_rate": 1.8410345069056517e-06, "loss": 0.3202, "step": 8884 }, { "epoch": 0.73, "grad_norm": 2.9360226309033695, "learning_rate": 1.840008507346302e-06, "loss": 0.4884, "step": 8885 }, { "epoch": 0.73, "grad_norm": 4.49497390112113, "learning_rate": 1.838982729288602e-06, "loss": 0.9013, "step": 8886 }, { "epoch": 0.73, "grad_norm": 2.954276049008714, "learning_rate": 1.8379571728044559e-06, "loss": 0.6735, "step": 8887 }, { "epoch": 0.73, "grad_norm": 2.947073723266408, "learning_rate": 1.8369318379657526e-06, "loss": 0.4891, "step": 8888 }, { "epoch": 0.73, "grad_norm": 3.085480599916667, "learning_rate": 1.8359067248443602e-06, "loss": 0.3471, "step": 8889 }, { "epoch": 0.73, "grad_norm": 3.0392365817723133, "learning_rate": 1.8348818335121355e-06, "loss": 0.6283, "step": 8890 }, { "epoch": 0.73, "grad_norm": 5.6103342856232805, "learning_rate": 1.8338571640409203e-06, "loss": 0.9192, "step": 8891 }, { "epoch": 0.73, "grad_norm": 3.587916812618091, "learning_rate": 1.8328327165025384e-06, "loss": 0.7832, "step": 8892 }, { "epoch": 0.73, "grad_norm": 4.671478660931107, "learning_rate": 1.8318084909687995e-06, "loss": 0.8664, "step": 8893 }, { "epoch": 0.73, "grad_norm": 3.428929659668447, "learning_rate": 1.8307844875114993e-06, "loss": 0.3476, "step": 8894 }, { "epoch": 0.73, "grad_norm": 3.1948707359979056, "learning_rate": 1.8297607062024125e-06, "loss": 0.717, "step": 8895 }, { "epoch": 0.73, "grad_norm": 4.149110950463409, "learning_rate": 1.828737147113303e-06, "loss": 0.7536, "step": 8896 }, { "epoch": 0.73, "grad_norm": 2.8963364537499205, "learning_rate": 1.827713810315918e-06, "loss": 0.4259, "step": 8897 }, { "epoch": 0.73, "grad_norm": 3.3710584652754862, "learning_rate": 1.8266906958819892e-06, "loss": 0.5665, "step": 8898 }, { "epoch": 0.73, "grad_norm": 4.246601455389945, "learning_rate": 1.8256678038832342e-06, "loss": 0.7157, "step": 8899 }, { "epoch": 0.73, "grad_norm": 2.7252475510620577, "learning_rate": 1.8246451343913497e-06, "loss": 0.4024, "step": 8900 }, { "epoch": 0.73, "grad_norm": 2.2154462309831513, "learning_rate": 1.8236226874780233e-06, "loss": 0.3761, "step": 8901 }, { "epoch": 0.73, "grad_norm": 4.463399088426534, "learning_rate": 1.822600463214922e-06, "loss": 0.7889, "step": 8902 }, { "epoch": 0.73, "grad_norm": 4.617610782224787, "learning_rate": 1.8215784616736993e-06, "loss": 1.2148, "step": 8903 }, { "epoch": 0.73, "grad_norm": 4.454984366704652, "learning_rate": 1.8205566829259942e-06, "loss": 0.813, "step": 8904 }, { "epoch": 0.73, "grad_norm": 5.205670702293653, "learning_rate": 1.8195351270434303e-06, "loss": 1.1278, "step": 8905 }, { "epoch": 0.73, "grad_norm": 2.285411479543468, "learning_rate": 1.8185137940976111e-06, "loss": 0.5392, "step": 8906 }, { "epoch": 0.73, "grad_norm": 3.7137924721743736, "learning_rate": 1.8174926841601294e-06, "loss": 0.6425, "step": 8907 }, { "epoch": 0.73, "grad_norm": 5.725552627995343, "learning_rate": 1.816471797302559e-06, "loss": 1.1603, "step": 8908 }, { "epoch": 0.73, "grad_norm": 3.415861053345675, "learning_rate": 1.8154511335964619e-06, "loss": 0.5348, "step": 8909 }, { "epoch": 0.73, "grad_norm": 3.1246213158237968, "learning_rate": 1.8144306931133809e-06, "loss": 0.2999, "step": 8910 }, { "epoch": 0.73, "grad_norm": 3.7435419776111294, "learning_rate": 1.8134104759248461e-06, "loss": 0.369, "step": 8911 }, { "epoch": 0.73, "grad_norm": 3.9848728694727753, "learning_rate": 1.8123904821023675e-06, "loss": 0.8606, "step": 8912 }, { "epoch": 0.73, "grad_norm": 2.2749438745423083, "learning_rate": 1.8113707117174433e-06, "loss": 0.6244, "step": 8913 }, { "epoch": 0.73, "grad_norm": 5.182642877270864, "learning_rate": 1.8103511648415556e-06, "loss": 1.3483, "step": 8914 }, { "epoch": 0.73, "grad_norm": 5.0969152987592725, "learning_rate": 1.8093318415461698e-06, "loss": 0.8401, "step": 8915 }, { "epoch": 0.73, "grad_norm": 5.307504013633494, "learning_rate": 1.8083127419027375e-06, "loss": 1.1576, "step": 8916 }, { "epoch": 0.73, "grad_norm": 3.888400058596468, "learning_rate": 1.80729386598269e-06, "loss": 1.08, "step": 8917 }, { "epoch": 0.73, "grad_norm": 4.491074432520151, "learning_rate": 1.8062752138574497e-06, "loss": 1.6848, "step": 8918 }, { "epoch": 0.73, "grad_norm": 4.445135276899701, "learning_rate": 1.805256785598416e-06, "loss": 0.8892, "step": 8919 }, { "epoch": 0.73, "grad_norm": 4.234531444079373, "learning_rate": 1.804238581276978e-06, "loss": 0.8545, "step": 8920 }, { "epoch": 0.73, "grad_norm": 5.143084059745439, "learning_rate": 1.8032206009645077e-06, "loss": 0.7199, "step": 8921 }, { "epoch": 0.73, "grad_norm": 4.042827522492736, "learning_rate": 1.8022028447323619e-06, "loss": 0.6258, "step": 8922 }, { "epoch": 0.73, "grad_norm": 2.826884504598852, "learning_rate": 1.8011853126518786e-06, "loss": 0.5258, "step": 8923 }, { "epoch": 0.73, "grad_norm": 5.623346055293667, "learning_rate": 1.8001680047943836e-06, "loss": 1.0562, "step": 8924 }, { "epoch": 0.73, "grad_norm": 2.9842091905897403, "learning_rate": 1.7991509212311858e-06, "loss": 0.219, "step": 8925 }, { "epoch": 0.73, "grad_norm": 4.48664500069927, "learning_rate": 1.798134062033578e-06, "loss": 0.7726, "step": 8926 }, { "epoch": 0.73, "grad_norm": 2.1405945631380128, "learning_rate": 1.7971174272728381e-06, "loss": 0.3926, "step": 8927 }, { "epoch": 0.73, "grad_norm": 4.0999316996457, "learning_rate": 1.7961010170202293e-06, "loss": 0.9603, "step": 8928 }, { "epoch": 0.73, "grad_norm": 3.7755183810759707, "learning_rate": 1.7950848313469944e-06, "loss": 0.7153, "step": 8929 }, { "epoch": 0.73, "grad_norm": 3.424073871148489, "learning_rate": 1.7940688703243641e-06, "loss": 0.4287, "step": 8930 }, { "epoch": 0.73, "grad_norm": 2.195063690892858, "learning_rate": 1.7930531340235546e-06, "loss": 0.3746, "step": 8931 }, { "epoch": 0.73, "grad_norm": 5.073234184739771, "learning_rate": 1.7920376225157648e-06, "loss": 0.801, "step": 8932 }, { "epoch": 0.73, "grad_norm": 3.9950993521702953, "learning_rate": 1.7910223358721751e-06, "loss": 1.2285, "step": 8933 }, { "epoch": 0.73, "grad_norm": 2.8398240441967366, "learning_rate": 1.7900072741639557e-06, "loss": 0.4967, "step": 8934 }, { "epoch": 0.73, "grad_norm": 3.9808438863197506, "learning_rate": 1.7889924374622552e-06, "loss": 0.7187, "step": 8935 }, { "epoch": 0.73, "grad_norm": 3.3805642712079598, "learning_rate": 1.7879778258382103e-06, "loss": 0.7522, "step": 8936 }, { "epoch": 0.73, "grad_norm": 5.326232272044018, "learning_rate": 1.786963439362941e-06, "loss": 1.0033, "step": 8937 }, { "epoch": 0.73, "grad_norm": 2.0476375259141353, "learning_rate": 1.7859492781075511e-06, "loss": 0.3842, "step": 8938 }, { "epoch": 0.73, "grad_norm": 4.872031597441457, "learning_rate": 1.7849353421431316e-06, "loss": 0.7099, "step": 8939 }, { "epoch": 0.73, "grad_norm": 3.912564714887852, "learning_rate": 1.7839216315407498e-06, "loss": 0.8623, "step": 8940 }, { "epoch": 0.73, "grad_norm": 4.953381229946058, "learning_rate": 1.782908146371466e-06, "loss": 0.7803, "step": 8941 }, { "epoch": 0.73, "grad_norm": 2.460200700473453, "learning_rate": 1.7818948867063201e-06, "loss": 0.7107, "step": 8942 }, { "epoch": 0.73, "grad_norm": 5.176029544680021, "learning_rate": 1.780881852616338e-06, "loss": 0.9858, "step": 8943 }, { "epoch": 0.73, "grad_norm": 2.753092611167436, "learning_rate": 1.7798690441725275e-06, "loss": 0.3707, "step": 8944 }, { "epoch": 0.73, "grad_norm": 3.948615487037133, "learning_rate": 1.7788564614458853e-06, "loss": 0.9159, "step": 8945 }, { "epoch": 0.73, "grad_norm": 2.8964014081785283, "learning_rate": 1.7778441045073846e-06, "loss": 0.5617, "step": 8946 }, { "epoch": 0.73, "grad_norm": 2.4158829649665705, "learning_rate": 1.7768319734279894e-06, "loss": 0.5745, "step": 8947 }, { "epoch": 0.73, "grad_norm": 3.9495655938363736, "learning_rate": 1.775820068278647e-06, "loss": 0.8235, "step": 8948 }, { "epoch": 0.73, "grad_norm": 2.3442309344872614, "learning_rate": 1.7748083891302847e-06, "loss": 0.2646, "step": 8949 }, { "epoch": 0.73, "grad_norm": 3.7840108594247086, "learning_rate": 1.7737969360538187e-06, "loss": 0.5071, "step": 8950 }, { "epoch": 0.73, "grad_norm": 4.488174423512263, "learning_rate": 1.7727857091201477e-06, "loss": 0.986, "step": 8951 }, { "epoch": 0.73, "grad_norm": 3.4000460221903386, "learning_rate": 1.771774708400153e-06, "loss": 0.6213, "step": 8952 }, { "epoch": 0.73, "grad_norm": 3.7478045033289273, "learning_rate": 1.7707639339647015e-06, "loss": 0.8226, "step": 8953 }, { "epoch": 0.73, "grad_norm": 4.02873919600009, "learning_rate": 1.7697533858846444e-06, "loss": 0.6703, "step": 8954 }, { "epoch": 0.73, "grad_norm": 3.461477367730992, "learning_rate": 1.7687430642308167e-06, "loss": 0.5836, "step": 8955 }, { "epoch": 0.73, "grad_norm": 3.9165398389129913, "learning_rate": 1.7677329690740397e-06, "loss": 0.7629, "step": 8956 }, { "epoch": 0.73, "grad_norm": 4.06870891643253, "learning_rate": 1.7667231004851132e-06, "loss": 0.8262, "step": 8957 }, { "epoch": 0.73, "grad_norm": 2.8948921154450318, "learning_rate": 1.7657134585348257e-06, "loss": 0.3847, "step": 8958 }, { "epoch": 0.73, "grad_norm": 4.278096040896495, "learning_rate": 1.7647040432939494e-06, "loss": 1.0448, "step": 8959 }, { "epoch": 0.73, "grad_norm": 2.350081697197041, "learning_rate": 1.7636948548332394e-06, "loss": 0.447, "step": 8960 }, { "epoch": 0.73, "grad_norm": 4.743675239557907, "learning_rate": 1.762685893223436e-06, "loss": 0.8835, "step": 8961 }, { "epoch": 0.73, "grad_norm": 5.1219408922093415, "learning_rate": 1.7616771585352638e-06, "loss": 0.6029, "step": 8962 }, { "epoch": 0.73, "grad_norm": 2.932424093664737, "learning_rate": 1.7606686508394278e-06, "loss": 0.4565, "step": 8963 }, { "epoch": 0.73, "grad_norm": 4.350284755062462, "learning_rate": 1.759660370206624e-06, "loss": 0.8739, "step": 8964 }, { "epoch": 0.73, "grad_norm": 1.8392207856522242, "learning_rate": 1.7586523167075243e-06, "loss": 0.3384, "step": 8965 }, { "epoch": 0.73, "grad_norm": 2.047881033882093, "learning_rate": 1.7576444904127909e-06, "loss": 0.3368, "step": 8966 }, { "epoch": 0.73, "grad_norm": 3.5023817371920622, "learning_rate": 1.7566368913930677e-06, "loss": 0.5931, "step": 8967 }, { "epoch": 0.73, "grad_norm": 3.0312836485738424, "learning_rate": 1.7556295197189849e-06, "loss": 0.4801, "step": 8968 }, { "epoch": 0.73, "grad_norm": 4.760566696496184, "learning_rate": 1.754622375461152e-06, "loss": 0.8811, "step": 8969 }, { "epoch": 0.73, "grad_norm": 3.2345251307797587, "learning_rate": 1.753615458690166e-06, "loss": 0.5812, "step": 8970 }, { "epoch": 0.73, "grad_norm": 6.045578240916088, "learning_rate": 1.7526087694766086e-06, "loss": 1.1997, "step": 8971 }, { "epoch": 0.73, "grad_norm": 3.481839428642073, "learning_rate": 1.7516023078910438e-06, "loss": 0.9402, "step": 8972 }, { "epoch": 0.73, "grad_norm": 3.04590137193262, "learning_rate": 1.7505960740040196e-06, "loss": 0.7429, "step": 8973 }, { "epoch": 0.73, "grad_norm": 1.2468634629602602, "learning_rate": 1.749590067886071e-06, "loss": 0.1524, "step": 8974 }, { "epoch": 0.73, "grad_norm": 4.637452769824487, "learning_rate": 1.7485842896077116e-06, "loss": 0.8302, "step": 8975 }, { "epoch": 0.73, "grad_norm": 3.7895625674392375, "learning_rate": 1.7475787392394427e-06, "loss": 0.6474, "step": 8976 }, { "epoch": 0.73, "grad_norm": 2.601124604564186, "learning_rate": 1.7465734168517501e-06, "loss": 0.2956, "step": 8977 }, { "epoch": 0.73, "grad_norm": 3.4072381897503887, "learning_rate": 1.7455683225151037e-06, "loss": 0.9623, "step": 8978 }, { "epoch": 0.73, "grad_norm": 3.9706115422857633, "learning_rate": 1.7445634562999526e-06, "loss": 0.6604, "step": 8979 }, { "epoch": 0.73, "grad_norm": 3.308798596479263, "learning_rate": 1.7435588182767371e-06, "loss": 0.5646, "step": 8980 }, { "epoch": 0.73, "grad_norm": 2.188892167150614, "learning_rate": 1.7425544085158747e-06, "loss": 0.3848, "step": 8981 }, { "epoch": 0.73, "grad_norm": 5.398272377558863, "learning_rate": 1.741550227087772e-06, "loss": 0.703, "step": 8982 }, { "epoch": 0.73, "grad_norm": 4.811819056468078, "learning_rate": 1.7405462740628177e-06, "loss": 1.0272, "step": 8983 }, { "epoch": 0.73, "grad_norm": 3.4139978187096194, "learning_rate": 1.7395425495113838e-06, "loss": 0.3504, "step": 8984 }, { "epoch": 0.73, "grad_norm": 3.703841869876057, "learning_rate": 1.7385390535038299e-06, "loss": 0.5092, "step": 8985 }, { "epoch": 0.73, "grad_norm": 5.341951956627426, "learning_rate": 1.7375357861104924e-06, "loss": 0.9809, "step": 8986 }, { "epoch": 0.73, "grad_norm": 3.687216573165631, "learning_rate": 1.7365327474016979e-06, "loss": 0.5291, "step": 8987 }, { "epoch": 0.73, "grad_norm": 3.2601727656097803, "learning_rate": 1.7355299374477558e-06, "loss": 0.6321, "step": 8988 }, { "epoch": 0.73, "grad_norm": 5.022852657978894, "learning_rate": 1.7345273563189575e-06, "loss": 0.9331, "step": 8989 }, { "epoch": 0.73, "grad_norm": 3.4543615795409, "learning_rate": 1.7335250040855805e-06, "loss": 0.5367, "step": 8990 }, { "epoch": 0.73, "grad_norm": 5.405105561209966, "learning_rate": 1.7325228808178862e-06, "loss": 0.9613, "step": 8991 }, { "epoch": 0.73, "grad_norm": 3.899093810149087, "learning_rate": 1.7315209865861165e-06, "loss": 0.7388, "step": 8992 }, { "epoch": 0.74, "grad_norm": 5.012980749679616, "learning_rate": 1.730519321460501e-06, "loss": 0.9296, "step": 8993 }, { "epoch": 0.74, "grad_norm": 4.868724219782896, "learning_rate": 1.7295178855112537e-06, "loss": 1.0129, "step": 8994 }, { "epoch": 0.74, "grad_norm": 3.921950322463082, "learning_rate": 1.7285166788085683e-06, "loss": 0.7011, "step": 8995 }, { "epoch": 0.74, "grad_norm": 3.467594249950734, "learning_rate": 1.7275157014226274e-06, "loss": 0.6228, "step": 8996 }, { "epoch": 0.74, "grad_norm": 5.208537067500457, "learning_rate": 1.7265149534235925e-06, "loss": 0.8779, "step": 8997 }, { "epoch": 0.74, "grad_norm": 4.587388261769327, "learning_rate": 1.7255144348816134e-06, "loss": 0.7871, "step": 8998 }, { "epoch": 0.74, "grad_norm": 3.725501053167962, "learning_rate": 1.7245141458668213e-06, "loss": 0.6805, "step": 8999 }, { "epoch": 0.74, "grad_norm": 4.741787604017313, "learning_rate": 1.7235140864493327e-06, "loss": 1.0489, "step": 9000 }, { "epoch": 0.74, "grad_norm": 3.2071984650447534, "learning_rate": 1.7225142566992476e-06, "loss": 0.8655, "step": 9001 }, { "epoch": 0.74, "grad_norm": 4.985035641680996, "learning_rate": 1.7215146566866508e-06, "loss": 0.8706, "step": 9002 }, { "epoch": 0.74, "grad_norm": 4.47775958261579, "learning_rate": 1.7205152864816071e-06, "loss": 0.828, "step": 9003 }, { "epoch": 0.74, "grad_norm": 3.7970700653958174, "learning_rate": 1.7195161461541692e-06, "loss": 0.3933, "step": 9004 }, { "epoch": 0.74, "grad_norm": 5.013903958013123, "learning_rate": 1.7185172357743729e-06, "loss": 0.7536, "step": 9005 }, { "epoch": 0.74, "grad_norm": 3.2442239307638188, "learning_rate": 1.7175185554122375e-06, "loss": 0.6861, "step": 9006 }, { "epoch": 0.74, "grad_norm": 2.2467894939410833, "learning_rate": 1.7165201051377657e-06, "loss": 0.3923, "step": 9007 }, { "epoch": 0.74, "grad_norm": 4.5533531296390075, "learning_rate": 1.7155218850209465e-06, "loss": 0.92, "step": 9008 }, { "epoch": 0.74, "grad_norm": 4.097363179597376, "learning_rate": 1.7145238951317473e-06, "loss": 0.688, "step": 9009 }, { "epoch": 0.74, "grad_norm": 1.6374955130177864, "learning_rate": 1.7135261355401246e-06, "loss": 0.2497, "step": 9010 }, { "epoch": 0.74, "grad_norm": 4.751931732962066, "learning_rate": 1.712528606316019e-06, "loss": 1.0268, "step": 9011 }, { "epoch": 0.74, "grad_norm": 1.3351512488767607, "learning_rate": 1.7115313075293488e-06, "loss": 0.2148, "step": 9012 }, { "epoch": 0.74, "grad_norm": 4.28499191945056, "learning_rate": 1.710534239250023e-06, "loss": 0.6547, "step": 9013 }, { "epoch": 0.74, "grad_norm": 3.7785812157892913, "learning_rate": 1.7095374015479326e-06, "loss": 0.6309, "step": 9014 }, { "epoch": 0.74, "grad_norm": 4.166814066008341, "learning_rate": 1.7085407944929488e-06, "loss": 0.8211, "step": 9015 }, { "epoch": 0.74, "grad_norm": 4.734193097427948, "learning_rate": 1.7075444181549305e-06, "loss": 0.92, "step": 9016 }, { "epoch": 0.74, "grad_norm": 3.9215893062088756, "learning_rate": 1.7065482726037196e-06, "loss": 0.4628, "step": 9017 }, { "epoch": 0.74, "grad_norm": 6.093119807275033, "learning_rate": 1.7055523579091422e-06, "loss": 1.0065, "step": 9018 }, { "epoch": 0.74, "grad_norm": 4.323869904369412, "learning_rate": 1.704556674141008e-06, "loss": 0.7332, "step": 9019 }, { "epoch": 0.74, "grad_norm": 1.5696067383535437, "learning_rate": 1.7035612213691083e-06, "loss": 0.3485, "step": 9020 }, { "epoch": 0.74, "grad_norm": 2.530204008821355, "learning_rate": 1.7025659996632198e-06, "loss": 0.4463, "step": 9021 }, { "epoch": 0.74, "grad_norm": 3.2497473771022154, "learning_rate": 1.7015710090931047e-06, "loss": 0.5451, "step": 9022 }, { "epoch": 0.74, "grad_norm": 1.9079734640877182, "learning_rate": 1.7005762497285078e-06, "loss": 0.3221, "step": 9023 }, { "epoch": 0.74, "grad_norm": 2.956229764069996, "learning_rate": 1.6995817216391559e-06, "loss": 0.7575, "step": 9024 }, { "epoch": 0.74, "grad_norm": 3.8665180173730653, "learning_rate": 1.698587424894763e-06, "loss": 0.8057, "step": 9025 }, { "epoch": 0.74, "grad_norm": 3.7188981255366276, "learning_rate": 1.6975933595650229e-06, "loss": 0.6104, "step": 9026 }, { "epoch": 0.74, "grad_norm": 1.9620310746669432, "learning_rate": 1.6965995257196177e-06, "loss": 0.3023, "step": 9027 }, { "epoch": 0.74, "grad_norm": 3.13494334777588, "learning_rate": 1.6956059234282079e-06, "loss": 0.4358, "step": 9028 }, { "epoch": 0.74, "grad_norm": 3.4396347140855688, "learning_rate": 1.6946125527604419e-06, "loss": 0.4067, "step": 9029 }, { "epoch": 0.74, "grad_norm": 2.7671237129759287, "learning_rate": 1.6936194137859508e-06, "loss": 0.7057, "step": 9030 }, { "epoch": 0.74, "grad_norm": 3.9593379420039243, "learning_rate": 1.6926265065743507e-06, "loss": 0.47, "step": 9031 }, { "epoch": 0.74, "grad_norm": 3.659273991675345, "learning_rate": 1.6916338311952373e-06, "loss": 0.6953, "step": 9032 }, { "epoch": 0.74, "grad_norm": 4.132124704254727, "learning_rate": 1.6906413877181948e-06, "loss": 0.7218, "step": 9033 }, { "epoch": 0.74, "grad_norm": 4.13717100686943, "learning_rate": 1.6896491762127882e-06, "loss": 0.6954, "step": 9034 }, { "epoch": 0.74, "grad_norm": 4.821745647616559, "learning_rate": 1.6886571967485677e-06, "loss": 0.7649, "step": 9035 }, { "epoch": 0.74, "grad_norm": 3.6486101795414934, "learning_rate": 1.6876654493950666e-06, "loss": 0.7128, "step": 9036 }, { "epoch": 0.74, "grad_norm": 4.829340506404127, "learning_rate": 1.6866739342218042e-06, "loss": 0.7762, "step": 9037 }, { "epoch": 0.74, "grad_norm": 5.418065515804254, "learning_rate": 1.6856826512982772e-06, "loss": 1.0962, "step": 9038 }, { "epoch": 0.74, "grad_norm": 1.282175126871541, "learning_rate": 1.6846916006939724e-06, "loss": 0.1717, "step": 9039 }, { "epoch": 0.74, "grad_norm": 4.689916488916796, "learning_rate": 1.6837007824783586e-06, "loss": 0.6698, "step": 9040 }, { "epoch": 0.74, "grad_norm": 5.710826508344211, "learning_rate": 1.6827101967208887e-06, "loss": 1.0579, "step": 9041 }, { "epoch": 0.74, "grad_norm": 3.8698950145246562, "learning_rate": 1.6817198434909954e-06, "loss": 0.7306, "step": 9042 }, { "epoch": 0.74, "grad_norm": 4.822068675912592, "learning_rate": 1.6807297228581016e-06, "loss": 0.9391, "step": 9043 }, { "epoch": 0.74, "grad_norm": 5.440054907992409, "learning_rate": 1.6797398348916073e-06, "loss": 0.8555, "step": 9044 }, { "epoch": 0.74, "grad_norm": 2.6654135564767585, "learning_rate": 1.6787501796609001e-06, "loss": 0.3411, "step": 9045 }, { "epoch": 0.74, "grad_norm": 3.0646110765873495, "learning_rate": 1.6777607572353516e-06, "loss": 0.3515, "step": 9046 }, { "epoch": 0.74, "grad_norm": 3.809911256440361, "learning_rate": 1.676771567684316e-06, "loss": 0.9619, "step": 9047 }, { "epoch": 0.74, "grad_norm": 5.248089390765532, "learning_rate": 1.675782611077132e-06, "loss": 0.9298, "step": 9048 }, { "epoch": 0.74, "grad_norm": 5.046653300658506, "learning_rate": 1.6747938874831182e-06, "loss": 1.0072, "step": 9049 }, { "epoch": 0.74, "grad_norm": 3.9210787418348643, "learning_rate": 1.6738053969715818e-06, "loss": 0.6771, "step": 9050 }, { "epoch": 0.74, "grad_norm": 5.7392379153531135, "learning_rate": 1.672817139611811e-06, "loss": 1.1881, "step": 9051 }, { "epoch": 0.74, "grad_norm": 4.729634737365383, "learning_rate": 1.6718291154730792e-06, "loss": 0.8296, "step": 9052 }, { "epoch": 0.74, "grad_norm": 7.292886378742193, "learning_rate": 1.6708413246246418e-06, "loss": 1.7863, "step": 9053 }, { "epoch": 0.74, "grad_norm": 4.091853622035211, "learning_rate": 1.6698537671357406e-06, "loss": 0.5248, "step": 9054 }, { "epoch": 0.74, "grad_norm": 2.034897188619175, "learning_rate": 1.6688664430755964e-06, "loss": 0.3249, "step": 9055 }, { "epoch": 0.74, "grad_norm": 4.212051184388152, "learning_rate": 1.6678793525134167e-06, "loss": 0.5452, "step": 9056 }, { "epoch": 0.74, "grad_norm": 5.587073637312639, "learning_rate": 1.6668924955183952e-06, "loss": 1.2495, "step": 9057 }, { "epoch": 0.74, "grad_norm": 3.4326452585038636, "learning_rate": 1.665905872159702e-06, "loss": 0.6913, "step": 9058 }, { "epoch": 0.74, "grad_norm": 5.64344670227028, "learning_rate": 1.6649194825064991e-06, "loss": 1.0694, "step": 9059 }, { "epoch": 0.74, "grad_norm": 2.987460457256301, "learning_rate": 1.6639333266279244e-06, "loss": 0.6845, "step": 9060 }, { "epoch": 0.74, "grad_norm": 3.392028734809809, "learning_rate": 1.662947404593105e-06, "loss": 0.7027, "step": 9061 }, { "epoch": 0.74, "grad_norm": 3.08408362057497, "learning_rate": 1.6619617164711493e-06, "loss": 0.6966, "step": 9062 }, { "epoch": 0.74, "grad_norm": 4.3109251101137485, "learning_rate": 1.6609762623311504e-06, "loss": 0.7128, "step": 9063 }, { "epoch": 0.74, "grad_norm": 4.243086851835061, "learning_rate": 1.659991042242184e-06, "loss": 1.1725, "step": 9064 }, { "epoch": 0.74, "grad_norm": 4.204599777941349, "learning_rate": 1.6590060562733111e-06, "loss": 0.5851, "step": 9065 }, { "epoch": 0.74, "grad_norm": 3.9517819510821637, "learning_rate": 1.6580213044935723e-06, "loss": 0.5402, "step": 9066 }, { "epoch": 0.74, "grad_norm": 5.053050927242831, "learning_rate": 1.6570367869719955e-06, "loss": 0.5968, "step": 9067 }, { "epoch": 0.74, "grad_norm": 4.239009288234037, "learning_rate": 1.656052503777591e-06, "loss": 0.7625, "step": 9068 }, { "epoch": 0.74, "grad_norm": 2.87110788114826, "learning_rate": 1.6550684549793539e-06, "loss": 0.5221, "step": 9069 }, { "epoch": 0.74, "grad_norm": 3.766413422379634, "learning_rate": 1.6540846406462602e-06, "loss": 0.4895, "step": 9070 }, { "epoch": 0.74, "grad_norm": 4.844991604320573, "learning_rate": 1.6531010608472736e-06, "loss": 1.3181, "step": 9071 }, { "epoch": 0.74, "grad_norm": 4.687473886208534, "learning_rate": 1.6521177156513351e-06, "loss": 0.9449, "step": 9072 }, { "epoch": 0.74, "grad_norm": 2.255207412004284, "learning_rate": 1.6511346051273768e-06, "loss": 0.3351, "step": 9073 }, { "epoch": 0.74, "grad_norm": 4.047676731344719, "learning_rate": 1.6501517293443064e-06, "loss": 0.8867, "step": 9074 }, { "epoch": 0.74, "grad_norm": 4.065878701202547, "learning_rate": 1.6491690883710209e-06, "loss": 0.6451, "step": 9075 }, { "epoch": 0.74, "grad_norm": 3.5443967112541754, "learning_rate": 1.6481866822763997e-06, "loss": 0.4899, "step": 9076 }, { "epoch": 0.74, "grad_norm": 3.9996127676580624, "learning_rate": 1.6472045111293072e-06, "loss": 0.6043, "step": 9077 }, { "epoch": 0.74, "grad_norm": 3.0559692397812004, "learning_rate": 1.6462225749985845e-06, "loss": 0.8296, "step": 9078 }, { "epoch": 0.74, "grad_norm": 4.266144230809973, "learning_rate": 1.645240873953064e-06, "loss": 0.8095, "step": 9079 }, { "epoch": 0.74, "grad_norm": 3.841030181663696, "learning_rate": 1.6442594080615581e-06, "loss": 0.6333, "step": 9080 }, { "epoch": 0.74, "grad_norm": 3.7709648439262544, "learning_rate": 1.6432781773928636e-06, "loss": 0.8523, "step": 9081 }, { "epoch": 0.74, "grad_norm": 4.394573663876398, "learning_rate": 1.6422971820157623e-06, "loss": 0.791, "step": 9082 }, { "epoch": 0.74, "grad_norm": 4.9444633502066395, "learning_rate": 1.6413164219990136e-06, "loss": 0.8428, "step": 9083 }, { "epoch": 0.74, "grad_norm": 1.9488231025805063, "learning_rate": 1.640335897411367e-06, "loss": 0.3488, "step": 9084 }, { "epoch": 0.74, "grad_norm": 1.9429043504182903, "learning_rate": 1.6393556083215528e-06, "loss": 0.3321, "step": 9085 }, { "epoch": 0.74, "grad_norm": 4.459417834868013, "learning_rate": 1.6383755547982844e-06, "loss": 0.5809, "step": 9086 }, { "epoch": 0.74, "grad_norm": 3.792858688546865, "learning_rate": 1.6373957369102616e-06, "loss": 0.8116, "step": 9087 }, { "epoch": 0.74, "grad_norm": 3.1192732924982023, "learning_rate": 1.636416154726162e-06, "loss": 0.4053, "step": 9088 }, { "epoch": 0.74, "grad_norm": 3.621300593467316, "learning_rate": 1.6354368083146532e-06, "loss": 0.574, "step": 9089 }, { "epoch": 0.74, "grad_norm": 3.7491013378790123, "learning_rate": 1.63445769774438e-06, "loss": 0.5252, "step": 9090 }, { "epoch": 0.74, "grad_norm": 4.417582731462544, "learning_rate": 1.6334788230839753e-06, "loss": 0.9197, "step": 9091 }, { "epoch": 0.74, "grad_norm": 5.077667303709065, "learning_rate": 1.6325001844020538e-06, "loss": 1.2808, "step": 9092 }, { "epoch": 0.74, "grad_norm": 2.9993091285089273, "learning_rate": 1.6315217817672142e-06, "loss": 0.558, "step": 9093 }, { "epoch": 0.74, "grad_norm": 4.99726956482455, "learning_rate": 1.6305436152480392e-06, "loss": 0.8917, "step": 9094 }, { "epoch": 0.74, "grad_norm": 1.970521738608599, "learning_rate": 1.6295656849130914e-06, "loss": 0.3274, "step": 9095 }, { "epoch": 0.74, "grad_norm": 4.221441371096554, "learning_rate": 1.628587990830921e-06, "loss": 0.6971, "step": 9096 }, { "epoch": 0.74, "grad_norm": 3.870364589941543, "learning_rate": 1.6276105330700599e-06, "loss": 0.5622, "step": 9097 }, { "epoch": 0.74, "grad_norm": 4.1773326866114715, "learning_rate": 1.6266333116990242e-06, "loss": 0.6384, "step": 9098 }, { "epoch": 0.74, "grad_norm": 2.2829276113017065, "learning_rate": 1.6256563267863135e-06, "loss": 0.348, "step": 9099 }, { "epoch": 0.74, "grad_norm": 3.420048364811174, "learning_rate": 1.6246795784004076e-06, "loss": 0.5856, "step": 9100 }, { "epoch": 0.74, "grad_norm": 3.7505987876192113, "learning_rate": 1.6237030666097736e-06, "loss": 0.7085, "step": 9101 }, { "epoch": 0.74, "grad_norm": 3.8063053381010703, "learning_rate": 1.6227267914828615e-06, "loss": 1.0319, "step": 9102 }, { "epoch": 0.74, "grad_norm": 3.804431995865752, "learning_rate": 1.6217507530881048e-06, "loss": 0.5913, "step": 9103 }, { "epoch": 0.74, "grad_norm": 3.687685126129331, "learning_rate": 1.6207749514939164e-06, "loss": 0.7277, "step": 9104 }, { "epoch": 0.74, "grad_norm": 4.062244202441285, "learning_rate": 1.6197993867686973e-06, "loss": 0.7456, "step": 9105 }, { "epoch": 0.74, "grad_norm": 2.3369190904442183, "learning_rate": 1.6188240589808325e-06, "loss": 0.4212, "step": 9106 }, { "epoch": 0.74, "grad_norm": 4.1709524686338915, "learning_rate": 1.6178489681986842e-06, "loss": 0.9092, "step": 9107 }, { "epoch": 0.74, "grad_norm": 3.3744359543461533, "learning_rate": 1.616874114490604e-06, "loss": 0.6864, "step": 9108 }, { "epoch": 0.74, "grad_norm": 2.58724493813051, "learning_rate": 1.6158994979249255e-06, "loss": 0.4519, "step": 9109 }, { "epoch": 0.74, "grad_norm": 2.6247163893013767, "learning_rate": 1.6149251185699643e-06, "loss": 0.237, "step": 9110 }, { "epoch": 0.74, "grad_norm": 3.723515891720296, "learning_rate": 1.613950976494022e-06, "loss": 0.655, "step": 9111 }, { "epoch": 0.74, "grad_norm": 2.9683408496964163, "learning_rate": 1.6129770717653781e-06, "loss": 0.4724, "step": 9112 }, { "epoch": 0.74, "grad_norm": 4.158516849075152, "learning_rate": 1.6120034044523015e-06, "loss": 0.9067, "step": 9113 }, { "epoch": 0.74, "grad_norm": 4.8198622674689116, "learning_rate": 1.6110299746230419e-06, "loss": 1.0292, "step": 9114 }, { "epoch": 0.75, "grad_norm": 3.7482896303080935, "learning_rate": 1.6100567823458319e-06, "loss": 0.5969, "step": 9115 }, { "epoch": 0.75, "grad_norm": 4.44067565129291, "learning_rate": 1.6090838276888882e-06, "loss": 0.9371, "step": 9116 }, { "epoch": 0.75, "grad_norm": 3.450233594803811, "learning_rate": 1.6081111107204127e-06, "loss": 0.4965, "step": 9117 }, { "epoch": 0.75, "grad_norm": 2.5899593889008092, "learning_rate": 1.6071386315085851e-06, "loss": 0.5021, "step": 9118 }, { "epoch": 0.75, "grad_norm": 1.9734439489250912, "learning_rate": 1.606166390121574e-06, "loss": 0.3827, "step": 9119 }, { "epoch": 0.75, "grad_norm": 2.34945668400547, "learning_rate": 1.60519438662753e-06, "loss": 0.4631, "step": 9120 }, { "epoch": 0.75, "grad_norm": 4.239324985671788, "learning_rate": 1.6042226210945838e-06, "loss": 0.5508, "step": 9121 }, { "epoch": 0.75, "grad_norm": 3.7334163443070567, "learning_rate": 1.6032510935908551e-06, "loss": 0.5746, "step": 9122 }, { "epoch": 0.75, "grad_norm": 4.6674181829994215, "learning_rate": 1.6022798041844407e-06, "loss": 0.7384, "step": 9123 }, { "epoch": 0.75, "grad_norm": 4.929516148507866, "learning_rate": 1.6013087529434247e-06, "loss": 1.1649, "step": 9124 }, { "epoch": 0.75, "grad_norm": 2.765302163788124, "learning_rate": 1.6003379399358742e-06, "loss": 0.3319, "step": 9125 }, { "epoch": 0.75, "grad_norm": 4.751803140552971, "learning_rate": 1.5993673652298386e-06, "loss": 0.8956, "step": 9126 }, { "epoch": 0.75, "grad_norm": 5.303559736955207, "learning_rate": 1.5983970288933509e-06, "loss": 1.1144, "step": 9127 }, { "epoch": 0.75, "grad_norm": 4.0912639283894245, "learning_rate": 1.5974269309944296e-06, "loss": 0.6042, "step": 9128 }, { "epoch": 0.75, "grad_norm": 4.236076518778019, "learning_rate": 1.5964570716010708e-06, "loss": 0.7398, "step": 9129 }, { "epoch": 0.75, "grad_norm": 4.891067061234153, "learning_rate": 1.595487450781259e-06, "loss": 0.8603, "step": 9130 }, { "epoch": 0.75, "grad_norm": 4.186974074359808, "learning_rate": 1.5945180686029598e-06, "loss": 0.8253, "step": 9131 }, { "epoch": 0.75, "grad_norm": 3.4969354792471243, "learning_rate": 1.593548925134124e-06, "loss": 0.3732, "step": 9132 }, { "epoch": 0.75, "grad_norm": 5.246678029315557, "learning_rate": 1.5925800204426833e-06, "loss": 0.9658, "step": 9133 }, { "epoch": 0.75, "grad_norm": 5.004737255489296, "learning_rate": 1.5916113545965562e-06, "loss": 0.6139, "step": 9134 }, { "epoch": 0.75, "grad_norm": 2.4217889762322553, "learning_rate": 1.5906429276636376e-06, "loss": 0.3275, "step": 9135 }, { "epoch": 0.75, "grad_norm": 3.798173938948157, "learning_rate": 1.589674739711814e-06, "loss": 0.9051, "step": 9136 }, { "epoch": 0.75, "grad_norm": 2.621642259837425, "learning_rate": 1.5887067908089472e-06, "loss": 0.3025, "step": 9137 }, { "epoch": 0.75, "grad_norm": 3.7705583656212167, "learning_rate": 1.5877390810228888e-06, "loss": 0.5332, "step": 9138 }, { "epoch": 0.75, "grad_norm": 1.1907773558344406, "learning_rate": 1.5867716104214725e-06, "loss": 0.1737, "step": 9139 }, { "epoch": 0.75, "grad_norm": 3.354378021368676, "learning_rate": 1.5858043790725096e-06, "loss": 0.6442, "step": 9140 }, { "epoch": 0.75, "grad_norm": 3.819461979920989, "learning_rate": 1.5848373870438016e-06, "loss": 0.2835, "step": 9141 }, { "epoch": 0.75, "grad_norm": 2.5942531966284172, "learning_rate": 1.58387063440313e-06, "loss": 0.4373, "step": 9142 }, { "epoch": 0.75, "grad_norm": 4.81101872617202, "learning_rate": 1.58290412121826e-06, "loss": 0.8678, "step": 9143 }, { "epoch": 0.75, "grad_norm": 3.625409574628458, "learning_rate": 1.5819378475569396e-06, "loss": 0.6234, "step": 9144 }, { "epoch": 0.75, "grad_norm": 4.659023434089425, "learning_rate": 1.5809718134869024e-06, "loss": 0.7718, "step": 9145 }, { "epoch": 0.75, "grad_norm": 4.435425429651767, "learning_rate": 1.5800060190758592e-06, "loss": 0.9598, "step": 9146 }, { "epoch": 0.75, "grad_norm": 2.1947682057148135, "learning_rate": 1.5790404643915108e-06, "loss": 0.3132, "step": 9147 }, { "epoch": 0.75, "grad_norm": 3.460252436859948, "learning_rate": 1.5780751495015379e-06, "loss": 0.5249, "step": 9148 }, { "epoch": 0.75, "grad_norm": 3.794571630750243, "learning_rate": 1.5771100744736039e-06, "loss": 0.7865, "step": 9149 }, { "epoch": 0.75, "grad_norm": 2.601783932317756, "learning_rate": 1.5761452393753596e-06, "loss": 0.5184, "step": 9150 }, { "epoch": 0.75, "grad_norm": 4.265490982404797, "learning_rate": 1.5751806442744315e-06, "loss": 1.082, "step": 9151 }, { "epoch": 0.75, "grad_norm": 6.026421806841435, "learning_rate": 1.5742162892384372e-06, "loss": 1.2085, "step": 9152 }, { "epoch": 0.75, "grad_norm": 3.885104571684317, "learning_rate": 1.57325217433497e-06, "loss": 0.9064, "step": 9153 }, { "epoch": 0.75, "grad_norm": 2.5114201265328964, "learning_rate": 1.5722882996316125e-06, "loss": 0.4936, "step": 9154 }, { "epoch": 0.75, "grad_norm": 3.8762422337914098, "learning_rate": 1.5713246651959275e-06, "loss": 0.735, "step": 9155 }, { "epoch": 0.75, "grad_norm": 5.451621781366753, "learning_rate": 1.570361271095462e-06, "loss": 1.0967, "step": 9156 }, { "epoch": 0.75, "grad_norm": 4.57069746566801, "learning_rate": 1.5693981173977468e-06, "loss": 0.6759, "step": 9157 }, { "epoch": 0.75, "grad_norm": 4.530243213795102, "learning_rate": 1.568435204170292e-06, "loss": 0.8214, "step": 9158 }, { "epoch": 0.75, "grad_norm": 4.486699756452967, "learning_rate": 1.5674725314805955e-06, "loss": 1.3609, "step": 9159 }, { "epoch": 0.75, "grad_norm": 4.165946902347504, "learning_rate": 1.5665100993961358e-06, "loss": 0.9671, "step": 9160 }, { "epoch": 0.75, "grad_norm": 5.834807390806405, "learning_rate": 1.565547907984376e-06, "loss": 1.0393, "step": 9161 }, { "epoch": 0.75, "grad_norm": 5.4886776264223816, "learning_rate": 1.564585957312762e-06, "loss": 1.4576, "step": 9162 }, { "epoch": 0.75, "grad_norm": 4.392651734475036, "learning_rate": 1.5636242474487207e-06, "loss": 0.5364, "step": 9163 }, { "epoch": 0.75, "grad_norm": 5.270991036499043, "learning_rate": 1.5626627784596638e-06, "loss": 1.3793, "step": 9164 }, { "epoch": 0.75, "grad_norm": 2.2656349137900924, "learning_rate": 1.5617015504129867e-06, "loss": 0.3967, "step": 9165 }, { "epoch": 0.75, "grad_norm": 4.42978305101965, "learning_rate": 1.560740563376069e-06, "loss": 0.7694, "step": 9166 }, { "epoch": 0.75, "grad_norm": 6.110916215287073, "learning_rate": 1.5597798174162693e-06, "loss": 0.8729, "step": 9167 }, { "epoch": 0.75, "grad_norm": 3.143223280894732, "learning_rate": 1.5588193126009332e-06, "loss": 0.587, "step": 9168 }, { "epoch": 0.75, "grad_norm": 4.703192537288563, "learning_rate": 1.557859048997386e-06, "loss": 1.0459, "step": 9169 }, { "epoch": 0.75, "grad_norm": 3.171765841636195, "learning_rate": 1.5568990266729394e-06, "loss": 0.4245, "step": 9170 }, { "epoch": 0.75, "grad_norm": 4.923832562043842, "learning_rate": 1.5559392456948863e-06, "loss": 1.0203, "step": 9171 }, { "epoch": 0.75, "grad_norm": 3.2433702013284265, "learning_rate": 1.5549797061305039e-06, "loss": 0.7897, "step": 9172 }, { "epoch": 0.75, "grad_norm": 4.151892778553473, "learning_rate": 1.5540204080470512e-06, "loss": 1.1794, "step": 9173 }, { "epoch": 0.75, "grad_norm": 3.868456145903217, "learning_rate": 1.5530613515117721e-06, "loss": 0.4656, "step": 9174 }, { "epoch": 0.75, "grad_norm": 2.6448226324506305, "learning_rate": 1.5521025365918895e-06, "loss": 0.4984, "step": 9175 }, { "epoch": 0.75, "grad_norm": 4.23419139243431, "learning_rate": 1.5511439633546143e-06, "loss": 0.5805, "step": 9176 }, { "epoch": 0.75, "grad_norm": 3.0798081955034866, "learning_rate": 1.5501856318671376e-06, "loss": 0.6739, "step": 9177 }, { "epoch": 0.75, "grad_norm": 4.303341304803665, "learning_rate": 1.5492275421966346e-06, "loss": 0.9649, "step": 9178 }, { "epoch": 0.75, "grad_norm": 3.2423816550283253, "learning_rate": 1.5482696944102643e-06, "loss": 0.7856, "step": 9179 }, { "epoch": 0.75, "grad_norm": 3.566822051382423, "learning_rate": 1.5473120885751652e-06, "loss": 0.7349, "step": 9180 }, { "epoch": 0.75, "grad_norm": 4.187762857595358, "learning_rate": 1.5463547247584621e-06, "loss": 0.6959, "step": 9181 }, { "epoch": 0.75, "grad_norm": 2.9031055664051526, "learning_rate": 1.5453976030272645e-06, "loss": 0.7711, "step": 9182 }, { "epoch": 0.75, "grad_norm": 3.1449051622626314, "learning_rate": 1.5444407234486585e-06, "loss": 0.5387, "step": 9183 }, { "epoch": 0.75, "grad_norm": 4.46496397396142, "learning_rate": 1.5434840860897194e-06, "loss": 1.0059, "step": 9184 }, { "epoch": 0.75, "grad_norm": 4.073251346906123, "learning_rate": 1.5425276910175046e-06, "loss": 0.9449, "step": 9185 }, { "epoch": 0.75, "grad_norm": 2.155958714244286, "learning_rate": 1.5415715382990504e-06, "loss": 0.427, "step": 9186 }, { "epoch": 0.75, "grad_norm": 2.203303357750263, "learning_rate": 1.54061562800138e-06, "loss": 0.3593, "step": 9187 }, { "epoch": 0.75, "grad_norm": 4.2183995353328285, "learning_rate": 1.5396599601914986e-06, "loss": 0.5619, "step": 9188 }, { "epoch": 0.75, "grad_norm": 3.199122158242305, "learning_rate": 1.5387045349363948e-06, "loss": 0.6847, "step": 9189 }, { "epoch": 0.75, "grad_norm": 3.7695706975246277, "learning_rate": 1.537749352303039e-06, "loss": 0.7021, "step": 9190 }, { "epoch": 0.75, "grad_norm": 3.6356070296975225, "learning_rate": 1.5367944123583884e-06, "loss": 0.466, "step": 9191 }, { "epoch": 0.75, "grad_norm": 4.199184907548104, "learning_rate": 1.535839715169375e-06, "loss": 0.7087, "step": 9192 }, { "epoch": 0.75, "grad_norm": 4.531727412732683, "learning_rate": 1.5348852608029218e-06, "loss": 0.841, "step": 9193 }, { "epoch": 0.75, "grad_norm": 3.345802142439073, "learning_rate": 1.5339310493259318e-06, "loss": 0.5765, "step": 9194 }, { "epoch": 0.75, "grad_norm": 3.1223332428130117, "learning_rate": 1.5329770808052908e-06, "loss": 0.603, "step": 9195 }, { "epoch": 0.75, "grad_norm": 2.8725909950301562, "learning_rate": 1.5320233553078694e-06, "loss": 0.2948, "step": 9196 }, { "epoch": 0.75, "grad_norm": 2.6803796890064913, "learning_rate": 1.5310698729005163e-06, "loss": 0.3243, "step": 9197 }, { "epoch": 0.75, "grad_norm": 3.310145402547282, "learning_rate": 1.5301166336500701e-06, "loss": 0.7332, "step": 9198 }, { "epoch": 0.75, "grad_norm": 3.252048882124109, "learning_rate": 1.5291636376233453e-06, "loss": 0.5481, "step": 9199 }, { "epoch": 0.75, "grad_norm": 2.9315753599609495, "learning_rate": 1.5282108848871445e-06, "loss": 0.614, "step": 9200 }, { "epoch": 0.75, "grad_norm": 3.032979560741048, "learning_rate": 1.5272583755082516e-06, "loss": 0.7907, "step": 9201 }, { "epoch": 0.75, "grad_norm": 1.2516871167184147, "learning_rate": 1.5263061095534343e-06, "loss": 0.1925, "step": 9202 }, { "epoch": 0.75, "grad_norm": 3.881963340392816, "learning_rate": 1.5253540870894395e-06, "loss": 0.7426, "step": 9203 }, { "epoch": 0.75, "grad_norm": 3.977219673175557, "learning_rate": 1.5244023081830018e-06, "loss": 0.7551, "step": 9204 }, { "epoch": 0.75, "grad_norm": 2.988781431535191, "learning_rate": 1.5234507729008363e-06, "loss": 0.4727, "step": 9205 }, { "epoch": 0.75, "grad_norm": 3.142425806367947, "learning_rate": 1.5224994813096417e-06, "loss": 0.6021, "step": 9206 }, { "epoch": 0.75, "grad_norm": 4.4882982513011305, "learning_rate": 1.5215484334760988e-06, "loss": 0.9421, "step": 9207 }, { "epoch": 0.75, "grad_norm": 5.227574799732071, "learning_rate": 1.5205976294668745e-06, "loss": 0.8865, "step": 9208 }, { "epoch": 0.75, "grad_norm": 2.294329145390786, "learning_rate": 1.5196470693486127e-06, "loss": 0.3568, "step": 9209 }, { "epoch": 0.75, "grad_norm": 2.3143291348855994, "learning_rate": 1.5186967531879443e-06, "loss": 0.3798, "step": 9210 }, { "epoch": 0.75, "grad_norm": 6.1684971311613435, "learning_rate": 1.517746681051483e-06, "loss": 1.1759, "step": 9211 }, { "epoch": 0.75, "grad_norm": 4.807693670268817, "learning_rate": 1.5167968530058263e-06, "loss": 1.0803, "step": 9212 }, { "epoch": 0.75, "grad_norm": 4.06193247946862, "learning_rate": 1.5158472691175491e-06, "loss": 0.9869, "step": 9213 }, { "epoch": 0.75, "grad_norm": 4.603334917657383, "learning_rate": 1.5148979294532157e-06, "loss": 0.8715, "step": 9214 }, { "epoch": 0.75, "grad_norm": 4.506355503448254, "learning_rate": 1.5139488340793718e-06, "loss": 0.8412, "step": 9215 }, { "epoch": 0.75, "grad_norm": 3.3900375758386714, "learning_rate": 1.512999983062542e-06, "loss": 0.4941, "step": 9216 }, { "epoch": 0.75, "grad_norm": 3.615903306345567, "learning_rate": 1.5120513764692373e-06, "loss": 0.6802, "step": 9217 }, { "epoch": 0.75, "grad_norm": 4.350467604248198, "learning_rate": 1.5111030143659516e-06, "loss": 0.5037, "step": 9218 }, { "epoch": 0.75, "grad_norm": 4.471453585618678, "learning_rate": 1.5101548968191626e-06, "loss": 0.6427, "step": 9219 }, { "epoch": 0.75, "grad_norm": 3.454923873453871, "learning_rate": 1.509207023895326e-06, "loss": 1.0145, "step": 9220 }, { "epoch": 0.75, "grad_norm": 2.56444356398917, "learning_rate": 1.5082593956608848e-06, "loss": 0.3001, "step": 9221 }, { "epoch": 0.75, "grad_norm": 3.721004611228858, "learning_rate": 1.5073120121822642e-06, "loss": 0.9185, "step": 9222 }, { "epoch": 0.75, "grad_norm": 2.0366899485017664, "learning_rate": 1.5063648735258713e-06, "loss": 0.3418, "step": 9223 }, { "epoch": 0.75, "grad_norm": 3.1813444894396907, "learning_rate": 1.5054179797580959e-06, "loss": 0.3646, "step": 9224 }, { "epoch": 0.75, "grad_norm": 4.625454279907865, "learning_rate": 1.5044713309453135e-06, "loss": 0.9152, "step": 9225 }, { "epoch": 0.75, "grad_norm": 2.708317774378133, "learning_rate": 1.5035249271538766e-06, "loss": 0.5606, "step": 9226 }, { "epoch": 0.75, "grad_norm": 5.811622971525766, "learning_rate": 1.5025787684501259e-06, "loss": 1.402, "step": 9227 }, { "epoch": 0.75, "grad_norm": 3.1435508201419733, "learning_rate": 1.5016328549003822e-06, "loss": 0.5919, "step": 9228 }, { "epoch": 0.75, "grad_norm": 3.422794846371174, "learning_rate": 1.5006871865709527e-06, "loss": 0.9311, "step": 9229 }, { "epoch": 0.75, "grad_norm": 3.5595881510978926, "learning_rate": 1.4997417635281204e-06, "loss": 0.5799, "step": 9230 }, { "epoch": 0.75, "grad_norm": 3.0844228003472995, "learning_rate": 1.4987965858381587e-06, "loss": 0.4529, "step": 9231 }, { "epoch": 0.75, "grad_norm": 3.2559439791737455, "learning_rate": 1.4978516535673176e-06, "loss": 0.4913, "step": 9232 }, { "epoch": 0.75, "grad_norm": 3.7266898885025044, "learning_rate": 1.4969069667818342e-06, "loss": 1.016, "step": 9233 }, { "epoch": 0.75, "grad_norm": 1.6652470891476823, "learning_rate": 1.495962525547927e-06, "loss": 0.3189, "step": 9234 }, { "epoch": 0.75, "grad_norm": 3.8723780905297405, "learning_rate": 1.4950183299317972e-06, "loss": 0.5337, "step": 9235 }, { "epoch": 0.75, "grad_norm": 3.299827963645423, "learning_rate": 1.4940743799996282e-06, "loss": 0.8833, "step": 9236 }, { "epoch": 0.75, "grad_norm": 1.4783504472332694, "learning_rate": 1.4931306758175896e-06, "loss": 0.1693, "step": 9237 }, { "epoch": 0.76, "grad_norm": 1.9569216401301468, "learning_rate": 1.4921872174518264e-06, "loss": 0.3565, "step": 9238 }, { "epoch": 0.76, "grad_norm": 3.9978458167392135, "learning_rate": 1.491244004968474e-06, "loss": 0.9464, "step": 9239 }, { "epoch": 0.76, "grad_norm": 3.694128103080308, "learning_rate": 1.4903010384336465e-06, "loss": 0.4128, "step": 9240 }, { "epoch": 0.76, "grad_norm": 3.4706648723140625, "learning_rate": 1.4893583179134414e-06, "loss": 0.568, "step": 9241 }, { "epoch": 0.76, "grad_norm": 5.648688088377756, "learning_rate": 1.488415843473942e-06, "loss": 1.097, "step": 9242 }, { "epoch": 0.76, "grad_norm": 4.266772854792492, "learning_rate": 1.4874736151812075e-06, "loss": 1.2822, "step": 9243 }, { "epoch": 0.76, "grad_norm": 4.411746530543385, "learning_rate": 1.4865316331012862e-06, "loss": 0.8683, "step": 9244 }, { "epoch": 0.76, "grad_norm": 3.772661039761835, "learning_rate": 1.4855898973002087e-06, "loss": 0.5959, "step": 9245 }, { "epoch": 0.76, "grad_norm": 3.814719328692366, "learning_rate": 1.484648407843982e-06, "loss": 0.5103, "step": 9246 }, { "epoch": 0.76, "grad_norm": 3.507064544108054, "learning_rate": 1.483707164798604e-06, "loss": 0.6051, "step": 9247 }, { "epoch": 0.76, "grad_norm": 4.202915654401037, "learning_rate": 1.4827661682300521e-06, "loss": 1.0418, "step": 9248 }, { "epoch": 0.76, "grad_norm": 3.469431937363606, "learning_rate": 1.4818254182042834e-06, "loss": 0.6728, "step": 9249 }, { "epoch": 0.76, "grad_norm": 2.4053054748930736, "learning_rate": 1.4808849147872417e-06, "loss": 0.4348, "step": 9250 }, { "epoch": 0.76, "grad_norm": 2.7957141907992247, "learning_rate": 1.4799446580448517e-06, "loss": 0.597, "step": 9251 }, { "epoch": 0.76, "grad_norm": 4.380756083812477, "learning_rate": 1.4790046480430226e-06, "loss": 0.7895, "step": 9252 }, { "epoch": 0.76, "grad_norm": 3.542878811181817, "learning_rate": 1.4780648848476436e-06, "loss": 0.7149, "step": 9253 }, { "epoch": 0.76, "grad_norm": 2.6472417985590964, "learning_rate": 1.4771253685245907e-06, "loss": 0.3477, "step": 9254 }, { "epoch": 0.76, "grad_norm": 2.8230679422187395, "learning_rate": 1.476186099139716e-06, "loss": 0.4883, "step": 9255 }, { "epoch": 0.76, "grad_norm": 2.998509200929273, "learning_rate": 1.47524707675886e-06, "loss": 0.4907, "step": 9256 }, { "epoch": 0.76, "grad_norm": 5.727298671205657, "learning_rate": 1.4743083014478443e-06, "loss": 1.0115, "step": 9257 }, { "epoch": 0.76, "grad_norm": 4.258019229535348, "learning_rate": 1.4733697732724728e-06, "loss": 0.8451, "step": 9258 }, { "epoch": 0.76, "grad_norm": 3.0894962139530087, "learning_rate": 1.472431492298534e-06, "loss": 0.7153, "step": 9259 }, { "epoch": 0.76, "grad_norm": 3.140265764565909, "learning_rate": 1.4714934585917933e-06, "loss": 0.5697, "step": 9260 }, { "epoch": 0.76, "grad_norm": 4.008713959072589, "learning_rate": 1.4705556722180075e-06, "loss": 0.6259, "step": 9261 }, { "epoch": 0.76, "grad_norm": 5.148159226128686, "learning_rate": 1.4696181332429065e-06, "loss": 1.1498, "step": 9262 }, { "epoch": 0.76, "grad_norm": 4.23851846708833, "learning_rate": 1.4686808417322107e-06, "loss": 0.845, "step": 9263 }, { "epoch": 0.76, "grad_norm": 2.2197524105620805, "learning_rate": 1.4677437977516197e-06, "loss": 0.5212, "step": 9264 }, { "epoch": 0.76, "grad_norm": 5.469659448279783, "learning_rate": 1.4668070013668173e-06, "loss": 0.9465, "step": 9265 }, { "epoch": 0.76, "grad_norm": 3.8821891357359815, "learning_rate": 1.465870452643466e-06, "loss": 0.8122, "step": 9266 }, { "epoch": 0.76, "grad_norm": 4.459784063330109, "learning_rate": 1.464934151647215e-06, "loss": 0.9195, "step": 9267 }, { "epoch": 0.76, "grad_norm": 3.3430373100392607, "learning_rate": 1.4639980984436957e-06, "loss": 0.5985, "step": 9268 }, { "epoch": 0.76, "grad_norm": 3.594680733843876, "learning_rate": 1.463062293098521e-06, "loss": 0.5093, "step": 9269 }, { "epoch": 0.76, "grad_norm": 4.152942092158016, "learning_rate": 1.4621267356772867e-06, "loss": 0.7401, "step": 9270 }, { "epoch": 0.76, "grad_norm": 4.643754547322705, "learning_rate": 1.461191426245573e-06, "loss": 0.7576, "step": 9271 }, { "epoch": 0.76, "grad_norm": 1.4539657131529113, "learning_rate": 1.4602563648689378e-06, "loss": 0.1889, "step": 9272 }, { "epoch": 0.76, "grad_norm": 6.884785121400726, "learning_rate": 1.459321551612926e-06, "loss": 1.0614, "step": 9273 }, { "epoch": 0.76, "grad_norm": 2.28683900158588, "learning_rate": 1.458386986543065e-06, "loss": 0.3655, "step": 9274 }, { "epoch": 0.76, "grad_norm": 4.380924904423539, "learning_rate": 1.4574526697248643e-06, "loss": 0.8172, "step": 9275 }, { "epoch": 0.76, "grad_norm": 3.336139561522197, "learning_rate": 1.4565186012238126e-06, "loss": 0.4901, "step": 9276 }, { "epoch": 0.76, "grad_norm": 5.045062288953997, "learning_rate": 1.4555847811053875e-06, "loss": 0.9607, "step": 9277 }, { "epoch": 0.76, "grad_norm": 3.236958797578773, "learning_rate": 1.4546512094350424e-06, "loss": 0.7349, "step": 9278 }, { "epoch": 0.76, "grad_norm": 3.9752286428550136, "learning_rate": 1.4537178862782175e-06, "loss": 0.7118, "step": 9279 }, { "epoch": 0.76, "grad_norm": 3.75898198448468, "learning_rate": 1.4527848117003357e-06, "loss": 1.1713, "step": 9280 }, { "epoch": 0.76, "grad_norm": 4.520741778373616, "learning_rate": 1.4518519857668012e-06, "loss": 0.8197, "step": 9281 }, { "epoch": 0.76, "grad_norm": 3.377246753506351, "learning_rate": 1.4509194085430024e-06, "loss": 0.7396, "step": 9282 }, { "epoch": 0.76, "grad_norm": 4.599893595646473, "learning_rate": 1.4499870800943055e-06, "loss": 0.4715, "step": 9283 }, { "epoch": 0.76, "grad_norm": 5.1112529821371355, "learning_rate": 1.4490550004860655e-06, "loss": 1.0465, "step": 9284 }, { "epoch": 0.76, "grad_norm": 3.127834502096852, "learning_rate": 1.4481231697836152e-06, "loss": 0.6399, "step": 9285 }, { "epoch": 0.76, "grad_norm": 1.5725140266229545, "learning_rate": 1.447191588052273e-06, "loss": 0.1532, "step": 9286 }, { "epoch": 0.76, "grad_norm": 4.859033873850176, "learning_rate": 1.446260255357339e-06, "loss": 1.1814, "step": 9287 }, { "epoch": 0.76, "grad_norm": 1.951917050503969, "learning_rate": 1.4453291717640966e-06, "loss": 0.2644, "step": 9288 }, { "epoch": 0.76, "grad_norm": 3.300478741110975, "learning_rate": 1.4443983373378078e-06, "loss": 0.4078, "step": 9289 }, { "epoch": 0.76, "grad_norm": 3.5342038298764753, "learning_rate": 1.4434677521437213e-06, "loss": 0.4432, "step": 9290 }, { "epoch": 0.76, "grad_norm": 4.663422152939918, "learning_rate": 1.442537416247069e-06, "loss": 0.9114, "step": 9291 }, { "epoch": 0.76, "grad_norm": 3.431581004164061, "learning_rate": 1.44160732971306e-06, "loss": 0.7277, "step": 9292 }, { "epoch": 0.76, "grad_norm": 3.913245516633208, "learning_rate": 1.4406774926068912e-06, "loss": 1.0555, "step": 9293 }, { "epoch": 0.76, "grad_norm": 3.899105723703307, "learning_rate": 1.4397479049937413e-06, "loss": 0.6237, "step": 9294 }, { "epoch": 0.76, "grad_norm": 3.7982967597812856, "learning_rate": 1.4388185669387678e-06, "loss": 0.79, "step": 9295 }, { "epoch": 0.76, "grad_norm": 4.478167377412812, "learning_rate": 1.437889478507114e-06, "loss": 0.8015, "step": 9296 }, { "epoch": 0.76, "grad_norm": 5.683330777135807, "learning_rate": 1.4369606397639058e-06, "loss": 0.8996, "step": 9297 }, { "epoch": 0.76, "grad_norm": 2.0947825710626913, "learning_rate": 1.4360320507742503e-06, "loss": 0.3233, "step": 9298 }, { "epoch": 0.76, "grad_norm": 3.9859594539972947, "learning_rate": 1.4351037116032391e-06, "loss": 0.8938, "step": 9299 }, { "epoch": 0.76, "grad_norm": 2.531021143798802, "learning_rate": 1.4341756223159414e-06, "loss": 0.2781, "step": 9300 }, { "epoch": 0.76, "grad_norm": 2.355153049487024, "learning_rate": 1.4332477829774144e-06, "loss": 0.3262, "step": 9301 }, { "epoch": 0.76, "grad_norm": 5.000948490375274, "learning_rate": 1.432320193652695e-06, "loss": 0.7266, "step": 9302 }, { "epoch": 0.76, "grad_norm": 5.299111288992087, "learning_rate": 1.4313928544068033e-06, "loss": 0.7665, "step": 9303 }, { "epoch": 0.76, "grad_norm": 3.5906372535420803, "learning_rate": 1.430465765304742e-06, "loss": 0.7717, "step": 9304 }, { "epoch": 0.76, "grad_norm": 4.570204564671903, "learning_rate": 1.429538926411498e-06, "loss": 0.8141, "step": 9305 }, { "epoch": 0.76, "grad_norm": 4.044477247782365, "learning_rate": 1.4286123377920342e-06, "loss": 1.0105, "step": 9306 }, { "epoch": 0.76, "grad_norm": 4.814453038391482, "learning_rate": 1.4276859995113047e-06, "loss": 1.3584, "step": 9307 }, { "epoch": 0.76, "grad_norm": 3.5677901090269843, "learning_rate": 1.4267599116342384e-06, "loss": 0.7221, "step": 9308 }, { "epoch": 0.76, "grad_norm": 2.9836186775557545, "learning_rate": 1.4258340742257516e-06, "loss": 0.5854, "step": 9309 }, { "epoch": 0.76, "grad_norm": 4.789849731644247, "learning_rate": 1.4249084873507412e-06, "loss": 1.1888, "step": 9310 }, { "epoch": 0.76, "grad_norm": 3.761888111510434, "learning_rate": 1.423983151074088e-06, "loss": 0.5422, "step": 9311 }, { "epoch": 0.76, "grad_norm": 5.060486241860016, "learning_rate": 1.4230580654606523e-06, "loss": 1.1927, "step": 9312 }, { "epoch": 0.76, "grad_norm": 5.9496662520122205, "learning_rate": 1.422133230575279e-06, "loss": 1.3018, "step": 9313 }, { "epoch": 0.76, "grad_norm": 4.520326361871921, "learning_rate": 1.4212086464827957e-06, "loss": 0.9233, "step": 9314 }, { "epoch": 0.76, "grad_norm": 6.280643679190604, "learning_rate": 1.420284313248011e-06, "loss": 1.4922, "step": 9315 }, { "epoch": 0.76, "grad_norm": 4.251869664324042, "learning_rate": 1.419360230935717e-06, "loss": 0.6898, "step": 9316 }, { "epoch": 0.76, "grad_norm": 3.65519446900931, "learning_rate": 1.4184363996106888e-06, "loss": 1.1632, "step": 9317 }, { "epoch": 0.76, "grad_norm": 4.138380698480845, "learning_rate": 1.417512819337681e-06, "loss": 0.9813, "step": 9318 }, { "epoch": 0.76, "grad_norm": 3.404739869469795, "learning_rate": 1.4165894901814337e-06, "loss": 0.8594, "step": 9319 }, { "epoch": 0.76, "grad_norm": 2.8786235118438714, "learning_rate": 1.4156664122066678e-06, "loss": 0.4921, "step": 9320 }, { "epoch": 0.76, "grad_norm": 4.155279089720612, "learning_rate": 1.414743585478089e-06, "loss": 1.0607, "step": 9321 }, { "epoch": 0.76, "grad_norm": 2.7988268307826685, "learning_rate": 1.41382101006038e-06, "loss": 0.3708, "step": 9322 }, { "epoch": 0.76, "grad_norm": 1.0743694371725223, "learning_rate": 1.412898686018211e-06, "loss": 0.1034, "step": 9323 }, { "epoch": 0.76, "grad_norm": 2.3399958967063275, "learning_rate": 1.411976613416235e-06, "loss": 0.3909, "step": 9324 }, { "epoch": 0.76, "grad_norm": 4.74287426460022, "learning_rate": 1.4110547923190816e-06, "loss": 0.962, "step": 9325 }, { "epoch": 0.76, "grad_norm": 4.6131435057312045, "learning_rate": 1.4101332227913677e-06, "loss": 0.7313, "step": 9326 }, { "epoch": 0.76, "grad_norm": 4.610089113972206, "learning_rate": 1.409211904897692e-06, "loss": 0.8497, "step": 9327 }, { "epoch": 0.76, "grad_norm": 5.298292588477315, "learning_rate": 1.4082908387026362e-06, "loss": 0.9706, "step": 9328 }, { "epoch": 0.76, "grad_norm": 5.271103752048412, "learning_rate": 1.40737002427076e-06, "loss": 1.0153, "step": 9329 }, { "epoch": 0.76, "grad_norm": 3.1823134658759185, "learning_rate": 1.4064494616666096e-06, "loss": 0.4829, "step": 9330 }, { "epoch": 0.76, "grad_norm": 2.532047882558935, "learning_rate": 1.405529150954713e-06, "loss": 0.2629, "step": 9331 }, { "epoch": 0.76, "grad_norm": 4.042392721562254, "learning_rate": 1.4046090921995798e-06, "loss": 0.6231, "step": 9332 }, { "epoch": 0.76, "grad_norm": 4.070483187386631, "learning_rate": 1.4036892854657019e-06, "loss": 0.7259, "step": 9333 }, { "epoch": 0.76, "grad_norm": 3.1113878681312666, "learning_rate": 1.4027697308175554e-06, "loss": 0.4745, "step": 9334 }, { "epoch": 0.76, "grad_norm": 4.709747769472877, "learning_rate": 1.4018504283195938e-06, "loss": 1.1229, "step": 9335 }, { "epoch": 0.76, "grad_norm": 1.9946288181004355, "learning_rate": 1.4009313780362582e-06, "loss": 0.3971, "step": 9336 }, { "epoch": 0.76, "grad_norm": 4.925474563921446, "learning_rate": 1.4000125800319702e-06, "loss": 0.9125, "step": 9337 }, { "epoch": 0.76, "grad_norm": 2.7557709471588145, "learning_rate": 1.399094034371134e-06, "loss": 0.5966, "step": 9338 }, { "epoch": 0.76, "grad_norm": 4.1324359061760685, "learning_rate": 1.398175741118134e-06, "loss": 0.8441, "step": 9339 }, { "epoch": 0.76, "grad_norm": 3.233867463157931, "learning_rate": 1.3972577003373406e-06, "loss": 0.6437, "step": 9340 }, { "epoch": 0.76, "grad_norm": 3.818645870947484, "learning_rate": 1.3963399120931014e-06, "loss": 0.5644, "step": 9341 }, { "epoch": 0.76, "grad_norm": 5.272791124987817, "learning_rate": 1.395422376449751e-06, "loss": 0.9272, "step": 9342 }, { "epoch": 0.76, "grad_norm": 4.315108114203056, "learning_rate": 1.3945050934716054e-06, "loss": 0.9911, "step": 9343 }, { "epoch": 0.76, "grad_norm": 5.992811384045854, "learning_rate": 1.3935880632229614e-06, "loss": 1.4808, "step": 9344 }, { "epoch": 0.76, "grad_norm": 5.115425303865779, "learning_rate": 1.3926712857681002e-06, "loss": 1.2953, "step": 9345 }, { "epoch": 0.76, "grad_norm": 0.7229788244527938, "learning_rate": 1.3917547611712818e-06, "loss": 0.1135, "step": 9346 }, { "epoch": 0.76, "grad_norm": 3.296847285183059, "learning_rate": 1.3908384894967514e-06, "loss": 0.4233, "step": 9347 }, { "epoch": 0.76, "grad_norm": 4.8014559482617996, "learning_rate": 1.3899224708087356e-06, "loss": 0.7937, "step": 9348 }, { "epoch": 0.76, "grad_norm": 2.5560120219233435, "learning_rate": 1.3890067051714435e-06, "loss": 0.5396, "step": 9349 }, { "epoch": 0.76, "grad_norm": 2.245313196667449, "learning_rate": 1.3880911926490658e-06, "loss": 0.4887, "step": 9350 }, { "epoch": 0.76, "grad_norm": 3.983509551692768, "learning_rate": 1.3871759333057783e-06, "loss": 0.7939, "step": 9351 }, { "epoch": 0.76, "grad_norm": 3.5050686399806574, "learning_rate": 1.3862609272057337e-06, "loss": 0.7862, "step": 9352 }, { "epoch": 0.76, "grad_norm": 4.121032184419712, "learning_rate": 1.3853461744130703e-06, "loss": 0.4938, "step": 9353 }, { "epoch": 0.76, "grad_norm": 3.9763811056618734, "learning_rate": 1.3844316749919113e-06, "loss": 1.0544, "step": 9354 }, { "epoch": 0.76, "grad_norm": 4.587943342699081, "learning_rate": 1.3835174290063553e-06, "loss": 1.1476, "step": 9355 }, { "epoch": 0.76, "grad_norm": 3.1870902498506797, "learning_rate": 1.3826034365204876e-06, "loss": 0.6916, "step": 9356 }, { "epoch": 0.76, "grad_norm": 4.174244088434585, "learning_rate": 1.3816896975983784e-06, "loss": 0.7123, "step": 9357 }, { "epoch": 0.76, "grad_norm": 3.739528189373238, "learning_rate": 1.380776212304073e-06, "loss": 0.7124, "step": 9358 }, { "epoch": 0.76, "grad_norm": 3.591639188404987, "learning_rate": 1.379862980701604e-06, "loss": 0.5921, "step": 9359 }, { "epoch": 0.77, "grad_norm": 3.73035731361834, "learning_rate": 1.378950002854985e-06, "loss": 0.6594, "step": 9360 }, { "epoch": 0.77, "grad_norm": 4.607527610861995, "learning_rate": 1.378037278828212e-06, "loss": 0.8706, "step": 9361 }, { "epoch": 0.77, "grad_norm": 5.870706821427959, "learning_rate": 1.3771248086852646e-06, "loss": 1.1595, "step": 9362 }, { "epoch": 0.77, "grad_norm": 4.951566000798771, "learning_rate": 1.3762125924900998e-06, "loss": 1.2864, "step": 9363 }, { "epoch": 0.77, "grad_norm": 1.0097441103001916, "learning_rate": 1.3753006303066612e-06, "loss": 0.1232, "step": 9364 }, { "epoch": 0.77, "grad_norm": 5.441261479938119, "learning_rate": 1.3743889221988744e-06, "loss": 0.895, "step": 9365 }, { "epoch": 0.77, "grad_norm": 2.9012789096869036, "learning_rate": 1.3734774682306446e-06, "loss": 0.515, "step": 9366 }, { "epoch": 0.77, "grad_norm": 4.148929030640898, "learning_rate": 1.372566268465862e-06, "loss": 0.7201, "step": 9367 }, { "epoch": 0.77, "grad_norm": 4.385343386382513, "learning_rate": 1.3716553229683989e-06, "loss": 0.726, "step": 9368 }, { "epoch": 0.77, "grad_norm": 4.111237908195454, "learning_rate": 1.3707446318021051e-06, "loss": 0.8248, "step": 9369 }, { "epoch": 0.77, "grad_norm": 3.449998081835322, "learning_rate": 1.3698341950308198e-06, "loss": 0.7886, "step": 9370 }, { "epoch": 0.77, "grad_norm": 4.300561376325745, "learning_rate": 1.3689240127183572e-06, "loss": 0.8067, "step": 9371 }, { "epoch": 0.77, "grad_norm": 4.250771097655397, "learning_rate": 1.3680140849285196e-06, "loss": 0.779, "step": 9372 }, { "epoch": 0.77, "grad_norm": 1.5043779709019471, "learning_rate": 1.3671044117250875e-06, "loss": 0.2278, "step": 9373 }, { "epoch": 0.77, "grad_norm": 3.6883155849475115, "learning_rate": 1.366194993171827e-06, "loss": 0.4736, "step": 9374 }, { "epoch": 0.77, "grad_norm": 3.793560869027931, "learning_rate": 1.3652858293324823e-06, "loss": 0.9813, "step": 9375 }, { "epoch": 0.77, "grad_norm": 2.328732050350097, "learning_rate": 1.3643769202707824e-06, "loss": 0.2956, "step": 9376 }, { "epoch": 0.77, "grad_norm": 4.89171571364176, "learning_rate": 1.3634682660504379e-06, "loss": 1.0947, "step": 9377 }, { "epoch": 0.77, "grad_norm": 3.4013676591947335, "learning_rate": 1.362559866735142e-06, "loss": 0.7116, "step": 9378 }, { "epoch": 0.77, "grad_norm": 6.549481225715749, "learning_rate": 1.3616517223885707e-06, "loss": 0.8947, "step": 9379 }, { "epoch": 0.77, "grad_norm": 3.999966242113525, "learning_rate": 1.3607438330743778e-06, "loss": 0.8584, "step": 9380 }, { "epoch": 0.77, "grad_norm": 4.9561831618363765, "learning_rate": 1.3598361988562037e-06, "loss": 0.8152, "step": 9381 }, { "epoch": 0.77, "grad_norm": 4.874434061208297, "learning_rate": 1.3589288197976707e-06, "loss": 0.6809, "step": 9382 }, { "epoch": 0.77, "grad_norm": 5.074947674701273, "learning_rate": 1.358021695962381e-06, "loss": 0.7936, "step": 9383 }, { "epoch": 0.77, "grad_norm": 4.760480366466265, "learning_rate": 1.3571148274139223e-06, "loss": 0.7204, "step": 9384 }, { "epoch": 0.77, "grad_norm": 4.5202573924317875, "learning_rate": 1.356208214215859e-06, "loss": 0.8679, "step": 9385 }, { "epoch": 0.77, "grad_norm": 3.7399295576934057, "learning_rate": 1.3553018564317432e-06, "loss": 0.6583, "step": 9386 }, { "epoch": 0.77, "grad_norm": 2.5029310094502546, "learning_rate": 1.354395754125104e-06, "loss": 0.4584, "step": 9387 }, { "epoch": 0.77, "grad_norm": 4.787278806573176, "learning_rate": 1.3534899073594566e-06, "loss": 0.9349, "step": 9388 }, { "epoch": 0.77, "grad_norm": 3.8050894166241926, "learning_rate": 1.352584316198297e-06, "loss": 0.7285, "step": 9389 }, { "epoch": 0.77, "grad_norm": 4.330536045395092, "learning_rate": 1.351678980705104e-06, "loss": 1.2079, "step": 9390 }, { "epoch": 0.77, "grad_norm": 3.253642168070775, "learning_rate": 1.3507739009433374e-06, "loss": 0.6539, "step": 9391 }, { "epoch": 0.77, "grad_norm": 6.463133979464778, "learning_rate": 1.3498690769764378e-06, "loss": 1.4632, "step": 9392 }, { "epoch": 0.77, "grad_norm": 4.254093796324293, "learning_rate": 1.3489645088678305e-06, "loss": 1.1103, "step": 9393 }, { "epoch": 0.77, "grad_norm": 2.596333662761472, "learning_rate": 1.348060196680922e-06, "loss": 0.5275, "step": 9394 }, { "epoch": 0.77, "grad_norm": 3.251869759713248, "learning_rate": 1.3471561404791e-06, "loss": 0.6351, "step": 9395 }, { "epoch": 0.77, "grad_norm": 2.355014066464097, "learning_rate": 1.3462523403257355e-06, "loss": 0.3062, "step": 9396 }, { "epoch": 0.77, "grad_norm": 3.033922825683495, "learning_rate": 1.3453487962841821e-06, "loss": 0.5902, "step": 9397 }, { "epoch": 0.77, "grad_norm": 4.126571279478452, "learning_rate": 1.3444455084177716e-06, "loss": 0.6138, "step": 9398 }, { "epoch": 0.77, "grad_norm": 1.7703313801155975, "learning_rate": 1.343542476789822e-06, "loss": 0.3165, "step": 9399 }, { "epoch": 0.77, "grad_norm": 3.172617171443144, "learning_rate": 1.3426397014636334e-06, "loss": 0.4856, "step": 9400 }, { "epoch": 0.77, "grad_norm": 4.597938554244449, "learning_rate": 1.3417371825024832e-06, "loss": 0.9556, "step": 9401 }, { "epoch": 0.77, "grad_norm": 4.4172447302014675, "learning_rate": 1.3408349199696374e-06, "loss": 0.9496, "step": 9402 }, { "epoch": 0.77, "grad_norm": 4.725445692558316, "learning_rate": 1.3399329139283375e-06, "loss": 1.241, "step": 9403 }, { "epoch": 0.77, "grad_norm": 4.7472402363942745, "learning_rate": 1.3390311644418113e-06, "loss": 1.1577, "step": 9404 }, { "epoch": 0.77, "grad_norm": 2.658514626642248, "learning_rate": 1.3381296715732678e-06, "loss": 0.215, "step": 9405 }, { "epoch": 0.77, "grad_norm": 3.9845902825761854, "learning_rate": 1.3372284353858983e-06, "loss": 0.5422, "step": 9406 }, { "epoch": 0.77, "grad_norm": 3.8423579845115987, "learning_rate": 1.3363274559428747e-06, "loss": 0.7291, "step": 9407 }, { "epoch": 0.77, "grad_norm": 4.410067348783659, "learning_rate": 1.335426733307354e-06, "loss": 0.7367, "step": 9408 }, { "epoch": 0.77, "grad_norm": 4.537893849743915, "learning_rate": 1.3345262675424691e-06, "loss": 0.8426, "step": 9409 }, { "epoch": 0.77, "grad_norm": 2.876409687775638, "learning_rate": 1.333626058711341e-06, "loss": 0.679, "step": 9410 }, { "epoch": 0.77, "grad_norm": 5.163415705623005, "learning_rate": 1.3327261068770698e-06, "loss": 1.0723, "step": 9411 }, { "epoch": 0.77, "grad_norm": 5.045484247563682, "learning_rate": 1.331826412102738e-06, "loss": 0.8043, "step": 9412 }, { "epoch": 0.77, "grad_norm": 4.400894042145848, "learning_rate": 1.3309269744514114e-06, "loss": 0.8605, "step": 9413 }, { "epoch": 0.77, "grad_norm": 3.41461886481942, "learning_rate": 1.3300277939861372e-06, "loss": 0.4978, "step": 9414 }, { "epoch": 0.77, "grad_norm": 4.885506898324602, "learning_rate": 1.3291288707699417e-06, "loss": 0.615, "step": 9415 }, { "epoch": 0.77, "grad_norm": 5.452241834588947, "learning_rate": 1.328230204865838e-06, "loss": 1.2617, "step": 9416 }, { "epoch": 0.77, "grad_norm": 5.098897694714867, "learning_rate": 1.327331796336816e-06, "loss": 1.0752, "step": 9417 }, { "epoch": 0.77, "grad_norm": 4.66589764611867, "learning_rate": 1.3264336452458514e-06, "loss": 0.9541, "step": 9418 }, { "epoch": 0.77, "grad_norm": 3.275492019155368, "learning_rate": 1.3255357516559025e-06, "loss": 0.4241, "step": 9419 }, { "epoch": 0.77, "grad_norm": 3.9066721077298308, "learning_rate": 1.3246381156299048e-06, "loss": 0.886, "step": 9420 }, { "epoch": 0.77, "grad_norm": 2.1401612817685454, "learning_rate": 1.3237407372307792e-06, "loss": 0.3662, "step": 9421 }, { "epoch": 0.77, "grad_norm": 4.326368524937841, "learning_rate": 1.3228436165214298e-06, "loss": 0.7366, "step": 9422 }, { "epoch": 0.77, "grad_norm": 2.3468037609646273, "learning_rate": 1.321946753564739e-06, "loss": 0.489, "step": 9423 }, { "epoch": 0.77, "grad_norm": 3.2502527737540756, "learning_rate": 1.3210501484235744e-06, "loss": 0.5933, "step": 9424 }, { "epoch": 0.77, "grad_norm": 2.2470551800294674, "learning_rate": 1.3201538011607845e-06, "loss": 0.3139, "step": 9425 }, { "epoch": 0.77, "grad_norm": 4.489138232154664, "learning_rate": 1.3192577118391975e-06, "loss": 0.7102, "step": 9426 }, { "epoch": 0.77, "grad_norm": 1.8914064841933165, "learning_rate": 1.318361880521626e-06, "loss": 0.4059, "step": 9427 }, { "epoch": 0.77, "grad_norm": 4.063844683388602, "learning_rate": 1.3174663072708637e-06, "loss": 0.6773, "step": 9428 }, { "epoch": 0.77, "grad_norm": 3.50563997940386, "learning_rate": 1.3165709921496873e-06, "loss": 0.714, "step": 9429 }, { "epoch": 0.77, "grad_norm": 3.1772613011563724, "learning_rate": 1.3156759352208554e-06, "loss": 0.5923, "step": 9430 }, { "epoch": 0.77, "grad_norm": 2.2247310279851917, "learning_rate": 1.3147811365471048e-06, "loss": 0.3034, "step": 9431 }, { "epoch": 0.77, "grad_norm": 3.25694837834173, "learning_rate": 1.3138865961911585e-06, "loss": 0.8065, "step": 9432 }, { "epoch": 0.77, "grad_norm": 4.787558627022996, "learning_rate": 1.312992314215721e-06, "loss": 0.9318, "step": 9433 }, { "epoch": 0.77, "grad_norm": 2.661175799036978, "learning_rate": 1.3120982906834745e-06, "loss": 0.7151, "step": 9434 }, { "epoch": 0.77, "grad_norm": 5.371334083380921, "learning_rate": 1.3112045256570888e-06, "loss": 1.0989, "step": 9435 }, { "epoch": 0.77, "grad_norm": 5.424428779312583, "learning_rate": 1.3103110191992118e-06, "loss": 1.0371, "step": 9436 }, { "epoch": 0.77, "grad_norm": 3.7596546812463707, "learning_rate": 1.3094177713724765e-06, "loss": 0.7684, "step": 9437 }, { "epoch": 0.77, "grad_norm": 4.099396301324731, "learning_rate": 1.308524782239492e-06, "loss": 0.8312, "step": 9438 }, { "epoch": 0.77, "grad_norm": 3.6834372052861317, "learning_rate": 1.3076320518628554e-06, "loss": 0.5639, "step": 9439 }, { "epoch": 0.77, "grad_norm": 1.2043445807513642, "learning_rate": 1.3067395803051425e-06, "loss": 0.2032, "step": 9440 }, { "epoch": 0.77, "grad_norm": 4.290269026486594, "learning_rate": 1.3058473676289118e-06, "loss": 0.7477, "step": 9441 }, { "epoch": 0.77, "grad_norm": 4.87168361128787, "learning_rate": 1.3049554138967052e-06, "loss": 1.0051, "step": 9442 }, { "epoch": 0.77, "grad_norm": 1.492575685994819, "learning_rate": 1.304063719171042e-06, "loss": 0.1826, "step": 9443 }, { "epoch": 0.77, "grad_norm": 3.6077122922831446, "learning_rate": 1.3031722835144266e-06, "loss": 0.4407, "step": 9444 }, { "epoch": 0.77, "grad_norm": 3.58056453786561, "learning_rate": 1.302281106989346e-06, "loss": 0.5929, "step": 9445 }, { "epoch": 0.77, "grad_norm": 4.254577200822444, "learning_rate": 1.3013901896582677e-06, "loss": 0.8297, "step": 9446 }, { "epoch": 0.77, "grad_norm": 4.348001334484949, "learning_rate": 1.3004995315836417e-06, "loss": 1.0947, "step": 9447 }, { "epoch": 0.77, "grad_norm": 1.1117699308226239, "learning_rate": 1.2996091328278965e-06, "loss": 0.1159, "step": 9448 }, { "epoch": 0.77, "grad_norm": 2.1536886378875844, "learning_rate": 1.2987189934534488e-06, "loss": 0.4287, "step": 9449 }, { "epoch": 0.77, "grad_norm": 3.998286991655185, "learning_rate": 1.29782911352269e-06, "loss": 0.7968, "step": 9450 }, { "epoch": 0.77, "grad_norm": 5.112248998883585, "learning_rate": 1.2969394930979984e-06, "loss": 1.1064, "step": 9451 }, { "epoch": 0.77, "grad_norm": 2.889365596754016, "learning_rate": 1.2960501322417323e-06, "loss": 0.4504, "step": 9452 }, { "epoch": 0.77, "grad_norm": 4.3994968910567405, "learning_rate": 1.2951610310162326e-06, "loss": 0.8025, "step": 9453 }, { "epoch": 0.77, "grad_norm": 3.7173719745240255, "learning_rate": 1.2942721894838227e-06, "loss": 0.6403, "step": 9454 }, { "epoch": 0.77, "grad_norm": 3.8722081746417203, "learning_rate": 1.2933836077068036e-06, "loss": 0.645, "step": 9455 }, { "epoch": 0.77, "grad_norm": 4.660927074059716, "learning_rate": 1.2924952857474622e-06, "loss": 0.9383, "step": 9456 }, { "epoch": 0.77, "grad_norm": 3.21218245222277, "learning_rate": 1.291607223668066e-06, "loss": 0.2733, "step": 9457 }, { "epoch": 0.77, "grad_norm": 3.122593583023933, "learning_rate": 1.2907194215308644e-06, "loss": 0.6487, "step": 9458 }, { "epoch": 0.77, "grad_norm": 5.298242692595961, "learning_rate": 1.2898318793980903e-06, "loss": 0.9055, "step": 9459 }, { "epoch": 0.77, "grad_norm": 5.478740435234801, "learning_rate": 1.288944597331953e-06, "loss": 1.3584, "step": 9460 }, { "epoch": 0.77, "grad_norm": 4.834110280911659, "learning_rate": 1.288057575394649e-06, "loss": 0.8821, "step": 9461 }, { "epoch": 0.77, "grad_norm": 3.4921116431704475, "learning_rate": 1.2871708136483546e-06, "loss": 0.8571, "step": 9462 }, { "epoch": 0.77, "grad_norm": 4.40767678423515, "learning_rate": 1.2862843121552293e-06, "loss": 0.6731, "step": 9463 }, { "epoch": 0.77, "grad_norm": 3.4209369222793753, "learning_rate": 1.28539807097741e-06, "loss": 0.6032, "step": 9464 }, { "epoch": 0.77, "grad_norm": 2.98294933471214, "learning_rate": 1.2845120901770214e-06, "loss": 0.5268, "step": 9465 }, { "epoch": 0.77, "grad_norm": 2.8442712477361876, "learning_rate": 1.2836263698161638e-06, "loss": 0.527, "step": 9466 }, { "epoch": 0.77, "grad_norm": 2.020183840794972, "learning_rate": 1.2827409099569237e-06, "loss": 0.4005, "step": 9467 }, { "epoch": 0.77, "grad_norm": 4.753830099766738, "learning_rate": 1.2818557106613689e-06, "loss": 0.9774, "step": 9468 }, { "epoch": 0.77, "grad_norm": 4.753391100022944, "learning_rate": 1.280970771991547e-06, "loss": 0.6152, "step": 9469 }, { "epoch": 0.77, "grad_norm": 4.5802211426804424, "learning_rate": 1.2800860940094889e-06, "loss": 0.8815, "step": 9470 }, { "epoch": 0.77, "grad_norm": 2.932757418212026, "learning_rate": 1.2792016767772081e-06, "loss": 0.6008, "step": 9471 }, { "epoch": 0.77, "grad_norm": 4.060272391727669, "learning_rate": 1.278317520356695e-06, "loss": 0.7747, "step": 9472 }, { "epoch": 0.77, "grad_norm": 3.391174501253654, "learning_rate": 1.2774336248099272e-06, "loss": 0.5028, "step": 9473 }, { "epoch": 0.77, "grad_norm": 4.10834258261621, "learning_rate": 1.2765499901988616e-06, "loss": 0.4337, "step": 9474 }, { "epoch": 0.77, "grad_norm": 3.6488900764648484, "learning_rate": 1.275666616585437e-06, "loss": 0.8388, "step": 9475 }, { "epoch": 0.77, "grad_norm": 3.0377874146013193, "learning_rate": 1.2747835040315741e-06, "loss": 0.7262, "step": 9476 }, { "epoch": 0.77, "grad_norm": 3.020586427498907, "learning_rate": 1.2739006525991772e-06, "loss": 0.4525, "step": 9477 }, { "epoch": 0.77, "grad_norm": 5.296675081507472, "learning_rate": 1.273018062350127e-06, "loss": 1.4636, "step": 9478 }, { "epoch": 0.77, "grad_norm": 2.586643243293631, "learning_rate": 1.2721357333462918e-06, "loss": 0.5183, "step": 9479 }, { "epoch": 0.77, "grad_norm": 2.8351867377323905, "learning_rate": 1.2712536656495167e-06, "loss": 0.5367, "step": 9480 }, { "epoch": 0.77, "grad_norm": 4.886196342458603, "learning_rate": 1.2703718593216324e-06, "loss": 0.7353, "step": 9481 }, { "epoch": 0.78, "grad_norm": 3.554634043073279, "learning_rate": 1.2694903144244509e-06, "loss": 0.703, "step": 9482 }, { "epoch": 0.78, "grad_norm": 5.4791022781511325, "learning_rate": 1.2686090310197613e-06, "loss": 0.7757, "step": 9483 }, { "epoch": 0.78, "grad_norm": 3.7325192961946816, "learning_rate": 1.2677280091693395e-06, "loss": 0.7382, "step": 9484 }, { "epoch": 0.78, "grad_norm": 4.521604735262762, "learning_rate": 1.2668472489349416e-06, "loss": 1.0818, "step": 9485 }, { "epoch": 0.78, "grad_norm": 3.000064290386803, "learning_rate": 1.2659667503783047e-06, "loss": 0.2897, "step": 9486 }, { "epoch": 0.78, "grad_norm": 3.1188878553316743, "learning_rate": 1.265086513561148e-06, "loss": 0.4462, "step": 9487 }, { "epoch": 0.78, "grad_norm": 6.865122818758232, "learning_rate": 1.2642065385451736e-06, "loss": 1.4022, "step": 9488 }, { "epoch": 0.78, "grad_norm": 6.057940881244001, "learning_rate": 1.263326825392061e-06, "loss": 1.1208, "step": 9489 }, { "epoch": 0.78, "grad_norm": 4.035237632457478, "learning_rate": 1.2624473741634764e-06, "loss": 0.8167, "step": 9490 }, { "epoch": 0.78, "grad_norm": 6.389708678626936, "learning_rate": 1.2615681849210648e-06, "loss": 1.2094, "step": 9491 }, { "epoch": 0.78, "grad_norm": 3.9356074826170615, "learning_rate": 1.260689257726454e-06, "loss": 0.767, "step": 9492 }, { "epoch": 0.78, "grad_norm": 3.229117168492173, "learning_rate": 1.2598105926412536e-06, "loss": 0.5801, "step": 9493 }, { "epoch": 0.78, "grad_norm": 3.9430776553172686, "learning_rate": 1.2589321897270523e-06, "loss": 0.5619, "step": 9494 }, { "epoch": 0.78, "grad_norm": 3.8731757871049557, "learning_rate": 1.2580540490454246e-06, "loss": 0.689, "step": 9495 }, { "epoch": 0.78, "grad_norm": 3.9330079288498623, "learning_rate": 1.2571761706579216e-06, "loss": 0.5508, "step": 9496 }, { "epoch": 0.78, "grad_norm": 4.306828303358857, "learning_rate": 1.2562985546260804e-06, "loss": 0.5306, "step": 9497 }, { "epoch": 0.78, "grad_norm": 2.9891319839435906, "learning_rate": 1.2554212010114176e-06, "loss": 0.784, "step": 9498 }, { "epoch": 0.78, "grad_norm": 3.56118675712117, "learning_rate": 1.2545441098754336e-06, "loss": 0.7236, "step": 9499 }, { "epoch": 0.78, "grad_norm": 4.921038626828936, "learning_rate": 1.2536672812796057e-06, "loss": 0.7664, "step": 9500 }, { "epoch": 0.78, "grad_norm": 4.779705067114796, "learning_rate": 1.252790715285398e-06, "loss": 0.7688, "step": 9501 }, { "epoch": 0.78, "grad_norm": 4.0120798055306865, "learning_rate": 1.2519144119542526e-06, "loss": 0.7212, "step": 9502 }, { "epoch": 0.78, "grad_norm": 2.3584976659903703, "learning_rate": 1.251038371347595e-06, "loss": 0.3485, "step": 9503 }, { "epoch": 0.78, "grad_norm": 4.913187163884777, "learning_rate": 1.2501625935268325e-06, "loss": 1.0688, "step": 9504 }, { "epoch": 0.78, "grad_norm": 4.87134469525131, "learning_rate": 1.2492870785533539e-06, "loss": 0.8905, "step": 9505 }, { "epoch": 0.78, "grad_norm": 3.8669210139522288, "learning_rate": 1.248411826488527e-06, "loss": 0.771, "step": 9506 }, { "epoch": 0.78, "grad_norm": 3.366795829316916, "learning_rate": 1.2475368373937035e-06, "loss": 0.4478, "step": 9507 }, { "epoch": 0.78, "grad_norm": 3.815116094891325, "learning_rate": 1.2466621113302174e-06, "loss": 0.7628, "step": 9508 }, { "epoch": 0.78, "grad_norm": 3.944149278920771, "learning_rate": 1.2457876483593839e-06, "loss": 0.5918, "step": 9509 }, { "epoch": 0.78, "grad_norm": 4.0388836112634285, "learning_rate": 1.2449134485424969e-06, "loss": 0.5214, "step": 9510 }, { "epoch": 0.78, "grad_norm": 4.327183866109166, "learning_rate": 1.244039511940836e-06, "loss": 0.8289, "step": 9511 }, { "epoch": 0.78, "grad_norm": 3.7530568667721487, "learning_rate": 1.2431658386156576e-06, "loss": 0.4686, "step": 9512 }, { "epoch": 0.78, "grad_norm": 4.480329489320254, "learning_rate": 1.2422924286282045e-06, "loss": 0.6048, "step": 9513 }, { "epoch": 0.78, "grad_norm": 2.1567143431506794, "learning_rate": 1.2414192820396987e-06, "loss": 0.4192, "step": 9514 }, { "epoch": 0.78, "grad_norm": 5.82164518572063, "learning_rate": 1.2405463989113437e-06, "loss": 0.8892, "step": 9515 }, { "epoch": 0.78, "grad_norm": 5.676772750999895, "learning_rate": 1.2396737793043246e-06, "loss": 0.6857, "step": 9516 }, { "epoch": 0.78, "grad_norm": 4.92710925047657, "learning_rate": 1.2388014232798102e-06, "loss": 0.8158, "step": 9517 }, { "epoch": 0.78, "grad_norm": 3.815218867450044, "learning_rate": 1.237929330898946e-06, "loss": 0.628, "step": 9518 }, { "epoch": 0.78, "grad_norm": 1.5802954859651215, "learning_rate": 1.2370575022228632e-06, "loss": 0.2097, "step": 9519 }, { "epoch": 0.78, "grad_norm": 3.087610106969515, "learning_rate": 1.2361859373126727e-06, "loss": 0.5337, "step": 9520 }, { "epoch": 0.78, "grad_norm": 2.819997553881703, "learning_rate": 1.2353146362294682e-06, "loss": 0.5812, "step": 9521 }, { "epoch": 0.78, "grad_norm": 4.129135410511098, "learning_rate": 1.234443599034325e-06, "loss": 0.9036, "step": 9522 }, { "epoch": 0.78, "grad_norm": 3.448219456451351, "learning_rate": 1.2335728257882962e-06, "loss": 0.5669, "step": 9523 }, { "epoch": 0.78, "grad_norm": 1.3452401635169344, "learning_rate": 1.232702316552421e-06, "loss": 0.2998, "step": 9524 }, { "epoch": 0.78, "grad_norm": 5.8183723015943, "learning_rate": 1.2318320713877191e-06, "loss": 0.9775, "step": 9525 }, { "epoch": 0.78, "grad_norm": 5.135468860988995, "learning_rate": 1.230962090355189e-06, "loss": 1.212, "step": 9526 }, { "epoch": 0.78, "grad_norm": 4.61340378905854, "learning_rate": 1.2300923735158133e-06, "loss": 0.8543, "step": 9527 }, { "epoch": 0.78, "grad_norm": 3.5275409270739684, "learning_rate": 1.2292229209305567e-06, "loss": 0.5539, "step": 9528 }, { "epoch": 0.78, "grad_norm": 3.0836387895110375, "learning_rate": 1.2283537326603611e-06, "loss": 0.4498, "step": 9529 }, { "epoch": 0.78, "grad_norm": 2.9828518185080988, "learning_rate": 1.2274848087661555e-06, "loss": 0.4785, "step": 9530 }, { "epoch": 0.78, "grad_norm": 3.4538261017643257, "learning_rate": 1.2266161493088463e-06, "loss": 0.5159, "step": 9531 }, { "epoch": 0.78, "grad_norm": 3.2069474774767754, "learning_rate": 1.225747754349323e-06, "loss": 0.5612, "step": 9532 }, { "epoch": 0.78, "grad_norm": 5.064186344941871, "learning_rate": 1.2248796239484567e-06, "loss": 0.9742, "step": 9533 }, { "epoch": 0.78, "grad_norm": 2.8123722929754025, "learning_rate": 1.2240117581671013e-06, "loss": 0.5447, "step": 9534 }, { "epoch": 0.78, "grad_norm": 4.408214932514735, "learning_rate": 1.2231441570660863e-06, "loss": 0.8532, "step": 9535 }, { "epoch": 0.78, "grad_norm": 5.284974680679537, "learning_rate": 1.222276820706229e-06, "loss": 1.443, "step": 9536 }, { "epoch": 0.78, "grad_norm": 2.978184535275699, "learning_rate": 1.2214097491483262e-06, "loss": 0.438, "step": 9537 }, { "epoch": 0.78, "grad_norm": 4.719536409936797, "learning_rate": 1.2205429424531556e-06, "loss": 0.823, "step": 9538 }, { "epoch": 0.78, "grad_norm": 3.636600975898947, "learning_rate": 1.2196764006814764e-06, "loss": 0.7855, "step": 9539 }, { "epoch": 0.78, "grad_norm": 3.313568214669807, "learning_rate": 1.2188101238940309e-06, "loss": 0.5635, "step": 9540 }, { "epoch": 0.78, "grad_norm": 4.827929767912652, "learning_rate": 1.2179441121515384e-06, "loss": 1.1951, "step": 9541 }, { "epoch": 0.78, "grad_norm": 5.83635248764761, "learning_rate": 1.2170783655147056e-06, "loss": 0.9488, "step": 9542 }, { "epoch": 0.78, "grad_norm": 3.730236472623352, "learning_rate": 1.2162128840442145e-06, "loss": 0.7573, "step": 9543 }, { "epoch": 0.78, "grad_norm": 2.8731020795937416, "learning_rate": 1.2153476678007331e-06, "loss": 0.7127, "step": 9544 }, { "epoch": 0.78, "grad_norm": 4.5919527598477154, "learning_rate": 1.2144827168449109e-06, "loss": 0.7943, "step": 9545 }, { "epoch": 0.78, "grad_norm": 4.010204035251635, "learning_rate": 1.2136180312373742e-06, "loss": 0.898, "step": 9546 }, { "epoch": 0.78, "grad_norm": 2.8695847466899043, "learning_rate": 1.212753611038735e-06, "loss": 0.5351, "step": 9547 }, { "epoch": 0.78, "grad_norm": 4.32016422722352, "learning_rate": 1.2118894563095857e-06, "loss": 1.0733, "step": 9548 }, { "epoch": 0.78, "grad_norm": 4.568638969482633, "learning_rate": 1.2110255671104997e-06, "loss": 0.6358, "step": 9549 }, { "epoch": 0.78, "grad_norm": 4.875271015558787, "learning_rate": 1.2101619435020311e-06, "loss": 0.9305, "step": 9550 }, { "epoch": 0.78, "grad_norm": 3.022347320932427, "learning_rate": 1.2092985855447193e-06, "loss": 0.6068, "step": 9551 }, { "epoch": 0.78, "grad_norm": 2.3295190510012524, "learning_rate": 1.2084354932990772e-06, "loss": 0.3397, "step": 9552 }, { "epoch": 0.78, "grad_norm": 3.2833774245157272, "learning_rate": 1.207572666825606e-06, "loss": 0.6509, "step": 9553 }, { "epoch": 0.78, "grad_norm": 4.484701560564738, "learning_rate": 1.2067101061847869e-06, "loss": 1.1058, "step": 9554 }, { "epoch": 0.78, "grad_norm": 4.690085106932299, "learning_rate": 1.2058478114370803e-06, "loss": 1.0041, "step": 9555 }, { "epoch": 0.78, "grad_norm": 3.456193791753844, "learning_rate": 1.2049857826429317e-06, "loss": 0.712, "step": 9556 }, { "epoch": 0.78, "grad_norm": 4.223199212769577, "learning_rate": 1.2041240198627617e-06, "loss": 0.7078, "step": 9557 }, { "epoch": 0.78, "grad_norm": 2.8785252470799803, "learning_rate": 1.2032625231569805e-06, "loss": 0.5626, "step": 9558 }, { "epoch": 0.78, "grad_norm": 3.4123505230738402, "learning_rate": 1.2024012925859712e-06, "loss": 0.6199, "step": 9559 }, { "epoch": 0.78, "grad_norm": 3.939580602094508, "learning_rate": 1.201540328210104e-06, "loss": 0.5855, "step": 9560 }, { "epoch": 0.78, "grad_norm": 1.135372367422248, "learning_rate": 1.200679630089729e-06, "loss": 0.1252, "step": 9561 }, { "epoch": 0.78, "grad_norm": 4.966481781191274, "learning_rate": 1.1998191982851787e-06, "loss": 0.9955, "step": 9562 }, { "epoch": 0.78, "grad_norm": 2.694849907298931, "learning_rate": 1.1989590328567623e-06, "loss": 0.4365, "step": 9563 }, { "epoch": 0.78, "grad_norm": 4.8035534047839255, "learning_rate": 1.1980991338647757e-06, "loss": 1.099, "step": 9564 }, { "epoch": 0.78, "grad_norm": 4.229682790370691, "learning_rate": 1.1972395013694944e-06, "loss": 0.7544, "step": 9565 }, { "epoch": 0.78, "grad_norm": 4.3258366679237765, "learning_rate": 1.1963801354311738e-06, "loss": 0.7798, "step": 9566 }, { "epoch": 0.78, "grad_norm": 4.395452828167987, "learning_rate": 1.1955210361100521e-06, "loss": 0.8726, "step": 9567 }, { "epoch": 0.78, "grad_norm": 5.194726586130052, "learning_rate": 1.1946622034663507e-06, "loss": 0.873, "step": 9568 }, { "epoch": 0.78, "grad_norm": 3.329469032605557, "learning_rate": 1.1938036375602662e-06, "loss": 0.4734, "step": 9569 }, { "epoch": 0.78, "grad_norm": 4.477324477489711, "learning_rate": 1.1929453384519818e-06, "loss": 0.8422, "step": 9570 }, { "epoch": 0.78, "grad_norm": 4.86605519298845, "learning_rate": 1.1920873062016613e-06, "loss": 0.7282, "step": 9571 }, { "epoch": 0.78, "grad_norm": 1.8644296246619503, "learning_rate": 1.1912295408694496e-06, "loss": 0.3902, "step": 9572 }, { "epoch": 0.78, "grad_norm": 2.126756509218199, "learning_rate": 1.1903720425154702e-06, "loss": 0.3178, "step": 9573 }, { "epoch": 0.78, "grad_norm": 3.1785592662935267, "learning_rate": 1.1895148111998323e-06, "loss": 0.2606, "step": 9574 }, { "epoch": 0.78, "grad_norm": 2.8275021525166313, "learning_rate": 1.1886578469826215e-06, "loss": 0.5906, "step": 9575 }, { "epoch": 0.78, "grad_norm": 4.148790268356531, "learning_rate": 1.1878011499239083e-06, "loss": 0.8455, "step": 9576 }, { "epoch": 0.78, "grad_norm": 4.830681000824894, "learning_rate": 1.1869447200837443e-06, "loss": 0.701, "step": 9577 }, { "epoch": 0.78, "grad_norm": 4.3820428705803245, "learning_rate": 1.1860885575221603e-06, "loss": 0.8088, "step": 9578 }, { "epoch": 0.78, "grad_norm": 2.3999850220494645, "learning_rate": 1.1852326622991712e-06, "loss": 0.2845, "step": 9579 }, { "epoch": 0.78, "grad_norm": 3.5818051654165215, "learning_rate": 1.1843770344747712e-06, "loss": 0.8162, "step": 9580 }, { "epoch": 0.78, "grad_norm": 3.3386044856674983, "learning_rate": 1.183521674108934e-06, "loss": 0.4882, "step": 9581 }, { "epoch": 0.78, "grad_norm": 4.6539031385252825, "learning_rate": 1.1826665812616183e-06, "loss": 1.1817, "step": 9582 }, { "epoch": 0.78, "grad_norm": 4.833873751597924, "learning_rate": 1.1818117559927622e-06, "loss": 0.5528, "step": 9583 }, { "epoch": 0.78, "grad_norm": 4.237939933708527, "learning_rate": 1.1809571983622846e-06, "loss": 0.7856, "step": 9584 }, { "epoch": 0.78, "grad_norm": 4.505227537156948, "learning_rate": 1.1801029084300891e-06, "loss": 0.6273, "step": 9585 }, { "epoch": 0.78, "grad_norm": 1.783948077996453, "learning_rate": 1.1792488862560536e-06, "loss": 0.3235, "step": 9586 }, { "epoch": 0.78, "grad_norm": 6.02804589596132, "learning_rate": 1.1783951319000437e-06, "loss": 1.1752, "step": 9587 }, { "epoch": 0.78, "grad_norm": 3.4292192697448938, "learning_rate": 1.177541645421904e-06, "loss": 0.3607, "step": 9588 }, { "epoch": 0.78, "grad_norm": 2.202673607295536, "learning_rate": 1.1766884268814587e-06, "loss": 0.4486, "step": 9589 }, { "epoch": 0.78, "grad_norm": 3.989876397149936, "learning_rate": 1.1758354763385154e-06, "loss": 0.812, "step": 9590 }, { "epoch": 0.78, "grad_norm": 3.622833772162186, "learning_rate": 1.174982793852864e-06, "loss": 0.5832, "step": 9591 }, { "epoch": 0.78, "grad_norm": 2.178032237414417, "learning_rate": 1.1741303794842706e-06, "loss": 0.3657, "step": 9592 }, { "epoch": 0.78, "grad_norm": 3.852195921600761, "learning_rate": 1.1732782332924874e-06, "loss": 0.8374, "step": 9593 }, { "epoch": 0.78, "grad_norm": 4.771156324016596, "learning_rate": 1.1724263553372462e-06, "loss": 0.9483, "step": 9594 }, { "epoch": 0.78, "grad_norm": 3.292588240670132, "learning_rate": 1.1715747456782594e-06, "loss": 0.706, "step": 9595 }, { "epoch": 0.78, "grad_norm": 2.5559926633483454, "learning_rate": 1.1707234043752219e-06, "loss": 0.5238, "step": 9596 }, { "epoch": 0.78, "grad_norm": 4.615783434419688, "learning_rate": 1.1698723314878102e-06, "loss": 0.7433, "step": 9597 }, { "epoch": 0.78, "grad_norm": 3.940962409567911, "learning_rate": 1.1690215270756777e-06, "loss": 0.9237, "step": 9598 }, { "epoch": 0.78, "grad_norm": 2.6793765422907363, "learning_rate": 1.168170991198464e-06, "loss": 0.3631, "step": 9599 }, { "epoch": 0.78, "grad_norm": 2.8653290532875517, "learning_rate": 1.1673207239157874e-06, "loss": 0.6689, "step": 9600 }, { "epoch": 0.78, "grad_norm": 2.976737564961005, "learning_rate": 1.1664707252872481e-06, "loss": 0.6642, "step": 9601 }, { "epoch": 0.78, "grad_norm": 3.9914614639763992, "learning_rate": 1.165620995372429e-06, "loss": 0.9346, "step": 9602 }, { "epoch": 0.78, "grad_norm": 3.1113270695900557, "learning_rate": 1.164771534230889e-06, "loss": 0.7333, "step": 9603 }, { "epoch": 0.78, "grad_norm": 4.8905432502915245, "learning_rate": 1.1639223419221756e-06, "loss": 0.8442, "step": 9604 }, { "epoch": 0.79, "grad_norm": 3.450603824396783, "learning_rate": 1.1630734185058096e-06, "loss": 0.5446, "step": 9605 }, { "epoch": 0.79, "grad_norm": 5.014716241071041, "learning_rate": 1.162224764041298e-06, "loss": 0.7966, "step": 9606 }, { "epoch": 0.79, "grad_norm": 4.548753052212068, "learning_rate": 1.1613763785881294e-06, "loss": 0.8736, "step": 9607 }, { "epoch": 0.79, "grad_norm": 5.183787050875972, "learning_rate": 1.1605282622057718e-06, "loss": 0.836, "step": 9608 }, { "epoch": 0.79, "grad_norm": 3.605722267159306, "learning_rate": 1.1596804149536723e-06, "loss": 0.7694, "step": 9609 }, { "epoch": 0.79, "grad_norm": 3.536756127578316, "learning_rate": 1.1588328368912622e-06, "loss": 0.3536, "step": 9610 }, { "epoch": 0.79, "grad_norm": 4.338837264580741, "learning_rate": 1.157985528077954e-06, "loss": 0.7896, "step": 9611 }, { "epoch": 0.79, "grad_norm": 4.002047789875118, "learning_rate": 1.1571384885731395e-06, "loss": 0.8901, "step": 9612 }, { "epoch": 0.79, "grad_norm": 2.805848427243978, "learning_rate": 1.1562917184361926e-06, "loss": 0.3782, "step": 9613 }, { "epoch": 0.79, "grad_norm": 5.156441147164578, "learning_rate": 1.1554452177264703e-06, "loss": 0.7537, "step": 9614 }, { "epoch": 0.79, "grad_norm": 4.1989631116228505, "learning_rate": 1.1545989865033047e-06, "loss": 0.6675, "step": 9615 }, { "epoch": 0.79, "grad_norm": 3.536201511039549, "learning_rate": 1.1537530248260154e-06, "loss": 0.7386, "step": 9616 }, { "epoch": 0.79, "grad_norm": 5.430036824389245, "learning_rate": 1.1529073327538997e-06, "loss": 1.0954, "step": 9617 }, { "epoch": 0.79, "grad_norm": 4.497001618649554, "learning_rate": 1.1520619103462387e-06, "loss": 0.5644, "step": 9618 }, { "epoch": 0.79, "grad_norm": 5.06404922181396, "learning_rate": 1.1512167576622906e-06, "loss": 0.8917, "step": 9619 }, { "epoch": 0.79, "grad_norm": 2.4379380659815215, "learning_rate": 1.150371874761299e-06, "loss": 0.4712, "step": 9620 }, { "epoch": 0.79, "grad_norm": 3.695014470124851, "learning_rate": 1.1495272617024839e-06, "loss": 0.7112, "step": 9621 }, { "epoch": 0.79, "grad_norm": 4.497502645969183, "learning_rate": 1.1486829185450504e-06, "loss": 0.9781, "step": 9622 }, { "epoch": 0.79, "grad_norm": 4.00480616008217, "learning_rate": 1.147838845348183e-06, "loss": 0.8317, "step": 9623 }, { "epoch": 0.79, "grad_norm": 4.394024203815417, "learning_rate": 1.1469950421710486e-06, "loss": 0.744, "step": 9624 }, { "epoch": 0.79, "grad_norm": 4.134570910059159, "learning_rate": 1.1461515090727943e-06, "loss": 0.5166, "step": 9625 }, { "epoch": 0.79, "grad_norm": 3.2031665141097005, "learning_rate": 1.1453082461125465e-06, "loss": 0.7629, "step": 9626 }, { "epoch": 0.79, "grad_norm": 6.273922961464934, "learning_rate": 1.1444652533494143e-06, "loss": 1.3982, "step": 9627 }, { "epoch": 0.79, "grad_norm": 4.001798258529542, "learning_rate": 1.1436225308424885e-06, "loss": 0.504, "step": 9628 }, { "epoch": 0.79, "grad_norm": 4.180857843732888, "learning_rate": 1.1427800786508402e-06, "loss": 0.8142, "step": 9629 }, { "epoch": 0.79, "grad_norm": 2.3082931776411604, "learning_rate": 1.141937896833522e-06, "loss": 0.5347, "step": 9630 }, { "epoch": 0.79, "grad_norm": 4.208351716277365, "learning_rate": 1.1410959854495684e-06, "loss": 0.922, "step": 9631 }, { "epoch": 0.79, "grad_norm": 7.570266375131698, "learning_rate": 1.1402543445579905e-06, "loss": 0.8222, "step": 9632 }, { "epoch": 0.79, "grad_norm": 3.74728306378593, "learning_rate": 1.1394129742177856e-06, "loss": 0.7746, "step": 9633 }, { "epoch": 0.79, "grad_norm": 3.1241305119409484, "learning_rate": 1.1385718744879298e-06, "loss": 0.5867, "step": 9634 }, { "epoch": 0.79, "grad_norm": 3.49803535696854, "learning_rate": 1.1377310454273821e-06, "loss": 0.5357, "step": 9635 }, { "epoch": 0.79, "grad_norm": 4.578844931449383, "learning_rate": 1.1368904870950782e-06, "loss": 1.0146, "step": 9636 }, { "epoch": 0.79, "grad_norm": 2.642038477169935, "learning_rate": 1.1360501995499396e-06, "loss": 0.5953, "step": 9637 }, { "epoch": 0.79, "grad_norm": 3.7415699599739503, "learning_rate": 1.135210182850865e-06, "loss": 0.7982, "step": 9638 }, { "epoch": 0.79, "grad_norm": 5.355420188822306, "learning_rate": 1.1343704370567371e-06, "loss": 1.0446, "step": 9639 }, { "epoch": 0.79, "grad_norm": 5.726871691223727, "learning_rate": 1.1335309622264184e-06, "loss": 1.3793, "step": 9640 }, { "epoch": 0.79, "grad_norm": 5.050570243004476, "learning_rate": 1.1326917584187518e-06, "loss": 0.5335, "step": 9641 }, { "epoch": 0.79, "grad_norm": 3.4912518680649693, "learning_rate": 1.1318528256925642e-06, "loss": 0.5002, "step": 9642 }, { "epoch": 0.79, "grad_norm": 3.2478760852442887, "learning_rate": 1.131014164106658e-06, "loss": 0.4667, "step": 9643 }, { "epoch": 0.79, "grad_norm": 2.2488126936794814, "learning_rate": 1.1301757737198206e-06, "loss": 0.4027, "step": 9644 }, { "epoch": 0.79, "grad_norm": 4.655049431304579, "learning_rate": 1.1293376545908202e-06, "loss": 0.9878, "step": 9645 }, { "epoch": 0.79, "grad_norm": 3.700968314352387, "learning_rate": 1.1284998067784047e-06, "loss": 0.7646, "step": 9646 }, { "epoch": 0.79, "grad_norm": 4.00981019441697, "learning_rate": 1.1276622303413043e-06, "loss": 0.7577, "step": 9647 }, { "epoch": 0.79, "grad_norm": 5.716290315788904, "learning_rate": 1.1268249253382303e-06, "loss": 0.7271, "step": 9648 }, { "epoch": 0.79, "grad_norm": 4.505095945581661, "learning_rate": 1.1259878918278717e-06, "loss": 0.852, "step": 9649 }, { "epoch": 0.79, "grad_norm": 4.338128790190635, "learning_rate": 1.1251511298689015e-06, "loss": 0.4403, "step": 9650 }, { "epoch": 0.79, "grad_norm": 4.241555005222036, "learning_rate": 1.1243146395199762e-06, "loss": 0.9327, "step": 9651 }, { "epoch": 0.79, "grad_norm": 3.5833214319323226, "learning_rate": 1.1234784208397254e-06, "loss": 0.7184, "step": 9652 }, { "epoch": 0.79, "grad_norm": 2.954642674831938, "learning_rate": 1.122642473886767e-06, "loss": 0.5282, "step": 9653 }, { "epoch": 0.79, "grad_norm": 4.72774250321284, "learning_rate": 1.121806798719698e-06, "loss": 0.9484, "step": 9654 }, { "epoch": 0.79, "grad_norm": 4.9282920460344455, "learning_rate": 1.120971395397093e-06, "loss": 1.0189, "step": 9655 }, { "epoch": 0.79, "grad_norm": 4.462960738027389, "learning_rate": 1.120136263977512e-06, "loss": 0.8744, "step": 9656 }, { "epoch": 0.79, "grad_norm": 2.760146405525162, "learning_rate": 1.1193014045194934e-06, "loss": 0.4105, "step": 9657 }, { "epoch": 0.79, "grad_norm": 3.2457823955428218, "learning_rate": 1.1184668170815572e-06, "loss": 0.4571, "step": 9658 }, { "epoch": 0.79, "grad_norm": 5.166433479661119, "learning_rate": 1.117632501722205e-06, "loss": 0.7967, "step": 9659 }, { "epoch": 0.79, "grad_norm": 2.560238295755665, "learning_rate": 1.1167984584999197e-06, "loss": 0.3852, "step": 9660 }, { "epoch": 0.79, "grad_norm": 4.640499295917284, "learning_rate": 1.1159646874731612e-06, "loss": 0.7281, "step": 9661 }, { "epoch": 0.79, "grad_norm": 3.491885857740138, "learning_rate": 1.1151311887003747e-06, "loss": 0.6245, "step": 9662 }, { "epoch": 0.79, "grad_norm": 4.0728974812486936, "learning_rate": 1.1142979622399853e-06, "loss": 0.4791, "step": 9663 }, { "epoch": 0.79, "grad_norm": 5.048737541139456, "learning_rate": 1.1134650081503978e-06, "loss": 1.1579, "step": 9664 }, { "epoch": 0.79, "grad_norm": 5.080432675687764, "learning_rate": 1.1126323264900002e-06, "loss": 0.9797, "step": 9665 }, { "epoch": 0.79, "grad_norm": 2.7141021438171187, "learning_rate": 1.1117999173171574e-06, "loss": 0.2737, "step": 9666 }, { "epoch": 0.79, "grad_norm": 3.4687110977368856, "learning_rate": 1.1109677806902203e-06, "loss": 0.9324, "step": 9667 }, { "epoch": 0.79, "grad_norm": 3.114467256724678, "learning_rate": 1.1101359166675151e-06, "loss": 0.5055, "step": 9668 }, { "epoch": 0.79, "grad_norm": 4.174587067675621, "learning_rate": 1.1093043253073538e-06, "loss": 0.7105, "step": 9669 }, { "epoch": 0.79, "grad_norm": 4.8009407831622655, "learning_rate": 1.1084730066680267e-06, "loss": 1.0176, "step": 9670 }, { "epoch": 0.79, "grad_norm": 6.475202699175426, "learning_rate": 1.107641960807807e-06, "loss": 1.4798, "step": 9671 }, { "epoch": 0.79, "grad_norm": 3.953644598963639, "learning_rate": 1.1068111877849448e-06, "loss": 0.5494, "step": 9672 }, { "epoch": 0.79, "grad_norm": 3.8234011788648514, "learning_rate": 1.1059806876576756e-06, "loss": 0.7237, "step": 9673 }, { "epoch": 0.79, "grad_norm": 4.920376867534809, "learning_rate": 1.1051504604842128e-06, "loss": 0.6737, "step": 9674 }, { "epoch": 0.79, "grad_norm": 2.50548118931816, "learning_rate": 1.104320506322753e-06, "loss": 0.3674, "step": 9675 }, { "epoch": 0.79, "grad_norm": 4.377095839238889, "learning_rate": 1.1034908252314714e-06, "loss": 0.8886, "step": 9676 }, { "epoch": 0.79, "grad_norm": 3.931779261768232, "learning_rate": 1.1026614172685263e-06, "loss": 0.8574, "step": 9677 }, { "epoch": 0.79, "grad_norm": 3.4732582543823733, "learning_rate": 1.1018322824920535e-06, "loss": 0.4539, "step": 9678 }, { "epoch": 0.79, "grad_norm": 3.568627885041344, "learning_rate": 1.1010034209601727e-06, "loss": 0.7445, "step": 9679 }, { "epoch": 0.79, "grad_norm": 4.593252938454156, "learning_rate": 1.1001748327309835e-06, "loss": 0.8384, "step": 9680 }, { "epoch": 0.79, "grad_norm": 5.594480343185368, "learning_rate": 1.0993465178625678e-06, "loss": 1.0378, "step": 9681 }, { "epoch": 0.79, "grad_norm": 4.46661245479926, "learning_rate": 1.0985184764129847e-06, "loss": 0.8683, "step": 9682 }, { "epoch": 0.79, "grad_norm": 5.046667478036286, "learning_rate": 1.0976907084402776e-06, "loss": 0.9989, "step": 9683 }, { "epoch": 0.79, "grad_norm": 3.233168789512369, "learning_rate": 1.0968632140024683e-06, "loss": 0.7017, "step": 9684 }, { "epoch": 0.79, "grad_norm": 4.852547466308722, "learning_rate": 1.096035993157561e-06, "loss": 0.7889, "step": 9685 }, { "epoch": 0.79, "grad_norm": 5.7693518060992535, "learning_rate": 1.09520904596354e-06, "loss": 1.301, "step": 9686 }, { "epoch": 0.79, "grad_norm": 1.2474182959207654, "learning_rate": 1.0943823724783719e-06, "loss": 0.1488, "step": 9687 }, { "epoch": 0.79, "grad_norm": 1.3677448085476038, "learning_rate": 1.0935559727600032e-06, "loss": 0.2045, "step": 9688 }, { "epoch": 0.79, "grad_norm": 3.6865572644403133, "learning_rate": 1.0927298468663582e-06, "loss": 0.8139, "step": 9689 }, { "epoch": 0.79, "grad_norm": 5.141834242533534, "learning_rate": 1.0919039948553467e-06, "loss": 1.1825, "step": 9690 }, { "epoch": 0.79, "grad_norm": 2.9705959149538557, "learning_rate": 1.0910784167848576e-06, "loss": 0.5733, "step": 9691 }, { "epoch": 0.79, "grad_norm": 3.5182354410933647, "learning_rate": 1.090253112712759e-06, "loss": 0.4627, "step": 9692 }, { "epoch": 0.79, "grad_norm": 3.3906113326699754, "learning_rate": 1.0894280826969022e-06, "loss": 0.6566, "step": 9693 }, { "epoch": 0.79, "grad_norm": 3.356231320302676, "learning_rate": 1.0886033267951196e-06, "loss": 0.7698, "step": 9694 }, { "epoch": 0.79, "grad_norm": 4.742199082679943, "learning_rate": 1.0877788450652199e-06, "loss": 0.8193, "step": 9695 }, { "epoch": 0.79, "grad_norm": 3.7612987299533867, "learning_rate": 1.086954637564997e-06, "loss": 0.8393, "step": 9696 }, { "epoch": 0.79, "grad_norm": 3.8863342177328155, "learning_rate": 1.0861307043522256e-06, "loss": 0.8334, "step": 9697 }, { "epoch": 0.79, "grad_norm": 3.3466674184223657, "learning_rate": 1.085307045484657e-06, "loss": 0.4735, "step": 9698 }, { "epoch": 0.79, "grad_norm": 2.6521837509972768, "learning_rate": 1.0844836610200282e-06, "loss": 0.5738, "step": 9699 }, { "epoch": 0.79, "grad_norm": 4.035367081615931, "learning_rate": 1.0836605510160558e-06, "loss": 0.6813, "step": 9700 }, { "epoch": 0.79, "grad_norm": 4.302242602103993, "learning_rate": 1.0828377155304332e-06, "loss": 1.1911, "step": 9701 }, { "epoch": 0.79, "grad_norm": 4.31959290473899, "learning_rate": 1.082015154620839e-06, "loss": 1.1347, "step": 9702 }, { "epoch": 0.79, "grad_norm": 6.198033537390311, "learning_rate": 1.0811928683449318e-06, "loss": 0.9468, "step": 9703 }, { "epoch": 0.79, "grad_norm": 4.581324713236814, "learning_rate": 1.0803708567603493e-06, "loss": 0.756, "step": 9704 }, { "epoch": 0.79, "grad_norm": 2.227710907963709, "learning_rate": 1.0795491199247133e-06, "loss": 0.3648, "step": 9705 }, { "epoch": 0.79, "grad_norm": 3.7858849391276843, "learning_rate": 1.0787276578956207e-06, "loss": 0.6991, "step": 9706 }, { "epoch": 0.79, "grad_norm": 5.430426207389419, "learning_rate": 1.0779064707306536e-06, "loss": 1.1752, "step": 9707 }, { "epoch": 0.79, "grad_norm": 3.0675236089607156, "learning_rate": 1.077085558487374e-06, "loss": 0.5475, "step": 9708 }, { "epoch": 0.79, "grad_norm": 4.074170239303381, "learning_rate": 1.076264921223324e-06, "loss": 0.8862, "step": 9709 }, { "epoch": 0.79, "grad_norm": 4.14174743579678, "learning_rate": 1.0754445589960273e-06, "loss": 1.2746, "step": 9710 }, { "epoch": 0.79, "grad_norm": 3.7197812301940547, "learning_rate": 1.0746244718629883e-06, "loss": 0.6069, "step": 9711 }, { "epoch": 0.79, "grad_norm": 4.275241200789883, "learning_rate": 1.0738046598816891e-06, "loss": 0.6855, "step": 9712 }, { "epoch": 0.79, "grad_norm": 3.9473450395501235, "learning_rate": 1.0729851231095983e-06, "loss": 0.5234, "step": 9713 }, { "epoch": 0.79, "grad_norm": 4.624716296959476, "learning_rate": 1.0721658616041581e-06, "loss": 1.0341, "step": 9714 }, { "epoch": 0.79, "grad_norm": 3.18194587295983, "learning_rate": 1.0713468754227968e-06, "loss": 0.6545, "step": 9715 }, { "epoch": 0.79, "grad_norm": 3.67836566277044, "learning_rate": 1.0705281646229227e-06, "loss": 0.5493, "step": 9716 }, { "epoch": 0.79, "grad_norm": 3.899433654390494, "learning_rate": 1.0697097292619241e-06, "loss": 0.9894, "step": 9717 }, { "epoch": 0.79, "grad_norm": 4.512719535347329, "learning_rate": 1.0688915693971675e-06, "loss": 0.9985, "step": 9718 }, { "epoch": 0.79, "grad_norm": 1.598644989378368, "learning_rate": 1.0680736850860034e-06, "loss": 0.2486, "step": 9719 }, { "epoch": 0.79, "grad_norm": 5.536177573138868, "learning_rate": 1.0672560763857626e-06, "loss": 0.9369, "step": 9720 }, { "epoch": 0.79, "grad_norm": 5.5585393591174235, "learning_rate": 1.066438743353755e-06, "loss": 1.2031, "step": 9721 }, { "epoch": 0.79, "grad_norm": 5.132905482869254, "learning_rate": 1.065621686047274e-06, "loss": 1.0333, "step": 9722 }, { "epoch": 0.79, "grad_norm": 3.5796683304053936, "learning_rate": 1.0648049045235891e-06, "loss": 0.6483, "step": 9723 }, { "epoch": 0.79, "grad_norm": 3.0256589186041563, "learning_rate": 1.0639883988399547e-06, "loss": 0.7105, "step": 9724 }, { "epoch": 0.79, "grad_norm": 6.390922431470076, "learning_rate": 1.0631721690536034e-06, "loss": 0.6557, "step": 9725 }, { "epoch": 0.79, "grad_norm": 2.1932750602916995, "learning_rate": 1.0623562152217503e-06, "loss": 0.3335, "step": 9726 }, { "epoch": 0.8, "grad_norm": 5.586248012852279, "learning_rate": 1.0615405374015913e-06, "loss": 0.6929, "step": 9727 }, { "epoch": 0.8, "grad_norm": 2.199738806020164, "learning_rate": 1.060725135650299e-06, "loss": 0.1826, "step": 9728 }, { "epoch": 0.8, "grad_norm": 4.001062988625413, "learning_rate": 1.059910010025032e-06, "loss": 0.5847, "step": 9729 }, { "epoch": 0.8, "grad_norm": 4.126468899802749, "learning_rate": 1.0590951605829247e-06, "loss": 0.9041, "step": 9730 }, { "epoch": 0.8, "grad_norm": 4.114543696558655, "learning_rate": 1.0582805873810959e-06, "loss": 1.0175, "step": 9731 }, { "epoch": 0.8, "grad_norm": 5.226737038244887, "learning_rate": 1.0574662904766432e-06, "loss": 0.9064, "step": 9732 }, { "epoch": 0.8, "grad_norm": 2.348937215859812, "learning_rate": 1.0566522699266457e-06, "loss": 0.398, "step": 9733 }, { "epoch": 0.8, "grad_norm": 4.014006204755118, "learning_rate": 1.0558385257881637e-06, "loss": 0.8706, "step": 9734 }, { "epoch": 0.8, "grad_norm": 2.926980563123289, "learning_rate": 1.0550250581182353e-06, "loss": 0.4947, "step": 9735 }, { "epoch": 0.8, "grad_norm": 4.774309576349225, "learning_rate": 1.054211866973881e-06, "loss": 1.4669, "step": 9736 }, { "epoch": 0.8, "grad_norm": 6.424093652109563, "learning_rate": 1.053398952412103e-06, "loss": 1.2049, "step": 9737 }, { "epoch": 0.8, "grad_norm": 3.4743138653694388, "learning_rate": 1.052586314489883e-06, "loss": 0.6989, "step": 9738 }, { "epoch": 0.8, "grad_norm": 3.101626911918696, "learning_rate": 1.051773953264183e-06, "loss": 0.4607, "step": 9739 }, { "epoch": 0.8, "grad_norm": 3.6506214341008953, "learning_rate": 1.0509618687919476e-06, "loss": 1.0164, "step": 9740 }, { "epoch": 0.8, "grad_norm": 2.475059228286483, "learning_rate": 1.0501500611300974e-06, "loss": 0.4186, "step": 9741 }, { "epoch": 0.8, "grad_norm": 3.1277552416511982, "learning_rate": 1.049338530335538e-06, "loss": 0.4358, "step": 9742 }, { "epoch": 0.8, "grad_norm": 5.009423272641236, "learning_rate": 1.0485272764651543e-06, "loss": 0.7617, "step": 9743 }, { "epoch": 0.8, "grad_norm": 2.815574150796619, "learning_rate": 1.0477162995758133e-06, "loss": 0.5747, "step": 9744 }, { "epoch": 0.8, "grad_norm": 2.4430449005631587, "learning_rate": 1.0469055997243578e-06, "loss": 0.3788, "step": 9745 }, { "epoch": 0.8, "grad_norm": 2.962221865232792, "learning_rate": 1.0460951769676175e-06, "loss": 0.353, "step": 9746 }, { "epoch": 0.8, "grad_norm": 3.4239971821571578, "learning_rate": 1.0452850313623958e-06, "loss": 0.573, "step": 9747 }, { "epoch": 0.8, "grad_norm": 1.0293272781540777, "learning_rate": 1.0444751629654831e-06, "loss": 0.1605, "step": 9748 }, { "epoch": 0.8, "grad_norm": 5.101613154249267, "learning_rate": 1.0436655718336464e-06, "loss": 0.6731, "step": 9749 }, { "epoch": 0.8, "grad_norm": 1.113365293730688, "learning_rate": 1.0428562580236358e-06, "loss": 0.175, "step": 9750 }, { "epoch": 0.8, "grad_norm": 5.105455382948563, "learning_rate": 1.0420472215921807e-06, "loss": 1.1378, "step": 9751 }, { "epoch": 0.8, "grad_norm": 3.7861055323893917, "learning_rate": 1.0412384625959887e-06, "loss": 1.08, "step": 9752 }, { "epoch": 0.8, "grad_norm": 2.675653711131224, "learning_rate": 1.0404299810917523e-06, "loss": 0.4479, "step": 9753 }, { "epoch": 0.8, "grad_norm": 4.335133964402627, "learning_rate": 1.0396217771361422e-06, "loss": 0.5115, "step": 9754 }, { "epoch": 0.8, "grad_norm": 4.980571756794141, "learning_rate": 1.0388138507858098e-06, "loss": 1.1913, "step": 9755 }, { "epoch": 0.8, "grad_norm": 3.8508596544370906, "learning_rate": 1.0380062020973875e-06, "loss": 0.6614, "step": 9756 }, { "epoch": 0.8, "grad_norm": 4.914488203777597, "learning_rate": 1.037198831127489e-06, "loss": 0.7279, "step": 9757 }, { "epoch": 0.8, "grad_norm": 3.7708154404824783, "learning_rate": 1.036391737932705e-06, "loss": 0.7997, "step": 9758 }, { "epoch": 0.8, "grad_norm": 4.380952909168642, "learning_rate": 1.0355849225696102e-06, "loss": 0.7888, "step": 9759 }, { "epoch": 0.8, "grad_norm": 4.42437832779043, "learning_rate": 1.0347783850947606e-06, "loss": 0.5281, "step": 9760 }, { "epoch": 0.8, "grad_norm": 5.508575678821504, "learning_rate": 1.0339721255646885e-06, "loss": 1.3033, "step": 9761 }, { "epoch": 0.8, "grad_norm": 3.93822300897433, "learning_rate": 1.0331661440359114e-06, "loss": 0.7981, "step": 9762 }, { "epoch": 0.8, "grad_norm": 4.311374690150685, "learning_rate": 1.0323604405649224e-06, "loss": 0.8647, "step": 9763 }, { "epoch": 0.8, "grad_norm": 3.0385444435253683, "learning_rate": 1.0315550152081988e-06, "loss": 0.5561, "step": 9764 }, { "epoch": 0.8, "grad_norm": 3.508625854582256, "learning_rate": 1.0307498680221988e-06, "loss": 0.4562, "step": 9765 }, { "epoch": 0.8, "grad_norm": 2.6243096831556865, "learning_rate": 1.029944999063358e-06, "loss": 0.2955, "step": 9766 }, { "epoch": 0.8, "grad_norm": 2.7360148103370268, "learning_rate": 1.0291404083880957e-06, "loss": 0.3317, "step": 9767 }, { "epoch": 0.8, "grad_norm": 5.045643567688735, "learning_rate": 1.0283360960528104e-06, "loss": 0.9755, "step": 9768 }, { "epoch": 0.8, "grad_norm": 4.892143673884387, "learning_rate": 1.027532062113879e-06, "loss": 0.8521, "step": 9769 }, { "epoch": 0.8, "grad_norm": 4.793368296844829, "learning_rate": 1.0267283066276618e-06, "loss": 1.1566, "step": 9770 }, { "epoch": 0.8, "grad_norm": 1.9362162470033908, "learning_rate": 1.0259248296504986e-06, "loss": 0.1881, "step": 9771 }, { "epoch": 0.8, "grad_norm": 2.454044760591829, "learning_rate": 1.025121631238709e-06, "loss": 0.5303, "step": 9772 }, { "epoch": 0.8, "grad_norm": 2.8740149655278215, "learning_rate": 1.0243187114485953e-06, "loss": 0.4281, "step": 9773 }, { "epoch": 0.8, "grad_norm": 3.395325037236486, "learning_rate": 1.0235160703364384e-06, "loss": 0.5552, "step": 9774 }, { "epoch": 0.8, "grad_norm": 4.774354601146491, "learning_rate": 1.022713707958498e-06, "loss": 1.0098, "step": 9775 }, { "epoch": 0.8, "grad_norm": 4.109845239922136, "learning_rate": 1.0219116243710192e-06, "loss": 0.4951, "step": 9776 }, { "epoch": 0.8, "grad_norm": 3.589572101861237, "learning_rate": 1.021109819630221e-06, "loss": 0.9188, "step": 9777 }, { "epoch": 0.8, "grad_norm": 2.2871748538178953, "learning_rate": 1.0203082937923082e-06, "loss": 0.6727, "step": 9778 }, { "epoch": 0.8, "grad_norm": 5.300448811565389, "learning_rate": 1.019507046913465e-06, "loss": 0.7887, "step": 9779 }, { "epoch": 0.8, "grad_norm": 3.7733135121297705, "learning_rate": 1.0187060790498553e-06, "loss": 0.821, "step": 9780 }, { "epoch": 0.8, "grad_norm": 5.4690246160815486, "learning_rate": 1.0179053902576214e-06, "loss": 0.9153, "step": 9781 }, { "epoch": 0.8, "grad_norm": 2.9915957436347544, "learning_rate": 1.01710498059289e-06, "loss": 0.6375, "step": 9782 }, { "epoch": 0.8, "grad_norm": 4.057028414868053, "learning_rate": 1.0163048501117657e-06, "loss": 1.1214, "step": 9783 }, { "epoch": 0.8, "grad_norm": 3.5991348622918498, "learning_rate": 1.0155049988703342e-06, "loss": 0.4839, "step": 9784 }, { "epoch": 0.8, "grad_norm": 4.125793983286334, "learning_rate": 1.014705426924663e-06, "loss": 0.7087, "step": 9785 }, { "epoch": 0.8, "grad_norm": 4.105332917713791, "learning_rate": 1.013906134330796e-06, "loss": 0.5788, "step": 9786 }, { "epoch": 0.8, "grad_norm": 5.982479435308393, "learning_rate": 1.013107121144762e-06, "loss": 1.4878, "step": 9787 }, { "epoch": 0.8, "grad_norm": 5.090355400449551, "learning_rate": 1.012308387422567e-06, "loss": 0.8028, "step": 9788 }, { "epoch": 0.8, "grad_norm": 3.6454026800282806, "learning_rate": 1.0115099332201999e-06, "loss": 0.7259, "step": 9789 }, { "epoch": 0.8, "grad_norm": 3.3975299683156943, "learning_rate": 1.01071175859363e-06, "loss": 0.4905, "step": 9790 }, { "epoch": 0.8, "grad_norm": 4.916271590526557, "learning_rate": 1.0099138635988026e-06, "loss": 0.8442, "step": 9791 }, { "epoch": 0.8, "grad_norm": 3.5728726610791397, "learning_rate": 1.00911624829165e-06, "loss": 0.7227, "step": 9792 }, { "epoch": 0.8, "grad_norm": 5.115132988655575, "learning_rate": 1.008318912728079e-06, "loss": 0.6475, "step": 9793 }, { "epoch": 0.8, "grad_norm": 3.904296494945222, "learning_rate": 1.00752185696398e-06, "loss": 0.5609, "step": 9794 }, { "epoch": 0.8, "grad_norm": 4.899616251799633, "learning_rate": 1.0067250810552236e-06, "loss": 0.8103, "step": 9795 }, { "epoch": 0.8, "grad_norm": 2.8749327110273764, "learning_rate": 1.00592858505766e-06, "loss": 0.5936, "step": 9796 }, { "epoch": 0.8, "grad_norm": 3.029676630552301, "learning_rate": 1.005132369027122e-06, "loss": 0.6193, "step": 9797 }, { "epoch": 0.8, "grad_norm": 4.158077519218264, "learning_rate": 1.0043364330194178e-06, "loss": 0.7681, "step": 9798 }, { "epoch": 0.8, "grad_norm": 5.9746467392254266, "learning_rate": 1.0035407770903405e-06, "loss": 1.0636, "step": 9799 }, { "epoch": 0.8, "grad_norm": 2.37050920443815, "learning_rate": 1.0027454012956617e-06, "loss": 0.4571, "step": 9800 }, { "epoch": 0.8, "grad_norm": 6.804320790908561, "learning_rate": 1.0019503056911346e-06, "loss": 1.0382, "step": 9801 }, { "epoch": 0.8, "grad_norm": 5.35103012277236, "learning_rate": 1.0011554903324928e-06, "loss": 1.043, "step": 9802 }, { "epoch": 0.8, "grad_norm": 4.608746779516858, "learning_rate": 1.0003609552754468e-06, "loss": 0.7013, "step": 9803 }, { "epoch": 0.8, "grad_norm": 3.1014704714558783, "learning_rate": 9.995667005756909e-07, "loss": 0.3483, "step": 9804 }, { "epoch": 0.8, "grad_norm": 6.257377286497344, "learning_rate": 9.987727262888997e-07, "loss": 0.7886, "step": 9805 }, { "epoch": 0.8, "grad_norm": 4.308433224780674, "learning_rate": 9.979790324707284e-07, "loss": 0.7515, "step": 9806 }, { "epoch": 0.8, "grad_norm": 5.4792480740472165, "learning_rate": 9.971856191768086e-07, "loss": 0.9002, "step": 9807 }, { "epoch": 0.8, "grad_norm": 4.140277972857411, "learning_rate": 9.963924864627578e-07, "loss": 0.9105, "step": 9808 }, { "epoch": 0.8, "grad_norm": 3.914102260867357, "learning_rate": 9.95599634384169e-07, "loss": 0.9481, "step": 9809 }, { "epoch": 0.8, "grad_norm": 1.513118226771093, "learning_rate": 9.948070629966183e-07, "loss": 0.169, "step": 9810 }, { "epoch": 0.8, "grad_norm": 2.7295513628394326, "learning_rate": 9.940147723556614e-07, "loss": 0.6453, "step": 9811 }, { "epoch": 0.8, "grad_norm": 3.5774326972108734, "learning_rate": 9.932227625168356e-07, "loss": 0.5254, "step": 9812 }, { "epoch": 0.8, "grad_norm": 5.602790864794578, "learning_rate": 9.924310335356563e-07, "loss": 1.1936, "step": 9813 }, { "epoch": 0.8, "grad_norm": 4.360797507501289, "learning_rate": 9.91639585467622e-07, "loss": 0.8715, "step": 9814 }, { "epoch": 0.8, "grad_norm": 3.4461678530552238, "learning_rate": 9.908484183682065e-07, "loss": 0.666, "step": 9815 }, { "epoch": 0.8, "grad_norm": 1.760828772809022, "learning_rate": 9.900575322928696e-07, "loss": 0.3425, "step": 9816 }, { "epoch": 0.8, "grad_norm": 4.34303928784837, "learning_rate": 9.892669272970485e-07, "loss": 0.9513, "step": 9817 }, { "epoch": 0.8, "grad_norm": 5.345416542482766, "learning_rate": 9.884766034361604e-07, "loss": 1.0784, "step": 9818 }, { "epoch": 0.8, "grad_norm": 3.706912283533461, "learning_rate": 9.876865607656045e-07, "loss": 0.466, "step": 9819 }, { "epoch": 0.8, "grad_norm": 2.9258263158249984, "learning_rate": 9.868967993407603e-07, "loss": 0.5956, "step": 9820 }, { "epoch": 0.8, "grad_norm": 3.0342284251333296, "learning_rate": 9.86107319216984e-07, "loss": 0.5981, "step": 9821 }, { "epoch": 0.8, "grad_norm": 4.143853090321942, "learning_rate": 9.853181204496176e-07, "loss": 0.7826, "step": 9822 }, { "epoch": 0.8, "grad_norm": 4.106256324176616, "learning_rate": 9.845292030939775e-07, "loss": 0.7144, "step": 9823 }, { "epoch": 0.8, "grad_norm": 4.946979660030996, "learning_rate": 9.837405672053651e-07, "loss": 0.8796, "step": 9824 }, { "epoch": 0.8, "grad_norm": 4.094283608807544, "learning_rate": 9.829522128390611e-07, "loss": 0.678, "step": 9825 }, { "epoch": 0.8, "grad_norm": 1.2068649294230744, "learning_rate": 9.821641400503235e-07, "loss": 0.1685, "step": 9826 }, { "epoch": 0.8, "grad_norm": 2.9386882985967446, "learning_rate": 9.813763488943946e-07, "loss": 0.5521, "step": 9827 }, { "epoch": 0.8, "grad_norm": 3.1806468055796757, "learning_rate": 9.80588839426494e-07, "loss": 0.5293, "step": 9828 }, { "epoch": 0.8, "grad_norm": 5.376950259944358, "learning_rate": 9.798016117018233e-07, "loss": 1.2063, "step": 9829 }, { "epoch": 0.8, "grad_norm": 3.6004839039357366, "learning_rate": 9.790146657755633e-07, "loss": 0.7438, "step": 9830 }, { "epoch": 0.8, "grad_norm": 4.17331895871248, "learning_rate": 9.782280017028777e-07, "loss": 0.9275, "step": 9831 }, { "epoch": 0.8, "grad_norm": 4.179139915704291, "learning_rate": 9.774416195389046e-07, "loss": 0.4853, "step": 9832 }, { "epoch": 0.8, "grad_norm": 4.576781743034318, "learning_rate": 9.766555193387683e-07, "loss": 1.141, "step": 9833 }, { "epoch": 0.8, "grad_norm": 4.229859663170041, "learning_rate": 9.7586970115757e-07, "loss": 1.069, "step": 9834 }, { "epoch": 0.8, "grad_norm": 5.163122161832738, "learning_rate": 9.750841650503928e-07, "loss": 1.1175, "step": 9835 }, { "epoch": 0.8, "grad_norm": 1.2391700379530222, "learning_rate": 9.742989110723e-07, "loss": 0.1761, "step": 9836 }, { "epoch": 0.8, "grad_norm": 2.460523201070169, "learning_rate": 9.735139392783326e-07, "loss": 0.3726, "step": 9837 }, { "epoch": 0.8, "grad_norm": 3.9994436411899814, "learning_rate": 9.727292497235151e-07, "loss": 0.8943, "step": 9838 }, { "epoch": 0.8, "grad_norm": 3.0603174081227125, "learning_rate": 9.719448424628514e-07, "loss": 0.6165, "step": 9839 }, { "epoch": 0.8, "grad_norm": 3.8683916770844027, "learning_rate": 9.711607175513228e-07, "loss": 0.7775, "step": 9840 }, { "epoch": 0.8, "grad_norm": 3.3962013038998338, "learning_rate": 9.70376875043894e-07, "loss": 0.5398, "step": 9841 }, { "epoch": 0.8, "grad_norm": 4.466230612997388, "learning_rate": 9.695933149955111e-07, "loss": 0.8821, "step": 9842 }, { "epoch": 0.8, "grad_norm": 3.126243935036026, "learning_rate": 9.688100374610953e-07, "loss": 0.5089, "step": 9843 }, { "epoch": 0.8, "grad_norm": 4.075033137777421, "learning_rate": 9.68027042495552e-07, "loss": 0.8537, "step": 9844 }, { "epoch": 0.8, "grad_norm": 5.044256011589565, "learning_rate": 9.672443301537654e-07, "loss": 0.8834, "step": 9845 }, { "epoch": 0.8, "grad_norm": 3.9775589520839407, "learning_rate": 9.664619004906007e-07, "loss": 0.7768, "step": 9846 }, { "epoch": 0.8, "grad_norm": 3.532802096795839, "learning_rate": 9.65679753560903e-07, "loss": 0.7676, "step": 9847 }, { "epoch": 0.8, "grad_norm": 4.9665152868615134, "learning_rate": 9.648978894194983e-07, "loss": 0.7077, "step": 9848 }, { "epoch": 0.81, "grad_norm": 2.497109619755261, "learning_rate": 9.641163081211891e-07, "loss": 0.542, "step": 9849 }, { "epoch": 0.81, "grad_norm": 4.00698599759604, "learning_rate": 9.633350097207628e-07, "loss": 0.8032, "step": 9850 }, { "epoch": 0.81, "grad_norm": 3.3863573071418904, "learning_rate": 9.62553994272985e-07, "loss": 0.5541, "step": 9851 }, { "epoch": 0.81, "grad_norm": 3.247677039720678, "learning_rate": 9.61773261832601e-07, "loss": 0.5179, "step": 9852 }, { "epoch": 0.81, "grad_norm": 3.8418076360091997, "learning_rate": 9.609928124543376e-07, "loss": 0.6889, "step": 9853 }, { "epoch": 0.81, "grad_norm": 3.118237736916841, "learning_rate": 9.602126461929002e-07, "loss": 0.8431, "step": 9854 }, { "epoch": 0.81, "grad_norm": 2.686936764023641, "learning_rate": 9.594327631029753e-07, "loss": 0.2751, "step": 9855 }, { "epoch": 0.81, "grad_norm": 2.640907225065898, "learning_rate": 9.586531632392282e-07, "loss": 0.5965, "step": 9856 }, { "epoch": 0.81, "grad_norm": 2.207380321843211, "learning_rate": 9.578738466563065e-07, "loss": 0.2198, "step": 9857 }, { "epoch": 0.81, "grad_norm": 3.2635923528591935, "learning_rate": 9.570948134088364e-07, "loss": 0.7252, "step": 9858 }, { "epoch": 0.81, "grad_norm": 4.105489324566011, "learning_rate": 9.563160635514252e-07, "loss": 0.6897, "step": 9859 }, { "epoch": 0.81, "grad_norm": 5.64835001657043, "learning_rate": 9.55537597138661e-07, "loss": 0.6154, "step": 9860 }, { "epoch": 0.81, "grad_norm": 3.7978146569040385, "learning_rate": 9.547594142251089e-07, "loss": 0.7363, "step": 9861 }, { "epoch": 0.81, "grad_norm": 1.7402329161412262, "learning_rate": 9.539815148653163e-07, "loss": 0.1861, "step": 9862 }, { "epoch": 0.81, "grad_norm": 3.53530084807777, "learning_rate": 9.532038991138115e-07, "loss": 0.863, "step": 9863 }, { "epoch": 0.81, "grad_norm": 4.914928019255762, "learning_rate": 9.524265670251015e-07, "loss": 0.7861, "step": 9864 }, { "epoch": 0.81, "grad_norm": 3.5257229406622033, "learning_rate": 9.516495186536751e-07, "loss": 0.7089, "step": 9865 }, { "epoch": 0.81, "grad_norm": 4.417659463572757, "learning_rate": 9.508727540539981e-07, "loss": 1.1149, "step": 9866 }, { "epoch": 0.81, "grad_norm": 4.709118153703828, "learning_rate": 9.500962732805192e-07, "loss": 0.6207, "step": 9867 }, { "epoch": 0.81, "grad_norm": 5.524006700894052, "learning_rate": 9.493200763876658e-07, "loss": 0.7884, "step": 9868 }, { "epoch": 0.81, "grad_norm": 2.996789862109213, "learning_rate": 9.485441634298482e-07, "loss": 0.5168, "step": 9869 }, { "epoch": 0.81, "grad_norm": 3.0079425193436378, "learning_rate": 9.477685344614517e-07, "loss": 0.5929, "step": 9870 }, { "epoch": 0.81, "grad_norm": 3.3091738066070024, "learning_rate": 9.469931895368462e-07, "loss": 0.5454, "step": 9871 }, { "epoch": 0.81, "grad_norm": 4.8737147416684445, "learning_rate": 9.462181287103783e-07, "loss": 1.1663, "step": 9872 }, { "epoch": 0.81, "grad_norm": 2.910258465431136, "learning_rate": 9.454433520363776e-07, "loss": 0.5832, "step": 9873 }, { "epoch": 0.81, "grad_norm": 4.3697371231984325, "learning_rate": 9.446688595691522e-07, "loss": 0.9605, "step": 9874 }, { "epoch": 0.81, "grad_norm": 3.0701907349081323, "learning_rate": 9.438946513629915e-07, "loss": 0.7593, "step": 9875 }, { "epoch": 0.81, "grad_norm": 4.494410910517011, "learning_rate": 9.431207274721627e-07, "loss": 0.8716, "step": 9876 }, { "epoch": 0.81, "grad_norm": 5.872713129768486, "learning_rate": 9.423470879509172e-07, "loss": 1.1377, "step": 9877 }, { "epoch": 0.81, "grad_norm": 3.8073964996011616, "learning_rate": 9.415737328534802e-07, "loss": 0.5902, "step": 9878 }, { "epoch": 0.81, "grad_norm": 3.960128248000555, "learning_rate": 9.408006622340627e-07, "loss": 0.6662, "step": 9879 }, { "epoch": 0.81, "grad_norm": 3.8411134500064015, "learning_rate": 9.400278761468523e-07, "loss": 0.6525, "step": 9880 }, { "epoch": 0.81, "grad_norm": 5.271183552028433, "learning_rate": 9.392553746460193e-07, "loss": 0.8974, "step": 9881 }, { "epoch": 0.81, "grad_norm": 3.6937126757581447, "learning_rate": 9.384831577857135e-07, "loss": 0.8745, "step": 9882 }, { "epoch": 0.81, "grad_norm": 3.918194333309644, "learning_rate": 9.37711225620061e-07, "loss": 0.6488, "step": 9883 }, { "epoch": 0.81, "grad_norm": 2.768034398152464, "learning_rate": 9.36939578203172e-07, "loss": 0.5795, "step": 9884 }, { "epoch": 0.81, "grad_norm": 3.5521280178173114, "learning_rate": 9.361682155891382e-07, "loss": 0.492, "step": 9885 }, { "epoch": 0.81, "grad_norm": 5.010274233421411, "learning_rate": 9.353971378320248e-07, "loss": 0.9207, "step": 9886 }, { "epoch": 0.81, "grad_norm": 1.6367400905396001, "learning_rate": 9.346263449858828e-07, "loss": 0.3406, "step": 9887 }, { "epoch": 0.81, "grad_norm": 5.299231533318858, "learning_rate": 9.338558371047429e-07, "loss": 0.9699, "step": 9888 }, { "epoch": 0.81, "grad_norm": 4.455281392120958, "learning_rate": 9.33085614242612e-07, "loss": 0.479, "step": 9889 }, { "epoch": 0.81, "grad_norm": 4.576101144784835, "learning_rate": 9.323156764534797e-07, "loss": 0.8125, "step": 9890 }, { "epoch": 0.81, "grad_norm": 6.001248491978731, "learning_rate": 9.315460237913159e-07, "loss": 0.9924, "step": 9891 }, { "epoch": 0.81, "grad_norm": 4.043072769369696, "learning_rate": 9.3077665631007e-07, "loss": 1.0591, "step": 9892 }, { "epoch": 0.81, "grad_norm": 2.4866963629472107, "learning_rate": 9.300075740636716e-07, "loss": 0.3196, "step": 9893 }, { "epoch": 0.81, "grad_norm": 2.8031433196931106, "learning_rate": 9.292387771060302e-07, "loss": 0.7287, "step": 9894 }, { "epoch": 0.81, "grad_norm": 3.311911836138839, "learning_rate": 9.284702654910338e-07, "loss": 0.5071, "step": 9895 }, { "epoch": 0.81, "grad_norm": 2.4149683062187797, "learning_rate": 9.277020392725522e-07, "loss": 0.3489, "step": 9896 }, { "epoch": 0.81, "grad_norm": 4.1060268580376, "learning_rate": 9.269340985044345e-07, "loss": 0.9197, "step": 9897 }, { "epoch": 0.81, "grad_norm": 1.2150900519384455, "learning_rate": 9.261664432405109e-07, "loss": 0.1616, "step": 9898 }, { "epoch": 0.81, "grad_norm": 3.005804801362847, "learning_rate": 9.253990735345914e-07, "loss": 0.6852, "step": 9899 }, { "epoch": 0.81, "grad_norm": 4.974940928049727, "learning_rate": 9.246319894404632e-07, "loss": 0.7249, "step": 9900 }, { "epoch": 0.81, "grad_norm": 4.038730562058925, "learning_rate": 9.238651910118973e-07, "loss": 0.855, "step": 9901 }, { "epoch": 0.81, "grad_norm": 3.3181916794751687, "learning_rate": 9.230986783026413e-07, "loss": 0.4042, "step": 9902 }, { "epoch": 0.81, "grad_norm": 4.576947794174118, "learning_rate": 9.223324513664245e-07, "loss": 0.7459, "step": 9903 }, { "epoch": 0.81, "grad_norm": 3.720413585163589, "learning_rate": 9.215665102569577e-07, "loss": 0.7138, "step": 9904 }, { "epoch": 0.81, "grad_norm": 4.406165342302664, "learning_rate": 9.208008550279296e-07, "loss": 0.5856, "step": 9905 }, { "epoch": 0.81, "grad_norm": 3.613024596190667, "learning_rate": 9.20035485733008e-07, "loss": 0.5525, "step": 9906 }, { "epoch": 0.81, "grad_norm": 5.11050019329382, "learning_rate": 9.192704024258426e-07, "loss": 1.169, "step": 9907 }, { "epoch": 0.81, "grad_norm": 4.645508394267574, "learning_rate": 9.185056051600627e-07, "loss": 0.8694, "step": 9908 }, { "epoch": 0.81, "grad_norm": 3.5518757837879007, "learning_rate": 9.177410939892772e-07, "loss": 0.5554, "step": 9909 }, { "epoch": 0.81, "grad_norm": 3.638157839908345, "learning_rate": 9.169768689670749e-07, "loss": 0.9099, "step": 9910 }, { "epoch": 0.81, "grad_norm": 3.268801656265276, "learning_rate": 9.162129301470258e-07, "loss": 0.8827, "step": 9911 }, { "epoch": 0.81, "grad_norm": 3.9296386685375695, "learning_rate": 9.154492775826762e-07, "loss": 0.8059, "step": 9912 }, { "epoch": 0.81, "grad_norm": 1.9105302074156747, "learning_rate": 9.146859113275569e-07, "loss": 0.333, "step": 9913 }, { "epoch": 0.81, "grad_norm": 4.412695156513237, "learning_rate": 9.13922831435175e-07, "loss": 0.6103, "step": 9914 }, { "epoch": 0.81, "grad_norm": 3.5759420864804925, "learning_rate": 9.131600379590222e-07, "loss": 0.7136, "step": 9915 }, { "epoch": 0.81, "grad_norm": 3.7030317891351676, "learning_rate": 9.123975309525629e-07, "loss": 0.7758, "step": 9916 }, { "epoch": 0.81, "grad_norm": 5.236943959535839, "learning_rate": 9.116353104692488e-07, "loss": 0.8042, "step": 9917 }, { "epoch": 0.81, "grad_norm": 4.01181731080616, "learning_rate": 9.10873376562505e-07, "loss": 0.5553, "step": 9918 }, { "epoch": 0.81, "grad_norm": 2.9999144714800794, "learning_rate": 9.10111729285742e-07, "loss": 0.4767, "step": 9919 }, { "epoch": 0.81, "grad_norm": 4.079564436691244, "learning_rate": 9.093503686923477e-07, "loss": 0.7797, "step": 9920 }, { "epoch": 0.81, "grad_norm": 3.242097952262364, "learning_rate": 9.0858929483569e-07, "loss": 0.6149, "step": 9921 }, { "epoch": 0.81, "grad_norm": 5.012721973636521, "learning_rate": 9.078285077691179e-07, "loss": 0.6988, "step": 9922 }, { "epoch": 0.81, "grad_norm": 1.9548161095069063, "learning_rate": 9.07068007545957e-07, "loss": 0.2971, "step": 9923 }, { "epoch": 0.81, "grad_norm": 4.129316033445483, "learning_rate": 9.063077942195164e-07, "loss": 0.8209, "step": 9924 }, { "epoch": 0.81, "grad_norm": 4.084635351098784, "learning_rate": 9.055478678430835e-07, "loss": 1.014, "step": 9925 }, { "epoch": 0.81, "grad_norm": 2.8229568048217413, "learning_rate": 9.047882284699255e-07, "loss": 0.555, "step": 9926 }, { "epoch": 0.81, "grad_norm": 3.2574689239458445, "learning_rate": 9.040288761532911e-07, "loss": 0.4482, "step": 9927 }, { "epoch": 0.81, "grad_norm": 2.806892944470939, "learning_rate": 9.032698109464072e-07, "loss": 0.5379, "step": 9928 }, { "epoch": 0.81, "grad_norm": 4.991868694121024, "learning_rate": 9.0251103290248e-07, "loss": 1.1004, "step": 9929 }, { "epoch": 0.81, "grad_norm": 5.024159326541445, "learning_rate": 9.017525420746964e-07, "loss": 0.703, "step": 9930 }, { "epoch": 0.81, "grad_norm": 3.5474833957255796, "learning_rate": 9.009943385162256e-07, "loss": 0.7723, "step": 9931 }, { "epoch": 0.81, "grad_norm": 2.7371136350187237, "learning_rate": 9.002364222802118e-07, "loss": 0.5427, "step": 9932 }, { "epoch": 0.81, "grad_norm": 3.6272837149566324, "learning_rate": 8.994787934197819e-07, "loss": 0.8529, "step": 9933 }, { "epoch": 0.81, "grad_norm": 3.4494997421343547, "learning_rate": 8.987214519880449e-07, "loss": 0.5914, "step": 9934 }, { "epoch": 0.81, "grad_norm": 3.044674802676371, "learning_rate": 8.979643980380837e-07, "loss": 0.4443, "step": 9935 }, { "epoch": 0.81, "grad_norm": 4.450958132122734, "learning_rate": 8.972076316229661e-07, "loss": 0.8514, "step": 9936 }, { "epoch": 0.81, "grad_norm": 3.997254308784755, "learning_rate": 8.964511527957382e-07, "loss": 0.981, "step": 9937 }, { "epoch": 0.81, "grad_norm": 3.8162827170779, "learning_rate": 8.956949616094257e-07, "loss": 0.9472, "step": 9938 }, { "epoch": 0.81, "grad_norm": 1.6060584816750842, "learning_rate": 8.949390581170341e-07, "loss": 0.2576, "step": 9939 }, { "epoch": 0.81, "grad_norm": 2.704545234891308, "learning_rate": 8.941834423715512e-07, "loss": 0.5923, "step": 9940 }, { "epoch": 0.81, "grad_norm": 3.3713590366128905, "learning_rate": 8.934281144259388e-07, "loss": 0.5801, "step": 9941 }, { "epoch": 0.81, "grad_norm": 2.4640626424930945, "learning_rate": 8.926730743331436e-07, "loss": 0.4416, "step": 9942 }, { "epoch": 0.81, "grad_norm": 4.604280564084286, "learning_rate": 8.919183221460909e-07, "loss": 0.6132, "step": 9943 }, { "epoch": 0.81, "grad_norm": 3.4174402956829706, "learning_rate": 8.911638579176851e-07, "loss": 0.5593, "step": 9944 }, { "epoch": 0.81, "grad_norm": 3.2855548280997926, "learning_rate": 8.904096817008129e-07, "loss": 1.0127, "step": 9945 }, { "epoch": 0.81, "grad_norm": 4.4040226749869, "learning_rate": 8.896557935483352e-07, "loss": 0.7921, "step": 9946 }, { "epoch": 0.81, "grad_norm": 4.270894329429153, "learning_rate": 8.889021935130987e-07, "loss": 0.8391, "step": 9947 }, { "epoch": 0.81, "grad_norm": 5.358811433320635, "learning_rate": 8.881488816479278e-07, "loss": 1.1271, "step": 9948 }, { "epoch": 0.81, "grad_norm": 2.4049660304861966, "learning_rate": 8.873958580056241e-07, "loss": 0.3162, "step": 9949 }, { "epoch": 0.81, "grad_norm": 4.207338887500905, "learning_rate": 8.866431226389727e-07, "loss": 0.9544, "step": 9950 }, { "epoch": 0.81, "grad_norm": 4.194212944720493, "learning_rate": 8.858906756007385e-07, "loss": 0.7725, "step": 9951 }, { "epoch": 0.81, "grad_norm": 4.024844323097988, "learning_rate": 8.851385169436616e-07, "loss": 0.7606, "step": 9952 }, { "epoch": 0.81, "grad_norm": 4.96861080229688, "learning_rate": 8.843866467204671e-07, "loss": 1.0984, "step": 9953 }, { "epoch": 0.81, "grad_norm": 2.333868653973619, "learning_rate": 8.836350649838576e-07, "loss": 0.3738, "step": 9954 }, { "epoch": 0.81, "grad_norm": 4.347678846049028, "learning_rate": 8.828837717865151e-07, "loss": 0.5327, "step": 9955 }, { "epoch": 0.81, "grad_norm": 3.5626055060594073, "learning_rate": 8.821327671811025e-07, "loss": 0.6182, "step": 9956 }, { "epoch": 0.81, "grad_norm": 4.323903238945544, "learning_rate": 8.813820512202637e-07, "loss": 0.757, "step": 9957 }, { "epoch": 0.81, "grad_norm": 3.207498333234816, "learning_rate": 8.80631623956617e-07, "loss": 0.8589, "step": 9958 }, { "epoch": 0.81, "grad_norm": 2.567352678695993, "learning_rate": 8.798814854427661e-07, "loss": 0.2841, "step": 9959 }, { "epoch": 0.81, "grad_norm": 3.4653789463370224, "learning_rate": 8.791316357312923e-07, "loss": 0.8585, "step": 9960 }, { "epoch": 0.81, "grad_norm": 4.159159522826038, "learning_rate": 8.783820748747568e-07, "loss": 0.6917, "step": 9961 }, { "epoch": 0.81, "grad_norm": 5.236044226099467, "learning_rate": 8.776328029257014e-07, "loss": 0.9786, "step": 9962 }, { "epoch": 0.81, "grad_norm": 3.8408160000979645, "learning_rate": 8.768838199366448e-07, "loss": 0.8945, "step": 9963 }, { "epoch": 0.81, "grad_norm": 6.087986763963806, "learning_rate": 8.761351259600904e-07, "loss": 0.8972, "step": 9964 }, { "epoch": 0.81, "grad_norm": 2.2336086813978655, "learning_rate": 8.753867210485145e-07, "loss": 0.4059, "step": 9965 }, { "epoch": 0.81, "grad_norm": 2.946149759152611, "learning_rate": 8.746386052543793e-07, "loss": 0.5339, "step": 9966 }, { "epoch": 0.81, "grad_norm": 4.999607721075151, "learning_rate": 8.738907786301242e-07, "loss": 0.8304, "step": 9967 }, { "epoch": 0.81, "grad_norm": 5.461783597333245, "learning_rate": 8.731432412281705e-07, "loss": 1.3311, "step": 9968 }, { "epoch": 0.81, "grad_norm": 3.048219803158993, "learning_rate": 8.723959931009135e-07, "loss": 0.4584, "step": 9969 }, { "epoch": 0.81, "grad_norm": 1.2166370546771277, "learning_rate": 8.716490343007344e-07, "loss": 0.198, "step": 9970 }, { "epoch": 0.81, "grad_norm": 2.636804300884971, "learning_rate": 8.709023648799908e-07, "loss": 0.5062, "step": 9971 }, { "epoch": 0.82, "grad_norm": 2.841325653511893, "learning_rate": 8.701559848910224e-07, "loss": 0.3741, "step": 9972 }, { "epoch": 0.82, "grad_norm": 5.226905068076976, "learning_rate": 8.694098943861457e-07, "loss": 1.0037, "step": 9973 }, { "epoch": 0.82, "grad_norm": 4.09276388083209, "learning_rate": 8.686640934176604e-07, "loss": 0.5478, "step": 9974 }, { "epoch": 0.82, "grad_norm": 3.2628753408620814, "learning_rate": 8.67918582037841e-07, "loss": 0.6117, "step": 9975 }, { "epoch": 0.82, "grad_norm": 6.094075317614659, "learning_rate": 8.671733602989463e-07, "loss": 1.5157, "step": 9976 }, { "epoch": 0.82, "grad_norm": 2.9949040744102318, "learning_rate": 8.664284282532132e-07, "loss": 0.4868, "step": 9977 }, { "epoch": 0.82, "grad_norm": 3.580425700118394, "learning_rate": 8.656837859528589e-07, "loss": 0.5457, "step": 9978 }, { "epoch": 0.82, "grad_norm": 3.7619693330016513, "learning_rate": 8.649394334500777e-07, "loss": 0.5573, "step": 9979 }, { "epoch": 0.82, "grad_norm": 2.4998835827530375, "learning_rate": 8.641953707970468e-07, "loss": 0.3458, "step": 9980 }, { "epoch": 0.82, "grad_norm": 2.5884753213983136, "learning_rate": 8.634515980459207e-07, "loss": 0.4565, "step": 9981 }, { "epoch": 0.82, "grad_norm": 3.5308432908564105, "learning_rate": 8.627081152488353e-07, "loss": 0.6351, "step": 9982 }, { "epoch": 0.82, "grad_norm": 3.6583575584188517, "learning_rate": 8.619649224579051e-07, "loss": 0.7003, "step": 9983 }, { "epoch": 0.82, "grad_norm": 4.610901179277053, "learning_rate": 8.612220197252257e-07, "loss": 0.8103, "step": 9984 }, { "epoch": 0.82, "grad_norm": 3.144654102560811, "learning_rate": 8.604794071028716e-07, "loss": 0.6213, "step": 9985 }, { "epoch": 0.82, "grad_norm": 4.693232625300068, "learning_rate": 8.597370846428943e-07, "loss": 1.0916, "step": 9986 }, { "epoch": 0.82, "grad_norm": 4.512234857020179, "learning_rate": 8.58995052397329e-07, "loss": 0.8597, "step": 9987 }, { "epoch": 0.82, "grad_norm": 2.800831230839158, "learning_rate": 8.582533104181889e-07, "loss": 0.3972, "step": 9988 }, { "epoch": 0.82, "grad_norm": 2.9638615202286096, "learning_rate": 8.575118587574666e-07, "loss": 0.4356, "step": 9989 }, { "epoch": 0.82, "grad_norm": 4.119015109367693, "learning_rate": 8.567706974671353e-07, "loss": 1.125, "step": 9990 }, { "epoch": 0.82, "grad_norm": 4.347167414230204, "learning_rate": 8.560298265991473e-07, "loss": 0.9594, "step": 9991 }, { "epoch": 0.82, "grad_norm": 3.638443725545612, "learning_rate": 8.55289246205433e-07, "loss": 1.0653, "step": 9992 }, { "epoch": 0.82, "grad_norm": 3.2924314784327344, "learning_rate": 8.54548956337905e-07, "loss": 0.4807, "step": 9993 }, { "epoch": 0.82, "grad_norm": 1.5310620171207978, "learning_rate": 8.538089570484548e-07, "loss": 0.1781, "step": 9994 }, { "epoch": 0.82, "grad_norm": 1.353735075993639, "learning_rate": 8.530692483889514e-07, "loss": 0.1444, "step": 9995 }, { "epoch": 0.82, "grad_norm": 4.664733022307317, "learning_rate": 8.523298304112465e-07, "loss": 0.4313, "step": 9996 }, { "epoch": 0.82, "grad_norm": 4.8533237551889785, "learning_rate": 8.515907031671705e-07, "loss": 0.5627, "step": 9997 }, { "epoch": 0.82, "grad_norm": 3.1254378915318313, "learning_rate": 8.508518667085314e-07, "loss": 0.4626, "step": 9998 }, { "epoch": 0.82, "grad_norm": 4.272195748833848, "learning_rate": 8.501133210871188e-07, "loss": 0.5535, "step": 9999 }, { "epoch": 0.82, "grad_norm": 3.056707904463845, "learning_rate": 8.493750663547024e-07, "loss": 0.6391, "step": 10000 }, { "epoch": 0.82, "grad_norm": 3.571965715095055, "learning_rate": 8.486371025630302e-07, "loss": 0.8858, "step": 10001 }, { "epoch": 0.82, "grad_norm": 4.207486077789943, "learning_rate": 8.478994297638316e-07, "loss": 0.7767, "step": 10002 }, { "epoch": 0.82, "grad_norm": 1.9268246492293135, "learning_rate": 8.471620480088117e-07, "loss": 0.2496, "step": 10003 }, { "epoch": 0.82, "grad_norm": 4.2457661811966805, "learning_rate": 8.464249573496591e-07, "loss": 0.7234, "step": 10004 }, { "epoch": 0.82, "grad_norm": 4.185701483125576, "learning_rate": 8.456881578380405e-07, "loss": 0.683, "step": 10005 }, { "epoch": 0.82, "grad_norm": 7.215225747900721, "learning_rate": 8.449516495256022e-07, "loss": 1.4064, "step": 10006 }, { "epoch": 0.82, "grad_norm": 3.90089749768446, "learning_rate": 8.442154324639706e-07, "loss": 0.8309, "step": 10007 }, { "epoch": 0.82, "grad_norm": 2.198820210759882, "learning_rate": 8.434795067047524e-07, "loss": 0.3133, "step": 10008 }, { "epoch": 0.82, "grad_norm": 2.781826961822043, "learning_rate": 8.427438722995301e-07, "loss": 0.3979, "step": 10009 }, { "epoch": 0.82, "grad_norm": 4.160628587137114, "learning_rate": 8.420085292998714e-07, "loss": 0.7002, "step": 10010 }, { "epoch": 0.82, "grad_norm": 4.262311883425024, "learning_rate": 8.412734777573178e-07, "loss": 0.7013, "step": 10011 }, { "epoch": 0.82, "grad_norm": 4.31000974324674, "learning_rate": 8.405387177233948e-07, "loss": 0.5945, "step": 10012 }, { "epoch": 0.82, "grad_norm": 2.785005472523334, "learning_rate": 8.398042492496056e-07, "loss": 0.3029, "step": 10013 }, { "epoch": 0.82, "grad_norm": 4.1983697979929, "learning_rate": 8.390700723874346e-07, "loss": 0.7723, "step": 10014 }, { "epoch": 0.82, "grad_norm": 4.1287679927077345, "learning_rate": 8.383361871883417e-07, "loss": 0.4062, "step": 10015 }, { "epoch": 0.82, "grad_norm": 4.16968495234099, "learning_rate": 8.376025937037702e-07, "loss": 0.644, "step": 10016 }, { "epoch": 0.82, "grad_norm": 4.445627009539799, "learning_rate": 8.368692919851424e-07, "loss": 0.8638, "step": 10017 }, { "epoch": 0.82, "grad_norm": 2.5414590746261316, "learning_rate": 8.361362820838593e-07, "loss": 0.492, "step": 10018 }, { "epoch": 0.82, "grad_norm": 5.417243551042646, "learning_rate": 8.354035640513014e-07, "loss": 1.3996, "step": 10019 }, { "epoch": 0.82, "grad_norm": 3.7135312777360125, "learning_rate": 8.346711379388306e-07, "loss": 0.684, "step": 10020 }, { "epoch": 0.82, "grad_norm": 1.7078717728104544, "learning_rate": 8.33939003797784e-07, "loss": 0.268, "step": 10021 }, { "epoch": 0.82, "grad_norm": 4.50771291363033, "learning_rate": 8.332071616794829e-07, "loss": 0.6469, "step": 10022 }, { "epoch": 0.82, "grad_norm": 3.7552968502155464, "learning_rate": 8.324756116352256e-07, "loss": 0.8547, "step": 10023 }, { "epoch": 0.82, "grad_norm": 4.2278798354341065, "learning_rate": 8.317443537162922e-07, "loss": 0.9567, "step": 10024 }, { "epoch": 0.82, "grad_norm": 5.301315101902168, "learning_rate": 8.310133879739379e-07, "loss": 1.1694, "step": 10025 }, { "epoch": 0.82, "grad_norm": 3.3715012305367575, "learning_rate": 8.302827144594028e-07, "loss": 0.5722, "step": 10026 }, { "epoch": 0.82, "grad_norm": 4.066136984584118, "learning_rate": 8.295523332239014e-07, "loss": 0.5825, "step": 10027 }, { "epoch": 0.82, "grad_norm": 2.7098705094390225, "learning_rate": 8.288222443186317e-07, "loss": 0.472, "step": 10028 }, { "epoch": 0.82, "grad_norm": 4.432335998365689, "learning_rate": 8.280924477947699e-07, "loss": 0.8808, "step": 10029 }, { "epoch": 0.82, "grad_norm": 3.0500464971568686, "learning_rate": 8.273629437034708e-07, "loss": 0.5363, "step": 10030 }, { "epoch": 0.82, "grad_norm": 4.639329475382795, "learning_rate": 8.266337320958718e-07, "loss": 0.8161, "step": 10031 }, { "epoch": 0.82, "grad_norm": 5.796479380976112, "learning_rate": 8.25904813023084e-07, "loss": 1.5202, "step": 10032 }, { "epoch": 0.82, "grad_norm": 3.730708086836667, "learning_rate": 8.251761865362035e-07, "loss": 0.6162, "step": 10033 }, { "epoch": 0.82, "grad_norm": 2.6349957122901544, "learning_rate": 8.244478526863026e-07, "loss": 0.4139, "step": 10034 }, { "epoch": 0.82, "grad_norm": 3.723412902142466, "learning_rate": 8.23719811524436e-07, "loss": 0.4541, "step": 10035 }, { "epoch": 0.82, "grad_norm": 3.794269298914522, "learning_rate": 8.229920631016353e-07, "loss": 0.684, "step": 10036 }, { "epoch": 0.82, "grad_norm": 5.290127336830074, "learning_rate": 8.222646074689133e-07, "loss": 1.117, "step": 10037 }, { "epoch": 0.82, "grad_norm": 4.5149409100032685, "learning_rate": 8.215374446772595e-07, "loss": 0.9134, "step": 10038 }, { "epoch": 0.82, "grad_norm": 3.9944726149977607, "learning_rate": 8.208105747776468e-07, "loss": 0.6517, "step": 10039 }, { "epoch": 0.82, "grad_norm": 6.588783694265223, "learning_rate": 8.200839978210256e-07, "loss": 1.1527, "step": 10040 }, { "epoch": 0.82, "grad_norm": 4.934127704405573, "learning_rate": 8.193577138583242e-07, "loss": 0.7983, "step": 10041 }, { "epoch": 0.82, "grad_norm": 2.9057358963851736, "learning_rate": 8.186317229404523e-07, "loss": 0.4775, "step": 10042 }, { "epoch": 0.82, "grad_norm": 3.819537120002851, "learning_rate": 8.179060251183007e-07, "loss": 1.0176, "step": 10043 }, { "epoch": 0.82, "grad_norm": 2.3721447545738563, "learning_rate": 8.171806204427351e-07, "loss": 0.3996, "step": 10044 }, { "epoch": 0.82, "grad_norm": 3.3969713669358352, "learning_rate": 8.164555089646048e-07, "loss": 0.6868, "step": 10045 }, { "epoch": 0.82, "grad_norm": 4.814354675455581, "learning_rate": 8.157306907347357e-07, "loss": 0.8552, "step": 10046 }, { "epoch": 0.82, "grad_norm": 3.827139451801565, "learning_rate": 8.150061658039354e-07, "loss": 0.7606, "step": 10047 }, { "epoch": 0.82, "grad_norm": 3.661073688282505, "learning_rate": 8.142819342229913e-07, "loss": 0.8016, "step": 10048 }, { "epoch": 0.82, "grad_norm": 2.984591847641335, "learning_rate": 8.135579960426659e-07, "loss": 0.4342, "step": 10049 }, { "epoch": 0.82, "grad_norm": 2.5052051722141777, "learning_rate": 8.12834351313705e-07, "loss": 0.3033, "step": 10050 }, { "epoch": 0.82, "grad_norm": 4.131568483230662, "learning_rate": 8.121110000868343e-07, "loss": 0.9707, "step": 10051 }, { "epoch": 0.82, "grad_norm": 4.872821654104947, "learning_rate": 8.113879424127564e-07, "loss": 0.9356, "step": 10052 }, { "epoch": 0.82, "grad_norm": 4.499040191623966, "learning_rate": 8.106651783421543e-07, "loss": 0.9858, "step": 10053 }, { "epoch": 0.82, "grad_norm": 2.2972781125121213, "learning_rate": 8.099427079256928e-07, "loss": 0.3593, "step": 10054 }, { "epoch": 0.82, "grad_norm": 4.9651309608674, "learning_rate": 8.092205312140111e-07, "loss": 0.9283, "step": 10055 }, { "epoch": 0.82, "grad_norm": 5.2035955637071405, "learning_rate": 8.084986482577323e-07, "loss": 1.4109, "step": 10056 }, { "epoch": 0.82, "grad_norm": 1.8922151956035511, "learning_rate": 8.077770591074574e-07, "loss": 0.3467, "step": 10057 }, { "epoch": 0.82, "grad_norm": 4.039653787579526, "learning_rate": 8.070557638137649e-07, "loss": 1.1254, "step": 10058 }, { "epoch": 0.82, "grad_norm": 3.5731288296777097, "learning_rate": 8.063347624272156e-07, "loss": 0.6772, "step": 10059 }, { "epoch": 0.82, "grad_norm": 3.200731054783732, "learning_rate": 8.056140549983499e-07, "loss": 0.544, "step": 10060 }, { "epoch": 0.82, "grad_norm": 4.2915502818258195, "learning_rate": 8.048936415776837e-07, "loss": 0.6498, "step": 10061 }, { "epoch": 0.82, "grad_norm": 3.621791096115158, "learning_rate": 8.041735222157159e-07, "loss": 0.8866, "step": 10062 }, { "epoch": 0.82, "grad_norm": 3.5508307845696594, "learning_rate": 8.034536969629242e-07, "loss": 0.5232, "step": 10063 }, { "epoch": 0.82, "grad_norm": 1.1309327404270295, "learning_rate": 8.027341658697646e-07, "loss": 0.1377, "step": 10064 }, { "epoch": 0.82, "grad_norm": 3.6677811482716183, "learning_rate": 8.020149289866746e-07, "loss": 0.8675, "step": 10065 }, { "epoch": 0.82, "grad_norm": 3.676930005706927, "learning_rate": 8.012959863640674e-07, "loss": 1.0361, "step": 10066 }, { "epoch": 0.82, "grad_norm": 3.9075167493127307, "learning_rate": 8.005773380523386e-07, "loss": 0.974, "step": 10067 }, { "epoch": 0.82, "grad_norm": 2.8272851825025835, "learning_rate": 7.998589841018622e-07, "loss": 0.3865, "step": 10068 }, { "epoch": 0.82, "grad_norm": 3.773220858860217, "learning_rate": 7.991409245629922e-07, "loss": 0.6879, "step": 10069 }, { "epoch": 0.82, "grad_norm": 3.78635399173605, "learning_rate": 7.984231594860614e-07, "loss": 0.6683, "step": 10070 }, { "epoch": 0.82, "grad_norm": 3.5489463805363006, "learning_rate": 7.977056889213831e-07, "loss": 0.8355, "step": 10071 }, { "epoch": 0.82, "grad_norm": 4.73856111582991, "learning_rate": 7.969885129192456e-07, "loss": 0.7822, "step": 10072 }, { "epoch": 0.82, "grad_norm": 5.000026482958678, "learning_rate": 7.962716315299235e-07, "loss": 1.1317, "step": 10073 }, { "epoch": 0.82, "grad_norm": 3.8629512984605703, "learning_rate": 7.955550448036642e-07, "loss": 0.4413, "step": 10074 }, { "epoch": 0.82, "grad_norm": 2.980903230806037, "learning_rate": 7.948387527906987e-07, "loss": 0.4457, "step": 10075 }, { "epoch": 0.82, "grad_norm": 5.403371770741651, "learning_rate": 7.941227555412351e-07, "loss": 0.8789, "step": 10076 }, { "epoch": 0.82, "grad_norm": 3.615071009109892, "learning_rate": 7.934070531054638e-07, "loss": 0.5785, "step": 10077 }, { "epoch": 0.82, "grad_norm": 4.102659969057406, "learning_rate": 7.926916455335498e-07, "loss": 0.8829, "step": 10078 }, { "epoch": 0.82, "grad_norm": 4.573077847289734, "learning_rate": 7.919765328756407e-07, "loss": 0.93, "step": 10079 }, { "epoch": 0.82, "grad_norm": 4.136876063217532, "learning_rate": 7.912617151818636e-07, "loss": 0.6846, "step": 10080 }, { "epoch": 0.82, "grad_norm": 4.4749603064475245, "learning_rate": 7.90547192502324e-07, "loss": 0.8403, "step": 10081 }, { "epoch": 0.82, "grad_norm": 3.909745880030514, "learning_rate": 7.898329648871067e-07, "loss": 0.6864, "step": 10082 }, { "epoch": 0.82, "grad_norm": 1.5567840556596326, "learning_rate": 7.891190323862762e-07, "loss": 0.1979, "step": 10083 }, { "epoch": 0.82, "grad_norm": 2.4274023280178167, "learning_rate": 7.884053950498754e-07, "loss": 0.3834, "step": 10084 }, { "epoch": 0.82, "grad_norm": 2.737814252469486, "learning_rate": 7.87692052927927e-07, "loss": 0.6148, "step": 10085 }, { "epoch": 0.82, "grad_norm": 4.9986161359124175, "learning_rate": 7.869790060704341e-07, "loss": 1.178, "step": 10086 }, { "epoch": 0.82, "grad_norm": 4.548081185556967, "learning_rate": 7.862662545273786e-07, "loss": 0.7753, "step": 10087 }, { "epoch": 0.82, "grad_norm": 2.760979136167705, "learning_rate": 7.855537983487194e-07, "loss": 0.3284, "step": 10088 }, { "epoch": 0.82, "grad_norm": 4.172216511458501, "learning_rate": 7.848416375843987e-07, "loss": 0.5237, "step": 10089 }, { "epoch": 0.82, "grad_norm": 3.0377431401690775, "learning_rate": 7.841297722843333e-07, "loss": 0.6412, "step": 10090 }, { "epoch": 0.82, "grad_norm": 4.928285806833154, "learning_rate": 7.834182024984238e-07, "loss": 0.5305, "step": 10091 }, { "epoch": 0.82, "grad_norm": 3.4613951992850245, "learning_rate": 7.827069282765475e-07, "loss": 0.3983, "step": 10092 }, { "epoch": 0.82, "grad_norm": 4.262635263729186, "learning_rate": 7.81995949668562e-07, "loss": 0.9262, "step": 10093 }, { "epoch": 0.83, "grad_norm": 3.319007183839746, "learning_rate": 7.812852667243043e-07, "loss": 0.3667, "step": 10094 }, { "epoch": 0.83, "grad_norm": 4.015797994989106, "learning_rate": 7.805748794935886e-07, "loss": 0.9151, "step": 10095 }, { "epoch": 0.83, "grad_norm": 1.4061862206066524, "learning_rate": 7.798647880262111e-07, "loss": 0.1664, "step": 10096 }, { "epoch": 0.83, "grad_norm": 4.410668028791237, "learning_rate": 7.791549923719455e-07, "loss": 0.9573, "step": 10097 }, { "epoch": 0.83, "grad_norm": 2.868404391966383, "learning_rate": 7.784454925805457e-07, "loss": 0.7018, "step": 10098 }, { "epoch": 0.83, "grad_norm": 2.381961548937185, "learning_rate": 7.777362887017448e-07, "loss": 0.2947, "step": 10099 }, { "epoch": 0.83, "grad_norm": 4.892050919340202, "learning_rate": 7.770273807852557e-07, "loss": 1.2152, "step": 10100 }, { "epoch": 0.83, "grad_norm": 6.19196362056325, "learning_rate": 7.763187688807677e-07, "loss": 0.6899, "step": 10101 }, { "epoch": 0.83, "grad_norm": 3.0527982191823737, "learning_rate": 7.756104530379526e-07, "loss": 0.3678, "step": 10102 }, { "epoch": 0.83, "grad_norm": 4.1701771145805395, "learning_rate": 7.749024333064614e-07, "loss": 0.7823, "step": 10103 }, { "epoch": 0.83, "grad_norm": 5.286789747108113, "learning_rate": 7.74194709735921e-07, "loss": 0.8221, "step": 10104 }, { "epoch": 0.83, "grad_norm": 5.316492683343482, "learning_rate": 7.73487282375941e-07, "loss": 1.1504, "step": 10105 }, { "epoch": 0.83, "grad_norm": 3.624176181992469, "learning_rate": 7.72780151276108e-07, "loss": 0.6505, "step": 10106 }, { "epoch": 0.83, "grad_norm": 4.622089616941887, "learning_rate": 7.720733164859895e-07, "loss": 0.7511, "step": 10107 }, { "epoch": 0.83, "grad_norm": 5.117173719225514, "learning_rate": 7.713667780551315e-07, "loss": 0.5868, "step": 10108 }, { "epoch": 0.83, "grad_norm": 4.066366457292403, "learning_rate": 7.706605360330594e-07, "loss": 0.4654, "step": 10109 }, { "epoch": 0.83, "grad_norm": 2.733998100948551, "learning_rate": 7.699545904692774e-07, "loss": 0.3261, "step": 10110 }, { "epoch": 0.83, "grad_norm": 3.2497827288864802, "learning_rate": 7.692489414132703e-07, "loss": 0.742, "step": 10111 }, { "epoch": 0.83, "grad_norm": 3.47988675054254, "learning_rate": 7.685435889144993e-07, "loss": 0.6233, "step": 10112 }, { "epoch": 0.83, "grad_norm": 2.8554813264771406, "learning_rate": 7.678385330224075e-07, "loss": 0.4469, "step": 10113 }, { "epoch": 0.83, "grad_norm": 4.4381940487933935, "learning_rate": 7.671337737864159e-07, "loss": 0.5083, "step": 10114 }, { "epoch": 0.83, "grad_norm": 4.6863597272472095, "learning_rate": 7.664293112559251e-07, "loss": 1.0119, "step": 10115 }, { "epoch": 0.83, "grad_norm": 2.4769697594820794, "learning_rate": 7.65725145480315e-07, "loss": 0.4439, "step": 10116 }, { "epoch": 0.83, "grad_norm": 5.370028598981, "learning_rate": 7.65021276508946e-07, "loss": 0.8541, "step": 10117 }, { "epoch": 0.83, "grad_norm": 3.4499207348327525, "learning_rate": 7.643177043911538e-07, "loss": 0.5175, "step": 10118 }, { "epoch": 0.83, "grad_norm": 2.1584533314883565, "learning_rate": 7.636144291762576e-07, "loss": 0.2468, "step": 10119 }, { "epoch": 0.83, "grad_norm": 4.018358478001542, "learning_rate": 7.629114509135521e-07, "loss": 0.9447, "step": 10120 }, { "epoch": 0.83, "grad_norm": 3.5912207702822387, "learning_rate": 7.62208769652314e-07, "loss": 0.5151, "step": 10121 }, { "epoch": 0.83, "grad_norm": 4.328108848632602, "learning_rate": 7.615063854417981e-07, "loss": 0.6842, "step": 10122 }, { "epoch": 0.83, "grad_norm": 4.900438538556681, "learning_rate": 7.608042983312397e-07, "loss": 0.7065, "step": 10123 }, { "epoch": 0.83, "grad_norm": 4.201546029218351, "learning_rate": 7.601025083698499e-07, "loss": 0.4837, "step": 10124 }, { "epoch": 0.83, "grad_norm": 3.026673616477278, "learning_rate": 7.594010156068221e-07, "loss": 0.4566, "step": 10125 }, { "epoch": 0.83, "grad_norm": 4.436617140355298, "learning_rate": 7.586998200913282e-07, "loss": 0.7603, "step": 10126 }, { "epoch": 0.83, "grad_norm": 3.8850862955727252, "learning_rate": 7.579989218725187e-07, "loss": 0.8296, "step": 10127 }, { "epoch": 0.83, "grad_norm": 6.665295443382982, "learning_rate": 7.572983209995244e-07, "loss": 0.8821, "step": 10128 }, { "epoch": 0.83, "grad_norm": 4.59546360930877, "learning_rate": 7.565980175214526e-07, "loss": 0.885, "step": 10129 }, { "epoch": 0.83, "grad_norm": 3.8774411324788542, "learning_rate": 7.558980114873921e-07, "loss": 0.726, "step": 10130 }, { "epoch": 0.83, "grad_norm": 3.0323948272572405, "learning_rate": 7.551983029464111e-07, "loss": 0.5955, "step": 10131 }, { "epoch": 0.83, "grad_norm": 4.266205587184443, "learning_rate": 7.544988919475555e-07, "loss": 0.9898, "step": 10132 }, { "epoch": 0.83, "grad_norm": 4.56948708053087, "learning_rate": 7.537997785398515e-07, "loss": 0.7395, "step": 10133 }, { "epoch": 0.83, "grad_norm": 3.4088982946641324, "learning_rate": 7.531009627723035e-07, "loss": 0.6821, "step": 10134 }, { "epoch": 0.83, "grad_norm": 3.0972983217595726, "learning_rate": 7.52402444693896e-07, "loss": 0.6063, "step": 10135 }, { "epoch": 0.83, "grad_norm": 3.8439494774168987, "learning_rate": 7.517042243535899e-07, "loss": 0.719, "step": 10136 }, { "epoch": 0.83, "grad_norm": 2.9165481492503744, "learning_rate": 7.5100630180033e-07, "loss": 0.4903, "step": 10137 }, { "epoch": 0.83, "grad_norm": 1.8094712506379884, "learning_rate": 7.503086770830359e-07, "loss": 0.1993, "step": 10138 }, { "epoch": 0.83, "grad_norm": 4.732114676057692, "learning_rate": 7.496113502506092e-07, "loss": 1.0937, "step": 10139 }, { "epoch": 0.83, "grad_norm": 4.573506384797262, "learning_rate": 7.489143213519301e-07, "loss": 1.1496, "step": 10140 }, { "epoch": 0.83, "grad_norm": 3.1810527637860355, "learning_rate": 7.482175904358552e-07, "loss": 0.5457, "step": 10141 }, { "epoch": 0.83, "grad_norm": 4.829992228202246, "learning_rate": 7.475211575512231e-07, "loss": 0.7806, "step": 10142 }, { "epoch": 0.83, "grad_norm": 3.4087777526064107, "learning_rate": 7.468250227468515e-07, "loss": 0.5698, "step": 10143 }, { "epoch": 0.83, "grad_norm": 4.443150332879401, "learning_rate": 7.461291860715359e-07, "loss": 0.9275, "step": 10144 }, { "epoch": 0.83, "grad_norm": 3.8076918964860598, "learning_rate": 7.45433647574052e-07, "loss": 0.7731, "step": 10145 }, { "epoch": 0.83, "grad_norm": 4.00916585804603, "learning_rate": 7.447384073031527e-07, "loss": 0.8045, "step": 10146 }, { "epoch": 0.83, "grad_norm": 7.543639619527765, "learning_rate": 7.440434653075723e-07, "loss": 1.4757, "step": 10147 }, { "epoch": 0.83, "grad_norm": 3.744778257036902, "learning_rate": 7.43348821636023e-07, "loss": 1.0878, "step": 10148 }, { "epoch": 0.83, "grad_norm": 2.873126512495574, "learning_rate": 7.426544763371974e-07, "loss": 0.5527, "step": 10149 }, { "epoch": 0.83, "grad_norm": 3.5157252702605923, "learning_rate": 7.419604294597632e-07, "loss": 0.6573, "step": 10150 }, { "epoch": 0.83, "grad_norm": 2.6910821262864113, "learning_rate": 7.412666810523727e-07, "loss": 0.5749, "step": 10151 }, { "epoch": 0.83, "grad_norm": 2.0645375077749435, "learning_rate": 7.405732311636543e-07, "loss": 0.2744, "step": 10152 }, { "epoch": 0.83, "grad_norm": 6.118859734677217, "learning_rate": 7.398800798422145e-07, "loss": 0.8487, "step": 10153 }, { "epoch": 0.83, "grad_norm": 2.8014057053335506, "learning_rate": 7.391872271366407e-07, "loss": 0.4396, "step": 10154 }, { "epoch": 0.83, "grad_norm": 4.57035437763408, "learning_rate": 7.384946730954995e-07, "loss": 0.745, "step": 10155 }, { "epoch": 0.83, "grad_norm": 2.6010854374742336, "learning_rate": 7.378024177673354e-07, "loss": 0.6538, "step": 10156 }, { "epoch": 0.83, "grad_norm": 2.783812121647489, "learning_rate": 7.371104612006741e-07, "loss": 0.3029, "step": 10157 }, { "epoch": 0.83, "grad_norm": 3.70317558211454, "learning_rate": 7.364188034440161e-07, "loss": 1.002, "step": 10158 }, { "epoch": 0.83, "grad_norm": 3.9305650101655183, "learning_rate": 7.357274445458446e-07, "loss": 0.7544, "step": 10159 }, { "epoch": 0.83, "grad_norm": 6.608343086018153, "learning_rate": 7.35036384554621e-07, "loss": 1.4278, "step": 10160 }, { "epoch": 0.83, "grad_norm": 4.571341322709621, "learning_rate": 7.343456235187857e-07, "loss": 0.6947, "step": 10161 }, { "epoch": 0.83, "grad_norm": 3.4511973270380194, "learning_rate": 7.336551614867582e-07, "loss": 0.6418, "step": 10162 }, { "epoch": 0.83, "grad_norm": 5.478077366704615, "learning_rate": 7.329649985069376e-07, "loss": 1.3947, "step": 10163 }, { "epoch": 0.83, "grad_norm": 3.4354541645576178, "learning_rate": 7.322751346276997e-07, "loss": 0.8503, "step": 10164 }, { "epoch": 0.83, "grad_norm": 3.0040327011560812, "learning_rate": 7.315855698974012e-07, "loss": 0.5111, "step": 10165 }, { "epoch": 0.83, "grad_norm": 3.9865675662372406, "learning_rate": 7.308963043643791e-07, "loss": 0.5029, "step": 10166 }, { "epoch": 0.83, "grad_norm": 2.5068836581670593, "learning_rate": 7.302073380769459e-07, "loss": 0.32, "step": 10167 }, { "epoch": 0.83, "grad_norm": 5.108309155183119, "learning_rate": 7.295186710833973e-07, "loss": 0.6038, "step": 10168 }, { "epoch": 0.83, "grad_norm": 3.1470775770097736, "learning_rate": 7.288303034320038e-07, "loss": 0.5335, "step": 10169 }, { "epoch": 0.83, "grad_norm": 4.930333607704734, "learning_rate": 7.281422351710177e-07, "loss": 0.8595, "step": 10170 }, { "epoch": 0.83, "grad_norm": 1.1370544711017538, "learning_rate": 7.274544663486694e-07, "loss": 0.1652, "step": 10171 }, { "epoch": 0.83, "grad_norm": 4.136367665352145, "learning_rate": 7.267669970131691e-07, "loss": 0.8134, "step": 10172 }, { "epoch": 0.83, "grad_norm": 5.141838639356749, "learning_rate": 7.260798272127051e-07, "loss": 1.0518, "step": 10173 }, { "epoch": 0.83, "grad_norm": 5.168489051573621, "learning_rate": 7.25392956995446e-07, "loss": 0.8956, "step": 10174 }, { "epoch": 0.83, "grad_norm": 4.033656979157179, "learning_rate": 7.247063864095361e-07, "loss": 0.8774, "step": 10175 }, { "epoch": 0.83, "grad_norm": 4.684176619776644, "learning_rate": 7.240201155031029e-07, "loss": 0.9595, "step": 10176 }, { "epoch": 0.83, "grad_norm": 3.3587537054595167, "learning_rate": 7.233341443242504e-07, "loss": 0.4504, "step": 10177 }, { "epoch": 0.83, "grad_norm": 4.72953453724526, "learning_rate": 7.22648472921062e-07, "loss": 0.8717, "step": 10178 }, { "epoch": 0.83, "grad_norm": 4.2640117138401425, "learning_rate": 7.219631013416007e-07, "loss": 0.8372, "step": 10179 }, { "epoch": 0.83, "grad_norm": 5.031931141346207, "learning_rate": 7.212780296339095e-07, "loss": 0.7958, "step": 10180 }, { "epoch": 0.83, "grad_norm": 3.3418842891165044, "learning_rate": 7.205932578460056e-07, "loss": 0.5695, "step": 10181 }, { "epoch": 0.83, "grad_norm": 3.416966419131427, "learning_rate": 7.199087860258913e-07, "loss": 0.6697, "step": 10182 }, { "epoch": 0.83, "grad_norm": 3.022172971765802, "learning_rate": 7.192246142215437e-07, "loss": 0.4962, "step": 10183 }, { "epoch": 0.83, "grad_norm": 3.3941894220163036, "learning_rate": 7.185407424809199e-07, "loss": 1.0106, "step": 10184 }, { "epoch": 0.83, "grad_norm": 2.8137243834314782, "learning_rate": 7.178571708519589e-07, "loss": 0.5257, "step": 10185 }, { "epoch": 0.83, "grad_norm": 4.4841375497339975, "learning_rate": 7.171738993825728e-07, "loss": 0.9764, "step": 10186 }, { "epoch": 0.83, "grad_norm": 2.495239260489433, "learning_rate": 7.164909281206573e-07, "loss": 0.2904, "step": 10187 }, { "epoch": 0.83, "grad_norm": 2.932504966643108, "learning_rate": 7.158082571140857e-07, "loss": 0.5881, "step": 10188 }, { "epoch": 0.83, "grad_norm": 4.309706622233228, "learning_rate": 7.151258864107107e-07, "loss": 0.8552, "step": 10189 }, { "epoch": 0.83, "grad_norm": 5.151604904407833, "learning_rate": 7.144438160583633e-07, "loss": 1.3396, "step": 10190 }, { "epoch": 0.83, "grad_norm": 2.175446160551388, "learning_rate": 7.137620461048544e-07, "loss": 0.3172, "step": 10191 }, { "epoch": 0.83, "grad_norm": 3.0943987081345203, "learning_rate": 7.130805765979714e-07, "loss": 0.5698, "step": 10192 }, { "epoch": 0.83, "grad_norm": 3.836987678336197, "learning_rate": 7.123994075854834e-07, "loss": 0.5729, "step": 10193 }, { "epoch": 0.83, "grad_norm": 2.072622197125855, "learning_rate": 7.117185391151371e-07, "loss": 0.3823, "step": 10194 }, { "epoch": 0.83, "grad_norm": 4.870931924754225, "learning_rate": 7.110379712346582e-07, "loss": 0.9687, "step": 10195 }, { "epoch": 0.83, "grad_norm": 4.549694135245669, "learning_rate": 7.103577039917536e-07, "loss": 1.1397, "step": 10196 }, { "epoch": 0.83, "grad_norm": 4.443048944288806, "learning_rate": 7.09677737434104e-07, "loss": 0.9514, "step": 10197 }, { "epoch": 0.83, "grad_norm": 6.412042572557696, "learning_rate": 7.089980716093741e-07, "loss": 1.004, "step": 10198 }, { "epoch": 0.83, "grad_norm": 2.8298060473757447, "learning_rate": 7.083187065652042e-07, "loss": 0.492, "step": 10199 }, { "epoch": 0.83, "grad_norm": 2.462097342915266, "learning_rate": 7.076396423492154e-07, "loss": 0.5119, "step": 10200 }, { "epoch": 0.83, "grad_norm": 3.4503156311001444, "learning_rate": 7.069608790090077e-07, "loss": 0.4069, "step": 10201 }, { "epoch": 0.83, "grad_norm": 1.514757874641178, "learning_rate": 7.062824165921589e-07, "loss": 0.2277, "step": 10202 }, { "epoch": 0.83, "grad_norm": 4.0935899306461865, "learning_rate": 7.056042551462273e-07, "loss": 0.9142, "step": 10203 }, { "epoch": 0.83, "grad_norm": 4.408676529971462, "learning_rate": 7.049263947187468e-07, "loss": 0.7984, "step": 10204 }, { "epoch": 0.83, "grad_norm": 3.7888943397001684, "learning_rate": 7.042488353572341e-07, "loss": 0.8042, "step": 10205 }, { "epoch": 0.83, "grad_norm": 3.6447457960019145, "learning_rate": 7.035715771091828e-07, "loss": 0.7367, "step": 10206 }, { "epoch": 0.83, "grad_norm": 2.7901059754746016, "learning_rate": 7.028946200220655e-07, "loss": 0.4016, "step": 10207 }, { "epoch": 0.83, "grad_norm": 2.768076715298043, "learning_rate": 7.022179641433357e-07, "loss": 0.3792, "step": 10208 }, { "epoch": 0.83, "grad_norm": 2.9790775227827817, "learning_rate": 7.015416095204214e-07, "loss": 0.2407, "step": 10209 }, { "epoch": 0.83, "grad_norm": 4.779174705300358, "learning_rate": 7.008655562007333e-07, "loss": 0.9499, "step": 10210 }, { "epoch": 0.83, "grad_norm": 3.504210062203104, "learning_rate": 7.001898042316602e-07, "loss": 0.5965, "step": 10211 }, { "epoch": 0.83, "grad_norm": 4.098430800876282, "learning_rate": 6.995143536605698e-07, "loss": 0.8216, "step": 10212 }, { "epoch": 0.83, "grad_norm": 4.722458855400462, "learning_rate": 6.988392045348063e-07, "loss": 0.5898, "step": 10213 }, { "epoch": 0.83, "grad_norm": 5.603204189743334, "learning_rate": 6.981643569016966e-07, "loss": 0.9451, "step": 10214 }, { "epoch": 0.83, "grad_norm": 4.383153672975866, "learning_rate": 6.974898108085431e-07, "loss": 0.7547, "step": 10215 }, { "epoch": 0.84, "grad_norm": 5.777138167338287, "learning_rate": 6.968155663026294e-07, "loss": 1.0101, "step": 10216 }, { "epoch": 0.84, "grad_norm": 2.949733626148483, "learning_rate": 6.961416234312168e-07, "loss": 0.5266, "step": 10217 }, { "epoch": 0.84, "grad_norm": 3.286890743214652, "learning_rate": 6.954679822415461e-07, "loss": 0.624, "step": 10218 }, { "epoch": 0.84, "grad_norm": 5.196197776760363, "learning_rate": 6.947946427808366e-07, "loss": 1.1136, "step": 10219 }, { "epoch": 0.84, "grad_norm": 5.8562787817039075, "learning_rate": 6.941216050962868e-07, "loss": 0.8969, "step": 10220 }, { "epoch": 0.84, "grad_norm": 5.311974724124445, "learning_rate": 6.934488692350727e-07, "loss": 0.8868, "step": 10221 }, { "epoch": 0.84, "grad_norm": 4.961188506136366, "learning_rate": 6.927764352443506e-07, "loss": 0.8205, "step": 10222 }, { "epoch": 0.84, "grad_norm": 4.955038469711517, "learning_rate": 6.921043031712549e-07, "loss": 0.8039, "step": 10223 }, { "epoch": 0.84, "grad_norm": 5.323796643908006, "learning_rate": 6.914324730629002e-07, "loss": 1.027, "step": 10224 }, { "epoch": 0.84, "grad_norm": 3.0023604844410343, "learning_rate": 6.907609449663788e-07, "loss": 0.49, "step": 10225 }, { "epoch": 0.84, "grad_norm": 4.357653845060114, "learning_rate": 6.900897189287603e-07, "loss": 0.5876, "step": 10226 }, { "epoch": 0.84, "grad_norm": 2.68285164443623, "learning_rate": 6.894187949970954e-07, "loss": 0.3064, "step": 10227 }, { "epoch": 0.84, "grad_norm": 3.3820541379404068, "learning_rate": 6.887481732184148e-07, "loss": 0.6108, "step": 10228 }, { "epoch": 0.84, "grad_norm": 6.299569677418883, "learning_rate": 6.880778536397237e-07, "loss": 0.8979, "step": 10229 }, { "epoch": 0.84, "grad_norm": 2.970028067271835, "learning_rate": 6.874078363080089e-07, "loss": 0.6938, "step": 10230 }, { "epoch": 0.84, "grad_norm": 3.031576213495916, "learning_rate": 6.867381212702378e-07, "loss": 0.5359, "step": 10231 }, { "epoch": 0.84, "grad_norm": 1.325165945870046, "learning_rate": 6.860687085733519e-07, "loss": 0.1823, "step": 10232 }, { "epoch": 0.84, "grad_norm": 4.633013472211267, "learning_rate": 6.853995982642753e-07, "loss": 0.681, "step": 10233 }, { "epoch": 0.84, "grad_norm": 5.529771631025134, "learning_rate": 6.847307903899091e-07, "loss": 0.7647, "step": 10234 }, { "epoch": 0.84, "grad_norm": 2.125269134331355, "learning_rate": 6.840622849971352e-07, "loss": 0.451, "step": 10235 }, { "epoch": 0.84, "grad_norm": 1.1441559641432029, "learning_rate": 6.833940821328117e-07, "loss": 0.1391, "step": 10236 }, { "epoch": 0.84, "grad_norm": 4.361524339778844, "learning_rate": 6.827261818437781e-07, "loss": 0.7674, "step": 10237 }, { "epoch": 0.84, "grad_norm": 3.0626961124778864, "learning_rate": 6.820585841768496e-07, "loss": 0.5342, "step": 10238 }, { "epoch": 0.84, "grad_norm": 4.582327339796292, "learning_rate": 6.813912891788221e-07, "loss": 1.0739, "step": 10239 }, { "epoch": 0.84, "grad_norm": 4.562027936198054, "learning_rate": 6.807242968964711e-07, "loss": 1.1472, "step": 10240 }, { "epoch": 0.84, "grad_norm": 3.9999078913684025, "learning_rate": 6.800576073765486e-07, "loss": 0.7174, "step": 10241 }, { "epoch": 0.84, "grad_norm": 3.317538348890353, "learning_rate": 6.793912206657893e-07, "loss": 0.4795, "step": 10242 }, { "epoch": 0.84, "grad_norm": 3.5309894163865896, "learning_rate": 6.787251368109005e-07, "loss": 0.5938, "step": 10243 }, { "epoch": 0.84, "grad_norm": 5.284058046073281, "learning_rate": 6.780593558585746e-07, "loss": 0.9317, "step": 10244 }, { "epoch": 0.84, "grad_norm": 4.844064243994371, "learning_rate": 6.773938778554773e-07, "loss": 0.8454, "step": 10245 }, { "epoch": 0.84, "grad_norm": 3.5893596866876116, "learning_rate": 6.767287028482577e-07, "loss": 0.5012, "step": 10246 }, { "epoch": 0.84, "grad_norm": 3.3520825892480266, "learning_rate": 6.760638308835404e-07, "loss": 0.4746, "step": 10247 }, { "epoch": 0.84, "grad_norm": 2.393175137395471, "learning_rate": 6.753992620079325e-07, "loss": 0.4163, "step": 10248 }, { "epoch": 0.84, "grad_norm": 3.4533606783094313, "learning_rate": 6.747349962680144e-07, "loss": 0.5819, "step": 10249 }, { "epoch": 0.84, "grad_norm": 2.7237269976253433, "learning_rate": 6.74071033710349e-07, "loss": 0.4897, "step": 10250 }, { "epoch": 0.84, "grad_norm": 4.746376562234642, "learning_rate": 6.734073743814779e-07, "loss": 1.1306, "step": 10251 }, { "epoch": 0.84, "grad_norm": 5.358985476907801, "learning_rate": 6.727440183279205e-07, "loss": 1.1153, "step": 10252 }, { "epoch": 0.84, "grad_norm": 3.0330042057965145, "learning_rate": 6.720809655961752e-07, "loss": 0.2759, "step": 10253 }, { "epoch": 0.84, "grad_norm": 5.387208326336032, "learning_rate": 6.714182162327198e-07, "loss": 0.7345, "step": 10254 }, { "epoch": 0.84, "grad_norm": 3.9235955723959077, "learning_rate": 6.707557702840084e-07, "loss": 0.5651, "step": 10255 }, { "epoch": 0.84, "grad_norm": 4.368011627009563, "learning_rate": 6.700936277964771e-07, "loss": 1.0694, "step": 10256 }, { "epoch": 0.84, "grad_norm": 2.6604373487739097, "learning_rate": 6.694317888165381e-07, "loss": 0.3197, "step": 10257 }, { "epoch": 0.84, "grad_norm": 4.394383222912035, "learning_rate": 6.687702533905855e-07, "loss": 0.7531, "step": 10258 }, { "epoch": 0.84, "grad_norm": 2.02730281209635, "learning_rate": 6.681090215649872e-07, "loss": 0.3874, "step": 10259 }, { "epoch": 0.84, "grad_norm": 3.4602941295006677, "learning_rate": 6.674480933860938e-07, "loss": 0.7086, "step": 10260 }, { "epoch": 0.84, "grad_norm": 3.408342882524193, "learning_rate": 6.667874689002352e-07, "loss": 0.5866, "step": 10261 }, { "epoch": 0.84, "grad_norm": 2.4173230715632665, "learning_rate": 6.661271481537157e-07, "loss": 0.5306, "step": 10262 }, { "epoch": 0.84, "grad_norm": 4.969856473735861, "learning_rate": 6.654671311928218e-07, "loss": 0.9168, "step": 10263 }, { "epoch": 0.84, "grad_norm": 2.6517478940535586, "learning_rate": 6.648074180638181e-07, "loss": 0.501, "step": 10264 }, { "epoch": 0.84, "grad_norm": 4.653153651391398, "learning_rate": 6.641480088129488e-07, "loss": 0.8284, "step": 10265 }, { "epoch": 0.84, "grad_norm": 3.850258416331185, "learning_rate": 6.634889034864334e-07, "loss": 0.8756, "step": 10266 }, { "epoch": 0.84, "grad_norm": 2.985382741642617, "learning_rate": 6.628301021304734e-07, "loss": 0.3819, "step": 10267 }, { "epoch": 0.84, "grad_norm": 3.380448581255921, "learning_rate": 6.621716047912475e-07, "loss": 0.6911, "step": 10268 }, { "epoch": 0.84, "grad_norm": 5.063915509321915, "learning_rate": 6.615134115149135e-07, "loss": 0.839, "step": 10269 }, { "epoch": 0.84, "grad_norm": 3.769133946396099, "learning_rate": 6.608555223476088e-07, "loss": 0.7109, "step": 10270 }, { "epoch": 0.84, "grad_norm": 2.433360575241961, "learning_rate": 6.60197937335449e-07, "loss": 0.3123, "step": 10271 }, { "epoch": 0.84, "grad_norm": 4.813186992499145, "learning_rate": 6.595406565245255e-07, "loss": 0.9826, "step": 10272 }, { "epoch": 0.84, "grad_norm": 3.6466358439004374, "learning_rate": 6.588836799609128e-07, "loss": 0.6288, "step": 10273 }, { "epoch": 0.84, "grad_norm": 4.693817862440512, "learning_rate": 6.582270076906611e-07, "loss": 0.5749, "step": 10274 }, { "epoch": 0.84, "grad_norm": 6.070760684402229, "learning_rate": 6.57570639759802e-07, "loss": 1.4687, "step": 10275 }, { "epoch": 0.84, "grad_norm": 3.607951170928873, "learning_rate": 6.569145762143414e-07, "loss": 0.7955, "step": 10276 }, { "epoch": 0.84, "grad_norm": 5.837526405375264, "learning_rate": 6.562588171002688e-07, "loss": 0.8754, "step": 10277 }, { "epoch": 0.84, "grad_norm": 4.214752328843424, "learning_rate": 6.556033624635482e-07, "loss": 0.9623, "step": 10278 }, { "epoch": 0.84, "grad_norm": 3.033828828863102, "learning_rate": 6.549482123501249e-07, "loss": 0.3344, "step": 10279 }, { "epoch": 0.84, "grad_norm": 1.3399389391733743, "learning_rate": 6.542933668059226e-07, "loss": 0.1505, "step": 10280 }, { "epoch": 0.84, "grad_norm": 4.133479400402992, "learning_rate": 6.536388258768423e-07, "loss": 0.9193, "step": 10281 }, { "epoch": 0.84, "grad_norm": 1.5262125922080394, "learning_rate": 6.529845896087649e-07, "loss": 0.1784, "step": 10282 }, { "epoch": 0.84, "grad_norm": 3.4484274448162457, "learning_rate": 6.523306580475508e-07, "loss": 0.7567, "step": 10283 }, { "epoch": 0.84, "grad_norm": 5.2346819404260065, "learning_rate": 6.516770312390353e-07, "loss": 1.1449, "step": 10284 }, { "epoch": 0.84, "grad_norm": 4.732851697170656, "learning_rate": 6.510237092290361e-07, "loss": 0.6355, "step": 10285 }, { "epoch": 0.84, "grad_norm": 6.066102838902472, "learning_rate": 6.503706920633473e-07, "loss": 1.278, "step": 10286 }, { "epoch": 0.84, "grad_norm": 4.29385326911628, "learning_rate": 6.49717979787744e-07, "loss": 1.0592, "step": 10287 }, { "epoch": 0.84, "grad_norm": 2.741642006414545, "learning_rate": 6.490655724479789e-07, "loss": 0.4784, "step": 10288 }, { "epoch": 0.84, "grad_norm": 4.301411693753504, "learning_rate": 6.48413470089781e-07, "loss": 0.4115, "step": 10289 }, { "epoch": 0.84, "grad_norm": 3.9701793371450704, "learning_rate": 6.477616727588604e-07, "loss": 0.593, "step": 10290 }, { "epoch": 0.84, "grad_norm": 2.334020513818201, "learning_rate": 6.471101805009062e-07, "loss": 0.3358, "step": 10291 }, { "epoch": 0.84, "grad_norm": 4.042172667693351, "learning_rate": 6.464589933615839e-07, "loss": 0.5782, "step": 10292 }, { "epoch": 0.84, "grad_norm": 4.898454941842396, "learning_rate": 6.458081113865395e-07, "loss": 0.98, "step": 10293 }, { "epoch": 0.84, "grad_norm": 3.301511079307743, "learning_rate": 6.451575346213979e-07, "loss": 0.6028, "step": 10294 }, { "epoch": 0.84, "grad_norm": 2.127705479254126, "learning_rate": 6.4450726311176e-07, "loss": 0.2934, "step": 10295 }, { "epoch": 0.84, "grad_norm": 3.6716391467874003, "learning_rate": 6.438572969032075e-07, "loss": 0.495, "step": 10296 }, { "epoch": 0.84, "grad_norm": 2.8791457123016366, "learning_rate": 6.432076360413003e-07, "loss": 0.5122, "step": 10297 }, { "epoch": 0.84, "grad_norm": 3.1169069139262344, "learning_rate": 6.425582805715775e-07, "loss": 0.5587, "step": 10298 }, { "epoch": 0.84, "grad_norm": 2.140954051481482, "learning_rate": 6.419092305395552e-07, "loss": 0.3388, "step": 10299 }, { "epoch": 0.84, "grad_norm": 2.4818241671767765, "learning_rate": 6.412604859907306e-07, "loss": 0.46, "step": 10300 }, { "epoch": 0.84, "grad_norm": 4.585994386964461, "learning_rate": 6.406120469705757e-07, "loss": 0.6881, "step": 10301 }, { "epoch": 0.84, "grad_norm": 3.3936003516441566, "learning_rate": 6.399639135245439e-07, "loss": 0.6548, "step": 10302 }, { "epoch": 0.84, "grad_norm": 3.076143681485743, "learning_rate": 6.393160856980668e-07, "loss": 0.6639, "step": 10303 }, { "epoch": 0.84, "grad_norm": 5.277110114326273, "learning_rate": 6.386685635365542e-07, "loss": 0.8738, "step": 10304 }, { "epoch": 0.84, "grad_norm": 4.097791072129815, "learning_rate": 6.380213470853963e-07, "loss": 0.7573, "step": 10305 }, { "epoch": 0.84, "grad_norm": 2.6666051053495963, "learning_rate": 6.37374436389957e-07, "loss": 0.6457, "step": 10306 }, { "epoch": 0.84, "grad_norm": 6.189374771386946, "learning_rate": 6.367278314955849e-07, "loss": 1.1032, "step": 10307 }, { "epoch": 0.84, "grad_norm": 3.2975087719293557, "learning_rate": 6.360815324476016e-07, "loss": 0.5535, "step": 10308 }, { "epoch": 0.84, "grad_norm": 4.001540898596129, "learning_rate": 6.354355392913114e-07, "loss": 0.7347, "step": 10309 }, { "epoch": 0.84, "grad_norm": 3.903098744714241, "learning_rate": 6.347898520719947e-07, "loss": 0.7331, "step": 10310 }, { "epoch": 0.84, "grad_norm": 3.7852382356129795, "learning_rate": 6.341444708349131e-07, "loss": 0.9693, "step": 10311 }, { "epoch": 0.84, "grad_norm": 3.788993743466064, "learning_rate": 6.334993956253033e-07, "loss": 0.5701, "step": 10312 }, { "epoch": 0.84, "grad_norm": 3.4718749584589923, "learning_rate": 6.328546264883822e-07, "loss": 0.5135, "step": 10313 }, { "epoch": 0.84, "grad_norm": 2.1507870284409103, "learning_rate": 6.322101634693461e-07, "loss": 0.5019, "step": 10314 }, { "epoch": 0.84, "grad_norm": 3.6874629571572686, "learning_rate": 6.315660066133689e-07, "loss": 0.6376, "step": 10315 }, { "epoch": 0.84, "grad_norm": 3.5487397987975715, "learning_rate": 6.309221559656026e-07, "loss": 0.599, "step": 10316 }, { "epoch": 0.84, "grad_norm": 4.856529420525376, "learning_rate": 6.302786115711806e-07, "loss": 0.6894, "step": 10317 }, { "epoch": 0.84, "grad_norm": 5.266462849628899, "learning_rate": 6.296353734752098e-07, "loss": 0.9058, "step": 10318 }, { "epoch": 0.84, "grad_norm": 6.010416616131682, "learning_rate": 6.289924417227789e-07, "loss": 1.1745, "step": 10319 }, { "epoch": 0.84, "grad_norm": 1.1058518824775168, "learning_rate": 6.283498163589558e-07, "loss": 0.1765, "step": 10320 }, { "epoch": 0.84, "grad_norm": 5.802275931450062, "learning_rate": 6.277074974287856e-07, "loss": 1.0474, "step": 10321 }, { "epoch": 0.84, "grad_norm": 4.797194600107948, "learning_rate": 6.270654849772906e-07, "loss": 0.8021, "step": 10322 }, { "epoch": 0.84, "grad_norm": 3.4246262562427128, "learning_rate": 6.264237790494754e-07, "loss": 0.6082, "step": 10323 }, { "epoch": 0.84, "grad_norm": 3.4649718930823123, "learning_rate": 6.257823796903178e-07, "loss": 0.6519, "step": 10324 }, { "epoch": 0.84, "grad_norm": 3.05193623676796, "learning_rate": 6.251412869447793e-07, "loss": 0.3848, "step": 10325 }, { "epoch": 0.84, "grad_norm": 5.309915119979786, "learning_rate": 6.24500500857797e-07, "loss": 0.8005, "step": 10326 }, { "epoch": 0.84, "grad_norm": 4.254255006430809, "learning_rate": 6.238600214742868e-07, "loss": 0.58, "step": 10327 }, { "epoch": 0.84, "grad_norm": 3.961040158682524, "learning_rate": 6.232198488391461e-07, "loss": 0.6447, "step": 10328 }, { "epoch": 0.84, "grad_norm": 6.040144358348651, "learning_rate": 6.225799829972445e-07, "loss": 1.0723, "step": 10329 }, { "epoch": 0.84, "grad_norm": 4.395942298388709, "learning_rate": 6.219404239934357e-07, "loss": 0.8869, "step": 10330 }, { "epoch": 0.84, "grad_norm": 3.4425011509638686, "learning_rate": 6.213011718725493e-07, "loss": 0.3754, "step": 10331 }, { "epoch": 0.84, "grad_norm": 2.0282056723859623, "learning_rate": 6.206622266793949e-07, "loss": 0.2786, "step": 10332 }, { "epoch": 0.84, "grad_norm": 2.798718887193467, "learning_rate": 6.200235884587596e-07, "loss": 0.5698, "step": 10333 }, { "epoch": 0.84, "grad_norm": 4.575555834030352, "learning_rate": 6.193852572554104e-07, "loss": 0.8362, "step": 10334 }, { "epoch": 0.84, "grad_norm": 3.5107693830811364, "learning_rate": 6.187472331140887e-07, "loss": 0.6514, "step": 10335 }, { "epoch": 0.84, "grad_norm": 3.401100975202584, "learning_rate": 6.181095160795187e-07, "loss": 0.8509, "step": 10336 }, { "epoch": 0.84, "grad_norm": 2.982092836864028, "learning_rate": 6.174721061964029e-07, "loss": 0.6061, "step": 10337 }, { "epoch": 0.84, "grad_norm": 4.069443870464548, "learning_rate": 6.168350035094178e-07, "loss": 0.6044, "step": 10338 }, { "epoch": 0.85, "grad_norm": 3.6613953155838637, "learning_rate": 6.161982080632239e-07, "loss": 0.7821, "step": 10339 }, { "epoch": 0.85, "grad_norm": 2.951373078869165, "learning_rate": 6.155617199024588e-07, "loss": 0.5071, "step": 10340 }, { "epoch": 0.85, "grad_norm": 5.23635692772508, "learning_rate": 6.14925539071734e-07, "loss": 0.7125, "step": 10341 }, { "epoch": 0.85, "grad_norm": 1.2432538500796653, "learning_rate": 6.142896656156455e-07, "loss": 0.1723, "step": 10342 }, { "epoch": 0.85, "grad_norm": 2.1860302085802137, "learning_rate": 6.136540995787649e-07, "loss": 0.3348, "step": 10343 }, { "epoch": 0.85, "grad_norm": 4.883965192436654, "learning_rate": 6.13018841005642e-07, "loss": 0.8817, "step": 10344 }, { "epoch": 0.85, "grad_norm": 3.2281268341711797, "learning_rate": 6.12383889940808e-07, "loss": 0.566, "step": 10345 }, { "epoch": 0.85, "grad_norm": 4.594241528342523, "learning_rate": 6.117492464287666e-07, "loss": 0.9008, "step": 10346 }, { "epoch": 0.85, "grad_norm": 2.73225891709093, "learning_rate": 6.111149105140052e-07, "loss": 0.7543, "step": 10347 }, { "epoch": 0.85, "grad_norm": 2.8994344357396984, "learning_rate": 6.104808822409885e-07, "loss": 0.6075, "step": 10348 }, { "epoch": 0.85, "grad_norm": 3.8249853665661506, "learning_rate": 6.098471616541585e-07, "loss": 1.0379, "step": 10349 }, { "epoch": 0.85, "grad_norm": 4.6325995942495455, "learning_rate": 6.092137487979366e-07, "loss": 0.7691, "step": 10350 }, { "epoch": 0.85, "grad_norm": 5.325602957824057, "learning_rate": 6.08580643716723e-07, "loss": 1.0923, "step": 10351 }, { "epoch": 0.85, "grad_norm": 3.8762181860203797, "learning_rate": 6.079478464548938e-07, "loss": 0.8949, "step": 10352 }, { "epoch": 0.85, "grad_norm": 5.082466940308302, "learning_rate": 6.073153570568074e-07, "loss": 1.0848, "step": 10353 }, { "epoch": 0.85, "grad_norm": 5.0532638854790966, "learning_rate": 6.066831755667962e-07, "loss": 1.1865, "step": 10354 }, { "epoch": 0.85, "grad_norm": 2.5533008575699334, "learning_rate": 6.060513020291753e-07, "loss": 0.3113, "step": 10355 }, { "epoch": 0.85, "grad_norm": 2.5437168783894606, "learning_rate": 6.054197364882347e-07, "loss": 0.6903, "step": 10356 }, { "epoch": 0.85, "grad_norm": 3.869798916119071, "learning_rate": 6.047884789882469e-07, "loss": 0.653, "step": 10357 }, { "epoch": 0.85, "grad_norm": 5.146009182397108, "learning_rate": 6.041575295734576e-07, "loss": 1.0095, "step": 10358 }, { "epoch": 0.85, "grad_norm": 3.8993766307489413, "learning_rate": 6.035268882880941e-07, "loss": 0.4933, "step": 10359 }, { "epoch": 0.85, "grad_norm": 5.448991672717264, "learning_rate": 6.028965551763627e-07, "loss": 1.195, "step": 10360 }, { "epoch": 0.85, "grad_norm": 4.678067743838973, "learning_rate": 6.022665302824465e-07, "loss": 0.8999, "step": 10361 }, { "epoch": 0.85, "grad_norm": 4.6260758312453385, "learning_rate": 6.016368136505074e-07, "loss": 0.7788, "step": 10362 }, { "epoch": 0.85, "grad_norm": 6.046366637884028, "learning_rate": 6.010074053246872e-07, "loss": 0.8591, "step": 10363 }, { "epoch": 0.85, "grad_norm": 3.3503412128880736, "learning_rate": 6.003783053491025e-07, "loss": 0.5532, "step": 10364 }, { "epoch": 0.85, "grad_norm": 4.203630680515273, "learning_rate": 5.99749513767851e-07, "loss": 0.581, "step": 10365 }, { "epoch": 0.85, "grad_norm": 2.9230485157594908, "learning_rate": 5.99121030625009e-07, "loss": 0.4968, "step": 10366 }, { "epoch": 0.85, "grad_norm": 4.0128915740778766, "learning_rate": 5.984928559646297e-07, "loss": 0.8049, "step": 10367 }, { "epoch": 0.85, "grad_norm": 2.5001324671821212, "learning_rate": 5.97864989830747e-07, "loss": 0.4365, "step": 10368 }, { "epoch": 0.85, "grad_norm": 4.697993203346618, "learning_rate": 5.97237432267369e-07, "loss": 0.9607, "step": 10369 }, { "epoch": 0.85, "grad_norm": 2.456607685228635, "learning_rate": 5.966101833184873e-07, "loss": 0.4056, "step": 10370 }, { "epoch": 0.85, "grad_norm": 3.821843099442181, "learning_rate": 5.959832430280677e-07, "loss": 0.6264, "step": 10371 }, { "epoch": 0.85, "grad_norm": 3.903722547084163, "learning_rate": 5.953566114400555e-07, "loss": 0.65, "step": 10372 }, { "epoch": 0.85, "grad_norm": 4.238758982285471, "learning_rate": 5.947302885983763e-07, "loss": 0.8129, "step": 10373 }, { "epoch": 0.85, "grad_norm": 5.1879731369082, "learning_rate": 5.941042745469333e-07, "loss": 0.9396, "step": 10374 }, { "epoch": 0.85, "grad_norm": 3.6125945881433394, "learning_rate": 5.934785693296046e-07, "loss": 0.6156, "step": 10375 }, { "epoch": 0.85, "grad_norm": 3.0299076473916093, "learning_rate": 5.928531729902509e-07, "loss": 0.638, "step": 10376 }, { "epoch": 0.85, "grad_norm": 3.2482608712189336, "learning_rate": 5.922280855727103e-07, "loss": 0.4839, "step": 10377 }, { "epoch": 0.85, "grad_norm": 5.30757637027783, "learning_rate": 5.916033071207977e-07, "loss": 1.0658, "step": 10378 }, { "epoch": 0.85, "grad_norm": 4.601260972257547, "learning_rate": 5.909788376783083e-07, "loss": 0.7538, "step": 10379 }, { "epoch": 0.85, "grad_norm": 4.367104308567999, "learning_rate": 5.903546772890151e-07, "loss": 0.7123, "step": 10380 }, { "epoch": 0.85, "grad_norm": 4.307815347324862, "learning_rate": 5.897308259966672e-07, "loss": 0.7737, "step": 10381 }, { "epoch": 0.85, "grad_norm": 3.688619950415655, "learning_rate": 5.891072838449946e-07, "loss": 0.9054, "step": 10382 }, { "epoch": 0.85, "grad_norm": 4.309335159982341, "learning_rate": 5.884840508777056e-07, "loss": 0.6679, "step": 10383 }, { "epoch": 0.85, "grad_norm": 6.2618730791039985, "learning_rate": 5.878611271384865e-07, "loss": 1.2592, "step": 10384 }, { "epoch": 0.85, "grad_norm": 1.294292166667183, "learning_rate": 5.872385126709995e-07, "loss": 0.1747, "step": 10385 }, { "epoch": 0.85, "grad_norm": 3.068858159170679, "learning_rate": 5.866162075188892e-07, "loss": 0.5161, "step": 10386 }, { "epoch": 0.85, "grad_norm": 4.841731758237636, "learning_rate": 5.859942117257749e-07, "loss": 0.621, "step": 10387 }, { "epoch": 0.85, "grad_norm": 3.14131196717673, "learning_rate": 5.853725253352566e-07, "loss": 0.6049, "step": 10388 }, { "epoch": 0.85, "grad_norm": 3.604489147954973, "learning_rate": 5.847511483909119e-07, "loss": 0.5231, "step": 10389 }, { "epoch": 0.85, "grad_norm": 1.019636781178539, "learning_rate": 5.841300809362959e-07, "loss": 0.1193, "step": 10390 }, { "epoch": 0.85, "grad_norm": 3.7573726079017606, "learning_rate": 5.835093230149447e-07, "loss": 0.6389, "step": 10391 }, { "epoch": 0.85, "grad_norm": 3.6155034308134195, "learning_rate": 5.828888746703687e-07, "loss": 0.9993, "step": 10392 }, { "epoch": 0.85, "grad_norm": 2.5814248275023153, "learning_rate": 5.822687359460588e-07, "loss": 0.352, "step": 10393 }, { "epoch": 0.85, "grad_norm": 3.959016551167589, "learning_rate": 5.816489068854841e-07, "loss": 0.5663, "step": 10394 }, { "epoch": 0.85, "grad_norm": 3.4804949765515043, "learning_rate": 5.810293875320927e-07, "loss": 0.4629, "step": 10395 }, { "epoch": 0.85, "grad_norm": 4.340824583344594, "learning_rate": 5.804101779293098e-07, "loss": 0.9673, "step": 10396 }, { "epoch": 0.85, "grad_norm": 3.491235360132722, "learning_rate": 5.797912781205406e-07, "loss": 0.5077, "step": 10397 }, { "epoch": 0.85, "grad_norm": 3.178766204446659, "learning_rate": 5.791726881491644e-07, "loss": 0.6482, "step": 10398 }, { "epoch": 0.85, "grad_norm": 3.3425427217509744, "learning_rate": 5.785544080585437e-07, "loss": 0.5488, "step": 10399 }, { "epoch": 0.85, "grad_norm": 4.871187595458767, "learning_rate": 5.779364378920177e-07, "loss": 0.8559, "step": 10400 }, { "epoch": 0.85, "grad_norm": 4.938369484063271, "learning_rate": 5.773187776929017e-07, "loss": 0.9178, "step": 10401 }, { "epoch": 0.85, "grad_norm": 3.239734217262983, "learning_rate": 5.767014275044914e-07, "loss": 0.6713, "step": 10402 }, { "epoch": 0.85, "grad_norm": 4.226986391688428, "learning_rate": 5.760843873700622e-07, "loss": 0.7308, "step": 10403 }, { "epoch": 0.85, "grad_norm": 3.060829800690869, "learning_rate": 5.754676573328632e-07, "loss": 0.5538, "step": 10404 }, { "epoch": 0.85, "grad_norm": 2.299010938722108, "learning_rate": 5.74851237436126e-07, "loss": 0.3488, "step": 10405 }, { "epoch": 0.85, "grad_norm": 3.634979349676279, "learning_rate": 5.742351277230584e-07, "loss": 0.6518, "step": 10406 }, { "epoch": 0.85, "grad_norm": 4.758525735469257, "learning_rate": 5.736193282368474e-07, "loss": 1.0361, "step": 10407 }, { "epoch": 0.85, "grad_norm": 5.071298839841249, "learning_rate": 5.730038390206594e-07, "loss": 1.0557, "step": 10408 }, { "epoch": 0.85, "grad_norm": 4.511955381102928, "learning_rate": 5.723886601176343e-07, "loss": 0.8135, "step": 10409 }, { "epoch": 0.85, "grad_norm": 3.4612770054887276, "learning_rate": 5.717737915708954e-07, "loss": 0.6504, "step": 10410 }, { "epoch": 0.85, "grad_norm": 3.622655119430177, "learning_rate": 5.711592334235416e-07, "loss": 0.6042, "step": 10411 }, { "epoch": 0.85, "grad_norm": 3.370232020220817, "learning_rate": 5.705449857186518e-07, "loss": 0.4626, "step": 10412 }, { "epoch": 0.85, "grad_norm": 3.4557763354897095, "learning_rate": 5.699310484992809e-07, "loss": 0.679, "step": 10413 }, { "epoch": 0.85, "grad_norm": 5.834903902822914, "learning_rate": 5.693174218084652e-07, "loss": 1.3057, "step": 10414 }, { "epoch": 0.85, "grad_norm": 3.8354419775478115, "learning_rate": 5.687041056892145e-07, "loss": 0.6512, "step": 10415 }, { "epoch": 0.85, "grad_norm": 4.903765763800176, "learning_rate": 5.680911001845218e-07, "loss": 0.6977, "step": 10416 }, { "epoch": 0.85, "grad_norm": 3.8246310995433146, "learning_rate": 5.674784053373545e-07, "loss": 0.7733, "step": 10417 }, { "epoch": 0.85, "grad_norm": 4.159198786553831, "learning_rate": 5.668660211906607e-07, "loss": 0.939, "step": 10418 }, { "epoch": 0.85, "grad_norm": 4.1686359363891405, "learning_rate": 5.662539477873657e-07, "loss": 0.5318, "step": 10419 }, { "epoch": 0.85, "grad_norm": 4.145125587558952, "learning_rate": 5.656421851703742e-07, "loss": 0.8105, "step": 10420 }, { "epoch": 0.85, "grad_norm": 2.849234211932994, "learning_rate": 5.650307333825661e-07, "loss": 0.3998, "step": 10421 }, { "epoch": 0.85, "grad_norm": 2.3382026475057884, "learning_rate": 5.644195924668028e-07, "loss": 0.4322, "step": 10422 }, { "epoch": 0.85, "grad_norm": 2.753214314856311, "learning_rate": 5.638087624659216e-07, "loss": 0.3664, "step": 10423 }, { "epoch": 0.85, "grad_norm": 2.688637428225327, "learning_rate": 5.631982434227406e-07, "loss": 0.5746, "step": 10424 }, { "epoch": 0.85, "grad_norm": 3.493311422581332, "learning_rate": 5.625880353800545e-07, "loss": 0.6409, "step": 10425 }, { "epoch": 0.85, "grad_norm": 4.5404767230707925, "learning_rate": 5.619781383806345e-07, "loss": 0.7954, "step": 10426 }, { "epoch": 0.85, "grad_norm": 3.6385739608008625, "learning_rate": 5.613685524672318e-07, "loss": 0.7876, "step": 10427 }, { "epoch": 0.85, "grad_norm": 4.242105143127419, "learning_rate": 5.607592776825777e-07, "loss": 0.7932, "step": 10428 }, { "epoch": 0.85, "grad_norm": 5.131498899837967, "learning_rate": 5.601503140693782e-07, "loss": 0.6301, "step": 10429 }, { "epoch": 0.85, "grad_norm": 4.150034675458454, "learning_rate": 5.595416616703203e-07, "loss": 0.688, "step": 10430 }, { "epoch": 0.85, "grad_norm": 4.3659764124021025, "learning_rate": 5.589333205280662e-07, "loss": 0.8211, "step": 10431 }, { "epoch": 0.85, "grad_norm": 3.9082421698650633, "learning_rate": 5.583252906852594e-07, "loss": 0.9047, "step": 10432 }, { "epoch": 0.85, "grad_norm": 4.194612789126027, "learning_rate": 5.577175721845185e-07, "loss": 0.5861, "step": 10433 }, { "epoch": 0.85, "grad_norm": 5.315130226747433, "learning_rate": 5.571101650684435e-07, "loss": 1.2291, "step": 10434 }, { "epoch": 0.85, "grad_norm": 2.723126091057395, "learning_rate": 5.565030693796098e-07, "loss": 0.5973, "step": 10435 }, { "epoch": 0.85, "grad_norm": 4.598692276891783, "learning_rate": 5.558962851605731e-07, "loss": 0.7576, "step": 10436 }, { "epoch": 0.85, "grad_norm": 3.2228214330867964, "learning_rate": 5.552898124538669e-07, "loss": 0.5391, "step": 10437 }, { "epoch": 0.85, "grad_norm": 4.220146649411876, "learning_rate": 5.546836513020004e-07, "loss": 0.6394, "step": 10438 }, { "epoch": 0.85, "grad_norm": 2.742810055429589, "learning_rate": 5.540778017474635e-07, "loss": 0.2506, "step": 10439 }, { "epoch": 0.85, "grad_norm": 4.035624938028897, "learning_rate": 5.534722638327245e-07, "loss": 0.652, "step": 10440 }, { "epoch": 0.85, "grad_norm": 3.690919519405272, "learning_rate": 5.528670376002282e-07, "loss": 0.5183, "step": 10441 }, { "epoch": 0.85, "grad_norm": 2.442735923919764, "learning_rate": 5.522621230923986e-07, "loss": 0.6551, "step": 10442 }, { "epoch": 0.85, "grad_norm": 4.765068024938463, "learning_rate": 5.516575203516389e-07, "loss": 0.839, "step": 10443 }, { "epoch": 0.85, "grad_norm": 3.2785990526068276, "learning_rate": 5.510532294203264e-07, "loss": 0.5858, "step": 10444 }, { "epoch": 0.85, "grad_norm": 4.778841998641732, "learning_rate": 5.504492503408204e-07, "loss": 0.8196, "step": 10445 }, { "epoch": 0.85, "grad_norm": 5.085316672922174, "learning_rate": 5.498455831554589e-07, "loss": 1.2685, "step": 10446 }, { "epoch": 0.85, "grad_norm": 4.967054918121122, "learning_rate": 5.492422279065535e-07, "loss": 0.8716, "step": 10447 }, { "epoch": 0.85, "grad_norm": 4.017089926733228, "learning_rate": 5.486391846363998e-07, "loss": 0.6948, "step": 10448 }, { "epoch": 0.85, "grad_norm": 2.960935176016891, "learning_rate": 5.48036453387265e-07, "loss": 0.5706, "step": 10449 }, { "epoch": 0.85, "grad_norm": 4.29836270974255, "learning_rate": 5.474340342014007e-07, "loss": 0.6272, "step": 10450 }, { "epoch": 0.85, "grad_norm": 3.618055713945523, "learning_rate": 5.468319271210326e-07, "loss": 0.743, "step": 10451 }, { "epoch": 0.85, "grad_norm": 2.777227654941848, "learning_rate": 5.462301321883661e-07, "loss": 0.4506, "step": 10452 }, { "epoch": 0.85, "grad_norm": 3.755869482729697, "learning_rate": 5.456286494455843e-07, "loss": 0.6088, "step": 10453 }, { "epoch": 0.85, "grad_norm": 3.6009071215185906, "learning_rate": 5.450274789348497e-07, "loss": 0.9747, "step": 10454 }, { "epoch": 0.85, "grad_norm": 4.5863105045010215, "learning_rate": 5.444266206983001e-07, "loss": 1.3258, "step": 10455 }, { "epoch": 0.85, "grad_norm": 4.419252475761903, "learning_rate": 5.438260747780532e-07, "loss": 0.7476, "step": 10456 }, { "epoch": 0.85, "grad_norm": 4.419854446947665, "learning_rate": 5.432258412162056e-07, "loss": 1.1689, "step": 10457 }, { "epoch": 0.85, "grad_norm": 2.0074298968463062, "learning_rate": 5.4262592005483e-07, "loss": 0.3632, "step": 10458 }, { "epoch": 0.85, "grad_norm": 3.225202148603655, "learning_rate": 5.420263113359791e-07, "loss": 0.6382, "step": 10459 }, { "epoch": 0.85, "grad_norm": 4.111944379787736, "learning_rate": 5.414270151016843e-07, "loss": 0.5012, "step": 10460 }, { "epoch": 0.86, "grad_norm": 3.6107706317948147, "learning_rate": 5.408280313939502e-07, "loss": 0.4584, "step": 10461 }, { "epoch": 0.86, "grad_norm": 1.9131447266796116, "learning_rate": 5.402293602547659e-07, "loss": 0.3841, "step": 10462 }, { "epoch": 0.86, "grad_norm": 3.9771864336352314, "learning_rate": 5.396310017260931e-07, "loss": 0.8553, "step": 10463 }, { "epoch": 0.86, "grad_norm": 4.495631673597813, "learning_rate": 5.390329558498759e-07, "loss": 0.9242, "step": 10464 }, { "epoch": 0.86, "grad_norm": 5.0870467055378406, "learning_rate": 5.384352226680356e-07, "loss": 1.0474, "step": 10465 }, { "epoch": 0.86, "grad_norm": 5.110527224780676, "learning_rate": 5.378378022224679e-07, "loss": 0.8318, "step": 10466 }, { "epoch": 0.86, "grad_norm": 1.7325885690721872, "learning_rate": 5.372406945550507e-07, "loss": 0.1975, "step": 10467 }, { "epoch": 0.86, "grad_norm": 2.80887620719976, "learning_rate": 5.366438997076396e-07, "loss": 0.2936, "step": 10468 }, { "epoch": 0.86, "grad_norm": 4.850940394131275, "learning_rate": 5.360474177220659e-07, "loss": 1.162, "step": 10469 }, { "epoch": 0.86, "grad_norm": 4.180895013424875, "learning_rate": 5.354512486401409e-07, "loss": 0.4575, "step": 10470 }, { "epoch": 0.86, "grad_norm": 2.8108880858287897, "learning_rate": 5.348553925036553e-07, "loss": 0.5706, "step": 10471 }, { "epoch": 0.86, "grad_norm": 1.8481081046692012, "learning_rate": 5.342598493543727e-07, "loss": 0.3095, "step": 10472 }, { "epoch": 0.86, "grad_norm": 4.895634104931042, "learning_rate": 5.3366461923404e-07, "loss": 0.7126, "step": 10473 }, { "epoch": 0.86, "grad_norm": 3.960115078884639, "learning_rate": 5.330697021843795e-07, "loss": 0.6181, "step": 10474 }, { "epoch": 0.86, "grad_norm": 3.34098382409636, "learning_rate": 5.324750982470933e-07, "loss": 0.5289, "step": 10475 }, { "epoch": 0.86, "grad_norm": 3.844540506448431, "learning_rate": 5.318808074638598e-07, "loss": 0.7852, "step": 10476 }, { "epoch": 0.86, "grad_norm": 2.316453704559759, "learning_rate": 5.312868298763374e-07, "loss": 0.4248, "step": 10477 }, { "epoch": 0.86, "grad_norm": 5.065511357492012, "learning_rate": 5.306931655261588e-07, "loss": 1.1797, "step": 10478 }, { "epoch": 0.86, "grad_norm": 5.088625813835447, "learning_rate": 5.300998144549402e-07, "loss": 0.7932, "step": 10479 }, { "epoch": 0.86, "grad_norm": 4.17514705001973, "learning_rate": 5.29506776704271e-07, "loss": 0.5734, "step": 10480 }, { "epoch": 0.86, "grad_norm": 3.5910957602546696, "learning_rate": 5.289140523157205e-07, "loss": 0.8801, "step": 10481 }, { "epoch": 0.86, "grad_norm": 3.6013748244996906, "learning_rate": 5.283216413308367e-07, "loss": 0.8214, "step": 10482 }, { "epoch": 0.86, "grad_norm": 3.6717264426904643, "learning_rate": 5.277295437911462e-07, "loss": 0.6352, "step": 10483 }, { "epoch": 0.86, "grad_norm": 4.371896629040389, "learning_rate": 5.271377597381505e-07, "loss": 0.5906, "step": 10484 }, { "epoch": 0.86, "grad_norm": 3.368054379749993, "learning_rate": 5.265462892133317e-07, "loss": 0.4887, "step": 10485 }, { "epoch": 0.86, "grad_norm": 2.8464376430535165, "learning_rate": 5.259551322581496e-07, "loss": 0.3931, "step": 10486 }, { "epoch": 0.86, "grad_norm": 3.399816522319251, "learning_rate": 5.253642889140414e-07, "loss": 0.4729, "step": 10487 }, { "epoch": 0.86, "grad_norm": 3.2664370652522123, "learning_rate": 5.247737592224239e-07, "loss": 0.304, "step": 10488 }, { "epoch": 0.86, "grad_norm": 3.5684092464370605, "learning_rate": 5.241835432246888e-07, "loss": 0.4109, "step": 10489 }, { "epoch": 0.86, "grad_norm": 3.106307996943481, "learning_rate": 5.235936409622083e-07, "loss": 0.6459, "step": 10490 }, { "epoch": 0.86, "grad_norm": 5.120738502869185, "learning_rate": 5.230040524763325e-07, "loss": 1.2144, "step": 10491 }, { "epoch": 0.86, "grad_norm": 5.702304509190002, "learning_rate": 5.224147778083882e-07, "loss": 1.2657, "step": 10492 }, { "epoch": 0.86, "grad_norm": 4.913241630622584, "learning_rate": 5.218258169996825e-07, "loss": 1.1311, "step": 10493 }, { "epoch": 0.86, "grad_norm": 3.9066351228096754, "learning_rate": 5.212371700914976e-07, "loss": 0.7973, "step": 10494 }, { "epoch": 0.86, "grad_norm": 5.202969004089788, "learning_rate": 5.206488371250956e-07, "loss": 1.1047, "step": 10495 }, { "epoch": 0.86, "grad_norm": 3.6915185468451486, "learning_rate": 5.200608181417155e-07, "loss": 0.9084, "step": 10496 }, { "epoch": 0.86, "grad_norm": 3.2002742632248755, "learning_rate": 5.194731131825754e-07, "loss": 0.6965, "step": 10497 }, { "epoch": 0.86, "grad_norm": 3.1255230862771204, "learning_rate": 5.188857222888699e-07, "loss": 0.386, "step": 10498 }, { "epoch": 0.86, "grad_norm": 4.113045842082105, "learning_rate": 5.182986455017741e-07, "loss": 0.7443, "step": 10499 }, { "epoch": 0.86, "grad_norm": 3.8650408809820687, "learning_rate": 5.177118828624395e-07, "loss": 0.8432, "step": 10500 }, { "epoch": 0.86, "grad_norm": 4.533967590398182, "learning_rate": 5.171254344119941e-07, "loss": 1.0344, "step": 10501 }, { "epoch": 0.86, "grad_norm": 4.553799879888968, "learning_rate": 5.165393001915464e-07, "loss": 0.7334, "step": 10502 }, { "epoch": 0.86, "grad_norm": 2.568057645612484, "learning_rate": 5.159534802421817e-07, "loss": 0.272, "step": 10503 }, { "epoch": 0.86, "grad_norm": 2.892616461853972, "learning_rate": 5.153679746049628e-07, "loss": 0.2415, "step": 10504 }, { "epoch": 0.86, "grad_norm": 3.2287915215924756, "learning_rate": 5.147827833209334e-07, "loss": 0.2845, "step": 10505 }, { "epoch": 0.86, "grad_norm": 2.98277451964769, "learning_rate": 5.141979064311098e-07, "loss": 0.5627, "step": 10506 }, { "epoch": 0.86, "grad_norm": 4.759412696679251, "learning_rate": 5.136133439764907e-07, "loss": 0.8121, "step": 10507 }, { "epoch": 0.86, "grad_norm": 3.6435535949517086, "learning_rate": 5.130290959980511e-07, "loss": 0.5884, "step": 10508 }, { "epoch": 0.86, "grad_norm": 3.5847272345069072, "learning_rate": 5.12445162536746e-07, "loss": 0.6392, "step": 10509 }, { "epoch": 0.86, "grad_norm": 2.7730194330075157, "learning_rate": 5.11861543633504e-07, "loss": 0.5449, "step": 10510 }, { "epoch": 0.86, "grad_norm": 4.836953255912099, "learning_rate": 5.112782393292359e-07, "loss": 0.8827, "step": 10511 }, { "epoch": 0.86, "grad_norm": 2.7660198150343227, "learning_rate": 5.106952496648276e-07, "loss": 0.5007, "step": 10512 }, { "epoch": 0.86, "grad_norm": 3.33503023950577, "learning_rate": 5.101125746811447e-07, "loss": 0.6088, "step": 10513 }, { "epoch": 0.86, "grad_norm": 3.8980156264743555, "learning_rate": 5.095302144190307e-07, "loss": 0.6212, "step": 10514 }, { "epoch": 0.86, "grad_norm": 4.84448207594496, "learning_rate": 5.089481689193054e-07, "loss": 0.8081, "step": 10515 }, { "epoch": 0.86, "grad_norm": 4.64459833793355, "learning_rate": 5.083664382227688e-07, "loss": 0.8825, "step": 10516 }, { "epoch": 0.86, "grad_norm": 2.043838681594187, "learning_rate": 5.07785022370198e-07, "loss": 0.3449, "step": 10517 }, { "epoch": 0.86, "grad_norm": 4.154060146279045, "learning_rate": 5.072039214023461e-07, "loss": 0.7281, "step": 10518 }, { "epoch": 0.86, "grad_norm": 3.96466531977924, "learning_rate": 5.06623135359946e-07, "loss": 0.8337, "step": 10519 }, { "epoch": 0.86, "grad_norm": 4.676759933945837, "learning_rate": 5.060426642837096e-07, "loss": 0.6837, "step": 10520 }, { "epoch": 0.86, "grad_norm": 4.565229230187216, "learning_rate": 5.054625082143244e-07, "loss": 0.9036, "step": 10521 }, { "epoch": 0.86, "grad_norm": 6.105733286335197, "learning_rate": 5.048826671924573e-07, "loss": 0.8276, "step": 10522 }, { "epoch": 0.86, "grad_norm": 4.453506602357791, "learning_rate": 5.043031412587529e-07, "loss": 0.7909, "step": 10523 }, { "epoch": 0.86, "grad_norm": 2.4796082685018095, "learning_rate": 5.037239304538328e-07, "loss": 0.3556, "step": 10524 }, { "epoch": 0.86, "grad_norm": 4.4196196199598, "learning_rate": 5.031450348182976e-07, "loss": 0.9265, "step": 10525 }, { "epoch": 0.86, "grad_norm": 1.712548641642958, "learning_rate": 5.025664543927239e-07, "loss": 0.3009, "step": 10526 }, { "epoch": 0.86, "grad_norm": 5.949476720099252, "learning_rate": 5.019881892176692e-07, "loss": 1.3988, "step": 10527 }, { "epoch": 0.86, "grad_norm": 3.287437585260928, "learning_rate": 5.014102393336684e-07, "loss": 0.7728, "step": 10528 }, { "epoch": 0.86, "grad_norm": 3.608351739883492, "learning_rate": 5.008326047812306e-07, "loss": 0.7378, "step": 10529 }, { "epoch": 0.86, "grad_norm": 4.24070022130551, "learning_rate": 5.002552856008463e-07, "loss": 1.0254, "step": 10530 }, { "epoch": 0.86, "grad_norm": 3.3390730038185388, "learning_rate": 4.996782818329843e-07, "loss": 0.6737, "step": 10531 }, { "epoch": 0.86, "grad_norm": 4.083408045700113, "learning_rate": 4.991015935180887e-07, "loss": 0.742, "step": 10532 }, { "epoch": 0.86, "grad_norm": 4.174766349101573, "learning_rate": 4.985252206965841e-07, "loss": 0.6072, "step": 10533 }, { "epoch": 0.86, "grad_norm": 4.172485055619851, "learning_rate": 4.979491634088712e-07, "loss": 0.6658, "step": 10534 }, { "epoch": 0.86, "grad_norm": 5.402005108799347, "learning_rate": 4.973734216953285e-07, "loss": 0.8842, "step": 10535 }, { "epoch": 0.86, "grad_norm": 5.90056761758074, "learning_rate": 4.967979955963132e-07, "loss": 1.0879, "step": 10536 }, { "epoch": 0.86, "grad_norm": 2.39097764998649, "learning_rate": 4.962228851521606e-07, "loss": 0.27, "step": 10537 }, { "epoch": 0.86, "grad_norm": 3.060768350538714, "learning_rate": 4.956480904031829e-07, "loss": 0.7793, "step": 10538 }, { "epoch": 0.86, "grad_norm": 3.0608895948984265, "learning_rate": 4.950736113896726e-07, "loss": 0.4767, "step": 10539 }, { "epoch": 0.86, "grad_norm": 3.2562936229286406, "learning_rate": 4.94499448151895e-07, "loss": 0.5844, "step": 10540 }, { "epoch": 0.86, "grad_norm": 3.4369122871795086, "learning_rate": 4.939256007300997e-07, "loss": 0.6791, "step": 10541 }, { "epoch": 0.86, "grad_norm": 4.343019423477376, "learning_rate": 4.933520691645078e-07, "loss": 0.7955, "step": 10542 }, { "epoch": 0.86, "grad_norm": 2.4430208185765965, "learning_rate": 4.927788534953232e-07, "loss": 0.2051, "step": 10543 }, { "epoch": 0.86, "grad_norm": 4.375757953035753, "learning_rate": 4.922059537627249e-07, "loss": 0.8462, "step": 10544 }, { "epoch": 0.86, "grad_norm": 4.787432113919724, "learning_rate": 4.916333700068732e-07, "loss": 0.7809, "step": 10545 }, { "epoch": 0.86, "grad_norm": 3.277283723496338, "learning_rate": 4.910611022679002e-07, "loss": 0.8131, "step": 10546 }, { "epoch": 0.86, "grad_norm": 4.915573451282051, "learning_rate": 4.904891505859211e-07, "loss": 0.7347, "step": 10547 }, { "epoch": 0.86, "grad_norm": 4.869333183646448, "learning_rate": 4.899175150010266e-07, "loss": 1.0938, "step": 10548 }, { "epoch": 0.86, "grad_norm": 4.985754897390461, "learning_rate": 4.893461955532869e-07, "loss": 0.7476, "step": 10549 }, { "epoch": 0.86, "grad_norm": 5.600746051641911, "learning_rate": 4.887751922827483e-07, "loss": 1.5825, "step": 10550 }, { "epoch": 0.86, "grad_norm": 4.604774612678929, "learning_rate": 4.882045052294371e-07, "loss": 0.7399, "step": 10551 }, { "epoch": 0.86, "grad_norm": 5.329949394899275, "learning_rate": 4.876341344333535e-07, "loss": 0.9543, "step": 10552 }, { "epoch": 0.86, "grad_norm": 3.183678652065017, "learning_rate": 4.870640799344789e-07, "loss": 0.6445, "step": 10553 }, { "epoch": 0.86, "grad_norm": 2.619817988896082, "learning_rate": 4.864943417727719e-07, "loss": 0.2499, "step": 10554 }, { "epoch": 0.86, "grad_norm": 2.7996139038893224, "learning_rate": 4.8592491998817e-07, "loss": 0.6501, "step": 10555 }, { "epoch": 0.86, "grad_norm": 3.8476390589206884, "learning_rate": 4.85355814620585e-07, "loss": 0.5333, "step": 10556 }, { "epoch": 0.86, "grad_norm": 1.6991815993314117, "learning_rate": 4.847870257099102e-07, "loss": 0.2765, "step": 10557 }, { "epoch": 0.86, "grad_norm": 4.626329793778804, "learning_rate": 4.842185532960142e-07, "loss": 0.7608, "step": 10558 }, { "epoch": 0.86, "grad_norm": 4.239040453232224, "learning_rate": 4.836503974187446e-07, "loss": 0.6292, "step": 10559 }, { "epoch": 0.86, "grad_norm": 4.090350654471295, "learning_rate": 4.830825581179266e-07, "loss": 0.8787, "step": 10560 }, { "epoch": 0.86, "grad_norm": 3.8632163414524663, "learning_rate": 4.825150354333641e-07, "loss": 0.8996, "step": 10561 }, { "epoch": 0.86, "grad_norm": 4.597256036373677, "learning_rate": 4.819478294048368e-07, "loss": 0.8609, "step": 10562 }, { "epoch": 0.86, "grad_norm": 3.8255565498335624, "learning_rate": 4.813809400721053e-07, "loss": 0.7586, "step": 10563 }, { "epoch": 0.86, "grad_norm": 5.747533371702565, "learning_rate": 4.808143674749044e-07, "loss": 1.0456, "step": 10564 }, { "epoch": 0.86, "grad_norm": 3.7978246930803086, "learning_rate": 4.802481116529484e-07, "loss": 0.8228, "step": 10565 }, { "epoch": 0.86, "grad_norm": 4.369936623859059, "learning_rate": 4.796821726459294e-07, "loss": 0.9767, "step": 10566 }, { "epoch": 0.86, "grad_norm": 4.80137353729828, "learning_rate": 4.79116550493518e-07, "loss": 0.759, "step": 10567 }, { "epoch": 0.86, "grad_norm": 3.835421858453713, "learning_rate": 4.785512452353619e-07, "loss": 0.7454, "step": 10568 }, { "epoch": 0.86, "grad_norm": 3.034874998674549, "learning_rate": 4.77986256911086e-07, "loss": 0.3137, "step": 10569 }, { "epoch": 0.86, "grad_norm": 5.184732909644232, "learning_rate": 4.774215855602932e-07, "loss": 0.847, "step": 10570 }, { "epoch": 0.86, "grad_norm": 3.991251299295491, "learning_rate": 4.768572312225645e-07, "loss": 0.5923, "step": 10571 }, { "epoch": 0.86, "grad_norm": 3.1722346498282974, "learning_rate": 4.762931939374604e-07, "loss": 0.5217, "step": 10572 }, { "epoch": 0.86, "grad_norm": 4.769833138335216, "learning_rate": 4.7572947374451563e-07, "loss": 0.6477, "step": 10573 }, { "epoch": 0.86, "grad_norm": 3.4207100410905196, "learning_rate": 4.751660706832456e-07, "loss": 0.5049, "step": 10574 }, { "epoch": 0.86, "grad_norm": 4.179637135441105, "learning_rate": 4.746029847931405e-07, "loss": 0.6807, "step": 10575 }, { "epoch": 0.86, "grad_norm": 2.9932773081312645, "learning_rate": 4.74040216113672e-07, "loss": 0.4292, "step": 10576 }, { "epoch": 0.86, "grad_norm": 2.1567134578312914, "learning_rate": 4.7347776468428755e-07, "loss": 0.2362, "step": 10577 }, { "epoch": 0.86, "grad_norm": 2.808502722109823, "learning_rate": 4.729156305444121e-07, "loss": 0.2996, "step": 10578 }, { "epoch": 0.86, "grad_norm": 3.193178091505855, "learning_rate": 4.7235381373344877e-07, "loss": 0.34, "step": 10579 }, { "epoch": 0.86, "grad_norm": 4.854881487942244, "learning_rate": 4.717923142907799e-07, "loss": 1.1866, "step": 10580 }, { "epoch": 0.86, "grad_norm": 1.1034841366520098, "learning_rate": 4.7123113225576223e-07, "loss": 0.1515, "step": 10581 }, { "epoch": 0.86, "grad_norm": 3.9591419888299337, "learning_rate": 4.7067026766773273e-07, "loss": 1.0687, "step": 10582 }, { "epoch": 0.87, "grad_norm": 5.852620985285636, "learning_rate": 4.701097205660055e-07, "loss": 1.2246, "step": 10583 }, { "epoch": 0.87, "grad_norm": 2.440507337223321, "learning_rate": 4.695494909898729e-07, "loss": 0.4791, "step": 10584 }, { "epoch": 0.87, "grad_norm": 6.249098578944652, "learning_rate": 4.689895789786059e-07, "loss": 1.2812, "step": 10585 }, { "epoch": 0.87, "grad_norm": 4.578958964412463, "learning_rate": 4.684299845714485e-07, "loss": 0.5956, "step": 10586 }, { "epoch": 0.87, "grad_norm": 2.58440156649555, "learning_rate": 4.6787070780762833e-07, "loss": 0.3894, "step": 10587 }, { "epoch": 0.87, "grad_norm": 4.827490135978755, "learning_rate": 4.6731174872634844e-07, "loss": 1.1186, "step": 10588 }, { "epoch": 0.87, "grad_norm": 4.8791858692725185, "learning_rate": 4.6675310736678746e-07, "loss": 1.1343, "step": 10589 }, { "epoch": 0.87, "grad_norm": 2.45158548480915, "learning_rate": 4.661947837681052e-07, "loss": 0.3851, "step": 10590 }, { "epoch": 0.87, "grad_norm": 4.8440433803230345, "learning_rate": 4.656367779694382e-07, "loss": 1.0207, "step": 10591 }, { "epoch": 0.87, "grad_norm": 4.627636686901322, "learning_rate": 4.650790900098989e-07, "loss": 0.5841, "step": 10592 }, { "epoch": 0.87, "grad_norm": 6.053180697309185, "learning_rate": 4.6452171992857895e-07, "loss": 0.9651, "step": 10593 }, { "epoch": 0.87, "grad_norm": 3.9797866509653845, "learning_rate": 4.6396466776454816e-07, "loss": 0.6952, "step": 10594 }, { "epoch": 0.87, "grad_norm": 5.283156809275132, "learning_rate": 4.634079335568531e-07, "loss": 1.3423, "step": 10595 }, { "epoch": 0.87, "grad_norm": 4.098370401479687, "learning_rate": 4.628515173445186e-07, "loss": 0.9491, "step": 10596 }, { "epoch": 0.87, "grad_norm": 5.4882523325858426, "learning_rate": 4.6229541916654797e-07, "loss": 0.6638, "step": 10597 }, { "epoch": 0.87, "grad_norm": 4.21272766290156, "learning_rate": 4.6173963906191945e-07, "loss": 0.6257, "step": 10598 }, { "epoch": 0.87, "grad_norm": 4.578440361226375, "learning_rate": 4.611841770695913e-07, "loss": 0.6912, "step": 10599 }, { "epoch": 0.87, "grad_norm": 3.7406030006374236, "learning_rate": 4.6062903322849963e-07, "loss": 0.5074, "step": 10600 }, { "epoch": 0.87, "grad_norm": 2.971787159460415, "learning_rate": 4.600742075775572e-07, "loss": 0.4895, "step": 10601 }, { "epoch": 0.87, "grad_norm": 3.549420946392138, "learning_rate": 4.5951970015565617e-07, "loss": 0.527, "step": 10602 }, { "epoch": 0.87, "grad_norm": 4.49758116442198, "learning_rate": 4.5896551100166273e-07, "loss": 0.7076, "step": 10603 }, { "epoch": 0.87, "grad_norm": 4.221557375377842, "learning_rate": 4.584116401544253e-07, "loss": 0.8774, "step": 10604 }, { "epoch": 0.87, "grad_norm": 4.704012548444121, "learning_rate": 4.578580876527661e-07, "loss": 0.905, "step": 10605 }, { "epoch": 0.87, "grad_norm": 5.8933981792163435, "learning_rate": 4.573048535354874e-07, "loss": 1.2283, "step": 10606 }, { "epoch": 0.87, "grad_norm": 3.6890289491684216, "learning_rate": 4.5675193784136873e-07, "loss": 0.7169, "step": 10607 }, { "epoch": 0.87, "grad_norm": 3.078110281832308, "learning_rate": 4.5619934060916747e-07, "loss": 0.4362, "step": 10608 }, { "epoch": 0.87, "grad_norm": 5.943365923867175, "learning_rate": 4.556470618776171e-07, "loss": 1.4185, "step": 10609 }, { "epoch": 0.87, "grad_norm": 3.7983566498192602, "learning_rate": 4.5509510168543045e-07, "loss": 0.7491, "step": 10610 }, { "epoch": 0.87, "grad_norm": 3.1864928678226625, "learning_rate": 4.545434600712978e-07, "loss": 0.6091, "step": 10611 }, { "epoch": 0.87, "grad_norm": 4.680425124795627, "learning_rate": 4.5399213707388645e-07, "loss": 1.067, "step": 10612 }, { "epoch": 0.87, "grad_norm": 3.2976887527520815, "learning_rate": 4.5344113273184223e-07, "loss": 0.6837, "step": 10613 }, { "epoch": 0.87, "grad_norm": 2.247737177096619, "learning_rate": 4.5289044708378914e-07, "loss": 0.2153, "step": 10614 }, { "epoch": 0.87, "grad_norm": 3.1577186393115704, "learning_rate": 4.523400801683253e-07, "loss": 0.4013, "step": 10615 }, { "epoch": 0.87, "grad_norm": 5.368416540525366, "learning_rate": 4.517900320240304e-07, "loss": 1.1825, "step": 10616 }, { "epoch": 0.87, "grad_norm": 4.433922677016759, "learning_rate": 4.512403026894607e-07, "loss": 0.6473, "step": 10617 }, { "epoch": 0.87, "grad_norm": 5.099567381738013, "learning_rate": 4.5069089220315e-07, "loss": 1.078, "step": 10618 }, { "epoch": 0.87, "grad_norm": 4.298397952514416, "learning_rate": 4.5014180060360843e-07, "loss": 0.7946, "step": 10619 }, { "epoch": 0.87, "grad_norm": 5.172604711180217, "learning_rate": 4.4959302792932645e-07, "loss": 1.2498, "step": 10620 }, { "epoch": 0.87, "grad_norm": 4.602856346642938, "learning_rate": 4.490445742187688e-07, "loss": 0.6376, "step": 10621 }, { "epoch": 0.87, "grad_norm": 4.904344180262937, "learning_rate": 4.484964395103808e-07, "loss": 0.8726, "step": 10622 }, { "epoch": 0.87, "grad_norm": 3.3263918944209077, "learning_rate": 4.47948623842584e-07, "loss": 0.4186, "step": 10623 }, { "epoch": 0.87, "grad_norm": 3.617491450908338, "learning_rate": 4.4740112725377817e-07, "loss": 0.7986, "step": 10624 }, { "epoch": 0.87, "grad_norm": 4.1027863871710375, "learning_rate": 4.468539497823399e-07, "loss": 0.7861, "step": 10625 }, { "epoch": 0.87, "grad_norm": 2.262894681306143, "learning_rate": 4.4630709146662623e-07, "loss": 0.4552, "step": 10626 }, { "epoch": 0.87, "grad_norm": 4.224403031382618, "learning_rate": 4.4576055234496595e-07, "loss": 0.8116, "step": 10627 }, { "epoch": 0.87, "grad_norm": 5.141197113716972, "learning_rate": 4.4521433245567127e-07, "loss": 0.7435, "step": 10628 }, { "epoch": 0.87, "grad_norm": 3.5200730619412, "learning_rate": 4.446684318370292e-07, "loss": 0.5186, "step": 10629 }, { "epoch": 0.87, "grad_norm": 5.16613430209177, "learning_rate": 4.4412285052730543e-07, "loss": 0.8888, "step": 10630 }, { "epoch": 0.87, "grad_norm": 6.131999128338534, "learning_rate": 4.435775885647431e-07, "loss": 0.9811, "step": 10631 }, { "epoch": 0.87, "grad_norm": 3.7427335505554438, "learning_rate": 4.4303264598756167e-07, "loss": 0.6292, "step": 10632 }, { "epoch": 0.87, "grad_norm": 4.312167453246354, "learning_rate": 4.4248802283395953e-07, "loss": 0.7849, "step": 10633 }, { "epoch": 0.87, "grad_norm": 2.724819689411017, "learning_rate": 4.4194371914211385e-07, "loss": 0.2586, "step": 10634 }, { "epoch": 0.87, "grad_norm": 4.132480578860827, "learning_rate": 4.4139973495017584e-07, "loss": 0.7393, "step": 10635 }, { "epoch": 0.87, "grad_norm": 4.937206205763876, "learning_rate": 4.4085607029627717e-07, "loss": 0.9206, "step": 10636 }, { "epoch": 0.87, "grad_norm": 3.1249392973486874, "learning_rate": 4.403127252185274e-07, "loss": 0.618, "step": 10637 }, { "epoch": 0.87, "grad_norm": 4.403196424429233, "learning_rate": 4.397696997550105e-07, "loss": 0.7381, "step": 10638 }, { "epoch": 0.87, "grad_norm": 3.567039543028251, "learning_rate": 4.392269939437921e-07, "loss": 0.5634, "step": 10639 }, { "epoch": 0.87, "grad_norm": 3.854015625729932, "learning_rate": 4.3868460782291235e-07, "loss": 0.743, "step": 10640 }, { "epoch": 0.87, "grad_norm": 3.9822235908521133, "learning_rate": 4.381425414303908e-07, "loss": 0.5683, "step": 10641 }, { "epoch": 0.87, "grad_norm": 5.996851986473055, "learning_rate": 4.376007948042238e-07, "loss": 1.2439, "step": 10642 }, { "epoch": 0.87, "grad_norm": 2.352824545867161, "learning_rate": 4.370593679823865e-07, "loss": 0.3105, "step": 10643 }, { "epoch": 0.87, "grad_norm": 4.6522769651597775, "learning_rate": 4.3651826100282844e-07, "loss": 0.6531, "step": 10644 }, { "epoch": 0.87, "grad_norm": 4.355682252300801, "learning_rate": 4.3597747390348056e-07, "loss": 0.6397, "step": 10645 }, { "epoch": 0.87, "grad_norm": 3.389657952791804, "learning_rate": 4.354370067222485e-07, "loss": 0.6547, "step": 10646 }, { "epoch": 0.87, "grad_norm": 2.5289816701473318, "learning_rate": 4.348968594970171e-07, "loss": 0.3732, "step": 10647 }, { "epoch": 0.87, "grad_norm": 4.168427711599652, "learning_rate": 4.343570322656498e-07, "loss": 0.8713, "step": 10648 }, { "epoch": 0.87, "grad_norm": 4.680600022944302, "learning_rate": 4.3381752506598373e-07, "loss": 1.2249, "step": 10649 }, { "epoch": 0.87, "grad_norm": 2.8633096515418357, "learning_rate": 4.33278337935838e-07, "loss": 0.6042, "step": 10650 }, { "epoch": 0.87, "grad_norm": 2.708536022959924, "learning_rate": 4.3273947091300504e-07, "loss": 0.3868, "step": 10651 }, { "epoch": 0.87, "grad_norm": 4.033168550954183, "learning_rate": 4.322009240352587e-07, "loss": 0.8516, "step": 10652 }, { "epoch": 0.87, "grad_norm": 3.8082117616109707, "learning_rate": 4.316626973403487e-07, "loss": 0.4788, "step": 10653 }, { "epoch": 0.87, "grad_norm": 3.888987878913506, "learning_rate": 4.311247908660027e-07, "loss": 0.6127, "step": 10654 }, { "epoch": 0.87, "grad_norm": 4.776096194500669, "learning_rate": 4.305872046499243e-07, "loss": 0.5333, "step": 10655 }, { "epoch": 0.87, "grad_norm": 3.281179360056835, "learning_rate": 4.300499387297963e-07, "loss": 0.3566, "step": 10656 }, { "epoch": 0.87, "grad_norm": 4.881596429018882, "learning_rate": 4.2951299314327953e-07, "loss": 1.1082, "step": 10657 }, { "epoch": 0.87, "grad_norm": 2.5948203241454566, "learning_rate": 4.2897636792801123e-07, "loss": 0.3076, "step": 10658 }, { "epoch": 0.87, "grad_norm": 4.400210588906877, "learning_rate": 4.2844006312160625e-07, "loss": 0.7595, "step": 10659 }, { "epoch": 0.87, "grad_norm": 3.7261392672939495, "learning_rate": 4.2790407876165783e-07, "loss": 0.9267, "step": 10660 }, { "epoch": 0.87, "grad_norm": 6.234313141007303, "learning_rate": 4.2736841488573543e-07, "loss": 1.3824, "step": 10661 }, { "epoch": 0.87, "grad_norm": 5.613363121137143, "learning_rate": 4.268330715313862e-07, "loss": 0.7748, "step": 10662 }, { "epoch": 0.87, "grad_norm": 4.808480011585258, "learning_rate": 4.2629804873613676e-07, "loss": 1.1513, "step": 10663 }, { "epoch": 0.87, "grad_norm": 4.47762144904202, "learning_rate": 4.2576334653749e-07, "loss": 0.9783, "step": 10664 }, { "epoch": 0.87, "grad_norm": 3.3851991281660267, "learning_rate": 4.2522896497292465e-07, "loss": 0.4157, "step": 10665 }, { "epoch": 0.87, "grad_norm": 4.29677868769382, "learning_rate": 4.2469490407990033e-07, "loss": 0.7274, "step": 10666 }, { "epoch": 0.87, "grad_norm": 5.316192431611226, "learning_rate": 4.2416116389585094e-07, "loss": 1.1019, "step": 10667 }, { "epoch": 0.87, "grad_norm": 3.803931542736004, "learning_rate": 4.236277444581893e-07, "loss": 0.6968, "step": 10668 }, { "epoch": 0.87, "grad_norm": 4.8032771517576185, "learning_rate": 4.2309464580430614e-07, "loss": 0.9831, "step": 10669 }, { "epoch": 0.87, "grad_norm": 3.969830532450726, "learning_rate": 4.2256186797156986e-07, "loss": 0.6159, "step": 10670 }, { "epoch": 0.87, "grad_norm": 3.424609582686892, "learning_rate": 4.220294109973266e-07, "loss": 0.5029, "step": 10671 }, { "epoch": 0.87, "grad_norm": 5.8175042161609865, "learning_rate": 4.2149727491889725e-07, "loss": 0.9314, "step": 10672 }, { "epoch": 0.87, "grad_norm": 3.148672746299841, "learning_rate": 4.2096545977358294e-07, "loss": 0.4523, "step": 10673 }, { "epoch": 0.87, "grad_norm": 3.4410525768851987, "learning_rate": 4.2043396559866224e-07, "loss": 0.4707, "step": 10674 }, { "epoch": 0.87, "grad_norm": 3.6024058720906362, "learning_rate": 4.199027924313903e-07, "loss": 0.8373, "step": 10675 }, { "epoch": 0.87, "grad_norm": 3.6521156174179907, "learning_rate": 4.1937194030899966e-07, "loss": 0.6826, "step": 10676 }, { "epoch": 0.87, "grad_norm": 4.334807522534701, "learning_rate": 4.188414092687021e-07, "loss": 1.1498, "step": 10677 }, { "epoch": 0.87, "grad_norm": 1.9210667026365889, "learning_rate": 4.183111993476835e-07, "loss": 0.3626, "step": 10678 }, { "epoch": 0.87, "grad_norm": 4.4504094068519215, "learning_rate": 4.177813105831102e-07, "loss": 0.9136, "step": 10679 }, { "epoch": 0.87, "grad_norm": 2.646032517955008, "learning_rate": 4.172517430121248e-07, "loss": 0.3493, "step": 10680 }, { "epoch": 0.87, "grad_norm": 2.264478556092544, "learning_rate": 4.1672249667184974e-07, "loss": 0.4744, "step": 10681 }, { "epoch": 0.87, "grad_norm": 3.7508307297946977, "learning_rate": 4.161935715993798e-07, "loss": 0.7425, "step": 10682 }, { "epoch": 0.87, "grad_norm": 6.403066664815627, "learning_rate": 4.1566496783179257e-07, "loss": 1.1371, "step": 10683 }, { "epoch": 0.87, "grad_norm": 4.686267943481072, "learning_rate": 4.1513668540613895e-07, "loss": 1.2371, "step": 10684 }, { "epoch": 0.87, "grad_norm": 3.765613706065596, "learning_rate": 4.1460872435945046e-07, "loss": 0.5829, "step": 10685 }, { "epoch": 0.87, "grad_norm": 4.861893386867951, "learning_rate": 4.1408108472873466e-07, "loss": 1.0981, "step": 10686 }, { "epoch": 0.87, "grad_norm": 4.095293931200413, "learning_rate": 4.1355376655097704e-07, "loss": 0.732, "step": 10687 }, { "epoch": 0.87, "grad_norm": 4.023464390673694, "learning_rate": 4.1302676986314126e-07, "loss": 0.843, "step": 10688 }, { "epoch": 0.87, "grad_norm": 2.7289135220673826, "learning_rate": 4.125000947021651e-07, "loss": 0.5838, "step": 10689 }, { "epoch": 0.87, "grad_norm": 3.054357836962516, "learning_rate": 4.1197374110496736e-07, "loss": 0.2689, "step": 10690 }, { "epoch": 0.87, "grad_norm": 4.368630862806371, "learning_rate": 4.1144770910844287e-07, "loss": 0.8435, "step": 10691 }, { "epoch": 0.87, "grad_norm": 2.67632656360338, "learning_rate": 4.1092199874946505e-07, "loss": 0.3495, "step": 10692 }, { "epoch": 0.87, "grad_norm": 3.8132894463745277, "learning_rate": 4.103966100648832e-07, "loss": 0.7751, "step": 10693 }, { "epoch": 0.87, "grad_norm": 3.240815316954804, "learning_rate": 4.0987154309152624e-07, "loss": 0.5966, "step": 10694 }, { "epoch": 0.87, "grad_norm": 3.4030123337657274, "learning_rate": 4.0934679786619635e-07, "loss": 0.7006, "step": 10695 }, { "epoch": 0.87, "grad_norm": 3.607028351181928, "learning_rate": 4.0882237442567753e-07, "loss": 0.3138, "step": 10696 }, { "epoch": 0.87, "grad_norm": 5.019722337630489, "learning_rate": 4.082982728067303e-07, "loss": 1.0796, "step": 10697 }, { "epoch": 0.87, "grad_norm": 4.26180541436453, "learning_rate": 4.077744930460903e-07, "loss": 0.6425, "step": 10698 }, { "epoch": 0.87, "grad_norm": 3.5925048951248546, "learning_rate": 4.072510351804726e-07, "loss": 0.7575, "step": 10699 }, { "epoch": 0.87, "grad_norm": 4.354897154606622, "learning_rate": 4.0672789924657065e-07, "loss": 1.1631, "step": 10700 }, { "epoch": 0.87, "grad_norm": 2.8872884073162206, "learning_rate": 4.062050852810523e-07, "loss": 0.5028, "step": 10701 }, { "epoch": 0.87, "grad_norm": 3.504835082816432, "learning_rate": 4.056825933205649e-07, "loss": 0.9385, "step": 10702 }, { "epoch": 0.87, "grad_norm": 4.3366722530498745, "learning_rate": 4.051604234017331e-07, "loss": 0.7271, "step": 10703 }, { "epoch": 0.87, "grad_norm": 5.004438000810595, "learning_rate": 4.0463857556115924e-07, "loss": 1.0754, "step": 10704 }, { "epoch": 0.87, "grad_norm": 4.030954108170035, "learning_rate": 4.0411704983542186e-07, "loss": 0.6232, "step": 10705 }, { "epoch": 0.88, "grad_norm": 4.128803075417094, "learning_rate": 4.0359584626107896e-07, "loss": 0.8658, "step": 10706 }, { "epoch": 0.88, "grad_norm": 4.140969447636292, "learning_rate": 4.0307496487466234e-07, "loss": 0.5852, "step": 10707 }, { "epoch": 0.88, "grad_norm": 3.5609521974211, "learning_rate": 4.025544057126851e-07, "loss": 0.6727, "step": 10708 }, { "epoch": 0.88, "grad_norm": 4.963580000879209, "learning_rate": 4.020341688116358e-07, "loss": 0.7234, "step": 10709 }, { "epoch": 0.88, "grad_norm": 3.490147357337444, "learning_rate": 4.0151425420798087e-07, "loss": 0.6812, "step": 10710 }, { "epoch": 0.88, "grad_norm": 4.6926098684383915, "learning_rate": 4.009946619381649e-07, "loss": 1.3689, "step": 10711 }, { "epoch": 0.88, "grad_norm": 4.35012293405951, "learning_rate": 4.004753920386073e-07, "loss": 0.7819, "step": 10712 }, { "epoch": 0.88, "grad_norm": 4.130592052806018, "learning_rate": 3.999564445457088e-07, "loss": 0.7592, "step": 10713 }, { "epoch": 0.88, "grad_norm": 4.838509212592349, "learning_rate": 3.994378194958426e-07, "loss": 0.6659, "step": 10714 }, { "epoch": 0.88, "grad_norm": 2.5971797437510538, "learning_rate": 3.9891951692536403e-07, "loss": 0.4575, "step": 10715 }, { "epoch": 0.88, "grad_norm": 4.79462436955565, "learning_rate": 3.984015368706029e-07, "loss": 0.9588, "step": 10716 }, { "epoch": 0.88, "grad_norm": 4.457717012861435, "learning_rate": 3.978838793678691e-07, "loss": 0.7413, "step": 10717 }, { "epoch": 0.88, "grad_norm": 3.8643353666091755, "learning_rate": 3.9736654445344583e-07, "loss": 0.4063, "step": 10718 }, { "epoch": 0.88, "grad_norm": 4.625635562975463, "learning_rate": 3.968495321635973e-07, "loss": 0.955, "step": 10719 }, { "epoch": 0.88, "grad_norm": 4.033288993894299, "learning_rate": 3.9633284253456306e-07, "loss": 0.7561, "step": 10720 }, { "epoch": 0.88, "grad_norm": 4.926926563325641, "learning_rate": 3.9581647560256175e-07, "loss": 0.9063, "step": 10721 }, { "epoch": 0.88, "grad_norm": 5.649846015446501, "learning_rate": 3.9530043140378783e-07, "loss": 0.6667, "step": 10722 }, { "epoch": 0.88, "grad_norm": 5.924621114151401, "learning_rate": 3.947847099744151e-07, "loss": 0.8411, "step": 10723 }, { "epoch": 0.88, "grad_norm": 3.908430828876094, "learning_rate": 3.942693113505908e-07, "loss": 0.9769, "step": 10724 }, { "epoch": 0.88, "grad_norm": 1.9654199707560478, "learning_rate": 3.937542355684443e-07, "loss": 0.5655, "step": 10725 }, { "epoch": 0.88, "grad_norm": 2.885417670420595, "learning_rate": 3.93239482664079e-07, "loss": 0.6008, "step": 10726 }, { "epoch": 0.88, "grad_norm": 4.18806641565675, "learning_rate": 3.9272505267357817e-07, "loss": 0.9083, "step": 10727 }, { "epoch": 0.88, "grad_norm": 4.632116258430988, "learning_rate": 3.9221094563299924e-07, "loss": 0.6983, "step": 10728 }, { "epoch": 0.88, "grad_norm": 5.415239353925382, "learning_rate": 3.91697161578381e-07, "loss": 1.0464, "step": 10729 }, { "epoch": 0.88, "grad_norm": 2.5919235662955313, "learning_rate": 3.911837005457353e-07, "loss": 0.4949, "step": 10730 }, { "epoch": 0.88, "grad_norm": 2.5218364501714263, "learning_rate": 3.906705625710544e-07, "loss": 0.5958, "step": 10731 }, { "epoch": 0.88, "grad_norm": 4.638661032322979, "learning_rate": 3.9015774769030737e-07, "loss": 0.7758, "step": 10732 }, { "epoch": 0.88, "grad_norm": 3.9578276632227913, "learning_rate": 3.8964525593944037e-07, "loss": 0.3478, "step": 10733 }, { "epoch": 0.88, "grad_norm": 3.5455863004408235, "learning_rate": 3.8913308735437695e-07, "loss": 0.6597, "step": 10734 }, { "epoch": 0.88, "grad_norm": 1.9097376668172084, "learning_rate": 3.8862124197101723e-07, "loss": 0.3072, "step": 10735 }, { "epoch": 0.88, "grad_norm": 3.6449222677733357, "learning_rate": 3.8810971982523925e-07, "loss": 0.8079, "step": 10736 }, { "epoch": 0.88, "grad_norm": 4.213293657965279, "learning_rate": 3.875985209528993e-07, "loss": 0.6895, "step": 10737 }, { "epoch": 0.88, "grad_norm": 1.9313900646944315, "learning_rate": 3.870876453898292e-07, "loss": 0.2604, "step": 10738 }, { "epoch": 0.88, "grad_norm": 2.680761500486827, "learning_rate": 3.8657709317184043e-07, "loss": 0.411, "step": 10739 }, { "epoch": 0.88, "grad_norm": 2.6893590662009825, "learning_rate": 3.8606686433471986e-07, "loss": 0.3308, "step": 10740 }, { "epoch": 0.88, "grad_norm": 3.943425828588057, "learning_rate": 3.855569589142316e-07, "loss": 0.5602, "step": 10741 }, { "epoch": 0.88, "grad_norm": 1.620359041990299, "learning_rate": 3.8504737694611884e-07, "loss": 0.3706, "step": 10742 }, { "epoch": 0.88, "grad_norm": 4.248144268331347, "learning_rate": 3.8453811846610124e-07, "loss": 0.8726, "step": 10743 }, { "epoch": 0.88, "grad_norm": 5.475629628031191, "learning_rate": 3.8402918350987363e-07, "loss": 1.1841, "step": 10744 }, { "epoch": 0.88, "grad_norm": 3.4896175540043695, "learning_rate": 3.8352057211311187e-07, "loss": 0.7999, "step": 10745 }, { "epoch": 0.88, "grad_norm": 2.9714899109623616, "learning_rate": 3.830122843114681e-07, "loss": 0.59, "step": 10746 }, { "epoch": 0.88, "grad_norm": 4.796663888117933, "learning_rate": 3.825043201405687e-07, "loss": 1.0062, "step": 10747 }, { "epoch": 0.88, "grad_norm": 4.150614319743733, "learning_rate": 3.819966796360214e-07, "loss": 0.6221, "step": 10748 }, { "epoch": 0.88, "grad_norm": 5.915798778933123, "learning_rate": 3.8148936283340876e-07, "loss": 0.9069, "step": 10749 }, { "epoch": 0.88, "grad_norm": 3.0373935126550538, "learning_rate": 3.8098236976829237e-07, "loss": 0.5306, "step": 10750 }, { "epoch": 0.88, "grad_norm": 3.5893956529612456, "learning_rate": 3.804757004762105e-07, "loss": 0.5189, "step": 10751 }, { "epoch": 0.88, "grad_norm": 2.03046284830745, "learning_rate": 3.7996935499267753e-07, "loss": 0.198, "step": 10752 }, { "epoch": 0.88, "grad_norm": 1.5964226176458247, "learning_rate": 3.7946333335318553e-07, "loss": 0.2255, "step": 10753 }, { "epoch": 0.88, "grad_norm": 4.306032320380534, "learning_rate": 3.7895763559320565e-07, "loss": 0.597, "step": 10754 }, { "epoch": 0.88, "grad_norm": 0.996963558315773, "learning_rate": 3.784522617481845e-07, "loss": 0.1405, "step": 10755 }, { "epoch": 0.88, "grad_norm": 3.958372766379946, "learning_rate": 3.779472118535471e-07, "loss": 0.576, "step": 10756 }, { "epoch": 0.88, "grad_norm": 3.1559251715806473, "learning_rate": 3.7744248594469514e-07, "loss": 0.6124, "step": 10757 }, { "epoch": 0.88, "grad_norm": 4.607760712270118, "learning_rate": 3.7693808405700693e-07, "loss": 0.9607, "step": 10758 }, { "epoch": 0.88, "grad_norm": 4.64135442254822, "learning_rate": 3.764340062258404e-07, "loss": 0.9674, "step": 10759 }, { "epoch": 0.88, "grad_norm": 3.295047258336011, "learning_rate": 3.7593025248652717e-07, "loss": 0.5003, "step": 10760 }, { "epoch": 0.88, "grad_norm": 2.1594550418128007, "learning_rate": 3.754268228743796e-07, "loss": 0.5412, "step": 10761 }, { "epoch": 0.88, "grad_norm": 3.19015187923477, "learning_rate": 3.74923717424685e-07, "loss": 0.5206, "step": 10762 }, { "epoch": 0.88, "grad_norm": 4.49541054523754, "learning_rate": 3.744209361727102e-07, "loss": 0.9129, "step": 10763 }, { "epoch": 0.88, "grad_norm": 3.813429106006615, "learning_rate": 3.7391847915369703e-07, "loss": 0.8258, "step": 10764 }, { "epoch": 0.88, "grad_norm": 4.620999661923272, "learning_rate": 3.7341634640286507e-07, "loss": 0.7093, "step": 10765 }, { "epoch": 0.88, "grad_norm": 4.338545381906777, "learning_rate": 3.729145379554128e-07, "loss": 0.649, "step": 10766 }, { "epoch": 0.88, "grad_norm": 5.463822813724985, "learning_rate": 3.724130538465137e-07, "loss": 0.6253, "step": 10767 }, { "epoch": 0.88, "grad_norm": 1.582230910851982, "learning_rate": 3.7191189411132145e-07, "loss": 0.2331, "step": 10768 }, { "epoch": 0.88, "grad_norm": 4.611934416139467, "learning_rate": 3.7141105878496284e-07, "loss": 1.1343, "step": 10769 }, { "epoch": 0.88, "grad_norm": 2.332748873450337, "learning_rate": 3.709105479025454e-07, "loss": 0.382, "step": 10770 }, { "epoch": 0.88, "grad_norm": 4.8338039845774174, "learning_rate": 3.704103614991528e-07, "loss": 0.8078, "step": 10771 }, { "epoch": 0.88, "grad_norm": 4.871897611029115, "learning_rate": 3.699104996098457e-07, "loss": 1.0775, "step": 10772 }, { "epoch": 0.88, "grad_norm": 5.132853935258872, "learning_rate": 3.694109622696629e-07, "loss": 1.1363, "step": 10773 }, { "epoch": 0.88, "grad_norm": 4.776054690216075, "learning_rate": 3.6891174951361905e-07, "loss": 1.0977, "step": 10774 }, { "epoch": 0.88, "grad_norm": 4.445085974684133, "learning_rate": 3.684128613767063e-07, "loss": 1.2064, "step": 10775 }, { "epoch": 0.88, "grad_norm": 4.76484603760837, "learning_rate": 3.6791429789389657e-07, "loss": 0.5601, "step": 10776 }, { "epoch": 0.88, "grad_norm": 2.996172031233409, "learning_rate": 3.674160591001347e-07, "loss": 0.5389, "step": 10777 }, { "epoch": 0.88, "grad_norm": 4.919193116332945, "learning_rate": 3.6691814503034607e-07, "loss": 0.8366, "step": 10778 }, { "epoch": 0.88, "grad_norm": 2.3764859737831223, "learning_rate": 3.664205557194322e-07, "loss": 0.5064, "step": 10779 }, { "epoch": 0.88, "grad_norm": 3.426508144399529, "learning_rate": 3.6592329120227254e-07, "loss": 0.7038, "step": 10780 }, { "epoch": 0.88, "grad_norm": 4.179334859223737, "learning_rate": 3.654263515137224e-07, "loss": 0.7756, "step": 10781 }, { "epoch": 0.88, "grad_norm": 2.932650197611428, "learning_rate": 3.649297366886145e-07, "loss": 0.5344, "step": 10782 }, { "epoch": 0.88, "grad_norm": 3.2992859885900576, "learning_rate": 3.644334467617605e-07, "loss": 0.4674, "step": 10783 }, { "epoch": 0.88, "grad_norm": 3.9900866233744483, "learning_rate": 3.6393748176794806e-07, "loss": 0.8017, "step": 10784 }, { "epoch": 0.88, "grad_norm": 3.4378118306854746, "learning_rate": 3.6344184174194166e-07, "loss": 0.5481, "step": 10785 }, { "epoch": 0.88, "grad_norm": 4.747101774626812, "learning_rate": 3.6294652671848506e-07, "loss": 0.7064, "step": 10786 }, { "epoch": 0.88, "grad_norm": 3.2071706917138627, "learning_rate": 3.6245153673229506e-07, "loss": 0.473, "step": 10787 }, { "epoch": 0.88, "grad_norm": 4.71469652859469, "learning_rate": 3.6195687181806995e-07, "loss": 0.8903, "step": 10788 }, { "epoch": 0.88, "grad_norm": 3.7849053291905626, "learning_rate": 3.614625320104831e-07, "loss": 0.6381, "step": 10789 }, { "epoch": 0.88, "grad_norm": 4.502042212694051, "learning_rate": 3.609685173441868e-07, "loss": 0.8736, "step": 10790 }, { "epoch": 0.88, "grad_norm": 4.8469244147375665, "learning_rate": 3.604748278538073e-07, "loss": 0.7559, "step": 10791 }, { "epoch": 0.88, "grad_norm": 3.93578626328786, "learning_rate": 3.599814635739518e-07, "loss": 0.6431, "step": 10792 }, { "epoch": 0.88, "grad_norm": 4.71900741012042, "learning_rate": 3.5948842453920164e-07, "loss": 0.9382, "step": 10793 }, { "epoch": 0.88, "grad_norm": 2.776179272353354, "learning_rate": 3.5899571078411743e-07, "loss": 0.3649, "step": 10794 }, { "epoch": 0.88, "grad_norm": 3.3194053237425853, "learning_rate": 3.5850332234323604e-07, "loss": 0.7113, "step": 10795 }, { "epoch": 0.88, "grad_norm": 2.0038277251839287, "learning_rate": 3.580112592510715e-07, "loss": 0.3154, "step": 10796 }, { "epoch": 0.88, "grad_norm": 3.146379388211302, "learning_rate": 3.5751952154211734e-07, "loss": 0.5958, "step": 10797 }, { "epoch": 0.88, "grad_norm": 3.392956378574398, "learning_rate": 3.570281092508393e-07, "loss": 0.4243, "step": 10798 }, { "epoch": 0.88, "grad_norm": 4.6560757309435115, "learning_rate": 3.565370224116843e-07, "loss": 0.83, "step": 10799 }, { "epoch": 0.88, "grad_norm": 5.617587980117154, "learning_rate": 3.560462610590759e-07, "loss": 1.4945, "step": 10800 }, { "epoch": 0.88, "grad_norm": 5.719625405410017, "learning_rate": 3.555558252274144e-07, "loss": 1.2418, "step": 10801 }, { "epoch": 0.88, "grad_norm": 4.920088732077162, "learning_rate": 3.550657149510761e-07, "loss": 0.9742, "step": 10802 }, { "epoch": 0.88, "grad_norm": 2.4276505680763347, "learning_rate": 3.545759302644175e-07, "loss": 0.3711, "step": 10803 }, { "epoch": 0.88, "grad_norm": 4.042615404497343, "learning_rate": 3.540864712017689e-07, "loss": 0.6435, "step": 10804 }, { "epoch": 0.88, "grad_norm": 5.142617960583135, "learning_rate": 3.5359733779743887e-07, "loss": 1.032, "step": 10805 }, { "epoch": 0.88, "grad_norm": 5.206168299136099, "learning_rate": 3.531085300857151e-07, "loss": 1.0329, "step": 10806 }, { "epoch": 0.88, "grad_norm": 4.689641946745283, "learning_rate": 3.526200481008596e-07, "loss": 1.1883, "step": 10807 }, { "epoch": 0.88, "grad_norm": 4.51102607546849, "learning_rate": 3.5213189187711383e-07, "loss": 0.5884, "step": 10808 }, { "epoch": 0.88, "grad_norm": 5.326322472854147, "learning_rate": 3.516440614486943e-07, "loss": 1.3112, "step": 10809 }, { "epoch": 0.88, "grad_norm": 4.142139814049927, "learning_rate": 3.5115655684979653e-07, "loss": 0.6651, "step": 10810 }, { "epoch": 0.88, "grad_norm": 3.5554431087528937, "learning_rate": 3.50669378114592e-07, "loss": 0.8697, "step": 10811 }, { "epoch": 0.88, "grad_norm": 4.002869827342025, "learning_rate": 3.5018252527723005e-07, "loss": 0.7896, "step": 10812 }, { "epoch": 0.88, "grad_norm": 5.445687922168761, "learning_rate": 3.4969599837183677e-07, "loss": 1.2917, "step": 10813 }, { "epoch": 0.88, "grad_norm": 5.044010478873331, "learning_rate": 3.4920979743251704e-07, "loss": 0.9237, "step": 10814 }, { "epoch": 0.88, "grad_norm": 3.9503255148776963, "learning_rate": 3.487239224933492e-07, "loss": 0.5253, "step": 10815 }, { "epoch": 0.88, "grad_norm": 3.5457223215662785, "learning_rate": 3.482383735883921e-07, "loss": 0.8796, "step": 10816 }, { "epoch": 0.88, "grad_norm": 5.289237360154442, "learning_rate": 3.4775315075168014e-07, "loss": 0.9533, "step": 10817 }, { "epoch": 0.88, "grad_norm": 3.7949480360010073, "learning_rate": 3.472682540172262e-07, "loss": 0.8025, "step": 10818 }, { "epoch": 0.88, "grad_norm": 2.30689665215385, "learning_rate": 3.467836834190186e-07, "loss": 0.484, "step": 10819 }, { "epoch": 0.88, "grad_norm": 3.0670269504963104, "learning_rate": 3.462994389910246e-07, "loss": 0.7331, "step": 10820 }, { "epoch": 0.88, "grad_norm": 4.098845678264885, "learning_rate": 3.4581552076718597e-07, "loss": 1.295, "step": 10821 }, { "epoch": 0.88, "grad_norm": 4.553248193694052, "learning_rate": 3.453319287814255e-07, "loss": 0.7411, "step": 10822 }, { "epoch": 0.88, "grad_norm": 4.688140717897219, "learning_rate": 3.4484866306763896e-07, "loss": 1.076, "step": 10823 }, { "epoch": 0.88, "grad_norm": 3.3193966087256697, "learning_rate": 3.4436572365970145e-07, "loss": 0.4711, "step": 10824 }, { "epoch": 0.88, "grad_norm": 3.2865368687166274, "learning_rate": 3.438831105914653e-07, "loss": 0.516, "step": 10825 }, { "epoch": 0.88, "grad_norm": 2.2698605560686738, "learning_rate": 3.4340082389676065e-07, "loss": 0.3486, "step": 10826 }, { "epoch": 0.88, "grad_norm": 4.251032302544349, "learning_rate": 3.429188636093922e-07, "loss": 0.4758, "step": 10827 }, { "epoch": 0.89, "grad_norm": 5.176776415080748, "learning_rate": 3.4243722976314285e-07, "loss": 0.9426, "step": 10828 }, { "epoch": 0.89, "grad_norm": 3.8686624142305717, "learning_rate": 3.4195592239177455e-07, "loss": 0.5923, "step": 10829 }, { "epoch": 0.89, "grad_norm": 3.897109402196155, "learning_rate": 3.4147494152902414e-07, "loss": 0.8303, "step": 10830 }, { "epoch": 0.89, "grad_norm": 2.5272091603729154, "learning_rate": 3.4099428720860693e-07, "loss": 0.4118, "step": 10831 }, { "epoch": 0.89, "grad_norm": 4.1253640658859645, "learning_rate": 3.4051395946421374e-07, "loss": 0.4998, "step": 10832 }, { "epoch": 0.89, "grad_norm": 6.410903311671439, "learning_rate": 3.4003395832951315e-07, "loss": 1.0841, "step": 10833 }, { "epoch": 0.89, "grad_norm": 4.405568517002049, "learning_rate": 3.3955428383815267e-07, "loss": 0.825, "step": 10834 }, { "epoch": 0.89, "grad_norm": 5.344647787528466, "learning_rate": 3.3907493602375386e-07, "loss": 0.6518, "step": 10835 }, { "epoch": 0.89, "grad_norm": 4.571118190302611, "learning_rate": 3.385959149199186e-07, "loss": 0.6786, "step": 10836 }, { "epoch": 0.89, "grad_norm": 4.6834737837192835, "learning_rate": 3.3811722056022287e-07, "loss": 0.8869, "step": 10837 }, { "epoch": 0.89, "grad_norm": 6.3061940341381275, "learning_rate": 3.3763885297822153e-07, "loss": 1.5261, "step": 10838 }, { "epoch": 0.89, "grad_norm": 3.5161441151787303, "learning_rate": 3.371608122074455e-07, "loss": 0.5645, "step": 10839 }, { "epoch": 0.89, "grad_norm": 6.081472187373712, "learning_rate": 3.36683098281404e-07, "loss": 1.2119, "step": 10840 }, { "epoch": 0.89, "grad_norm": 5.171022016722354, "learning_rate": 3.36205711233582e-07, "loss": 1.1501, "step": 10841 }, { "epoch": 0.89, "grad_norm": 3.322989050379168, "learning_rate": 3.3572865109744334e-07, "loss": 0.767, "step": 10842 }, { "epoch": 0.89, "grad_norm": 3.025603277700914, "learning_rate": 3.3525191790642733e-07, "loss": 0.5912, "step": 10843 }, { "epoch": 0.89, "grad_norm": 2.5155571855514176, "learning_rate": 3.347755116939505e-07, "loss": 0.573, "step": 10844 }, { "epoch": 0.89, "grad_norm": 3.9386049896185673, "learning_rate": 3.342994324934068e-07, "loss": 0.5738, "step": 10845 }, { "epoch": 0.89, "grad_norm": 5.938087478700539, "learning_rate": 3.338236803381684e-07, "loss": 1.1193, "step": 10846 }, { "epoch": 0.89, "grad_norm": 4.832406399777737, "learning_rate": 3.3334825526158185e-07, "loss": 1.208, "step": 10847 }, { "epoch": 0.89, "grad_norm": 3.795716617602275, "learning_rate": 3.328731572969746e-07, "loss": 0.4324, "step": 10848 }, { "epoch": 0.89, "grad_norm": 2.7713239105931833, "learning_rate": 3.32398386477647e-07, "loss": 0.4664, "step": 10849 }, { "epoch": 0.89, "grad_norm": 4.978762454868344, "learning_rate": 3.319239428368787e-07, "loss": 1.4731, "step": 10850 }, { "epoch": 0.89, "grad_norm": 4.066659139746784, "learning_rate": 3.3144982640792633e-07, "loss": 0.7836, "step": 10851 }, { "epoch": 0.89, "grad_norm": 3.594254731150115, "learning_rate": 3.309760372240245e-07, "loss": 0.6424, "step": 10852 }, { "epoch": 0.89, "grad_norm": 4.458815645513699, "learning_rate": 3.3050257531838213e-07, "loss": 0.659, "step": 10853 }, { "epoch": 0.89, "grad_norm": 2.606011251269728, "learning_rate": 3.300294407241883e-07, "loss": 0.3979, "step": 10854 }, { "epoch": 0.89, "grad_norm": 2.3601353790940625, "learning_rate": 3.2955663347460586e-07, "loss": 0.2911, "step": 10855 }, { "epoch": 0.89, "grad_norm": 3.2947077288587123, "learning_rate": 3.2908415360277777e-07, "loss": 0.7556, "step": 10856 }, { "epoch": 0.89, "grad_norm": 2.6814920758815863, "learning_rate": 3.2861200114182257e-07, "loss": 0.5605, "step": 10857 }, { "epoch": 0.89, "grad_norm": 4.588671370200977, "learning_rate": 3.2814017612483596e-07, "loss": 0.9264, "step": 10858 }, { "epoch": 0.89, "grad_norm": 5.818531071367023, "learning_rate": 3.276686785848915e-07, "loss": 1.2616, "step": 10859 }, { "epoch": 0.89, "grad_norm": 3.513390776495794, "learning_rate": 3.2719750855503886e-07, "loss": 0.7851, "step": 10860 }, { "epoch": 0.89, "grad_norm": 4.164370528259902, "learning_rate": 3.267266660683044e-07, "loss": 0.4353, "step": 10861 }, { "epoch": 0.89, "grad_norm": 2.7186871412994567, "learning_rate": 3.2625615115769225e-07, "loss": 0.2845, "step": 10862 }, { "epoch": 0.89, "grad_norm": 3.2796635993385106, "learning_rate": 3.257859638561839e-07, "loss": 0.4609, "step": 10863 }, { "epoch": 0.89, "grad_norm": 4.077730283617591, "learning_rate": 3.2531610419673675e-07, "loss": 0.4293, "step": 10864 }, { "epoch": 0.89, "grad_norm": 4.234160766755855, "learning_rate": 3.248465722122868e-07, "loss": 0.3752, "step": 10865 }, { "epoch": 0.89, "grad_norm": 4.300505913411803, "learning_rate": 3.24377367935747e-07, "loss": 0.7588, "step": 10866 }, { "epoch": 0.89, "grad_norm": 5.388942913410447, "learning_rate": 3.2390849140000403e-07, "loss": 0.8793, "step": 10867 }, { "epoch": 0.89, "grad_norm": 3.0979214837702216, "learning_rate": 3.2343994263792586e-07, "loss": 0.5667, "step": 10868 }, { "epoch": 0.89, "grad_norm": 4.97473027058986, "learning_rate": 3.229717216823552e-07, "loss": 1.0228, "step": 10869 }, { "epoch": 0.89, "grad_norm": 4.864386867807811, "learning_rate": 3.2250382856611193e-07, "loss": 0.6682, "step": 10870 }, { "epoch": 0.89, "grad_norm": 3.2722788925574453, "learning_rate": 3.220362633219948e-07, "loss": 0.5731, "step": 10871 }, { "epoch": 0.89, "grad_norm": 5.379496879342514, "learning_rate": 3.2156902598277585e-07, "loss": 0.8015, "step": 10872 }, { "epoch": 0.89, "grad_norm": 3.994419351663586, "learning_rate": 3.2110211658120784e-07, "loss": 0.6576, "step": 10873 }, { "epoch": 0.89, "grad_norm": 3.780488642815016, "learning_rate": 3.206355351500184e-07, "loss": 0.6979, "step": 10874 }, { "epoch": 0.89, "grad_norm": 4.419326211069094, "learning_rate": 3.2016928172191377e-07, "loss": 0.9318, "step": 10875 }, { "epoch": 0.89, "grad_norm": 2.9204127882648647, "learning_rate": 3.1970335632957595e-07, "loss": 0.4459, "step": 10876 }, { "epoch": 0.89, "grad_norm": 4.156120085557064, "learning_rate": 3.1923775900566444e-07, "loss": 0.6487, "step": 10877 }, { "epoch": 0.89, "grad_norm": 2.2345160732754623, "learning_rate": 3.1877248978281484e-07, "loss": 0.5941, "step": 10878 }, { "epoch": 0.89, "grad_norm": 2.4191464683206716, "learning_rate": 3.18307548693641e-07, "loss": 0.3887, "step": 10879 }, { "epoch": 0.89, "grad_norm": 4.448224384755552, "learning_rate": 3.17842935770733e-07, "loss": 0.6698, "step": 10880 }, { "epoch": 0.89, "grad_norm": 3.715905562647058, "learning_rate": 3.173786510466581e-07, "loss": 0.5083, "step": 10881 }, { "epoch": 0.89, "grad_norm": 2.933740117090663, "learning_rate": 3.1691469455396196e-07, "loss": 0.5269, "step": 10882 }, { "epoch": 0.89, "grad_norm": 3.060932225469689, "learning_rate": 3.164510663251641e-07, "loss": 0.5333, "step": 10883 }, { "epoch": 0.89, "grad_norm": 5.980209462101881, "learning_rate": 3.159877663927635e-07, "loss": 1.1751, "step": 10884 }, { "epoch": 0.89, "grad_norm": 2.362802845681076, "learning_rate": 3.15524794789237e-07, "loss": 0.3487, "step": 10885 }, { "epoch": 0.89, "grad_norm": 3.142789023875797, "learning_rate": 3.1506215154703424e-07, "loss": 0.5471, "step": 10886 }, { "epoch": 0.89, "grad_norm": 3.8573988934118852, "learning_rate": 3.145998366985853e-07, "loss": 0.7298, "step": 10887 }, { "epoch": 0.89, "grad_norm": 5.515108176966938, "learning_rate": 3.141378502762982e-07, "loss": 1.0164, "step": 10888 }, { "epoch": 0.89, "grad_norm": 1.8633670065312347, "learning_rate": 3.136761923125542e-07, "loss": 0.3277, "step": 10889 }, { "epoch": 0.89, "grad_norm": 3.194830584266059, "learning_rate": 3.1321486283971357e-07, "loss": 0.4989, "step": 10890 }, { "epoch": 0.89, "grad_norm": 3.6276186717088548, "learning_rate": 3.127538618901144e-07, "loss": 0.5732, "step": 10891 }, { "epoch": 0.89, "grad_norm": 5.570089495193147, "learning_rate": 3.122931894960707e-07, "loss": 0.8861, "step": 10892 }, { "epoch": 0.89, "grad_norm": 5.0135959179798055, "learning_rate": 3.118328456898734e-07, "loss": 0.9223, "step": 10893 }, { "epoch": 0.89, "grad_norm": 5.141074146576756, "learning_rate": 3.1137283050379165e-07, "loss": 0.8711, "step": 10894 }, { "epoch": 0.89, "grad_norm": 4.582126875808922, "learning_rate": 3.10913143970068e-07, "loss": 0.4177, "step": 10895 }, { "epoch": 0.89, "grad_norm": 5.718422207878949, "learning_rate": 3.104537861209267e-07, "loss": 1.2523, "step": 10896 }, { "epoch": 0.89, "grad_norm": 6.4516793569985955, "learning_rate": 3.0999475698856583e-07, "loss": 1.3102, "step": 10897 }, { "epoch": 0.89, "grad_norm": 3.7206114005265314, "learning_rate": 3.095360566051614e-07, "loss": 0.5341, "step": 10898 }, { "epoch": 0.89, "grad_norm": 4.436185251661134, "learning_rate": 3.090776850028671e-07, "loss": 0.7484, "step": 10899 }, { "epoch": 0.89, "grad_norm": 6.066747356792933, "learning_rate": 3.086196422138116e-07, "loss": 1.1466, "step": 10900 }, { "epoch": 0.89, "grad_norm": 5.889368480857732, "learning_rate": 3.0816192827010317e-07, "loss": 0.9629, "step": 10901 }, { "epoch": 0.89, "grad_norm": 3.6207016500856315, "learning_rate": 3.077045432038234e-07, "loss": 0.4401, "step": 10902 }, { "epoch": 0.89, "grad_norm": 5.332940686131082, "learning_rate": 3.0724748704703435e-07, "loss": 1.1868, "step": 10903 }, { "epoch": 0.89, "grad_norm": 5.058409923124676, "learning_rate": 3.0679075983177376e-07, "loss": 0.9681, "step": 10904 }, { "epoch": 0.89, "grad_norm": 3.279635932894341, "learning_rate": 3.063343615900555e-07, "loss": 0.5421, "step": 10905 }, { "epoch": 0.89, "grad_norm": 4.102073653497036, "learning_rate": 3.0587829235387277e-07, "loss": 0.3761, "step": 10906 }, { "epoch": 0.89, "grad_norm": 3.8061269220006597, "learning_rate": 3.0542255215519177e-07, "loss": 0.7479, "step": 10907 }, { "epoch": 0.89, "grad_norm": 4.31739768655144, "learning_rate": 3.0496714102595914e-07, "loss": 1.0301, "step": 10908 }, { "epoch": 0.89, "grad_norm": 3.5912994301111514, "learning_rate": 3.0451205899809764e-07, "loss": 0.5204, "step": 10909 }, { "epoch": 0.89, "grad_norm": 4.0923652953300005, "learning_rate": 3.0405730610350516e-07, "loss": 0.88, "step": 10910 }, { "epoch": 0.89, "grad_norm": 3.2713263066201983, "learning_rate": 3.0360288237406e-07, "loss": 0.6832, "step": 10911 }, { "epoch": 0.89, "grad_norm": 4.892186852611783, "learning_rate": 3.0314878784161284e-07, "loss": 0.8156, "step": 10912 }, { "epoch": 0.89, "grad_norm": 4.36463854352164, "learning_rate": 3.0269502253799485e-07, "loss": 0.7741, "step": 10913 }, { "epoch": 0.89, "grad_norm": 2.5927460345059146, "learning_rate": 3.0224158649501343e-07, "loss": 0.5198, "step": 10914 }, { "epoch": 0.89, "grad_norm": 2.516528093229124, "learning_rate": 3.017884797444526e-07, "loss": 0.5627, "step": 10915 }, { "epoch": 0.89, "grad_norm": 4.563189913634584, "learning_rate": 3.013357023180724e-07, "loss": 0.779, "step": 10916 }, { "epoch": 0.89, "grad_norm": 4.590422596413634, "learning_rate": 3.008832542476109e-07, "loss": 0.6201, "step": 10917 }, { "epoch": 0.89, "grad_norm": 4.860653508579803, "learning_rate": 3.0043113556478207e-07, "loss": 1.1325, "step": 10918 }, { "epoch": 0.89, "grad_norm": 5.0539682300732185, "learning_rate": 2.999793463012779e-07, "loss": 1.1529, "step": 10919 }, { "epoch": 0.89, "grad_norm": 5.861370069172823, "learning_rate": 2.995278864887674e-07, "loss": 1.1975, "step": 10920 }, { "epoch": 0.89, "grad_norm": 4.283954230503348, "learning_rate": 2.990767561588953e-07, "loss": 1.0167, "step": 10921 }, { "epoch": 0.89, "grad_norm": 5.271556079490615, "learning_rate": 2.986259553432841e-07, "loss": 1.0017, "step": 10922 }, { "epoch": 0.89, "grad_norm": 1.8648335588731393, "learning_rate": 2.98175484073534e-07, "loss": 0.2049, "step": 10923 }, { "epoch": 0.89, "grad_norm": 4.5805437442033075, "learning_rate": 2.977253423812193e-07, "loss": 0.901, "step": 10924 }, { "epoch": 0.89, "grad_norm": 2.8726092892928534, "learning_rate": 2.9727553029789303e-07, "loss": 0.5756, "step": 10925 }, { "epoch": 0.89, "grad_norm": 4.73696508058446, "learning_rate": 2.9682604785508664e-07, "loss": 0.6246, "step": 10926 }, { "epoch": 0.89, "grad_norm": 2.6695261519349907, "learning_rate": 2.963768950843054e-07, "loss": 0.5638, "step": 10927 }, { "epoch": 0.89, "grad_norm": 5.692648401985274, "learning_rate": 2.9592807201703486e-07, "loss": 1.2495, "step": 10928 }, { "epoch": 0.89, "grad_norm": 4.626581084693792, "learning_rate": 2.9547957868473307e-07, "loss": 0.7701, "step": 10929 }, { "epoch": 0.89, "grad_norm": 2.8172435855293267, "learning_rate": 2.9503141511883884e-07, "loss": 0.5745, "step": 10930 }, { "epoch": 0.89, "grad_norm": 2.630192348382033, "learning_rate": 2.9458358135076693e-07, "loss": 0.4044, "step": 10931 }, { "epoch": 0.89, "grad_norm": 3.808703663511119, "learning_rate": 2.9413607741190733e-07, "loss": 0.9562, "step": 10932 }, { "epoch": 0.89, "grad_norm": 3.929016503818397, "learning_rate": 2.936889033336288e-07, "loss": 0.5762, "step": 10933 }, { "epoch": 0.89, "grad_norm": 4.436744038954424, "learning_rate": 2.9324205914727674e-07, "loss": 0.8342, "step": 10934 }, { "epoch": 0.89, "grad_norm": 4.253131568729143, "learning_rate": 2.9279554488417186e-07, "loss": 0.6823, "step": 10935 }, { "epoch": 0.89, "grad_norm": 2.066108099578079, "learning_rate": 2.9234936057561336e-07, "loss": 0.2603, "step": 10936 }, { "epoch": 0.89, "grad_norm": 5.652093687095228, "learning_rate": 2.919035062528769e-07, "loss": 0.9055, "step": 10937 }, { "epoch": 0.89, "grad_norm": 2.7600104969135906, "learning_rate": 2.914579819472152e-07, "loss": 0.4802, "step": 10938 }, { "epoch": 0.89, "grad_norm": 2.9176102039855842, "learning_rate": 2.910127876898572e-07, "loss": 0.578, "step": 10939 }, { "epoch": 0.89, "grad_norm": 3.72597740012647, "learning_rate": 2.905679235120096e-07, "loss": 0.8006, "step": 10940 }, { "epoch": 0.89, "grad_norm": 4.30238468052772, "learning_rate": 2.9012338944485463e-07, "loss": 0.7967, "step": 10941 }, { "epoch": 0.89, "grad_norm": 3.69012051220324, "learning_rate": 2.89679185519553e-07, "loss": 0.6067, "step": 10942 }, { "epoch": 0.89, "grad_norm": 4.539522560333616, "learning_rate": 2.8923531176724027e-07, "loss": 0.8178, "step": 10943 }, { "epoch": 0.89, "grad_norm": 4.561569584044321, "learning_rate": 2.887917682190311e-07, "loss": 1.2437, "step": 10944 }, { "epoch": 0.89, "grad_norm": 4.096023672049721, "learning_rate": 2.883485549060167e-07, "loss": 0.7392, "step": 10945 }, { "epoch": 0.89, "grad_norm": 3.5030239018798355, "learning_rate": 2.879056718592627e-07, "loss": 0.4574, "step": 10946 }, { "epoch": 0.89, "grad_norm": 4.384378182815733, "learning_rate": 2.8746311910981485e-07, "loss": 0.6167, "step": 10947 }, { "epoch": 0.89, "grad_norm": 6.413790629940976, "learning_rate": 2.8702089668869227e-07, "loss": 1.2176, "step": 10948 }, { "epoch": 0.89, "grad_norm": 2.5476452228294972, "learning_rate": 2.86579004626894e-07, "loss": 0.4998, "step": 10949 }, { "epoch": 0.9, "grad_norm": 5.578922777726011, "learning_rate": 2.861374429553948e-07, "loss": 0.8611, "step": 10950 }, { "epoch": 0.9, "grad_norm": 4.9133060061104805, "learning_rate": 2.856962117051465e-07, "loss": 0.9554, "step": 10951 }, { "epoch": 0.9, "grad_norm": 4.709850585262299, "learning_rate": 2.852553109070766e-07, "loss": 0.5331, "step": 10952 }, { "epoch": 0.9, "grad_norm": 2.8510311396142125, "learning_rate": 2.8481474059209033e-07, "loss": 0.5582, "step": 10953 }, { "epoch": 0.9, "grad_norm": 4.785827042551584, "learning_rate": 2.8437450079107034e-07, "loss": 0.6103, "step": 10954 }, { "epoch": 0.9, "grad_norm": 4.681038383589419, "learning_rate": 2.839345915348757e-07, "loss": 1.1143, "step": 10955 }, { "epoch": 0.9, "grad_norm": 4.7286450723732525, "learning_rate": 2.8349501285434123e-07, "loss": 0.9406, "step": 10956 }, { "epoch": 0.9, "grad_norm": 2.4851305243777806, "learning_rate": 2.830557647802812e-07, "loss": 0.3931, "step": 10957 }, { "epoch": 0.9, "grad_norm": 3.76467683896015, "learning_rate": 2.8261684734348316e-07, "loss": 0.9277, "step": 10958 }, { "epoch": 0.9, "grad_norm": 3.7793588360667427, "learning_rate": 2.8217826057471423e-07, "loss": 0.46, "step": 10959 }, { "epoch": 0.9, "grad_norm": 2.756079446742792, "learning_rate": 2.817400045047164e-07, "loss": 0.5393, "step": 10960 }, { "epoch": 0.9, "grad_norm": 2.3470853112431773, "learning_rate": 2.813020791642118e-07, "loss": 0.3717, "step": 10961 }, { "epoch": 0.9, "grad_norm": 3.240363172485346, "learning_rate": 2.808644845838943e-07, "loss": 0.5289, "step": 10962 }, { "epoch": 0.9, "grad_norm": 4.777083156136472, "learning_rate": 2.804272207944397e-07, "loss": 0.877, "step": 10963 }, { "epoch": 0.9, "grad_norm": 6.051156731001556, "learning_rate": 2.799902878264965e-07, "loss": 1.5133, "step": 10964 }, { "epoch": 0.9, "grad_norm": 4.240849695462603, "learning_rate": 2.7955368571069284e-07, "loss": 0.2849, "step": 10965 }, { "epoch": 0.9, "grad_norm": 5.070898820430699, "learning_rate": 2.791174144776321e-07, "loss": 0.8403, "step": 10966 }, { "epoch": 0.9, "grad_norm": 2.889092870804847, "learning_rate": 2.7868147415789526e-07, "loss": 0.5334, "step": 10967 }, { "epoch": 0.9, "grad_norm": 2.791923339154662, "learning_rate": 2.782458647820407e-07, "loss": 0.4653, "step": 10968 }, { "epoch": 0.9, "grad_norm": 4.88705712106383, "learning_rate": 2.778105863806013e-07, "loss": 1.1669, "step": 10969 }, { "epoch": 0.9, "grad_norm": 2.831371608629582, "learning_rate": 2.7737563898408814e-07, "loss": 0.3367, "step": 10970 }, { "epoch": 0.9, "grad_norm": 6.111330519551319, "learning_rate": 2.769410226229902e-07, "loss": 0.879, "step": 10971 }, { "epoch": 0.9, "grad_norm": 2.635508115964347, "learning_rate": 2.765067373277719e-07, "loss": 0.2942, "step": 10972 }, { "epoch": 0.9, "grad_norm": 2.9739200019422767, "learning_rate": 2.760727831288745e-07, "loss": 0.3165, "step": 10973 }, { "epoch": 0.9, "grad_norm": 4.514726040277123, "learning_rate": 2.756391600567171e-07, "loss": 0.9741, "step": 10974 }, { "epoch": 0.9, "grad_norm": 4.2116695143322636, "learning_rate": 2.7520586814169303e-07, "loss": 0.5593, "step": 10975 }, { "epoch": 0.9, "grad_norm": 5.753408779265016, "learning_rate": 2.7477290741417526e-07, "loss": 1.1868, "step": 10976 }, { "epoch": 0.9, "grad_norm": 3.6749590100957485, "learning_rate": 2.7434027790451346e-07, "loss": 0.6321, "step": 10977 }, { "epoch": 0.9, "grad_norm": 3.9872602389265652, "learning_rate": 2.739079796430316e-07, "loss": 0.6642, "step": 10978 }, { "epoch": 0.9, "grad_norm": 4.466734413380715, "learning_rate": 2.7347601266003165e-07, "loss": 0.8289, "step": 10979 }, { "epoch": 0.9, "grad_norm": 4.257607083269077, "learning_rate": 2.730443769857943e-07, "loss": 0.6408, "step": 10980 }, { "epoch": 0.9, "grad_norm": 2.639939952215677, "learning_rate": 2.726130726505738e-07, "loss": 0.3134, "step": 10981 }, { "epoch": 0.9, "grad_norm": 4.701429366481553, "learning_rate": 2.721820996846031e-07, "loss": 0.5872, "step": 10982 }, { "epoch": 0.9, "grad_norm": 3.505066897522115, "learning_rate": 2.717514581180919e-07, "loss": 0.8732, "step": 10983 }, { "epoch": 0.9, "grad_norm": 3.8617198443788148, "learning_rate": 2.7132114798122557e-07, "loss": 0.8892, "step": 10984 }, { "epoch": 0.9, "grad_norm": 4.358700844128491, "learning_rate": 2.708911693041683e-07, "loss": 1.0066, "step": 10985 }, { "epoch": 0.9, "grad_norm": 3.923681167525303, "learning_rate": 2.7046152211705865e-07, "loss": 0.5607, "step": 10986 }, { "epoch": 0.9, "grad_norm": 3.404828609512687, "learning_rate": 2.7003220645001325e-07, "loss": 0.482, "step": 10987 }, { "epoch": 0.9, "grad_norm": 4.090455246821734, "learning_rate": 2.696032223331252e-07, "loss": 0.5434, "step": 10988 }, { "epoch": 0.9, "grad_norm": 2.77128135135535, "learning_rate": 2.6917456979646426e-07, "loss": 0.2923, "step": 10989 }, { "epoch": 0.9, "grad_norm": 5.6209549505587475, "learning_rate": 2.687462488700776e-07, "loss": 1.2092, "step": 10990 }, { "epoch": 0.9, "grad_norm": 3.6326542842188356, "learning_rate": 2.683182595839889e-07, "loss": 0.7548, "step": 10991 }, { "epoch": 0.9, "grad_norm": 3.3388253743696557, "learning_rate": 2.6789060196819705e-07, "loss": 0.7041, "step": 10992 }, { "epoch": 0.9, "grad_norm": 4.280074955140449, "learning_rate": 2.6746327605268017e-07, "loss": 0.769, "step": 10993 }, { "epoch": 0.9, "grad_norm": 4.300141434768321, "learning_rate": 2.670362818673922e-07, "loss": 0.8973, "step": 10994 }, { "epoch": 0.9, "grad_norm": 2.038387138665519, "learning_rate": 2.666096194422624e-07, "loss": 0.2636, "step": 10995 }, { "epoch": 0.9, "grad_norm": 2.988886270242992, "learning_rate": 2.6618328880719803e-07, "loss": 0.6738, "step": 10996 }, { "epoch": 0.9, "grad_norm": 4.710406907947948, "learning_rate": 2.6575728999208404e-07, "loss": 0.8795, "step": 10997 }, { "epoch": 0.9, "grad_norm": 5.047388030931536, "learning_rate": 2.653316230267805e-07, "loss": 0.933, "step": 10998 }, { "epoch": 0.9, "grad_norm": 4.702221769066459, "learning_rate": 2.649062879411246e-07, "loss": 0.8378, "step": 10999 }, { "epoch": 0.9, "grad_norm": 4.787486009716643, "learning_rate": 2.644812847649303e-07, "loss": 0.5252, "step": 11000 }, { "epoch": 0.9, "grad_norm": 1.4915548159213152, "learning_rate": 2.640566135279893e-07, "loss": 0.126, "step": 11001 }, { "epoch": 0.9, "grad_norm": 4.430806272111197, "learning_rate": 2.636322742600689e-07, "loss": 0.9382, "step": 11002 }, { "epoch": 0.9, "grad_norm": 3.450059826246382, "learning_rate": 2.632082669909136e-07, "loss": 0.541, "step": 11003 }, { "epoch": 0.9, "grad_norm": 1.9120320385365477, "learning_rate": 2.627845917502442e-07, "loss": 0.3607, "step": 11004 }, { "epoch": 0.9, "grad_norm": 4.437954820256922, "learning_rate": 2.6236124856775793e-07, "loss": 0.7392, "step": 11005 }, { "epoch": 0.9, "grad_norm": 4.016078629052368, "learning_rate": 2.6193823747313e-07, "loss": 0.9399, "step": 11006 }, { "epoch": 0.9, "grad_norm": 2.8336758820553998, "learning_rate": 2.6151555849601107e-07, "loss": 0.5607, "step": 11007 }, { "epoch": 0.9, "grad_norm": 4.533114744036621, "learning_rate": 2.6109321166603087e-07, "loss": 0.9896, "step": 11008 }, { "epoch": 0.9, "grad_norm": 4.655681551168729, "learning_rate": 2.6067119701279175e-07, "loss": 0.8572, "step": 11009 }, { "epoch": 0.9, "grad_norm": 4.83599426448416, "learning_rate": 2.6024951456587677e-07, "loss": 0.9201, "step": 11010 }, { "epoch": 0.9, "grad_norm": 2.348684784011196, "learning_rate": 2.5982816435484283e-07, "loss": 0.3516, "step": 11011 }, { "epoch": 0.9, "grad_norm": 1.9607888032500733, "learning_rate": 2.5940714640922516e-07, "loss": 0.2168, "step": 11012 }, { "epoch": 0.9, "grad_norm": 3.1593505187596693, "learning_rate": 2.5898646075853573e-07, "loss": 0.6291, "step": 11013 }, { "epoch": 0.9, "grad_norm": 3.7827509657674843, "learning_rate": 2.5856610743226265e-07, "loss": 0.4637, "step": 11014 }, { "epoch": 0.9, "grad_norm": 5.249129440152548, "learning_rate": 2.5814608645987e-07, "loss": 1.2756, "step": 11015 }, { "epoch": 0.9, "grad_norm": 4.826583744501296, "learning_rate": 2.5772639787080056e-07, "loss": 1.0892, "step": 11016 }, { "epoch": 0.9, "grad_norm": 3.078319979925435, "learning_rate": 2.5730704169447176e-07, "loss": 0.9656, "step": 11017 }, { "epoch": 0.9, "grad_norm": 3.0615261040398014, "learning_rate": 2.5688801796027895e-07, "loss": 0.4221, "step": 11018 }, { "epoch": 0.9, "grad_norm": 3.3212155015481817, "learning_rate": 2.5646932669759427e-07, "loss": 0.3062, "step": 11019 }, { "epoch": 0.9, "grad_norm": 3.790010764557456, "learning_rate": 2.5605096793576646e-07, "loss": 0.4332, "step": 11020 }, { "epoch": 0.9, "grad_norm": 3.15283502705698, "learning_rate": 2.556329417041192e-07, "loss": 0.5554, "step": 11021 }, { "epoch": 0.9, "grad_norm": 5.284318512087921, "learning_rate": 2.552152480319553e-07, "loss": 0.6837, "step": 11022 }, { "epoch": 0.9, "grad_norm": 3.454615123520454, "learning_rate": 2.5479788694855343e-07, "loss": 0.7399, "step": 11023 }, { "epoch": 0.9, "grad_norm": 5.495421924009523, "learning_rate": 2.5438085848316916e-07, "loss": 0.8502, "step": 11024 }, { "epoch": 0.9, "grad_norm": 4.745216338272797, "learning_rate": 2.5396416266503245e-07, "loss": 1.3025, "step": 11025 }, { "epoch": 0.9, "grad_norm": 1.4879481153781136, "learning_rate": 2.535477995233543e-07, "loss": 0.1693, "step": 11026 }, { "epoch": 0.9, "grad_norm": 3.3729847640325024, "learning_rate": 2.531317690873181e-07, "loss": 0.889, "step": 11027 }, { "epoch": 0.9, "grad_norm": 4.671859726369521, "learning_rate": 2.52716071386086e-07, "loss": 1.1565, "step": 11028 }, { "epoch": 0.9, "grad_norm": 2.979680538540261, "learning_rate": 2.5230070644879757e-07, "loss": 0.5938, "step": 11029 }, { "epoch": 0.9, "grad_norm": 5.360130147685159, "learning_rate": 2.518856743045672e-07, "loss": 0.8711, "step": 11030 }, { "epoch": 0.9, "grad_norm": 5.407689265554586, "learning_rate": 2.514709749824884e-07, "loss": 1.0209, "step": 11031 }, { "epoch": 0.9, "grad_norm": 4.421968303096306, "learning_rate": 2.510566085116273e-07, "loss": 0.5024, "step": 11032 }, { "epoch": 0.9, "grad_norm": 1.1873366866216424, "learning_rate": 2.5064257492103064e-07, "loss": 0.146, "step": 11033 }, { "epoch": 0.9, "grad_norm": 4.834142366561029, "learning_rate": 2.502288742397202e-07, "loss": 1.1664, "step": 11034 }, { "epoch": 0.9, "grad_norm": 1.3021514892264503, "learning_rate": 2.4981550649669504e-07, "loss": 0.1722, "step": 11035 }, { "epoch": 0.9, "grad_norm": 2.065157901732676, "learning_rate": 2.4940247172092924e-07, "loss": 0.3421, "step": 11036 }, { "epoch": 0.9, "grad_norm": 3.163428169715771, "learning_rate": 2.489897699413768e-07, "loss": 0.4166, "step": 11037 }, { "epoch": 0.9, "grad_norm": 3.437325471175225, "learning_rate": 2.4857740118696406e-07, "loss": 0.3751, "step": 11038 }, { "epoch": 0.9, "grad_norm": 4.199057988048138, "learning_rate": 2.481653654865973e-07, "loss": 0.713, "step": 11039 }, { "epoch": 0.9, "grad_norm": 3.619639946601736, "learning_rate": 2.47753662869159e-07, "loss": 0.5802, "step": 11040 }, { "epoch": 0.9, "grad_norm": 3.1151249195378137, "learning_rate": 2.473422933635067e-07, "loss": 0.3998, "step": 11041 }, { "epoch": 0.9, "grad_norm": 2.940103140856555, "learning_rate": 2.469312569984755e-07, "loss": 0.8201, "step": 11042 }, { "epoch": 0.9, "grad_norm": 4.039052681644886, "learning_rate": 2.4652055380287866e-07, "loss": 0.6402, "step": 11043 }, { "epoch": 0.9, "grad_norm": 4.097990478563924, "learning_rate": 2.46110183805503e-07, "loss": 0.6293, "step": 11044 }, { "epoch": 0.9, "grad_norm": 4.976807621091998, "learning_rate": 2.45700147035115e-07, "loss": 0.791, "step": 11045 }, { "epoch": 0.9, "grad_norm": 2.9381474601172486, "learning_rate": 2.4529044352045507e-07, "loss": 0.5721, "step": 11046 }, { "epoch": 0.9, "grad_norm": 2.392734855345606, "learning_rate": 2.448810732902429e-07, "loss": 0.2277, "step": 11047 }, { "epoch": 0.9, "grad_norm": 5.073692288450157, "learning_rate": 2.4447203637317396e-07, "loss": 0.5008, "step": 11048 }, { "epoch": 0.9, "grad_norm": 1.0443640424281004, "learning_rate": 2.440633327979186e-07, "loss": 0.1341, "step": 11049 }, { "epoch": 0.9, "grad_norm": 2.94043086673372, "learning_rate": 2.436549625931256e-07, "loss": 0.487, "step": 11050 }, { "epoch": 0.9, "grad_norm": 3.6347325369876495, "learning_rate": 2.432469257874198e-07, "loss": 0.7156, "step": 11051 }, { "epoch": 0.9, "grad_norm": 3.1073434097379553, "learning_rate": 2.4283922240940285e-07, "loss": 0.3269, "step": 11052 }, { "epoch": 0.9, "grad_norm": 2.4455566731482126, "learning_rate": 2.4243185248765347e-07, "loss": 0.4302, "step": 11053 }, { "epoch": 0.9, "grad_norm": 4.0889595023649, "learning_rate": 2.4202481605072715e-07, "loss": 0.5646, "step": 11054 }, { "epoch": 0.9, "grad_norm": 2.6371914942050356, "learning_rate": 2.4161811312715336e-07, "loss": 0.3783, "step": 11055 }, { "epoch": 0.9, "grad_norm": 4.161157934117554, "learning_rate": 2.41211743745442e-07, "loss": 0.4261, "step": 11056 }, { "epoch": 0.9, "grad_norm": 4.3216016149663545, "learning_rate": 2.40805707934077e-07, "loss": 0.6887, "step": 11057 }, { "epoch": 0.9, "grad_norm": 4.645190440247131, "learning_rate": 2.404000057215189e-07, "loss": 0.9987, "step": 11058 }, { "epoch": 0.9, "grad_norm": 4.5485437389266705, "learning_rate": 2.399946371362072e-07, "loss": 0.9675, "step": 11059 }, { "epoch": 0.9, "grad_norm": 4.003720375727015, "learning_rate": 2.3958960220655637e-07, "loss": 0.814, "step": 11060 }, { "epoch": 0.9, "grad_norm": 4.089568719659635, "learning_rate": 2.391849009609559e-07, "loss": 0.7647, "step": 11061 }, { "epoch": 0.9, "grad_norm": 5.2458157362121085, "learning_rate": 2.387805334277754e-07, "loss": 1.0476, "step": 11062 }, { "epoch": 0.9, "grad_norm": 3.2327623500986915, "learning_rate": 2.3837649963535825e-07, "loss": 0.5227, "step": 11063 }, { "epoch": 0.9, "grad_norm": 3.340149049297397, "learning_rate": 2.379727996120257e-07, "loss": 0.7746, "step": 11064 }, { "epoch": 0.9, "grad_norm": 5.503326569977153, "learning_rate": 2.3756943338607564e-07, "loss": 1.101, "step": 11065 }, { "epoch": 0.9, "grad_norm": 5.111674690062485, "learning_rate": 2.3716640098578326e-07, "loss": 1.2175, "step": 11066 }, { "epoch": 0.9, "grad_norm": 4.474486226758203, "learning_rate": 2.3676370243939706e-07, "loss": 0.7457, "step": 11067 }, { "epoch": 0.9, "grad_norm": 4.7496111669335175, "learning_rate": 2.363613377751456e-07, "loss": 0.9001, "step": 11068 }, { "epoch": 0.9, "grad_norm": 3.2549787221233446, "learning_rate": 2.3595930702123292e-07, "loss": 0.7977, "step": 11069 }, { "epoch": 0.9, "grad_norm": 4.210072232193499, "learning_rate": 2.35557610205841e-07, "loss": 0.8362, "step": 11070 }, { "epoch": 0.9, "grad_norm": 2.482328728267237, "learning_rate": 2.351562473571245e-07, "loss": 0.3899, "step": 11071 }, { "epoch": 0.9, "grad_norm": 5.867913330107275, "learning_rate": 2.3475521850321868e-07, "loss": 0.971, "step": 11072 }, { "epoch": 0.91, "grad_norm": 3.6817365818980528, "learning_rate": 2.3435452367223333e-07, "loss": 0.5856, "step": 11073 }, { "epoch": 0.91, "grad_norm": 4.492208935289324, "learning_rate": 2.3395416289225591e-07, "loss": 1.3804, "step": 11074 }, { "epoch": 0.91, "grad_norm": 3.522649897360985, "learning_rate": 2.3355413619134958e-07, "loss": 0.9513, "step": 11075 }, { "epoch": 0.91, "grad_norm": 3.2002375390908013, "learning_rate": 2.3315444359755468e-07, "loss": 0.5962, "step": 11076 }, { "epoch": 0.91, "grad_norm": 3.699768339248374, "learning_rate": 2.3275508513888822e-07, "loss": 0.6855, "step": 11077 }, { "epoch": 0.91, "grad_norm": 5.2946389437483745, "learning_rate": 2.3235606084334285e-07, "loss": 0.5991, "step": 11078 }, { "epoch": 0.91, "grad_norm": 4.008303054571759, "learning_rate": 2.319573707388889e-07, "loss": 0.6266, "step": 11079 }, { "epoch": 0.91, "grad_norm": 3.0139603695193897, "learning_rate": 2.3155901485347242e-07, "loss": 0.5756, "step": 11080 }, { "epoch": 0.91, "grad_norm": 4.228017886089983, "learning_rate": 2.3116099321501716e-07, "loss": 0.441, "step": 11081 }, { "epoch": 0.91, "grad_norm": 5.21420696840283, "learning_rate": 2.3076330585142138e-07, "loss": 1.3786, "step": 11082 }, { "epoch": 0.91, "grad_norm": 3.7161093803831586, "learning_rate": 2.303659527905633e-07, "loss": 0.5781, "step": 11083 }, { "epoch": 0.91, "grad_norm": 3.732562707840122, "learning_rate": 2.2996893406029396e-07, "loss": 0.6167, "step": 11084 }, { "epoch": 0.91, "grad_norm": 3.9199633925383037, "learning_rate": 2.2957224968844227e-07, "loss": 0.7024, "step": 11085 }, { "epoch": 0.91, "grad_norm": 4.175139268446115, "learning_rate": 2.291758997028165e-07, "loss": 0.7179, "step": 11086 }, { "epoch": 0.91, "grad_norm": 4.684719713666393, "learning_rate": 2.2877988413119613e-07, "loss": 0.814, "step": 11087 }, { "epoch": 0.91, "grad_norm": 5.707293402429215, "learning_rate": 2.2838420300134168e-07, "loss": 1.1273, "step": 11088 }, { "epoch": 0.91, "grad_norm": 5.506806733915437, "learning_rate": 2.2798885634098934e-07, "loss": 1.2383, "step": 11089 }, { "epoch": 0.91, "grad_norm": 4.601802804472051, "learning_rate": 2.2759384417784914e-07, "loss": 0.8593, "step": 11090 }, { "epoch": 0.91, "grad_norm": 3.6701148366908787, "learning_rate": 2.2719916653961117e-07, "loss": 0.8575, "step": 11091 }, { "epoch": 0.91, "grad_norm": 3.647881541444441, "learning_rate": 2.268048234539405e-07, "loss": 0.5893, "step": 11092 }, { "epoch": 0.91, "grad_norm": 4.767204519975867, "learning_rate": 2.264108149484784e-07, "loss": 0.9492, "step": 11093 }, { "epoch": 0.91, "grad_norm": 4.01034330234706, "learning_rate": 2.2601714105084438e-07, "loss": 0.874, "step": 11094 }, { "epoch": 0.91, "grad_norm": 3.393774300890451, "learning_rate": 2.256238017886314e-07, "loss": 0.5693, "step": 11095 }, { "epoch": 0.91, "grad_norm": 3.0602238658907375, "learning_rate": 2.2523079718941188e-07, "loss": 0.3612, "step": 11096 }, { "epoch": 0.91, "grad_norm": 5.202968231086185, "learning_rate": 2.2483812728073372e-07, "loss": 0.9142, "step": 11097 }, { "epoch": 0.91, "grad_norm": 3.485032124470473, "learning_rate": 2.2444579209012106e-07, "loss": 0.4293, "step": 11098 }, { "epoch": 0.91, "grad_norm": 3.022784020013259, "learning_rate": 2.2405379164507524e-07, "loss": 0.6828, "step": 11099 }, { "epoch": 0.91, "grad_norm": 4.363452111618262, "learning_rate": 2.2366212597307424e-07, "loss": 0.618, "step": 11100 }, { "epoch": 0.91, "grad_norm": 4.0461915866057945, "learning_rate": 2.2327079510157112e-07, "loss": 0.7344, "step": 11101 }, { "epoch": 0.91, "grad_norm": 3.5256665808456873, "learning_rate": 2.2287979905799672e-07, "loss": 0.4726, "step": 11102 }, { "epoch": 0.91, "grad_norm": 4.923703005863906, "learning_rate": 2.2248913786975857e-07, "loss": 1.1763, "step": 11103 }, { "epoch": 0.91, "grad_norm": 2.6965162168198713, "learning_rate": 2.2209881156423973e-07, "loss": 0.3289, "step": 11104 }, { "epoch": 0.91, "grad_norm": 3.421436369491871, "learning_rate": 2.2170882016880112e-07, "loss": 0.7714, "step": 11105 }, { "epoch": 0.91, "grad_norm": 5.1651362991109435, "learning_rate": 2.213191637107792e-07, "loss": 0.8261, "step": 11106 }, { "epoch": 0.91, "grad_norm": 3.392052582605556, "learning_rate": 2.2092984221748602e-07, "loss": 0.7156, "step": 11107 }, { "epoch": 0.91, "grad_norm": 3.515651379382963, "learning_rate": 2.205408557162131e-07, "loss": 0.7332, "step": 11108 }, { "epoch": 0.91, "grad_norm": 3.996762719331602, "learning_rate": 2.2015220423422523e-07, "loss": 0.6411, "step": 11109 }, { "epoch": 0.91, "grad_norm": 4.481526049280716, "learning_rate": 2.1976388779876623e-07, "loss": 1.0722, "step": 11110 }, { "epoch": 0.91, "grad_norm": 6.452038705988483, "learning_rate": 2.1937590643705542e-07, "loss": 1.3209, "step": 11111 }, { "epoch": 0.91, "grad_norm": 4.40830111144125, "learning_rate": 2.1898826017628772e-07, "loss": 0.7558, "step": 11112 }, { "epoch": 0.91, "grad_norm": 6.21407611714033, "learning_rate": 2.1860094904363637e-07, "loss": 1.1922, "step": 11113 }, { "epoch": 0.91, "grad_norm": 2.211696547540112, "learning_rate": 2.1821397306624915e-07, "loss": 0.5482, "step": 11114 }, { "epoch": 0.91, "grad_norm": 1.10794839879072, "learning_rate": 2.1782733227125264e-07, "loss": 0.147, "step": 11115 }, { "epoch": 0.91, "grad_norm": 5.539108132473663, "learning_rate": 2.174410266857474e-07, "loss": 1.0284, "step": 11116 }, { "epoch": 0.91, "grad_norm": 3.4530422166033254, "learning_rate": 2.1705505633681346e-07, "loss": 0.4119, "step": 11117 }, { "epoch": 0.91, "grad_norm": 2.8801438058179403, "learning_rate": 2.1666942125150358e-07, "loss": 0.5742, "step": 11118 }, { "epoch": 0.91, "grad_norm": 4.218928014732488, "learning_rate": 2.1628412145685119e-07, "loss": 0.994, "step": 11119 }, { "epoch": 0.91, "grad_norm": 5.6833087536929625, "learning_rate": 2.158991569798624e-07, "loss": 1.198, "step": 11120 }, { "epoch": 0.91, "grad_norm": 5.325366267395837, "learning_rate": 2.1551452784752236e-07, "loss": 1.2089, "step": 11121 }, { "epoch": 0.91, "grad_norm": 4.651134494656022, "learning_rate": 2.1513023408679168e-07, "loss": 1.0882, "step": 11122 }, { "epoch": 0.91, "grad_norm": 5.374675834672166, "learning_rate": 2.1474627572460826e-07, "loss": 0.9013, "step": 11123 }, { "epoch": 0.91, "grad_norm": 4.39461346185137, "learning_rate": 2.1436265278788448e-07, "loss": 0.5845, "step": 11124 }, { "epoch": 0.91, "grad_norm": 3.167430852177149, "learning_rate": 2.1397936530351214e-07, "loss": 0.5196, "step": 11125 }, { "epoch": 0.91, "grad_norm": 3.3743656027738775, "learning_rate": 2.13596413298357e-07, "loss": 0.5903, "step": 11126 }, { "epoch": 0.91, "grad_norm": 3.548528392729398, "learning_rate": 2.1321379679926314e-07, "loss": 0.4787, "step": 11127 }, { "epoch": 0.91, "grad_norm": 2.7005002557902085, "learning_rate": 2.1283151583305073e-07, "loss": 0.4916, "step": 11128 }, { "epoch": 0.91, "grad_norm": 3.631365562756313, "learning_rate": 2.1244957042651394e-07, "loss": 0.5243, "step": 11129 }, { "epoch": 0.91, "grad_norm": 4.457370939567119, "learning_rate": 2.1206796060642742e-07, "loss": 0.8838, "step": 11130 }, { "epoch": 0.91, "grad_norm": 3.947617050297842, "learning_rate": 2.1168668639953925e-07, "loss": 0.6216, "step": 11131 }, { "epoch": 0.91, "grad_norm": 5.449892882699174, "learning_rate": 2.1130574783257585e-07, "loss": 1.1595, "step": 11132 }, { "epoch": 0.91, "grad_norm": 2.1387838929076355, "learning_rate": 2.109251449322397e-07, "loss": 0.5245, "step": 11133 }, { "epoch": 0.91, "grad_norm": 2.3414999678308424, "learning_rate": 2.105448777252078e-07, "loss": 0.3202, "step": 11134 }, { "epoch": 0.91, "grad_norm": 4.925262482104804, "learning_rate": 2.1016494623813722e-07, "loss": 0.9867, "step": 11135 }, { "epoch": 0.91, "grad_norm": 3.278064810880702, "learning_rate": 2.0978535049765769e-07, "loss": 0.5975, "step": 11136 }, { "epoch": 0.91, "grad_norm": 2.7913682482536117, "learning_rate": 2.0940609053037796e-07, "loss": 0.2869, "step": 11137 }, { "epoch": 0.91, "grad_norm": 3.942497206347499, "learning_rate": 2.090271663628829e-07, "loss": 0.8199, "step": 11138 }, { "epoch": 0.91, "grad_norm": 5.14434134047888, "learning_rate": 2.086485780217329e-07, "loss": 1.3063, "step": 11139 }, { "epoch": 0.91, "grad_norm": 5.727023327112773, "learning_rate": 2.0827032553346615e-07, "loss": 1.2092, "step": 11140 }, { "epoch": 0.91, "grad_norm": 4.28386093935721, "learning_rate": 2.0789240892459485e-07, "loss": 0.6101, "step": 11141 }, { "epoch": 0.91, "grad_norm": 4.052194647387861, "learning_rate": 2.0751482822161106e-07, "loss": 0.6826, "step": 11142 }, { "epoch": 0.91, "grad_norm": 2.5490516592790584, "learning_rate": 2.0713758345098033e-07, "loss": 0.3835, "step": 11143 }, { "epoch": 0.91, "grad_norm": 2.908189906576185, "learning_rate": 2.0676067463914651e-07, "loss": 0.5757, "step": 11144 }, { "epoch": 0.91, "grad_norm": 2.867167669543695, "learning_rate": 2.0638410181252898e-07, "loss": 0.5637, "step": 11145 }, { "epoch": 0.91, "grad_norm": 4.697488403153114, "learning_rate": 2.06007864997525e-07, "loss": 1.2564, "step": 11146 }, { "epoch": 0.91, "grad_norm": 2.991525616886259, "learning_rate": 2.0563196422050568e-07, "loss": 0.4381, "step": 11147 }, { "epoch": 0.91, "grad_norm": 2.7813495047843086, "learning_rate": 2.0525639950781996e-07, "loss": 0.3386, "step": 11148 }, { "epoch": 0.91, "grad_norm": 3.695648298957778, "learning_rate": 2.0488117088579506e-07, "loss": 0.5629, "step": 11149 }, { "epoch": 0.91, "grad_norm": 4.03623000154458, "learning_rate": 2.0450627838073056e-07, "loss": 0.6678, "step": 11150 }, { "epoch": 0.91, "grad_norm": 2.7482338355973073, "learning_rate": 2.0413172201890653e-07, "loss": 0.6516, "step": 11151 }, { "epoch": 0.91, "grad_norm": 2.864430039136069, "learning_rate": 2.0375750182657695e-07, "loss": 0.5004, "step": 11152 }, { "epoch": 0.91, "grad_norm": 4.4182944549418455, "learning_rate": 2.0338361782997252e-07, "loss": 0.7622, "step": 11153 }, { "epoch": 0.91, "grad_norm": 2.5796549918494818, "learning_rate": 2.0301007005530226e-07, "loss": 0.4822, "step": 11154 }, { "epoch": 0.91, "grad_norm": 5.101927255290817, "learning_rate": 2.0263685852874915e-07, "loss": 0.7614, "step": 11155 }, { "epoch": 0.91, "grad_norm": 4.936883196766509, "learning_rate": 2.022639832764739e-07, "loss": 1.0994, "step": 11156 }, { "epoch": 0.91, "grad_norm": 3.1251344484501056, "learning_rate": 2.0189144432461449e-07, "loss": 0.5361, "step": 11157 }, { "epoch": 0.91, "grad_norm": 4.220043319689753, "learning_rate": 2.0151924169928228e-07, "loss": 0.8652, "step": 11158 }, { "epoch": 0.91, "grad_norm": 5.371349797847683, "learning_rate": 2.011473754265686e-07, "loss": 0.8478, "step": 11159 }, { "epoch": 0.91, "grad_norm": 3.1755451624852777, "learning_rate": 2.0077584553253927e-07, "loss": 0.5387, "step": 11160 }, { "epoch": 0.91, "grad_norm": 5.700040156838395, "learning_rate": 2.0040465204323678e-07, "loss": 0.9394, "step": 11161 }, { "epoch": 0.91, "grad_norm": 4.7351776612595184, "learning_rate": 2.0003379498468035e-07, "loss": 0.836, "step": 11162 }, { "epoch": 0.91, "grad_norm": 4.607577896180465, "learning_rate": 1.9966327438286582e-07, "loss": 1.0263, "step": 11163 }, { "epoch": 0.91, "grad_norm": 3.4638615943766453, "learning_rate": 1.992930902637641e-07, "loss": 0.5572, "step": 11164 }, { "epoch": 0.91, "grad_norm": 3.442831959285367, "learning_rate": 1.9892324265332442e-07, "loss": 0.5543, "step": 11165 }, { "epoch": 0.91, "grad_norm": 5.233236165568079, "learning_rate": 1.9855373157747048e-07, "loss": 1.2052, "step": 11166 }, { "epoch": 0.91, "grad_norm": 2.152889625127049, "learning_rate": 1.9818455706210438e-07, "loss": 0.446, "step": 11167 }, { "epoch": 0.91, "grad_norm": 2.1104276927708336, "learning_rate": 1.9781571913310317e-07, "loss": 0.1884, "step": 11168 }, { "epoch": 0.91, "grad_norm": 5.0639604217808065, "learning_rate": 1.9744721781632115e-07, "loss": 1.0051, "step": 11169 }, { "epoch": 0.91, "grad_norm": 4.061353204589289, "learning_rate": 1.9707905313758769e-07, "loss": 0.8107, "step": 11170 }, { "epoch": 0.91, "grad_norm": 3.726630206028116, "learning_rate": 1.96711225122711e-07, "loss": 0.8195, "step": 11171 }, { "epoch": 0.91, "grad_norm": 6.104973615834729, "learning_rate": 1.9634373379747274e-07, "loss": 1.4508, "step": 11172 }, { "epoch": 0.91, "grad_norm": 3.120619482005494, "learning_rate": 1.9597657918763335e-07, "loss": 0.6486, "step": 11173 }, { "epoch": 0.91, "grad_norm": 5.191333913205158, "learning_rate": 1.9560976131892894e-07, "loss": 1.0356, "step": 11174 }, { "epoch": 0.91, "grad_norm": 3.905687977075724, "learning_rate": 1.9524328021707118e-07, "loss": 0.8676, "step": 11175 }, { "epoch": 0.91, "grad_norm": 3.586290688649648, "learning_rate": 1.9487713590774948e-07, "loss": 0.6796, "step": 11176 }, { "epoch": 0.91, "grad_norm": 2.000787043097494, "learning_rate": 1.9451132841662778e-07, "loss": 0.3549, "step": 11177 }, { "epoch": 0.91, "grad_norm": 5.2012772722664256, "learning_rate": 1.9414585776934892e-07, "loss": 1.1755, "step": 11178 }, { "epoch": 0.91, "grad_norm": 3.905691271358258, "learning_rate": 1.937807239915307e-07, "loss": 0.5006, "step": 11179 }, { "epoch": 0.91, "grad_norm": 4.424924940593239, "learning_rate": 1.9341592710876656e-07, "loss": 0.5779, "step": 11180 }, { "epoch": 0.91, "grad_norm": 4.047146689063044, "learning_rate": 1.9305146714662826e-07, "loss": 0.5132, "step": 11181 }, { "epoch": 0.91, "grad_norm": 2.7016882955252894, "learning_rate": 1.926873441306615e-07, "loss": 0.4587, "step": 11182 }, { "epoch": 0.91, "grad_norm": 3.9093433744808506, "learning_rate": 1.9232355808639025e-07, "loss": 0.6004, "step": 11183 }, { "epoch": 0.91, "grad_norm": 6.0015902471677585, "learning_rate": 1.9196010903931417e-07, "loss": 1.1875, "step": 11184 }, { "epoch": 0.91, "grad_norm": 3.9358852462762313, "learning_rate": 1.9159699701491065e-07, "loss": 0.5939, "step": 11185 }, { "epoch": 0.91, "grad_norm": 3.697605370531831, "learning_rate": 1.9123422203863152e-07, "loss": 0.5391, "step": 11186 }, { "epoch": 0.91, "grad_norm": 2.469171962086072, "learning_rate": 1.908717841359048e-07, "loss": 0.428, "step": 11187 }, { "epoch": 0.91, "grad_norm": 2.3907613046127594, "learning_rate": 1.9050968333213683e-07, "loss": 0.4528, "step": 11188 }, { "epoch": 0.91, "grad_norm": 2.5817391032192507, "learning_rate": 1.9014791965270952e-07, "loss": 0.4375, "step": 11189 }, { "epoch": 0.91, "grad_norm": 4.627772604588454, "learning_rate": 1.897864931229798e-07, "loss": 0.8307, "step": 11190 }, { "epoch": 0.91, "grad_norm": 2.789077798942602, "learning_rate": 1.8942540376828355e-07, "loss": 0.4359, "step": 11191 }, { "epoch": 0.91, "grad_norm": 2.7768245176577677, "learning_rate": 1.890646516139305e-07, "loss": 0.4011, "step": 11192 }, { "epoch": 0.91, "grad_norm": 5.210996217553082, "learning_rate": 1.887042366852082e-07, "loss": 0.9132, "step": 11193 }, { "epoch": 0.91, "grad_norm": 2.906255086628593, "learning_rate": 1.8834415900737978e-07, "loss": 0.5761, "step": 11194 }, { "epoch": 0.92, "grad_norm": 2.6180163634599065, "learning_rate": 1.8798441860568616e-07, "loss": 0.6036, "step": 11195 }, { "epoch": 0.92, "grad_norm": 1.0980136267430478, "learning_rate": 1.8762501550534217e-07, "loss": 0.0995, "step": 11196 }, { "epoch": 0.92, "grad_norm": 4.290132282228848, "learning_rate": 1.87265949731541e-07, "loss": 0.6914, "step": 11197 }, { "epoch": 0.92, "grad_norm": 4.136875746607067, "learning_rate": 1.869072213094525e-07, "loss": 0.7159, "step": 11198 }, { "epoch": 0.92, "grad_norm": 2.467381386165635, "learning_rate": 1.865488302642199e-07, "loss": 0.5064, "step": 11199 }, { "epoch": 0.92, "grad_norm": 2.4473827868656355, "learning_rate": 1.8619077662096696e-07, "loss": 0.4848, "step": 11200 }, { "epoch": 0.92, "grad_norm": 4.864204754691415, "learning_rate": 1.8583306040479032e-07, "loss": 0.8725, "step": 11201 }, { "epoch": 0.92, "grad_norm": 4.180014132625099, "learning_rate": 1.854756816407649e-07, "loss": 0.8338, "step": 11202 }, { "epoch": 0.92, "grad_norm": 3.8155883572348497, "learning_rate": 1.851186403539418e-07, "loss": 0.8488, "step": 11203 }, { "epoch": 0.92, "grad_norm": 4.525992468347426, "learning_rate": 1.8476193656934704e-07, "loss": 1.1812, "step": 11204 }, { "epoch": 0.92, "grad_norm": 4.7447516147126025, "learning_rate": 1.844055703119846e-07, "loss": 1.0543, "step": 11205 }, { "epoch": 0.92, "grad_norm": 4.17017342661183, "learning_rate": 1.8404954160683443e-07, "loss": 0.4521, "step": 11206 }, { "epoch": 0.92, "grad_norm": 4.946381787997414, "learning_rate": 1.8369385047885156e-07, "loss": 0.8788, "step": 11207 }, { "epoch": 0.92, "grad_norm": 4.172479909491882, "learning_rate": 1.8333849695297e-07, "loss": 0.7996, "step": 11208 }, { "epoch": 0.92, "grad_norm": 3.2283229855748674, "learning_rate": 1.8298348105409757e-07, "loss": 0.5261, "step": 11209 }, { "epoch": 0.92, "grad_norm": 4.733079662348577, "learning_rate": 1.826288028071188e-07, "loss": 0.8896, "step": 11210 }, { "epoch": 0.92, "grad_norm": 4.139927670971216, "learning_rate": 1.8227446223689605e-07, "loss": 0.9959, "step": 11211 }, { "epoch": 0.92, "grad_norm": 5.325872302072155, "learning_rate": 1.819204593682672e-07, "loss": 0.775, "step": 11212 }, { "epoch": 0.92, "grad_norm": 3.820136054276924, "learning_rate": 1.8156679422604516e-07, "loss": 0.816, "step": 11213 }, { "epoch": 0.92, "grad_norm": 4.790950829563678, "learning_rate": 1.8121346683502183e-07, "loss": 0.6849, "step": 11214 }, { "epoch": 0.92, "grad_norm": 3.5023638899812206, "learning_rate": 1.808604772199618e-07, "loss": 0.6212, "step": 11215 }, { "epoch": 0.92, "grad_norm": 3.478480719963517, "learning_rate": 1.8050782540560974e-07, "loss": 0.6308, "step": 11216 }, { "epoch": 0.92, "grad_norm": 4.570003184022221, "learning_rate": 1.8015551141668474e-07, "loss": 0.5625, "step": 11217 }, { "epoch": 0.92, "grad_norm": 3.8625066042343503, "learning_rate": 1.7980353527788207e-07, "loss": 0.8208, "step": 11218 }, { "epoch": 0.92, "grad_norm": 4.897763261574733, "learning_rate": 1.7945189701387422e-07, "loss": 0.8867, "step": 11219 }, { "epoch": 0.92, "grad_norm": 3.56730900511684, "learning_rate": 1.7910059664930978e-07, "loss": 0.6531, "step": 11220 }, { "epoch": 0.92, "grad_norm": 3.5322507412594497, "learning_rate": 1.7874963420881242e-07, "loss": 0.4657, "step": 11221 }, { "epoch": 0.92, "grad_norm": 3.7200820452997982, "learning_rate": 1.7839900971698355e-07, "loss": 0.7998, "step": 11222 }, { "epoch": 0.92, "grad_norm": 4.13435755876668, "learning_rate": 1.7804872319840017e-07, "loss": 0.7359, "step": 11223 }, { "epoch": 0.92, "grad_norm": 2.2755152186076573, "learning_rate": 1.7769877467761655e-07, "loss": 0.5649, "step": 11224 }, { "epoch": 0.92, "grad_norm": 2.6590296290393876, "learning_rate": 1.7734916417916136e-07, "loss": 0.2297, "step": 11225 }, { "epoch": 0.92, "grad_norm": 3.842949647874636, "learning_rate": 1.7699989172754284e-07, "loss": 0.7091, "step": 11226 }, { "epoch": 0.92, "grad_norm": 4.819659656122972, "learning_rate": 1.7665095734724136e-07, "loss": 0.8342, "step": 11227 }, { "epoch": 0.92, "grad_norm": 3.748007448179637, "learning_rate": 1.763023610627168e-07, "loss": 0.9059, "step": 11228 }, { "epoch": 0.92, "grad_norm": 3.9416185834126987, "learning_rate": 1.7595410289840352e-07, "loss": 0.5522, "step": 11229 }, { "epoch": 0.92, "grad_norm": 3.515053494653996, "learning_rate": 1.756061828787131e-07, "loss": 0.7281, "step": 11230 }, { "epoch": 0.92, "grad_norm": 2.4827523750750853, "learning_rate": 1.7525860102803438e-07, "loss": 0.321, "step": 11231 }, { "epoch": 0.92, "grad_norm": 4.901220700742775, "learning_rate": 1.749113573707295e-07, "loss": 0.874, "step": 11232 }, { "epoch": 0.92, "grad_norm": 4.183832641562579, "learning_rate": 1.7456445193114014e-07, "loss": 0.733, "step": 11233 }, { "epoch": 0.92, "grad_norm": 5.000577475774218, "learning_rate": 1.742178847335818e-07, "loss": 1.3244, "step": 11234 }, { "epoch": 0.92, "grad_norm": 4.83867868502028, "learning_rate": 1.7387165580234789e-07, "loss": 0.7298, "step": 11235 }, { "epoch": 0.92, "grad_norm": 1.3992288505410486, "learning_rate": 1.7352576516170784e-07, "loss": 0.1665, "step": 11236 }, { "epoch": 0.92, "grad_norm": 4.221374384301655, "learning_rate": 1.7318021283590724e-07, "loss": 0.9748, "step": 11237 }, { "epoch": 0.92, "grad_norm": 1.5943361673092038, "learning_rate": 1.7283499884916677e-07, "loss": 0.3766, "step": 11238 }, { "epoch": 0.92, "grad_norm": 4.624902684414091, "learning_rate": 1.724901232256848e-07, "loss": 1.0204, "step": 11239 }, { "epoch": 0.92, "grad_norm": 4.970727945735099, "learning_rate": 1.7214558598963594e-07, "loss": 0.7984, "step": 11240 }, { "epoch": 0.92, "grad_norm": 4.866006374575356, "learning_rate": 1.7180138716517025e-07, "loss": 0.6781, "step": 11241 }, { "epoch": 0.92, "grad_norm": 3.6831592115954566, "learning_rate": 1.714575267764157e-07, "loss": 0.6031, "step": 11242 }, { "epoch": 0.92, "grad_norm": 5.117946502570576, "learning_rate": 1.7111400484747409e-07, "loss": 0.756, "step": 11243 }, { "epoch": 0.92, "grad_norm": 4.028915158091609, "learning_rate": 1.7077082140242617e-07, "loss": 1.0491, "step": 11244 }, { "epoch": 0.92, "grad_norm": 3.528198610470189, "learning_rate": 1.7042797646532606e-07, "loss": 0.5895, "step": 11245 }, { "epoch": 0.92, "grad_norm": 3.3915221070718706, "learning_rate": 1.700854700602067e-07, "loss": 0.6763, "step": 11246 }, { "epoch": 0.92, "grad_norm": 3.9585794046649387, "learning_rate": 1.697433022110756e-07, "loss": 0.6661, "step": 11247 }, { "epoch": 0.92, "grad_norm": 4.0598631303407915, "learning_rate": 1.69401472941918e-07, "loss": 0.6134, "step": 11248 }, { "epoch": 0.92, "grad_norm": 4.833142545038157, "learning_rate": 1.6905998227669474e-07, "loss": 1.0471, "step": 11249 }, { "epoch": 0.92, "grad_norm": 4.217539587480963, "learning_rate": 1.6871883023934166e-07, "loss": 0.6563, "step": 11250 }, { "epoch": 0.92, "grad_norm": 2.960147177640882, "learning_rate": 1.6837801685377298e-07, "loss": 0.2802, "step": 11251 }, { "epoch": 0.92, "grad_norm": 4.584210166954635, "learning_rate": 1.680375421438779e-07, "loss": 0.9426, "step": 11252 }, { "epoch": 0.92, "grad_norm": 2.839656649825677, "learning_rate": 1.6769740613352237e-07, "loss": 0.4337, "step": 11253 }, { "epoch": 0.92, "grad_norm": 3.337294598224811, "learning_rate": 1.6735760884654894e-07, "loss": 0.5698, "step": 11254 }, { "epoch": 0.92, "grad_norm": 6.090740134791479, "learning_rate": 1.6701815030677525e-07, "loss": 0.9533, "step": 11255 }, { "epoch": 0.92, "grad_norm": 4.086760664500222, "learning_rate": 1.666790305379956e-07, "loss": 0.6691, "step": 11256 }, { "epoch": 0.92, "grad_norm": 4.063399156552799, "learning_rate": 1.6634024956398098e-07, "loss": 0.6545, "step": 11257 }, { "epoch": 0.92, "grad_norm": 4.305498616104555, "learning_rate": 1.660018074084796e-07, "loss": 0.628, "step": 11258 }, { "epoch": 0.92, "grad_norm": 3.9923655462342778, "learning_rate": 1.656637040952136e-07, "loss": 0.555, "step": 11259 }, { "epoch": 0.92, "grad_norm": 2.48463791219742, "learning_rate": 1.653259396478829e-07, "loss": 0.4043, "step": 11260 }, { "epoch": 0.92, "grad_norm": 2.1888494863078725, "learning_rate": 1.6498851409016304e-07, "loss": 0.3287, "step": 11261 }, { "epoch": 0.92, "grad_norm": 4.391289356990704, "learning_rate": 1.6465142744570617e-07, "loss": 0.9179, "step": 11262 }, { "epoch": 0.92, "grad_norm": 2.4228954424793883, "learning_rate": 1.6431467973814064e-07, "loss": 0.2831, "step": 11263 }, { "epoch": 0.92, "grad_norm": 3.3720171548713602, "learning_rate": 1.6397827099107144e-07, "loss": 0.5803, "step": 11264 }, { "epoch": 0.92, "grad_norm": 3.5179120097759555, "learning_rate": 1.6364220122807862e-07, "loss": 0.6053, "step": 11265 }, { "epoch": 0.92, "grad_norm": 4.620685463962939, "learning_rate": 1.6330647047272052e-07, "loss": 0.8843, "step": 11266 }, { "epoch": 0.92, "grad_norm": 4.078360315208572, "learning_rate": 1.6297107874852836e-07, "loss": 0.5724, "step": 11267 }, { "epoch": 0.92, "grad_norm": 6.353974912838414, "learning_rate": 1.626360260790133e-07, "loss": 1.1554, "step": 11268 }, { "epoch": 0.92, "grad_norm": 2.4036611618739023, "learning_rate": 1.6230131248766046e-07, "loss": 0.6715, "step": 11269 }, { "epoch": 0.92, "grad_norm": 3.58811237856854, "learning_rate": 1.6196693799793162e-07, "loss": 0.7753, "step": 11270 }, { "epoch": 0.92, "grad_norm": 5.064100711903729, "learning_rate": 1.616329026332658e-07, "loss": 0.7528, "step": 11271 }, { "epoch": 0.92, "grad_norm": 5.656043693642232, "learning_rate": 1.6129920641707654e-07, "loss": 1.1097, "step": 11272 }, { "epoch": 0.92, "grad_norm": 4.467223107675906, "learning_rate": 1.6096584937275505e-07, "loss": 0.9837, "step": 11273 }, { "epoch": 0.92, "grad_norm": 3.701685744944751, "learning_rate": 1.6063283152366772e-07, "loss": 0.7138, "step": 11274 }, { "epoch": 0.92, "grad_norm": 5.187258886409783, "learning_rate": 1.6030015289315804e-07, "loss": 1.4469, "step": 11275 }, { "epoch": 0.92, "grad_norm": 2.559429823627061, "learning_rate": 1.5996781350454516e-07, "loss": 0.3951, "step": 11276 }, { "epoch": 0.92, "grad_norm": 2.550707903735016, "learning_rate": 1.5963581338112488e-07, "loss": 0.3247, "step": 11277 }, { "epoch": 0.92, "grad_norm": 4.41070075623855, "learning_rate": 1.5930415254616804e-07, "loss": 0.9562, "step": 11278 }, { "epoch": 0.92, "grad_norm": 4.845711815923627, "learning_rate": 1.5897283102292383e-07, "loss": 0.6267, "step": 11279 }, { "epoch": 0.92, "grad_norm": 3.828948205716296, "learning_rate": 1.5864184883461587e-07, "loss": 0.6446, "step": 11280 }, { "epoch": 0.92, "grad_norm": 3.9044405188980735, "learning_rate": 1.583112060044445e-07, "loss": 0.6316, "step": 11281 }, { "epoch": 0.92, "grad_norm": 4.860168447675666, "learning_rate": 1.5798090255558617e-07, "loss": 1.086, "step": 11282 }, { "epoch": 0.92, "grad_norm": 3.8752415390305104, "learning_rate": 1.5765093851119518e-07, "loss": 0.7086, "step": 11283 }, { "epoch": 0.92, "grad_norm": 5.086774830872845, "learning_rate": 1.5732131389439853e-07, "loss": 0.9672, "step": 11284 }, { "epoch": 0.92, "grad_norm": 4.262079064086391, "learning_rate": 1.5699202872830278e-07, "loss": 0.6275, "step": 11285 }, { "epoch": 0.92, "grad_norm": 4.3456980321069585, "learning_rate": 1.5666308303598833e-07, "loss": 1.023, "step": 11286 }, { "epoch": 0.92, "grad_norm": 4.004805392257209, "learning_rate": 1.5633447684051395e-07, "loss": 0.6211, "step": 11287 }, { "epoch": 0.92, "grad_norm": 4.4750971894269, "learning_rate": 1.5600621016491347e-07, "loss": 0.6443, "step": 11288 }, { "epoch": 0.92, "grad_norm": 4.383512278224929, "learning_rate": 1.5567828303219566e-07, "loss": 0.8311, "step": 11289 }, { "epoch": 0.92, "grad_norm": 4.659952133960186, "learning_rate": 1.5535069546534887e-07, "loss": 0.8986, "step": 11290 }, { "epoch": 0.92, "grad_norm": 1.6494431478194589, "learning_rate": 1.5502344748733356e-07, "loss": 0.3387, "step": 11291 }, { "epoch": 0.92, "grad_norm": 5.315047979396131, "learning_rate": 1.5469653912108862e-07, "loss": 1.0557, "step": 11292 }, { "epoch": 0.92, "grad_norm": 5.456468791457157, "learning_rate": 1.5436997038953017e-07, "loss": 1.0145, "step": 11293 }, { "epoch": 0.92, "grad_norm": 3.3868376907378184, "learning_rate": 1.5404374131554877e-07, "loss": 0.7768, "step": 11294 }, { "epoch": 0.92, "grad_norm": 2.3987429541251983, "learning_rate": 1.537178519220106e-07, "loss": 0.361, "step": 11295 }, { "epoch": 0.92, "grad_norm": 3.3747056365014325, "learning_rate": 1.5339230223176016e-07, "loss": 0.7798, "step": 11296 }, { "epoch": 0.92, "grad_norm": 4.5850680765242355, "learning_rate": 1.5306709226761696e-07, "loss": 0.9988, "step": 11297 }, { "epoch": 0.92, "grad_norm": 1.9340330748299945, "learning_rate": 1.5274222205237664e-07, "loss": 0.2935, "step": 11298 }, { "epoch": 0.92, "grad_norm": 4.923023096241975, "learning_rate": 1.5241769160881104e-07, "loss": 0.8663, "step": 11299 }, { "epoch": 0.92, "grad_norm": 5.431124122208152, "learning_rate": 1.520935009596697e-07, "loss": 0.6967, "step": 11300 }, { "epoch": 0.92, "grad_norm": 3.5747143000254358, "learning_rate": 1.5176965012767443e-07, "loss": 0.7445, "step": 11301 }, { "epoch": 0.92, "grad_norm": 4.122107757528846, "learning_rate": 1.5144613913552765e-07, "loss": 0.7541, "step": 11302 }, { "epoch": 0.92, "grad_norm": 4.611120114866463, "learning_rate": 1.511229680059051e-07, "loss": 0.7325, "step": 11303 }, { "epoch": 0.92, "grad_norm": 3.315470373503458, "learning_rate": 1.5080013676146032e-07, "loss": 0.5616, "step": 11304 }, { "epoch": 0.92, "grad_norm": 5.124800589520938, "learning_rate": 1.5047764542482245e-07, "loss": 1.1372, "step": 11305 }, { "epoch": 0.92, "grad_norm": 4.052256498311689, "learning_rate": 1.501554940185962e-07, "loss": 0.7783, "step": 11306 }, { "epoch": 0.92, "grad_norm": 4.691173523402315, "learning_rate": 1.49833682565364e-07, "loss": 1.0757, "step": 11307 }, { "epoch": 0.92, "grad_norm": 4.582743442181179, "learning_rate": 1.4951221108768177e-07, "loss": 0.8932, "step": 11308 }, { "epoch": 0.92, "grad_norm": 4.08830725166257, "learning_rate": 1.491910796080842e-07, "loss": 1.0209, "step": 11309 }, { "epoch": 0.92, "grad_norm": 4.038731637184449, "learning_rate": 1.488702881490811e-07, "loss": 1.1201, "step": 11310 }, { "epoch": 0.92, "grad_norm": 5.890836273218682, "learning_rate": 1.4854983673315948e-07, "loss": 0.7534, "step": 11311 }, { "epoch": 0.92, "grad_norm": 3.190074553783258, "learning_rate": 1.482297253827797e-07, "loss": 0.4521, "step": 11312 }, { "epoch": 0.92, "grad_norm": 4.42871335374221, "learning_rate": 1.4790995412038156e-07, "loss": 0.5175, "step": 11313 }, { "epoch": 0.92, "grad_norm": 4.36379925264401, "learning_rate": 1.4759052296837884e-07, "loss": 0.6415, "step": 11314 }, { "epoch": 0.92, "grad_norm": 1.0429800495424937, "learning_rate": 1.4727143194916304e-07, "loss": 0.1184, "step": 11315 }, { "epoch": 0.92, "grad_norm": 3.913291195983895, "learning_rate": 1.4695268108510075e-07, "loss": 1.0162, "step": 11316 }, { "epoch": 0.93, "grad_norm": 1.2645169967609522, "learning_rate": 1.4663427039853574e-07, "loss": 0.1846, "step": 11317 }, { "epoch": 0.93, "grad_norm": 3.820504603394702, "learning_rate": 1.4631619991178568e-07, "loss": 0.8034, "step": 11318 }, { "epoch": 0.93, "grad_norm": 6.132497291593508, "learning_rate": 1.459984696471467e-07, "loss": 1.0389, "step": 11319 }, { "epoch": 0.93, "grad_norm": 3.483890146892965, "learning_rate": 1.4568107962688981e-07, "loss": 0.6468, "step": 11320 }, { "epoch": 0.93, "grad_norm": 1.8060879496132378, "learning_rate": 1.4536402987326448e-07, "loss": 0.3804, "step": 11321 }, { "epoch": 0.93, "grad_norm": 3.6983043868534735, "learning_rate": 1.4504732040849234e-07, "loss": 0.6215, "step": 11322 }, { "epoch": 0.93, "grad_norm": 3.8015248517561604, "learning_rate": 1.4473095125477455e-07, "loss": 0.5368, "step": 11323 }, { "epoch": 0.93, "grad_norm": 2.678951740513247, "learning_rate": 1.4441492243428668e-07, "loss": 0.4824, "step": 11324 }, { "epoch": 0.93, "grad_norm": 3.2553897273348658, "learning_rate": 1.4409923396918102e-07, "loss": 0.5046, "step": 11325 }, { "epoch": 0.93, "grad_norm": 4.679285071330336, "learning_rate": 1.4378388588158598e-07, "loss": 1.0004, "step": 11326 }, { "epoch": 0.93, "grad_norm": 5.874660054217869, "learning_rate": 1.4346887819360667e-07, "loss": 0.9498, "step": 11327 }, { "epoch": 0.93, "grad_norm": 1.7774450413499203, "learning_rate": 1.4315421092732262e-07, "loss": 0.3228, "step": 11328 }, { "epoch": 0.93, "grad_norm": 4.245778087293293, "learning_rate": 1.4283988410479233e-07, "loss": 0.5838, "step": 11329 }, { "epoch": 0.93, "grad_norm": 2.677879261074963, "learning_rate": 1.4252589774804705e-07, "loss": 0.3299, "step": 11330 }, { "epoch": 0.93, "grad_norm": 3.396066471615801, "learning_rate": 1.4221225187909692e-07, "loss": 0.5794, "step": 11331 }, { "epoch": 0.93, "grad_norm": 2.993010315065761, "learning_rate": 1.4189894651992665e-07, "loss": 0.4717, "step": 11332 }, { "epoch": 0.93, "grad_norm": 4.70289365334675, "learning_rate": 1.415859816924975e-07, "loss": 0.8767, "step": 11333 }, { "epoch": 0.93, "grad_norm": 4.220641939449342, "learning_rate": 1.412733574187475e-07, "loss": 0.6153, "step": 11334 }, { "epoch": 0.93, "grad_norm": 4.365275761926734, "learning_rate": 1.4096107372058966e-07, "loss": 0.5104, "step": 11335 }, { "epoch": 0.93, "grad_norm": 3.9165392789832314, "learning_rate": 1.406491306199137e-07, "loss": 0.5385, "step": 11336 }, { "epoch": 0.93, "grad_norm": 4.824793724718174, "learning_rate": 1.4033752813858603e-07, "loss": 1.0546, "step": 11337 }, { "epoch": 0.93, "grad_norm": 3.780770875995635, "learning_rate": 1.4002626629844862e-07, "loss": 0.5101, "step": 11338 }, { "epoch": 0.93, "grad_norm": 2.5442777056394754, "learning_rate": 1.3971534512131845e-07, "loss": 0.253, "step": 11339 }, { "epoch": 0.93, "grad_norm": 5.6304779254960815, "learning_rate": 1.3940476462899143e-07, "loss": 0.815, "step": 11340 }, { "epoch": 0.93, "grad_norm": 3.6909485546180965, "learning_rate": 1.3909452484323682e-07, "loss": 0.5588, "step": 11341 }, { "epoch": 0.93, "grad_norm": 4.095782462527793, "learning_rate": 1.3878462578580054e-07, "loss": 0.6584, "step": 11342 }, { "epoch": 0.93, "grad_norm": 4.056814596706321, "learning_rate": 1.3847506747840633e-07, "loss": 0.7376, "step": 11343 }, { "epoch": 0.93, "grad_norm": 3.2531875872661526, "learning_rate": 1.3816584994275238e-07, "loss": 0.5637, "step": 11344 }, { "epoch": 0.93, "grad_norm": 3.926482984992203, "learning_rate": 1.378569732005136e-07, "loss": 0.9198, "step": 11345 }, { "epoch": 0.93, "grad_norm": 3.9457916584839743, "learning_rate": 1.3754843727334156e-07, "loss": 0.5255, "step": 11346 }, { "epoch": 0.93, "grad_norm": 4.727290180209972, "learning_rate": 1.3724024218286224e-07, "loss": 0.9668, "step": 11347 }, { "epoch": 0.93, "grad_norm": 5.224952767802356, "learning_rate": 1.3693238795067897e-07, "loss": 0.9269, "step": 11348 }, { "epoch": 0.93, "grad_norm": 4.965815540685722, "learning_rate": 1.3662487459837114e-07, "loss": 1.1514, "step": 11349 }, { "epoch": 0.93, "grad_norm": 5.049303234978047, "learning_rate": 1.3631770214749374e-07, "loss": 0.6255, "step": 11350 }, { "epoch": 0.93, "grad_norm": 3.981080124905114, "learning_rate": 1.3601087061957953e-07, "loss": 0.515, "step": 11351 }, { "epoch": 0.93, "grad_norm": 4.615161473763682, "learning_rate": 1.3570438003613462e-07, "loss": 0.8371, "step": 11352 }, { "epoch": 0.93, "grad_norm": 4.9398318733080595, "learning_rate": 1.3539823041864354e-07, "loss": 1.1855, "step": 11353 }, { "epoch": 0.93, "grad_norm": 2.9646347094591925, "learning_rate": 1.350924217885652e-07, "loss": 0.5909, "step": 11354 }, { "epoch": 0.93, "grad_norm": 3.4578430140887053, "learning_rate": 1.3478695416733577e-07, "loss": 0.5344, "step": 11355 }, { "epoch": 0.93, "grad_norm": 3.1623744287144895, "learning_rate": 1.3448182757636763e-07, "loss": 0.457, "step": 11356 }, { "epoch": 0.93, "grad_norm": 4.4389502492090305, "learning_rate": 1.3417704203704919e-07, "loss": 0.5576, "step": 11357 }, { "epoch": 0.93, "grad_norm": 3.7181312116356975, "learning_rate": 1.3387259757074334e-07, "loss": 0.8019, "step": 11358 }, { "epoch": 0.93, "grad_norm": 4.336195288213495, "learning_rate": 1.335684941987908e-07, "loss": 1.0168, "step": 11359 }, { "epoch": 0.93, "grad_norm": 4.387383483589837, "learning_rate": 1.3326473194250844e-07, "loss": 0.8317, "step": 11360 }, { "epoch": 0.93, "grad_norm": 4.455915677307732, "learning_rate": 1.3296131082318808e-07, "loss": 0.7971, "step": 11361 }, { "epoch": 0.93, "grad_norm": 1.0769604095285712, "learning_rate": 1.3265823086209828e-07, "loss": 0.1388, "step": 11362 }, { "epoch": 0.93, "grad_norm": 4.403256666098837, "learning_rate": 1.3235549208048426e-07, "loss": 0.5191, "step": 11363 }, { "epoch": 0.93, "grad_norm": 2.7045935963524754, "learning_rate": 1.320530944995657e-07, "loss": 0.5484, "step": 11364 }, { "epoch": 0.93, "grad_norm": 4.352491888105958, "learning_rate": 1.3175103814053958e-07, "loss": 0.7698, "step": 11365 }, { "epoch": 0.93, "grad_norm": 2.037086127689683, "learning_rate": 1.3144932302457948e-07, "loss": 0.3168, "step": 11366 }, { "epoch": 0.93, "grad_norm": 4.628660771907785, "learning_rate": 1.3114794917283403e-07, "loss": 0.8921, "step": 11367 }, { "epoch": 0.93, "grad_norm": 5.473819307131876, "learning_rate": 1.3084691660642746e-07, "loss": 0.7624, "step": 11368 }, { "epoch": 0.93, "grad_norm": 4.545272780439525, "learning_rate": 1.3054622534646234e-07, "loss": 0.7565, "step": 11369 }, { "epoch": 0.93, "grad_norm": 3.37639945632583, "learning_rate": 1.3024587541401402e-07, "loss": 0.8412, "step": 11370 }, { "epoch": 0.93, "grad_norm": 3.231299791289676, "learning_rate": 1.2994586683013677e-07, "loss": 0.5228, "step": 11371 }, { "epoch": 0.93, "grad_norm": 3.6247239231922217, "learning_rate": 1.2964619961585934e-07, "loss": 0.6691, "step": 11372 }, { "epoch": 0.93, "grad_norm": 4.335661847549364, "learning_rate": 1.2934687379218769e-07, "loss": 0.7996, "step": 11373 }, { "epoch": 0.93, "grad_norm": 4.194778115338029, "learning_rate": 1.2904788938010392e-07, "loss": 0.8221, "step": 11374 }, { "epoch": 0.93, "grad_norm": 4.091606578487979, "learning_rate": 1.2874924640056352e-07, "loss": 1.1225, "step": 11375 }, { "epoch": 0.93, "grad_norm": 4.524644004317661, "learning_rate": 1.2845094487450193e-07, "loss": 0.9753, "step": 11376 }, { "epoch": 0.93, "grad_norm": 5.182308703363743, "learning_rate": 1.2815298482282746e-07, "loss": 0.9673, "step": 11377 }, { "epoch": 0.93, "grad_norm": 5.513074224594544, "learning_rate": 1.278553662664267e-07, "loss": 1.0608, "step": 11378 }, { "epoch": 0.93, "grad_norm": 3.050545949195318, "learning_rate": 1.2755808922616075e-07, "loss": 0.2652, "step": 11379 }, { "epoch": 0.93, "grad_norm": 4.282441709700939, "learning_rate": 1.2726115372286852e-07, "loss": 0.6766, "step": 11380 }, { "epoch": 0.93, "grad_norm": 5.193279806048703, "learning_rate": 1.2696455977736278e-07, "loss": 1.0562, "step": 11381 }, { "epoch": 0.93, "grad_norm": 4.149582341907641, "learning_rate": 1.266683074104341e-07, "loss": 0.8034, "step": 11382 }, { "epoch": 0.93, "grad_norm": 3.8986898507073433, "learning_rate": 1.2637239664284816e-07, "loss": 0.6925, "step": 11383 }, { "epoch": 0.93, "grad_norm": 6.340439187009353, "learning_rate": 1.2607682749534723e-07, "loss": 0.9223, "step": 11384 }, { "epoch": 0.93, "grad_norm": 5.563330446190727, "learning_rate": 1.2578159998864858e-07, "loss": 1.0805, "step": 11385 }, { "epoch": 0.93, "grad_norm": 4.127752602449478, "learning_rate": 1.2548671414344848e-07, "loss": 0.997, "step": 11386 }, { "epoch": 0.93, "grad_norm": 3.686063809926292, "learning_rate": 1.2519216998041483e-07, "loss": 0.5383, "step": 11387 }, { "epoch": 0.93, "grad_norm": 2.025269407670612, "learning_rate": 1.2489796752019446e-07, "loss": 0.286, "step": 11388 }, { "epoch": 0.93, "grad_norm": 3.5751929466185035, "learning_rate": 1.2460410678341027e-07, "loss": 0.5423, "step": 11389 }, { "epoch": 0.93, "grad_norm": 3.870787922141563, "learning_rate": 1.2431058779066086e-07, "loss": 0.7163, "step": 11390 }, { "epoch": 0.93, "grad_norm": 3.8724761807165287, "learning_rate": 1.2401741056252027e-07, "loss": 0.4614, "step": 11391 }, { "epoch": 0.93, "grad_norm": 2.818761266802908, "learning_rate": 1.2372457511953816e-07, "loss": 0.5997, "step": 11392 }, { "epoch": 0.93, "grad_norm": 3.9234466514415307, "learning_rate": 1.2343208148224205e-07, "loss": 0.3844, "step": 11393 }, { "epoch": 0.93, "grad_norm": 2.6717868926200237, "learning_rate": 1.2313992967113442e-07, "loss": 0.4092, "step": 11394 }, { "epoch": 0.93, "grad_norm": 4.543066468599289, "learning_rate": 1.228481197066933e-07, "loss": 0.8673, "step": 11395 }, { "epoch": 0.93, "grad_norm": 4.16694214591345, "learning_rate": 1.2255665160937346e-07, "loss": 0.9808, "step": 11396 }, { "epoch": 0.93, "grad_norm": 4.468145315616178, "learning_rate": 1.2226552539960578e-07, "loss": 0.8954, "step": 11397 }, { "epoch": 0.93, "grad_norm": 5.62108876858957, "learning_rate": 1.2197474109779672e-07, "loss": 0.9778, "step": 11398 }, { "epoch": 0.93, "grad_norm": 3.3769233539605152, "learning_rate": 1.2168429872432941e-07, "loss": 0.5391, "step": 11399 }, { "epoch": 0.93, "grad_norm": 4.030085613305808, "learning_rate": 1.21394198299562e-07, "loss": 0.8454, "step": 11400 }, { "epoch": 0.93, "grad_norm": 5.005327340476278, "learning_rate": 1.2110443984382936e-07, "loss": 0.9019, "step": 11401 }, { "epoch": 0.93, "grad_norm": 4.066588979693674, "learning_rate": 1.2081502337744245e-07, "loss": 0.5903, "step": 11402 }, { "epoch": 0.93, "grad_norm": 4.59288686119471, "learning_rate": 1.2052594892068892e-07, "loss": 1.2391, "step": 11403 }, { "epoch": 0.93, "grad_norm": 3.5518887254683364, "learning_rate": 1.202372164938298e-07, "loss": 0.5666, "step": 11404 }, { "epoch": 0.93, "grad_norm": 4.788599245153345, "learning_rate": 1.1994882611710502e-07, "loss": 0.6974, "step": 11405 }, { "epoch": 0.93, "grad_norm": 3.8459791543792003, "learning_rate": 1.1966077781073006e-07, "loss": 0.8061, "step": 11406 }, { "epoch": 0.93, "grad_norm": 5.2033452368178885, "learning_rate": 1.1937307159489486e-07, "loss": 1.2126, "step": 11407 }, { "epoch": 0.93, "grad_norm": 5.139334628288193, "learning_rate": 1.1908570748976666e-07, "loss": 0.5134, "step": 11408 }, { "epoch": 0.93, "grad_norm": 5.922911461641456, "learning_rate": 1.1879868551548935e-07, "loss": 0.9709, "step": 11409 }, { "epoch": 0.93, "grad_norm": 4.413980700520785, "learning_rate": 1.1851200569218069e-07, "loss": 0.3545, "step": 11410 }, { "epoch": 0.93, "grad_norm": 4.643047829997474, "learning_rate": 1.1822566803993574e-07, "loss": 0.6406, "step": 11411 }, { "epoch": 0.93, "grad_norm": 2.2187121643032532, "learning_rate": 1.1793967257882621e-07, "loss": 0.3273, "step": 11412 }, { "epoch": 0.93, "grad_norm": 2.026110346408181, "learning_rate": 1.1765401932889886e-07, "loss": 0.279, "step": 11413 }, { "epoch": 0.93, "grad_norm": 3.0060657564552162, "learning_rate": 1.1736870831017711e-07, "loss": 0.6003, "step": 11414 }, { "epoch": 0.93, "grad_norm": 5.191971013035739, "learning_rate": 1.1708373954265884e-07, "loss": 0.9175, "step": 11415 }, { "epoch": 0.93, "grad_norm": 4.064330439717445, "learning_rate": 1.1679911304632086e-07, "loss": 0.9524, "step": 11416 }, { "epoch": 0.93, "grad_norm": 4.169885247510807, "learning_rate": 1.1651482884111276e-07, "loss": 0.8036, "step": 11417 }, { "epoch": 0.93, "grad_norm": 4.785332141244321, "learning_rate": 1.1623088694696194e-07, "loss": 1.19, "step": 11418 }, { "epoch": 0.93, "grad_norm": 2.594104038984991, "learning_rate": 1.1594728738377192e-07, "loss": 0.3819, "step": 11419 }, { "epoch": 0.93, "grad_norm": 2.872396175952463, "learning_rate": 1.156640301714218e-07, "loss": 0.5467, "step": 11420 }, { "epoch": 0.93, "grad_norm": 4.905342440994222, "learning_rate": 1.1538111532976626e-07, "loss": 0.7751, "step": 11421 }, { "epoch": 0.93, "grad_norm": 2.729105469841603, "learning_rate": 1.1509854287863609e-07, "loss": 0.5878, "step": 11422 }, { "epoch": 0.93, "grad_norm": 3.2242795366711547, "learning_rate": 1.1481631283783934e-07, "loss": 0.6588, "step": 11423 }, { "epoch": 0.93, "grad_norm": 0.9178608870536981, "learning_rate": 1.1453442522715852e-07, "loss": 0.0959, "step": 11424 }, { "epoch": 0.93, "grad_norm": 3.8820096712829804, "learning_rate": 1.1425288006635283e-07, "loss": 0.4315, "step": 11425 }, { "epoch": 0.93, "grad_norm": 4.015369236993359, "learning_rate": 1.1397167737515813e-07, "loss": 0.6537, "step": 11426 }, { "epoch": 0.93, "grad_norm": 3.291026797185897, "learning_rate": 1.1369081717328423e-07, "loss": 0.458, "step": 11427 }, { "epoch": 0.93, "grad_norm": 3.7474722112109133, "learning_rate": 1.1341029948041871e-07, "loss": 0.6803, "step": 11428 }, { "epoch": 0.93, "grad_norm": 5.460239601393617, "learning_rate": 1.1313012431622472e-07, "loss": 0.7746, "step": 11429 }, { "epoch": 0.93, "grad_norm": 3.5032495408235245, "learning_rate": 1.1285029170034156e-07, "loss": 0.6912, "step": 11430 }, { "epoch": 0.93, "grad_norm": 2.1065549064346887, "learning_rate": 1.125708016523841e-07, "loss": 0.3045, "step": 11431 }, { "epoch": 0.93, "grad_norm": 4.201927306332262, "learning_rate": 1.1229165419194332e-07, "loss": 0.663, "step": 11432 }, { "epoch": 0.93, "grad_norm": 4.893359351425234, "learning_rate": 1.1201284933858581e-07, "loss": 0.6112, "step": 11433 }, { "epoch": 0.93, "grad_norm": 2.5457924134993117, "learning_rate": 1.117343871118548e-07, "loss": 0.3572, "step": 11434 }, { "epoch": 0.93, "grad_norm": 3.7618255828224187, "learning_rate": 1.114562675312697e-07, "loss": 0.7144, "step": 11435 }, { "epoch": 0.93, "grad_norm": 3.830967488476228, "learning_rate": 1.1117849061632491e-07, "loss": 0.668, "step": 11436 }, { "epoch": 0.93, "grad_norm": 3.0456683266792077, "learning_rate": 1.1090105638649262e-07, "loss": 0.6332, "step": 11437 }, { "epoch": 0.93, "grad_norm": 4.645145777245306, "learning_rate": 1.1062396486121785e-07, "loss": 0.7712, "step": 11438 }, { "epoch": 0.93, "grad_norm": 5.69933829079648, "learning_rate": 1.1034721605992504e-07, "loss": 1.1358, "step": 11439 }, { "epoch": 0.94, "grad_norm": 3.3023589956484822, "learning_rate": 1.1007081000201203e-07, "loss": 0.8938, "step": 11440 }, { "epoch": 0.94, "grad_norm": 4.714037306552824, "learning_rate": 1.0979474670685441e-07, "loss": 0.9522, "step": 11441 }, { "epoch": 0.94, "grad_norm": 3.8215571782472995, "learning_rate": 1.0951902619380284e-07, "loss": 0.8513, "step": 11442 }, { "epoch": 0.94, "grad_norm": 5.151668741371961, "learning_rate": 1.0924364848218461e-07, "loss": 1.1703, "step": 11443 }, { "epoch": 0.94, "grad_norm": 4.258142399241855, "learning_rate": 1.0896861359130151e-07, "loss": 0.992, "step": 11444 }, { "epoch": 0.94, "grad_norm": 3.842097233666027, "learning_rate": 1.0869392154043256e-07, "loss": 0.6473, "step": 11445 }, { "epoch": 0.94, "grad_norm": 5.282634427069878, "learning_rate": 1.084195723488335e-07, "loss": 1.0108, "step": 11446 }, { "epoch": 0.94, "grad_norm": 4.82352021553961, "learning_rate": 1.0814556603573334e-07, "loss": 0.611, "step": 11447 }, { "epoch": 0.94, "grad_norm": 3.9540453231454236, "learning_rate": 1.0787190262034008e-07, "loss": 0.6282, "step": 11448 }, { "epoch": 0.94, "grad_norm": 3.724155380279264, "learning_rate": 1.075985821218356e-07, "loss": 0.4692, "step": 11449 }, { "epoch": 0.94, "grad_norm": 4.823909371886717, "learning_rate": 1.0732560455937902e-07, "loss": 0.6942, "step": 11450 }, { "epoch": 0.94, "grad_norm": 4.864402366534795, "learning_rate": 1.070529699521039e-07, "loss": 1.1268, "step": 11451 }, { "epoch": 0.94, "grad_norm": 4.665735809875102, "learning_rate": 1.0678067831912164e-07, "loss": 0.933, "step": 11452 }, { "epoch": 0.94, "grad_norm": 4.071968052088595, "learning_rate": 1.0650872967951864e-07, "loss": 0.5692, "step": 11453 }, { "epoch": 0.94, "grad_norm": 4.402671875049084, "learning_rate": 1.0623712405235742e-07, "loss": 0.799, "step": 11454 }, { "epoch": 0.94, "grad_norm": 2.8339504762304664, "learning_rate": 1.0596586145667553e-07, "loss": 0.6069, "step": 11455 }, { "epoch": 0.94, "grad_norm": 4.554616223835177, "learning_rate": 1.0569494191148832e-07, "loss": 1.0409, "step": 11456 }, { "epoch": 0.94, "grad_norm": 4.355035502155545, "learning_rate": 1.0542436543578505e-07, "loss": 0.9743, "step": 11457 }, { "epoch": 0.94, "grad_norm": 2.8526725805057938, "learning_rate": 1.0515413204853276e-07, "loss": 0.2967, "step": 11458 }, { "epoch": 0.94, "grad_norm": 4.924321023359295, "learning_rate": 1.0488424176867351e-07, "loss": 0.5891, "step": 11459 }, { "epoch": 0.94, "grad_norm": 3.8122948981271754, "learning_rate": 1.0461469461512552e-07, "loss": 0.7662, "step": 11460 }, { "epoch": 0.94, "grad_norm": 3.9773162802992377, "learning_rate": 1.0434549060678201e-07, "loss": 0.8828, "step": 11461 }, { "epoch": 0.94, "grad_norm": 4.494031706823871, "learning_rate": 1.0407662976251453e-07, "loss": 1.0307, "step": 11462 }, { "epoch": 0.94, "grad_norm": 3.871334368520525, "learning_rate": 1.0380811210116748e-07, "loss": 0.2681, "step": 11463 }, { "epoch": 0.94, "grad_norm": 3.420993607912421, "learning_rate": 1.0353993764156356e-07, "loss": 0.5039, "step": 11464 }, { "epoch": 0.94, "grad_norm": 3.5214802423323555, "learning_rate": 1.0327210640250051e-07, "loss": 0.5811, "step": 11465 }, { "epoch": 0.94, "grad_norm": 4.1476833037383765, "learning_rate": 1.0300461840275278e-07, "loss": 0.5421, "step": 11466 }, { "epoch": 0.94, "grad_norm": 3.064162042829437, "learning_rate": 1.0273747366106867e-07, "loss": 0.5128, "step": 11467 }, { "epoch": 0.94, "grad_norm": 4.408249378397272, "learning_rate": 1.024706721961749e-07, "loss": 0.5601, "step": 11468 }, { "epoch": 0.94, "grad_norm": 2.885012696896012, "learning_rate": 1.0220421402677261e-07, "loss": 0.5723, "step": 11469 }, { "epoch": 0.94, "grad_norm": 3.891852518029071, "learning_rate": 1.0193809917154018e-07, "loss": 0.7822, "step": 11470 }, { "epoch": 0.94, "grad_norm": 5.022331987363963, "learning_rate": 1.0167232764913104e-07, "loss": 0.9398, "step": 11471 }, { "epoch": 0.94, "grad_norm": 3.880819542465347, "learning_rate": 1.0140689947817305e-07, "loss": 0.6669, "step": 11472 }, { "epoch": 0.94, "grad_norm": 5.514773015344065, "learning_rate": 1.0114181467727302e-07, "loss": 1.0464, "step": 11473 }, { "epoch": 0.94, "grad_norm": 3.204128200139511, "learning_rate": 1.0087707326501218e-07, "loss": 0.5684, "step": 11474 }, { "epoch": 0.94, "grad_norm": 3.7287083542166064, "learning_rate": 1.0061267525994678e-07, "loss": 0.5869, "step": 11475 }, { "epoch": 0.94, "grad_norm": 4.8170199043790864, "learning_rate": 1.0034862068061147e-07, "loss": 0.9989, "step": 11476 }, { "epoch": 0.94, "grad_norm": 5.513320291452303, "learning_rate": 1.0008490954551419e-07, "loss": 0.7419, "step": 11477 }, { "epoch": 0.94, "grad_norm": 3.078026930248082, "learning_rate": 9.982154187314075e-08, "loss": 0.1895, "step": 11478 }, { "epoch": 0.94, "grad_norm": 4.0083071016056, "learning_rate": 9.95585176819508e-08, "loss": 0.5666, "step": 11479 }, { "epoch": 0.94, "grad_norm": 4.944779680595329, "learning_rate": 9.929583699038182e-08, "loss": 0.9947, "step": 11480 }, { "epoch": 0.94, "grad_norm": 5.471368599698787, "learning_rate": 9.90334998168474e-08, "loss": 1.0571, "step": 11481 }, { "epoch": 0.94, "grad_norm": 4.1216598426591595, "learning_rate": 9.877150617973507e-08, "loss": 0.6191, "step": 11482 }, { "epoch": 0.94, "grad_norm": 4.424815174836256, "learning_rate": 9.850985609741015e-08, "loss": 1.1982, "step": 11483 }, { "epoch": 0.94, "grad_norm": 3.558857226478264, "learning_rate": 9.824854958821295e-08, "loss": 0.857, "step": 11484 }, { "epoch": 0.94, "grad_norm": 3.6881372715841327, "learning_rate": 9.798758667045993e-08, "loss": 0.5128, "step": 11485 }, { "epoch": 0.94, "grad_norm": 2.6042232762236632, "learning_rate": 9.772696736244369e-08, "loss": 0.3684, "step": 11486 }, { "epoch": 0.94, "grad_norm": 3.3849127166399193, "learning_rate": 9.746669168243184e-08, "loss": 0.4047, "step": 11487 }, { "epoch": 0.94, "grad_norm": 3.3419144586146996, "learning_rate": 9.720675964866866e-08, "loss": 0.491, "step": 11488 }, { "epoch": 0.94, "grad_norm": 3.7121433656782155, "learning_rate": 9.69471712793757e-08, "loss": 0.6647, "step": 11489 }, { "epoch": 0.94, "grad_norm": 1.3240341254558812, "learning_rate": 9.668792659274729e-08, "loss": 0.1673, "step": 11490 }, { "epoch": 0.94, "grad_norm": 4.5540746486450585, "learning_rate": 9.64290256069561e-08, "loss": 0.9346, "step": 11491 }, { "epoch": 0.94, "grad_norm": 4.080260945098309, "learning_rate": 9.617046834014987e-08, "loss": 1.0139, "step": 11492 }, { "epoch": 0.94, "grad_norm": 3.7921434537664847, "learning_rate": 9.591225481045186e-08, "loss": 0.8898, "step": 11493 }, { "epoch": 0.94, "grad_norm": 1.4892994629718517, "learning_rate": 9.565438503596258e-08, "loss": 0.3442, "step": 11494 }, { "epoch": 0.94, "grad_norm": 3.137716244372587, "learning_rate": 9.539685903475704e-08, "loss": 0.4682, "step": 11495 }, { "epoch": 0.94, "grad_norm": 3.5793078099482996, "learning_rate": 9.513967682488634e-08, "loss": 0.7187, "step": 11496 }, { "epoch": 0.94, "grad_norm": 2.9457444408370574, "learning_rate": 9.488283842437829e-08, "loss": 0.5016, "step": 11497 }, { "epoch": 0.94, "grad_norm": 1.1224986446700729, "learning_rate": 9.462634385123681e-08, "loss": 0.1085, "step": 11498 }, { "epoch": 0.94, "grad_norm": 5.600689552970591, "learning_rate": 9.437019312343976e-08, "loss": 0.7454, "step": 11499 }, { "epoch": 0.94, "grad_norm": 3.659396098124453, "learning_rate": 9.411438625894331e-08, "loss": 0.5225, "step": 11500 }, { "epoch": 0.94, "grad_norm": 2.199140005369275, "learning_rate": 9.385892327567759e-08, "loss": 0.3812, "step": 11501 }, { "epoch": 0.94, "grad_norm": 5.064817368758036, "learning_rate": 9.360380419154935e-08, "loss": 0.8633, "step": 11502 }, { "epoch": 0.94, "grad_norm": 2.621385853262332, "learning_rate": 9.33490290244421e-08, "loss": 0.5887, "step": 11503 }, { "epoch": 0.94, "grad_norm": 4.058381904526087, "learning_rate": 9.309459779221375e-08, "loss": 0.6112, "step": 11504 }, { "epoch": 0.94, "grad_norm": 4.484172345562006, "learning_rate": 9.284051051269949e-08, "loss": 0.7339, "step": 11505 }, { "epoch": 0.94, "grad_norm": 4.215384163118494, "learning_rate": 9.258676720371007e-08, "loss": 1.1403, "step": 11506 }, { "epoch": 0.94, "grad_norm": 3.3297724747788098, "learning_rate": 9.233336788303016e-08, "loss": 0.5412, "step": 11507 }, { "epoch": 0.94, "grad_norm": 1.2661471977938814, "learning_rate": 9.208031256842332e-08, "loss": 0.1712, "step": 11508 }, { "epoch": 0.94, "grad_norm": 4.154101220777355, "learning_rate": 9.182760127762757e-08, "loss": 0.7015, "step": 11509 }, { "epoch": 0.94, "grad_norm": 2.023772803963613, "learning_rate": 9.1575234028356e-08, "loss": 0.3518, "step": 11510 }, { "epoch": 0.94, "grad_norm": 2.673827508559293, "learning_rate": 9.132321083829943e-08, "loss": 0.2518, "step": 11511 }, { "epoch": 0.94, "grad_norm": 2.8791526657741016, "learning_rate": 9.107153172512318e-08, "loss": 0.5467, "step": 11512 }, { "epoch": 0.94, "grad_norm": 3.850501346120811, "learning_rate": 9.082019670646813e-08, "loss": 0.6633, "step": 11513 }, { "epoch": 0.94, "grad_norm": 2.531166946857835, "learning_rate": 9.056920579995299e-08, "loss": 0.5177, "step": 11514 }, { "epoch": 0.94, "grad_norm": 3.5638372089779664, "learning_rate": 9.031855902317033e-08, "loss": 0.4288, "step": 11515 }, { "epoch": 0.94, "grad_norm": 4.875937634908744, "learning_rate": 9.006825639368944e-08, "loss": 0.6471, "step": 11516 }, { "epoch": 0.94, "grad_norm": 4.9193632747619445, "learning_rate": 8.981829792905628e-08, "loss": 0.8261, "step": 11517 }, { "epoch": 0.94, "grad_norm": 2.9089533805539123, "learning_rate": 8.956868364679128e-08, "loss": 0.4271, "step": 11518 }, { "epoch": 0.94, "grad_norm": 2.383332574181771, "learning_rate": 8.9319413564391e-08, "loss": 0.324, "step": 11519 }, { "epoch": 0.94, "grad_norm": 2.6635614921681072, "learning_rate": 8.907048769932813e-08, "loss": 0.4885, "step": 11520 }, { "epoch": 0.94, "grad_norm": 4.417415217501595, "learning_rate": 8.882190606905206e-08, "loss": 0.5147, "step": 11521 }, { "epoch": 0.94, "grad_norm": 2.5786572783250215, "learning_rate": 8.85736686909866e-08, "loss": 0.4211, "step": 11522 }, { "epoch": 0.94, "grad_norm": 4.139069378559404, "learning_rate": 8.832577558253285e-08, "loss": 1.0832, "step": 11523 }, { "epoch": 0.94, "grad_norm": 3.3481033051612243, "learning_rate": 8.807822676106637e-08, "loss": 0.4845, "step": 11524 }, { "epoch": 0.94, "grad_norm": 3.4623507690569926, "learning_rate": 8.783102224393992e-08, "loss": 0.8474, "step": 11525 }, { "epoch": 0.94, "grad_norm": 4.897890325096617, "learning_rate": 8.758416204848019e-08, "loss": 0.5634, "step": 11526 }, { "epoch": 0.94, "grad_norm": 6.2330917403536095, "learning_rate": 8.73376461919917e-08, "loss": 0.9752, "step": 11527 }, { "epoch": 0.94, "grad_norm": 4.622719270232019, "learning_rate": 8.709147469175449e-08, "loss": 0.8331, "step": 11528 }, { "epoch": 0.94, "grad_norm": 3.632434371411331, "learning_rate": 8.684564756502423e-08, "loss": 0.8461, "step": 11529 }, { "epoch": 0.94, "grad_norm": 4.221314964205183, "learning_rate": 8.660016482903156e-08, "loss": 0.8041, "step": 11530 }, { "epoch": 0.94, "grad_norm": 3.096728943256637, "learning_rate": 8.635502650098437e-08, "loss": 0.458, "step": 11531 }, { "epoch": 0.94, "grad_norm": 2.889713743014272, "learning_rate": 8.611023259806561e-08, "loss": 0.3912, "step": 11532 }, { "epoch": 0.94, "grad_norm": 5.032860247914431, "learning_rate": 8.586578313743377e-08, "loss": 0.8403, "step": 11533 }, { "epoch": 0.94, "grad_norm": 2.898698151282602, "learning_rate": 8.562167813622457e-08, "loss": 0.4575, "step": 11534 }, { "epoch": 0.94, "grad_norm": 3.2422326007950137, "learning_rate": 8.537791761154823e-08, "loss": 0.4273, "step": 11535 }, { "epoch": 0.94, "grad_norm": 4.608676149082793, "learning_rate": 8.513450158049109e-08, "loss": 1.062, "step": 11536 }, { "epoch": 0.94, "grad_norm": 4.788955881010936, "learning_rate": 8.489143006011613e-08, "loss": 0.7896, "step": 11537 }, { "epoch": 0.94, "grad_norm": 4.747015684652336, "learning_rate": 8.464870306746087e-08, "loss": 0.9884, "step": 11538 }, { "epoch": 0.94, "grad_norm": 6.603052178222852, "learning_rate": 8.440632061954057e-08, "loss": 0.8629, "step": 11539 }, { "epoch": 0.94, "grad_norm": 4.844263423828449, "learning_rate": 8.416428273334387e-08, "loss": 0.8126, "step": 11540 }, { "epoch": 0.94, "grad_norm": 3.903695648370362, "learning_rate": 8.392258942583775e-08, "loss": 0.6307, "step": 11541 }, { "epoch": 0.94, "grad_norm": 4.425499726387105, "learning_rate": 8.368124071396255e-08, "loss": 0.6124, "step": 11542 }, { "epoch": 0.94, "grad_norm": 2.4550308257582176, "learning_rate": 8.344023661463696e-08, "loss": 0.3733, "step": 11543 }, { "epoch": 0.94, "grad_norm": 4.116310760936996, "learning_rate": 8.319957714475357e-08, "loss": 0.6365, "step": 11544 }, { "epoch": 0.94, "grad_norm": 4.5552693050682, "learning_rate": 8.295926232118168e-08, "loss": 0.6594, "step": 11545 }, { "epoch": 0.94, "grad_norm": 3.678799517409882, "learning_rate": 8.271929216076724e-08, "loss": 0.9425, "step": 11546 }, { "epoch": 0.94, "grad_norm": 4.454174229725478, "learning_rate": 8.247966668032958e-08, "loss": 0.7239, "step": 11547 }, { "epoch": 0.94, "grad_norm": 3.417164602813985, "learning_rate": 8.224038589666639e-08, "loss": 0.7123, "step": 11548 }, { "epoch": 0.94, "grad_norm": 4.485645869758046, "learning_rate": 8.200144982654978e-08, "loss": 1.0404, "step": 11549 }, { "epoch": 0.94, "grad_norm": 4.749260992056846, "learning_rate": 8.17628584867286e-08, "loss": 0.7257, "step": 11550 }, { "epoch": 0.94, "grad_norm": 4.735974713852315, "learning_rate": 8.152461189392725e-08, "loss": 0.947, "step": 11551 }, { "epoch": 0.94, "grad_norm": 2.691532152322852, "learning_rate": 8.128671006484457e-08, "loss": 0.4618, "step": 11552 }, { "epoch": 0.94, "grad_norm": 3.606720790843942, "learning_rate": 8.104915301615723e-08, "loss": 0.5511, "step": 11553 }, { "epoch": 0.94, "grad_norm": 4.203064780496871, "learning_rate": 8.081194076451749e-08, "loss": 0.9137, "step": 11554 }, { "epoch": 0.94, "grad_norm": 2.423709468178528, "learning_rate": 8.057507332655201e-08, "loss": 0.5172, "step": 11555 }, { "epoch": 0.94, "grad_norm": 3.544884911549885, "learning_rate": 8.033855071886476e-08, "loss": 0.7933, "step": 11556 }, { "epoch": 0.94, "grad_norm": 4.05296144706269, "learning_rate": 8.01023729580347e-08, "loss": 0.7496, "step": 11557 }, { "epoch": 0.94, "grad_norm": 5.168752164032706, "learning_rate": 7.986654006061633e-08, "loss": 0.6369, "step": 11558 }, { "epoch": 0.94, "grad_norm": 5.6099727114647076, "learning_rate": 7.963105204314092e-08, "loss": 0.9393, "step": 11559 }, { "epoch": 0.94, "grad_norm": 4.972524823079395, "learning_rate": 7.939590892211523e-08, "loss": 0.9579, "step": 11560 }, { "epoch": 0.94, "grad_norm": 2.4525684664562473, "learning_rate": 7.916111071402222e-08, "loss": 0.3673, "step": 11561 }, { "epoch": 0.95, "grad_norm": 5.365456772535376, "learning_rate": 7.89266574353198e-08, "loss": 0.5321, "step": 11562 }, { "epoch": 0.95, "grad_norm": 3.5464746563623795, "learning_rate": 7.869254910244206e-08, "loss": 0.5361, "step": 11563 }, { "epoch": 0.95, "grad_norm": 3.8067799548706502, "learning_rate": 7.845878573179866e-08, "loss": 0.7528, "step": 11564 }, { "epoch": 0.95, "grad_norm": 2.0718976468322223, "learning_rate": 7.822536733977592e-08, "loss": 0.4032, "step": 11565 }, { "epoch": 0.95, "grad_norm": 5.789649920090571, "learning_rate": 7.799229394273522e-08, "loss": 0.8454, "step": 11566 }, { "epoch": 0.95, "grad_norm": 4.544293359733326, "learning_rate": 7.775956555701458e-08, "loss": 0.8429, "step": 11567 }, { "epoch": 0.95, "grad_norm": 4.0705618784929, "learning_rate": 7.752718219892597e-08, "loss": 0.5423, "step": 11568 }, { "epoch": 0.95, "grad_norm": 4.604745100289837, "learning_rate": 7.729514388476023e-08, "loss": 0.7956, "step": 11569 }, { "epoch": 0.95, "grad_norm": 3.7360391468404823, "learning_rate": 7.706345063078047e-08, "loss": 0.8195, "step": 11570 }, { "epoch": 0.95, "grad_norm": 5.623807845515393, "learning_rate": 7.683210245322869e-08, "loss": 1.0799, "step": 11571 }, { "epoch": 0.95, "grad_norm": 3.2628999944535972, "learning_rate": 7.660109936832027e-08, "loss": 0.6359, "step": 11572 }, { "epoch": 0.95, "grad_norm": 4.573771335494906, "learning_rate": 7.63704413922478e-08, "loss": 0.7494, "step": 11573 }, { "epoch": 0.95, "grad_norm": 4.991034643761258, "learning_rate": 7.614012854118058e-08, "loss": 1.2426, "step": 11574 }, { "epoch": 0.95, "grad_norm": 5.16808363897129, "learning_rate": 7.591016083126124e-08, "loss": 1.2851, "step": 11575 }, { "epoch": 0.95, "grad_norm": 3.2343520940224546, "learning_rate": 7.568053827860966e-08, "loss": 0.5812, "step": 11576 }, { "epoch": 0.95, "grad_norm": 3.2577967730041224, "learning_rate": 7.545126089932186e-08, "loss": 0.7634, "step": 11577 }, { "epoch": 0.95, "grad_norm": 3.050302492327812, "learning_rate": 7.522232870946889e-08, "loss": 0.7246, "step": 11578 }, { "epoch": 0.95, "grad_norm": 5.43669760424669, "learning_rate": 7.499374172509789e-08, "loss": 0.8774, "step": 11579 }, { "epoch": 0.95, "grad_norm": 4.640794761815384, "learning_rate": 7.476549996223215e-08, "loss": 1.0219, "step": 11580 }, { "epoch": 0.95, "grad_norm": 4.831791310140802, "learning_rate": 7.453760343686999e-08, "loss": 0.774, "step": 11581 }, { "epoch": 0.95, "grad_norm": 4.7100391569805735, "learning_rate": 7.431005216498588e-08, "loss": 0.8476, "step": 11582 }, { "epoch": 0.95, "grad_norm": 3.514649034623249, "learning_rate": 7.408284616253092e-08, "loss": 0.6735, "step": 11583 }, { "epoch": 0.95, "grad_norm": 3.860514730999645, "learning_rate": 7.385598544543015e-08, "loss": 0.7812, "step": 11584 }, { "epoch": 0.95, "grad_norm": 3.5803739223711872, "learning_rate": 7.362947002958698e-08, "loss": 0.5444, "step": 11585 }, { "epoch": 0.95, "grad_norm": 3.2568546454751908, "learning_rate": 7.340329993087813e-08, "loss": 0.5813, "step": 11586 }, { "epoch": 0.95, "grad_norm": 4.295020809146325, "learning_rate": 7.31774751651576e-08, "loss": 0.9003, "step": 11587 }, { "epoch": 0.95, "grad_norm": 3.081893972177772, "learning_rate": 7.295199574825384e-08, "loss": 0.6387, "step": 11588 }, { "epoch": 0.95, "grad_norm": 5.705099452527816, "learning_rate": 7.272686169597253e-08, "loss": 0.8707, "step": 11589 }, { "epoch": 0.95, "grad_norm": 4.952450169969662, "learning_rate": 7.250207302409496e-08, "loss": 0.6748, "step": 11590 }, { "epoch": 0.95, "grad_norm": 5.098531567953999, "learning_rate": 7.227762974837793e-08, "loss": 0.6623, "step": 11591 }, { "epoch": 0.95, "grad_norm": 2.735206583392966, "learning_rate": 7.205353188455277e-08, "loss": 0.4195, "step": 11592 }, { "epoch": 0.95, "grad_norm": 2.2890640064475805, "learning_rate": 7.182977944832859e-08, "loss": 0.3627, "step": 11593 }, { "epoch": 0.95, "grad_norm": 5.931514297049116, "learning_rate": 7.160637245538949e-08, "loss": 0.9897, "step": 11594 }, { "epoch": 0.95, "grad_norm": 2.9622194419255887, "learning_rate": 7.138331092139573e-08, "loss": 0.506, "step": 11595 }, { "epoch": 0.95, "grad_norm": 6.133161143859423, "learning_rate": 7.116059486198201e-08, "loss": 1.2324, "step": 11596 }, { "epoch": 0.95, "grad_norm": 3.989076014377061, "learning_rate": 7.093822429276032e-08, "loss": 0.6505, "step": 11597 }, { "epoch": 0.95, "grad_norm": 5.539630457357993, "learning_rate": 7.07161992293176e-08, "loss": 0.8703, "step": 11598 }, { "epoch": 0.95, "grad_norm": 4.498339770297644, "learning_rate": 7.049451968721699e-08, "loss": 0.7966, "step": 11599 }, { "epoch": 0.95, "grad_norm": 4.006096866758366, "learning_rate": 7.02731856819977e-08, "loss": 0.6534, "step": 11600 }, { "epoch": 0.95, "grad_norm": 4.770977893654591, "learning_rate": 7.0052197229174e-08, "loss": 0.9353, "step": 11601 }, { "epoch": 0.95, "grad_norm": 3.9628131174003864, "learning_rate": 6.983155434423517e-08, "loss": 1.1756, "step": 11602 }, { "epoch": 0.95, "grad_norm": 3.7831215570400714, "learning_rate": 6.961125704264937e-08, "loss": 0.6618, "step": 11603 }, { "epoch": 0.95, "grad_norm": 5.3595549402144105, "learning_rate": 6.93913053398565e-08, "loss": 1.1731, "step": 11604 }, { "epoch": 0.95, "grad_norm": 2.6089979218198756, "learning_rate": 6.917169925127476e-08, "loss": 0.416, "step": 11605 }, { "epoch": 0.95, "grad_norm": 3.289736387729071, "learning_rate": 6.895243879229852e-08, "loss": 0.6777, "step": 11606 }, { "epoch": 0.95, "grad_norm": 3.1473913374249305, "learning_rate": 6.873352397829603e-08, "loss": 0.5736, "step": 11607 }, { "epoch": 0.95, "grad_norm": 4.712640401611258, "learning_rate": 6.851495482461279e-08, "loss": 0.8186, "step": 11608 }, { "epoch": 0.95, "grad_norm": 3.7269873024694307, "learning_rate": 6.829673134656934e-08, "loss": 0.8095, "step": 11609 }, { "epoch": 0.95, "grad_norm": 4.398859974456807, "learning_rate": 6.807885355946176e-08, "loss": 0.7604, "step": 11610 }, { "epoch": 0.95, "grad_norm": 5.468407590378123, "learning_rate": 6.786132147856283e-08, "loss": 0.7929, "step": 11611 }, { "epoch": 0.95, "grad_norm": 1.0647493326111022, "learning_rate": 6.764413511912094e-08, "loss": 0.1609, "step": 11612 }, { "epoch": 0.95, "grad_norm": 3.014610172645361, "learning_rate": 6.742729449635888e-08, "loss": 0.6214, "step": 11613 }, { "epoch": 0.95, "grad_norm": 2.6337823901937814, "learning_rate": 6.721079962547783e-08, "loss": 0.5395, "step": 11614 }, { "epoch": 0.95, "grad_norm": 3.595467025102142, "learning_rate": 6.699465052165122e-08, "loss": 0.6058, "step": 11615 }, { "epoch": 0.95, "grad_norm": 2.3282336878955827, "learning_rate": 6.677884720003136e-08, "loss": 0.2651, "step": 11616 }, { "epoch": 0.95, "grad_norm": 5.320522459061478, "learning_rate": 6.656338967574505e-08, "loss": 1.1039, "step": 11617 }, { "epoch": 0.95, "grad_norm": 5.061575871391227, "learning_rate": 6.634827796389465e-08, "loss": 1.2823, "step": 11618 }, { "epoch": 0.95, "grad_norm": 4.021015682318374, "learning_rate": 6.613351207955865e-08, "loss": 0.6708, "step": 11619 }, { "epoch": 0.95, "grad_norm": 4.8257923699654635, "learning_rate": 6.591909203779167e-08, "loss": 0.7875, "step": 11620 }, { "epoch": 0.95, "grad_norm": 4.35300776701248, "learning_rate": 6.570501785362227e-08, "loss": 0.7842, "step": 11621 }, { "epoch": 0.95, "grad_norm": 4.960849040274898, "learning_rate": 6.54912895420573e-08, "loss": 0.9143, "step": 11622 }, { "epoch": 0.95, "grad_norm": 4.417844368499578, "learning_rate": 6.527790711807813e-08, "loss": 0.9415, "step": 11623 }, { "epoch": 0.95, "grad_norm": 4.634517255764881, "learning_rate": 6.506487059664113e-08, "loss": 0.6651, "step": 11624 }, { "epoch": 0.95, "grad_norm": 3.3349906425666074, "learning_rate": 6.485217999268045e-08, "loss": 0.4669, "step": 11625 }, { "epoch": 0.95, "grad_norm": 4.088055091691627, "learning_rate": 6.463983532110418e-08, "loss": 0.7205, "step": 11626 }, { "epoch": 0.95, "grad_norm": 6.189227315555454, "learning_rate": 6.442783659679596e-08, "loss": 0.8922, "step": 11627 }, { "epoch": 0.95, "grad_norm": 4.062688209602089, "learning_rate": 6.421618383461726e-08, "loss": 0.792, "step": 11628 }, { "epoch": 0.95, "grad_norm": 5.0459100063850295, "learning_rate": 6.400487704940284e-08, "loss": 1.041, "step": 11629 }, { "epoch": 0.95, "grad_norm": 3.179069687860232, "learning_rate": 6.379391625596587e-08, "loss": 0.698, "step": 11630 }, { "epoch": 0.95, "grad_norm": 5.734301783196438, "learning_rate": 6.358330146909231e-08, "loss": 0.8751, "step": 11631 }, { "epoch": 0.95, "grad_norm": 3.6330821281949284, "learning_rate": 6.337303270354644e-08, "loss": 0.7615, "step": 11632 }, { "epoch": 0.95, "grad_norm": 3.1857711430187416, "learning_rate": 6.31631099740665e-08, "loss": 0.5557, "step": 11633 }, { "epoch": 0.95, "grad_norm": 3.2242637373736787, "learning_rate": 6.295353329536736e-08, "loss": 0.5299, "step": 11634 }, { "epoch": 0.95, "grad_norm": 2.627338941591819, "learning_rate": 6.274430268213949e-08, "loss": 0.5305, "step": 11635 }, { "epoch": 0.95, "grad_norm": 4.179602518667075, "learning_rate": 6.253541814904895e-08, "loss": 0.5425, "step": 11636 }, { "epoch": 0.95, "grad_norm": 5.042256472962217, "learning_rate": 6.232687971073792e-08, "loss": 0.5536, "step": 11637 }, { "epoch": 0.95, "grad_norm": 2.5314106009173982, "learning_rate": 6.211868738182303e-08, "loss": 0.3046, "step": 11638 }, { "epoch": 0.95, "grad_norm": 3.710831625897087, "learning_rate": 6.191084117689871e-08, "loss": 0.5028, "step": 11639 }, { "epoch": 0.95, "grad_norm": 1.887473282809646, "learning_rate": 6.170334111053444e-08, "loss": 0.3401, "step": 11640 }, { "epoch": 0.95, "grad_norm": 1.086127827375325, "learning_rate": 6.149618719727358e-08, "loss": 0.1172, "step": 11641 }, { "epoch": 0.95, "grad_norm": 2.805108073029417, "learning_rate": 6.128937945163782e-08, "loss": 0.2569, "step": 11642 }, { "epoch": 0.95, "grad_norm": 3.3531768646930997, "learning_rate": 6.108291788812393e-08, "loss": 0.3978, "step": 11643 }, { "epoch": 0.95, "grad_norm": 4.222728997768416, "learning_rate": 6.087680252120254e-08, "loss": 0.7847, "step": 11644 }, { "epoch": 0.95, "grad_norm": 4.961397698122342, "learning_rate": 6.06710333653221e-08, "loss": 1.2055, "step": 11645 }, { "epoch": 0.95, "grad_norm": 3.9685647046638475, "learning_rate": 6.046561043490606e-08, "loss": 0.9056, "step": 11646 }, { "epoch": 0.95, "grad_norm": 5.588238146585753, "learning_rate": 6.026053374435404e-08, "loss": 1.2207, "step": 11647 }, { "epoch": 0.95, "grad_norm": 3.733864306330916, "learning_rate": 6.005580330804117e-08, "loss": 0.4693, "step": 11648 }, { "epoch": 0.95, "grad_norm": 2.8971312445805486, "learning_rate": 5.985141914031767e-08, "loss": 0.2831, "step": 11649 }, { "epoch": 0.95, "grad_norm": 4.5183158215651575, "learning_rate": 5.964738125550984e-08, "loss": 0.6881, "step": 11650 }, { "epoch": 0.95, "grad_norm": 4.10165312450103, "learning_rate": 5.944368966792014e-08, "loss": 0.5499, "step": 11651 }, { "epoch": 0.95, "grad_norm": 5.395994324312094, "learning_rate": 5.924034439182658e-08, "loss": 1.0545, "step": 11652 }, { "epoch": 0.95, "grad_norm": 2.180941467475703, "learning_rate": 5.903734544148221e-08, "loss": 0.3215, "step": 11653 }, { "epoch": 0.95, "grad_norm": 3.1291064968405005, "learning_rate": 5.8834692831117315e-08, "loss": 0.5487, "step": 11654 }, { "epoch": 0.95, "grad_norm": 3.620358573775542, "learning_rate": 5.863238657493608e-08, "loss": 0.8608, "step": 11655 }, { "epoch": 0.95, "grad_norm": 3.6106870421584873, "learning_rate": 5.8430426687119954e-08, "loss": 0.9586, "step": 11656 }, { "epoch": 0.95, "grad_norm": 3.2684985178727715, "learning_rate": 5.822881318182483e-08, "loss": 0.6098, "step": 11657 }, { "epoch": 0.95, "grad_norm": 2.9087540299140833, "learning_rate": 5.802754607318273e-08, "loss": 0.5448, "step": 11658 }, { "epoch": 0.95, "grad_norm": 4.108416999885527, "learning_rate": 5.7826625375302923e-08, "loss": 0.7536, "step": 11659 }, { "epoch": 0.95, "grad_norm": 3.4963580682447812, "learning_rate": 5.762605110226804e-08, "loss": 0.5322, "step": 11660 }, { "epoch": 0.95, "grad_norm": 4.019122073312331, "learning_rate": 5.742582326813795e-08, "loss": 0.6686, "step": 11661 }, { "epoch": 0.95, "grad_norm": 3.5195503585473022, "learning_rate": 5.722594188694697e-08, "loss": 0.7019, "step": 11662 }, { "epoch": 0.95, "grad_norm": 3.1784774680724825, "learning_rate": 5.702640697270667e-08, "loss": 0.6628, "step": 11663 }, { "epoch": 0.95, "grad_norm": 3.7942647579416007, "learning_rate": 5.682721853940365e-08, "loss": 0.6642, "step": 11664 }, { "epoch": 0.95, "grad_norm": 5.492623859467021, "learning_rate": 5.662837660099951e-08, "loss": 0.5757, "step": 11665 }, { "epoch": 0.95, "grad_norm": 4.765473552833394, "learning_rate": 5.642988117143311e-08, "loss": 1.0545, "step": 11666 }, { "epoch": 0.95, "grad_norm": 2.114077177890035, "learning_rate": 5.6231732264616644e-08, "loss": 0.3435, "step": 11667 }, { "epoch": 0.95, "grad_norm": 3.7433908228118784, "learning_rate": 5.603392989444068e-08, "loss": 0.6281, "step": 11668 }, { "epoch": 0.95, "grad_norm": 3.0458702215608127, "learning_rate": 5.583647407477022e-08, "loss": 0.62, "step": 11669 }, { "epoch": 0.95, "grad_norm": 4.270318292611534, "learning_rate": 5.5639364819445875e-08, "loss": 0.8412, "step": 11670 }, { "epoch": 0.95, "grad_norm": 2.779845237480693, "learning_rate": 5.5442602142284364e-08, "loss": 0.4793, "step": 11671 }, { "epoch": 0.95, "grad_norm": 3.0113458173780154, "learning_rate": 5.5246186057076875e-08, "loss": 0.488, "step": 11672 }, { "epoch": 0.95, "grad_norm": 3.3714534494585875, "learning_rate": 5.505011657759296e-08, "loss": 0.5611, "step": 11673 }, { "epoch": 0.95, "grad_norm": 4.923472859924644, "learning_rate": 5.4854393717574396e-08, "loss": 1.0725, "step": 11674 }, { "epoch": 0.95, "grad_norm": 3.6161754209717114, "learning_rate": 5.465901749074243e-08, "loss": 0.5456, "step": 11675 }, { "epoch": 0.95, "grad_norm": 4.247407489721118, "learning_rate": 5.446398791079055e-08, "loss": 0.7884, "step": 11676 }, { "epoch": 0.95, "grad_norm": 3.7660720679954554, "learning_rate": 5.42693049913906e-08, "loss": 0.676, "step": 11677 }, { "epoch": 0.95, "grad_norm": 5.2160897355578095, "learning_rate": 5.407496874618778e-08, "loss": 1.0326, "step": 11678 }, { "epoch": 0.95, "grad_norm": 4.227074451795597, "learning_rate": 5.388097918880564e-08, "loss": 0.986, "step": 11679 }, { "epoch": 0.95, "grad_norm": 2.7225632670772963, "learning_rate": 5.3687336332841065e-08, "loss": 0.458, "step": 11680 }, { "epoch": 0.95, "grad_norm": 5.588303275939599, "learning_rate": 5.3494040191867655e-08, "loss": 1.1258, "step": 11681 }, { "epoch": 0.95, "grad_norm": 3.487444079482097, "learning_rate": 5.3301090779434574e-08, "loss": 0.3763, "step": 11682 }, { "epoch": 0.95, "grad_norm": 3.7603394226626325, "learning_rate": 5.310848810906766e-08, "loss": 0.626, "step": 11683 }, { "epoch": 0.96, "grad_norm": 5.578678130957751, "learning_rate": 5.2916232194266116e-08, "loss": 1.232, "step": 11684 }, { "epoch": 0.96, "grad_norm": 4.811411493552024, "learning_rate": 5.272432304850694e-08, "loss": 0.7073, "step": 11685 }, { "epoch": 0.96, "grad_norm": 2.6471063109711923, "learning_rate": 5.253276068524216e-08, "loss": 0.5287, "step": 11686 }, { "epoch": 0.96, "grad_norm": 3.6723646418446254, "learning_rate": 5.2341545117899353e-08, "loss": 0.9197, "step": 11687 }, { "epoch": 0.96, "grad_norm": 4.1312490454129955, "learning_rate": 5.215067635988169e-08, "loss": 0.8088, "step": 11688 }, { "epoch": 0.96, "grad_norm": 4.303647713081, "learning_rate": 5.1960154424569587e-08, "loss": 0.6594, "step": 11689 }, { "epoch": 0.96, "grad_norm": 2.3245269674291773, "learning_rate": 5.176997932531569e-08, "loss": 0.2501, "step": 11690 }, { "epoch": 0.96, "grad_norm": 4.024797051793267, "learning_rate": 5.158015107545156e-08, "loss": 0.8117, "step": 11691 }, { "epoch": 0.96, "grad_norm": 3.6393877702713913, "learning_rate": 5.139066968828377e-08, "loss": 0.5532, "step": 11692 }, { "epoch": 0.96, "grad_norm": 3.0030678265837416, "learning_rate": 5.120153517709281e-08, "loss": 0.5272, "step": 11693 }, { "epoch": 0.96, "grad_norm": 1.3423220512615752, "learning_rate": 5.101274755513808e-08, "loss": 0.1617, "step": 11694 }, { "epoch": 0.96, "grad_norm": 4.293777898690641, "learning_rate": 5.082430683565065e-08, "loss": 0.9318, "step": 11695 }, { "epoch": 0.96, "grad_norm": 3.721454187574026, "learning_rate": 5.063621303184163e-08, "loss": 0.5775, "step": 11696 }, { "epoch": 0.96, "grad_norm": 3.8328649913876216, "learning_rate": 5.044846615689325e-08, "loss": 0.5656, "step": 11697 }, { "epoch": 0.96, "grad_norm": 4.067704682786649, "learning_rate": 5.02610662239672e-08, "loss": 0.6409, "step": 11698 }, { "epoch": 0.96, "grad_norm": 4.045261495652824, "learning_rate": 5.0074013246199096e-08, "loss": 0.6285, "step": 11699 }, { "epoch": 0.96, "grad_norm": 3.760271628485974, "learning_rate": 4.9887307236700654e-08, "loss": 0.3907, "step": 11700 }, { "epoch": 0.96, "grad_norm": 3.3602929536408532, "learning_rate": 4.97009482085592e-08, "loss": 0.6208, "step": 11701 }, { "epoch": 0.96, "grad_norm": 2.8847486528892277, "learning_rate": 4.9514936174837047e-08, "loss": 0.4227, "step": 11702 }, { "epoch": 0.96, "grad_norm": 2.5872727368371575, "learning_rate": 4.932927114857322e-08, "loss": 0.5445, "step": 11703 }, { "epoch": 0.96, "grad_norm": 5.348455025343582, "learning_rate": 4.914395314278231e-08, "loss": 0.5013, "step": 11704 }, { "epoch": 0.96, "grad_norm": 4.005260929206768, "learning_rate": 4.8958982170453915e-08, "loss": 0.6913, "step": 11705 }, { "epoch": 0.96, "grad_norm": 3.484792319652042, "learning_rate": 4.8774358244554346e-08, "loss": 0.4886, "step": 11706 }, { "epoch": 0.96, "grad_norm": 4.283131853262212, "learning_rate": 4.859008137802379e-08, "loss": 0.6121, "step": 11707 }, { "epoch": 0.96, "grad_norm": 6.101735286795017, "learning_rate": 4.840615158378026e-08, "loss": 1.1604, "step": 11708 }, { "epoch": 0.96, "grad_norm": 4.927126922279178, "learning_rate": 4.8222568874716216e-08, "loss": 0.6162, "step": 11709 }, { "epoch": 0.96, "grad_norm": 5.701134441874893, "learning_rate": 4.803933326370025e-08, "loss": 1.0281, "step": 11710 }, { "epoch": 0.96, "grad_norm": 4.122986484143584, "learning_rate": 4.7856444763575424e-08, "loss": 1.07, "step": 11711 }, { "epoch": 0.96, "grad_norm": 4.973057498389193, "learning_rate": 4.7673903387162044e-08, "loss": 1.0825, "step": 11712 }, { "epoch": 0.96, "grad_norm": 6.210457935937212, "learning_rate": 4.749170914725543e-08, "loss": 1.2721, "step": 11713 }, { "epoch": 0.96, "grad_norm": 3.6817758788117056, "learning_rate": 4.730986205662702e-08, "loss": 0.6935, "step": 11714 }, { "epoch": 0.96, "grad_norm": 3.7642625493959705, "learning_rate": 4.712836212802274e-08, "loss": 0.601, "step": 11715 }, { "epoch": 0.96, "grad_norm": 3.7641600812630798, "learning_rate": 4.694720937416519e-08, "loss": 0.6075, "step": 11716 }, { "epoch": 0.96, "grad_norm": 5.638167474691066, "learning_rate": 4.67664038077531e-08, "loss": 1.1841, "step": 11717 }, { "epoch": 0.96, "grad_norm": 3.255147060846687, "learning_rate": 4.658594544145911e-08, "loss": 0.641, "step": 11718 }, { "epoch": 0.96, "grad_norm": 3.736717762131721, "learning_rate": 4.640583428793255e-08, "loss": 0.5668, "step": 11719 }, { "epoch": 0.96, "grad_norm": 3.955164869337488, "learning_rate": 4.622607035979942e-08, "loss": 0.869, "step": 11720 }, { "epoch": 0.96, "grad_norm": 3.213184542465023, "learning_rate": 4.6046653669659656e-08, "loss": 0.5982, "step": 11721 }, { "epoch": 0.96, "grad_norm": 4.653497997895835, "learning_rate": 4.5867584230089853e-08, "loss": 0.6563, "step": 11722 }, { "epoch": 0.96, "grad_norm": 3.528597158347067, "learning_rate": 4.568886205364276e-08, "loss": 0.7737, "step": 11723 }, { "epoch": 0.96, "grad_norm": 3.3007991599918984, "learning_rate": 4.551048715284445e-08, "loss": 0.6716, "step": 11724 }, { "epoch": 0.96, "grad_norm": 3.869732963197292, "learning_rate": 4.5332459540198825e-08, "loss": 0.5945, "step": 11725 }, { "epoch": 0.96, "grad_norm": 3.985582699442896, "learning_rate": 4.515477922818479e-08, "loss": 0.6312, "step": 11726 }, { "epoch": 0.96, "grad_norm": 4.484632735394568, "learning_rate": 4.497744622925793e-08, "loss": 0.3245, "step": 11727 }, { "epoch": 0.96, "grad_norm": 4.567783803478978, "learning_rate": 4.480046055584775e-08, "loss": 0.9989, "step": 11728 }, { "epoch": 0.96, "grad_norm": 4.243537942446385, "learning_rate": 4.462382222035988e-08, "loss": 0.6423, "step": 11729 }, { "epoch": 0.96, "grad_norm": 1.0891058539920115, "learning_rate": 4.444753123517609e-08, "loss": 0.1768, "step": 11730 }, { "epoch": 0.96, "grad_norm": 3.8314988858753942, "learning_rate": 4.427158761265371e-08, "loss": 0.5654, "step": 11731 }, { "epoch": 0.96, "grad_norm": 3.365254800376362, "learning_rate": 4.4095991365125656e-08, "loss": 0.6513, "step": 11732 }, { "epoch": 0.96, "grad_norm": 5.554928906459958, "learning_rate": 4.3920742504900415e-08, "loss": 1.4647, "step": 11733 }, { "epoch": 0.96, "grad_norm": 3.6148808353058794, "learning_rate": 4.3745841044262606e-08, "loss": 0.7798, "step": 11734 }, { "epoch": 0.96, "grad_norm": 5.058028006949942, "learning_rate": 4.357128699547131e-08, "loss": 0.825, "step": 11735 }, { "epoch": 0.96, "grad_norm": 5.582851708848683, "learning_rate": 4.339708037076229e-08, "loss": 1.3073, "step": 11736 }, { "epoch": 0.96, "grad_norm": 4.427789519056039, "learning_rate": 4.3223221182346894e-08, "loss": 0.7024, "step": 11737 }, { "epoch": 0.96, "grad_norm": 3.9148871543389543, "learning_rate": 4.304970944241149e-08, "loss": 0.6003, "step": 11738 }, { "epoch": 0.96, "grad_norm": 4.306396967498024, "learning_rate": 4.2876545163118566e-08, "loss": 0.8819, "step": 11739 }, { "epoch": 0.96, "grad_norm": 3.8677418803361903, "learning_rate": 4.27037283566073e-08, "loss": 0.9604, "step": 11740 }, { "epoch": 0.96, "grad_norm": 2.408385076006431, "learning_rate": 4.253125903498967e-08, "loss": 0.2947, "step": 11741 }, { "epoch": 0.96, "grad_norm": 4.746703569901941, "learning_rate": 4.2359137210356e-08, "loss": 0.769, "step": 11742 }, { "epoch": 0.96, "grad_norm": 2.6779079957653233, "learning_rate": 4.21873628947711e-08, "loss": 0.5306, "step": 11743 }, { "epoch": 0.96, "grad_norm": 3.4562236694218886, "learning_rate": 4.2015936100275324e-08, "loss": 0.9029, "step": 11744 }, { "epoch": 0.96, "grad_norm": 4.228704550219655, "learning_rate": 4.184485683888573e-08, "loss": 0.4066, "step": 11745 }, { "epoch": 0.96, "grad_norm": 5.424287972794271, "learning_rate": 4.167412512259328e-08, "loss": 1.0257, "step": 11746 }, { "epoch": 0.96, "grad_norm": 3.4697683143800604, "learning_rate": 4.150374096336618e-08, "loss": 0.435, "step": 11747 }, { "epoch": 0.96, "grad_norm": 4.440624052189856, "learning_rate": 4.13337043731471e-08, "loss": 0.6654, "step": 11748 }, { "epoch": 0.96, "grad_norm": 2.7522882705179637, "learning_rate": 4.116401536385539e-08, "loss": 0.3438, "step": 11749 }, { "epoch": 0.96, "grad_norm": 3.4657812590362957, "learning_rate": 4.099467394738543e-08, "loss": 0.4506, "step": 11750 }, { "epoch": 0.96, "grad_norm": 4.639754963246002, "learning_rate": 4.0825680135606615e-08, "loss": 0.6189, "step": 11751 }, { "epoch": 0.96, "grad_norm": 2.5670399308099583, "learning_rate": 4.065703394036613e-08, "loss": 0.2676, "step": 11752 }, { "epoch": 0.96, "grad_norm": 3.113158281399538, "learning_rate": 4.048873537348341e-08, "loss": 0.5267, "step": 11753 }, { "epoch": 0.96, "grad_norm": 4.481364819272389, "learning_rate": 4.03207844467568e-08, "loss": 0.9964, "step": 11754 }, { "epoch": 0.96, "grad_norm": 4.975723737572638, "learning_rate": 4.01531811719591e-08, "loss": 0.8212, "step": 11755 }, { "epoch": 0.96, "grad_norm": 4.07136840966976, "learning_rate": 3.998592556083758e-08, "loss": 0.7558, "step": 11756 }, { "epoch": 0.96, "grad_norm": 3.977434357886183, "learning_rate": 3.981901762511675e-08, "loss": 0.6283, "step": 11757 }, { "epoch": 0.96, "grad_norm": 6.1372750896314745, "learning_rate": 3.9652457376496146e-08, "loss": 0.9247, "step": 11758 }, { "epoch": 0.96, "grad_norm": 3.6768346894998594, "learning_rate": 3.9486244826650865e-08, "loss": 0.7823, "step": 11759 }, { "epoch": 0.96, "grad_norm": 5.162561476866158, "learning_rate": 3.932037998723104e-08, "loss": 1.0312, "step": 11760 }, { "epoch": 0.96, "grad_norm": 3.231638084068061, "learning_rate": 3.915486286986403e-08, "loss": 0.718, "step": 11761 }, { "epoch": 0.96, "grad_norm": 2.7585378451458094, "learning_rate": 3.898969348615167e-08, "loss": 0.2716, "step": 11762 }, { "epoch": 0.96, "grad_norm": 2.8033127495889065, "learning_rate": 3.8824871847671366e-08, "loss": 0.6429, "step": 11763 }, { "epoch": 0.96, "grad_norm": 3.7423527907068834, "learning_rate": 3.8660397965976094e-08, "loss": 0.4373, "step": 11764 }, { "epoch": 0.96, "grad_norm": 4.069044778469548, "learning_rate": 3.849627185259497e-08, "loss": 0.6415, "step": 11765 }, { "epoch": 0.96, "grad_norm": 4.315625982864917, "learning_rate": 3.833249351903268e-08, "loss": 0.6324, "step": 11766 }, { "epoch": 0.96, "grad_norm": 2.7279005421132565, "learning_rate": 3.816906297676948e-08, "loss": 0.3445, "step": 11767 }, { "epoch": 0.96, "grad_norm": 3.50701363886008, "learning_rate": 3.800598023726121e-08, "loss": 0.6086, "step": 11768 }, { "epoch": 0.96, "grad_norm": 4.296092547140078, "learning_rate": 3.784324531193928e-08, "loss": 0.7383, "step": 11769 }, { "epoch": 0.96, "grad_norm": 5.23251361838104, "learning_rate": 3.768085821221013e-08, "loss": 0.9903, "step": 11770 }, { "epoch": 0.96, "grad_norm": 4.213264983725306, "learning_rate": 3.7518818949456305e-08, "loss": 1.123, "step": 11771 }, { "epoch": 0.96, "grad_norm": 4.368028614421407, "learning_rate": 3.735712753503706e-08, "loss": 0.9072, "step": 11772 }, { "epoch": 0.96, "grad_norm": 1.8344672689359063, "learning_rate": 3.719578398028556e-08, "loss": 0.2159, "step": 11773 }, { "epoch": 0.96, "grad_norm": 3.012385244807552, "learning_rate": 3.703478829651164e-08, "loss": 0.5381, "step": 11774 }, { "epoch": 0.96, "grad_norm": 5.864676780705077, "learning_rate": 3.687414049500015e-08, "loss": 0.9533, "step": 11775 }, { "epoch": 0.96, "grad_norm": 3.4633608720572053, "learning_rate": 3.671384058701155e-08, "loss": 0.6767, "step": 11776 }, { "epoch": 0.96, "grad_norm": 4.1948794508399985, "learning_rate": 3.6553888583782395e-08, "loss": 0.7211, "step": 11777 }, { "epoch": 0.96, "grad_norm": 2.889285302320259, "learning_rate": 3.639428449652427e-08, "loss": 0.5263, "step": 11778 }, { "epoch": 0.96, "grad_norm": 3.5537407969712893, "learning_rate": 3.6235028336426004e-08, "loss": 0.2516, "step": 11779 }, { "epoch": 0.96, "grad_norm": 4.2204192477410345, "learning_rate": 3.607612011464923e-08, "loss": 0.9395, "step": 11780 }, { "epoch": 0.96, "grad_norm": 3.598581066594799, "learning_rate": 3.591755984233391e-08, "loss": 0.8617, "step": 11781 }, { "epoch": 0.96, "grad_norm": 4.756522623237823, "learning_rate": 3.57593475305934e-08, "loss": 0.9931, "step": 11782 }, { "epoch": 0.96, "grad_norm": 4.064404256654831, "learning_rate": 3.560148319051826e-08, "loss": 0.97, "step": 11783 }, { "epoch": 0.96, "grad_norm": 2.465086785120551, "learning_rate": 3.5443966833174084e-08, "loss": 0.3152, "step": 11784 }, { "epoch": 0.96, "grad_norm": 4.096692457761413, "learning_rate": 3.528679846960148e-08, "loss": 0.4706, "step": 11785 }, { "epoch": 0.96, "grad_norm": 5.195789170395069, "learning_rate": 3.5129978110818866e-08, "loss": 0.6458, "step": 11786 }, { "epoch": 0.96, "grad_norm": 5.942434317814537, "learning_rate": 3.497350576781688e-08, "loss": 1.2318, "step": 11787 }, { "epoch": 0.96, "grad_norm": 3.358805952965998, "learning_rate": 3.4817381451564546e-08, "loss": 0.2734, "step": 11788 }, { "epoch": 0.96, "grad_norm": 4.925661326114637, "learning_rate": 3.466160517300532e-08, "loss": 0.7324, "step": 11789 }, { "epoch": 0.96, "grad_norm": 3.084076068790197, "learning_rate": 3.450617694305825e-08, "loss": 0.5408, "step": 11790 }, { "epoch": 0.96, "grad_norm": 4.839331507480073, "learning_rate": 3.4351096772617945e-08, "loss": 0.9241, "step": 11791 }, { "epoch": 0.96, "grad_norm": 3.8008598230611255, "learning_rate": 3.4196364672555715e-08, "loss": 0.6092, "step": 11792 }, { "epoch": 0.96, "grad_norm": 4.8795245977568715, "learning_rate": 3.4041980653716777e-08, "loss": 0.4539, "step": 11793 }, { "epoch": 0.96, "grad_norm": 4.536553255378163, "learning_rate": 3.388794472692303e-08, "loss": 0.4606, "step": 11794 }, { "epoch": 0.96, "grad_norm": 4.871365365644232, "learning_rate": 3.3734256902971385e-08, "loss": 0.7989, "step": 11795 }, { "epoch": 0.96, "grad_norm": 5.1551404757420975, "learning_rate": 3.3580917192635454e-08, "loss": 1.1861, "step": 11796 }, { "epoch": 0.96, "grad_norm": 5.287953974934314, "learning_rate": 3.3427925606663856e-08, "loss": 0.8849, "step": 11797 }, { "epoch": 0.96, "grad_norm": 3.5919220106556877, "learning_rate": 3.327528215577913e-08, "loss": 0.5035, "step": 11798 }, { "epoch": 0.96, "grad_norm": 4.393547504328156, "learning_rate": 3.3122986850682713e-08, "loss": 0.8699, "step": 11799 }, { "epoch": 0.96, "grad_norm": 5.847976191247303, "learning_rate": 3.297103970204829e-08, "loss": 1.0686, "step": 11800 }, { "epoch": 0.96, "grad_norm": 5.3101053476366795, "learning_rate": 3.2819440720527894e-08, "loss": 1.2983, "step": 11801 }, { "epoch": 0.96, "grad_norm": 4.348969443173985, "learning_rate": 3.266818991674692e-08, "loss": 1.0874, "step": 11802 }, { "epoch": 0.96, "grad_norm": 5.313600503412035, "learning_rate": 3.251728730130854e-08, "loss": 1.148, "step": 11803 }, { "epoch": 0.96, "grad_norm": 4.772193377266182, "learning_rate": 3.236673288478931e-08, "loss": 0.8395, "step": 11804 }, { "epoch": 0.96, "grad_norm": 4.48591309072955, "learning_rate": 3.221652667774355e-08, "loss": 1.0058, "step": 11805 }, { "epoch": 0.96, "grad_norm": 3.960003907617976, "learning_rate": 3.2066668690698967e-08, "loss": 0.608, "step": 11806 }, { "epoch": 0.97, "grad_norm": 1.563836151855298, "learning_rate": 3.191715893415992e-08, "loss": 0.1871, "step": 11807 }, { "epoch": 0.97, "grad_norm": 3.8170741667588723, "learning_rate": 3.1767997418607474e-08, "loss": 0.5567, "step": 11808 }, { "epoch": 0.97, "grad_norm": 3.9128601595015318, "learning_rate": 3.1619184154496605e-08, "loss": 0.5273, "step": 11809 }, { "epoch": 0.97, "grad_norm": 3.4878065354273864, "learning_rate": 3.1470719152257856e-08, "loss": 0.5814, "step": 11810 }, { "epoch": 0.97, "grad_norm": 3.383885581930084, "learning_rate": 3.132260242229901e-08, "loss": 0.6354, "step": 11811 }, { "epoch": 0.97, "grad_norm": 2.9485465857088755, "learning_rate": 3.117483397500232e-08, "loss": 0.2627, "step": 11812 }, { "epoch": 0.97, "grad_norm": 3.9127081237186885, "learning_rate": 3.1027413820724494e-08, "loss": 0.8579, "step": 11813 }, { "epoch": 0.97, "grad_norm": 1.7947974661861645, "learning_rate": 3.0880341969801164e-08, "loss": 0.2722, "step": 11814 }, { "epoch": 0.97, "grad_norm": 4.421281975902339, "learning_rate": 3.073361843253908e-08, "loss": 0.7727, "step": 11815 }, { "epoch": 0.97, "grad_norm": 3.7079889954384253, "learning_rate": 3.058724321922446e-08, "loss": 0.5543, "step": 11816 }, { "epoch": 0.97, "grad_norm": 4.573949392109921, "learning_rate": 3.044121634011687e-08, "loss": 0.893, "step": 11817 }, { "epoch": 0.97, "grad_norm": 4.674279380148187, "learning_rate": 3.029553780545258e-08, "loss": 0.9186, "step": 11818 }, { "epoch": 0.97, "grad_norm": 3.3034868267727884, "learning_rate": 3.015020762544341e-08, "loss": 0.4583, "step": 11819 }, { "epoch": 0.97, "grad_norm": 4.894623775380931, "learning_rate": 3.00052258102751e-08, "loss": 1.0015, "step": 11820 }, { "epoch": 0.97, "grad_norm": 3.603660981984285, "learning_rate": 2.9860592370111186e-08, "loss": 0.361, "step": 11821 }, { "epoch": 0.97, "grad_norm": 4.915259342684215, "learning_rate": 2.9716307315089677e-08, "loss": 0.7958, "step": 11822 }, { "epoch": 0.97, "grad_norm": 4.8168026583412065, "learning_rate": 2.9572370655324146e-08, "loss": 1.1639, "step": 11823 }, { "epoch": 0.97, "grad_norm": 4.374519272465158, "learning_rate": 2.94287824009043e-08, "loss": 0.89, "step": 11824 }, { "epoch": 0.97, "grad_norm": 2.663285053546188, "learning_rate": 2.928554256189431e-08, "loss": 0.5214, "step": 11825 }, { "epoch": 0.97, "grad_norm": 5.9762625834375305, "learning_rate": 2.914265114833614e-08, "loss": 1.0695, "step": 11826 }, { "epoch": 0.97, "grad_norm": 5.332668720167278, "learning_rate": 2.9000108170244013e-08, "loss": 1.2487, "step": 11827 }, { "epoch": 0.97, "grad_norm": 4.408214880538728, "learning_rate": 2.8857913637610478e-08, "loss": 0.7891, "step": 11828 }, { "epoch": 0.97, "grad_norm": 5.41942697624612, "learning_rate": 2.8716067560403128e-08, "loss": 1.0684, "step": 11829 }, { "epoch": 0.97, "grad_norm": 2.8164144094370407, "learning_rate": 2.8574569948564002e-08, "loss": 0.4722, "step": 11830 }, { "epoch": 0.97, "grad_norm": 5.132366332899768, "learning_rate": 2.8433420812011836e-08, "loss": 1.1149, "step": 11831 }, { "epoch": 0.97, "grad_norm": 4.1411737141967455, "learning_rate": 2.829262016064094e-08, "loss": 1.1777, "step": 11832 }, { "epoch": 0.97, "grad_norm": 4.309970324128132, "learning_rate": 2.815216800432008e-08, "loss": 0.6603, "step": 11833 }, { "epoch": 0.97, "grad_norm": 4.351849238006262, "learning_rate": 2.8012064352894718e-08, "loss": 0.8311, "step": 11834 }, { "epoch": 0.97, "grad_norm": 3.1894191347071508, "learning_rate": 2.7872309216185333e-08, "loss": 0.6127, "step": 11835 }, { "epoch": 0.97, "grad_norm": 2.6702761891664815, "learning_rate": 2.7732902603988532e-08, "loss": 0.5173, "step": 11836 }, { "epoch": 0.97, "grad_norm": 2.9034952122764306, "learning_rate": 2.7593844526075943e-08, "loss": 0.349, "step": 11837 }, { "epoch": 0.97, "grad_norm": 4.847761447024961, "learning_rate": 2.7455134992194767e-08, "loss": 0.7418, "step": 11838 }, { "epoch": 0.97, "grad_norm": 4.856028845939515, "learning_rate": 2.7316774012068337e-08, "loss": 0.6664, "step": 11839 }, { "epoch": 0.97, "grad_norm": 3.4415462519630657, "learning_rate": 2.7178761595394455e-08, "loss": 0.6076, "step": 11840 }, { "epoch": 0.97, "grad_norm": 3.7737666124134774, "learning_rate": 2.70410977518476e-08, "loss": 0.4558, "step": 11841 }, { "epoch": 0.97, "grad_norm": 3.920737183462944, "learning_rate": 2.6903782491077278e-08, "loss": 0.7898, "step": 11842 }, { "epoch": 0.97, "grad_norm": 3.068358658790362, "learning_rate": 2.6766815822709124e-08, "loss": 0.7369, "step": 11843 }, { "epoch": 0.97, "grad_norm": 4.147692732508822, "learning_rate": 2.663019775634379e-08, "loss": 0.7402, "step": 11844 }, { "epoch": 0.97, "grad_norm": 2.802750877458447, "learning_rate": 2.6493928301556947e-08, "loss": 0.5201, "step": 11845 }, { "epoch": 0.97, "grad_norm": 5.292352851159922, "learning_rate": 2.635800746790096e-08, "loss": 1.2759, "step": 11846 }, { "epoch": 0.97, "grad_norm": 4.2793226440870615, "learning_rate": 2.62224352649032e-08, "loss": 0.5542, "step": 11847 }, { "epoch": 0.97, "grad_norm": 2.7325544908920576, "learning_rate": 2.6087211702067184e-08, "loss": 0.465, "step": 11848 }, { "epoch": 0.97, "grad_norm": 4.104911052916324, "learning_rate": 2.5952336788871434e-08, "loss": 0.758, "step": 11849 }, { "epoch": 0.97, "grad_norm": 3.1921776183039756, "learning_rate": 2.581781053476895e-08, "loss": 0.3688, "step": 11850 }, { "epoch": 0.97, "grad_norm": 5.523292465051399, "learning_rate": 2.568363294919052e-08, "loss": 1.1649, "step": 11851 }, { "epoch": 0.97, "grad_norm": 2.707201016531321, "learning_rate": 2.5549804041541392e-08, "loss": 0.3458, "step": 11852 }, { "epoch": 0.97, "grad_norm": 4.421203294947658, "learning_rate": 2.5416323821201848e-08, "loss": 0.9934, "step": 11853 }, { "epoch": 0.97, "grad_norm": 3.8331440448136003, "learning_rate": 2.5283192297528846e-08, "loss": 0.5116, "step": 11854 }, { "epoch": 0.97, "grad_norm": 2.4544071763317814, "learning_rate": 2.5150409479853255e-08, "loss": 0.277, "step": 11855 }, { "epoch": 0.97, "grad_norm": 4.6384962008616775, "learning_rate": 2.5017975377483738e-08, "loss": 0.7894, "step": 11856 }, { "epoch": 0.97, "grad_norm": 4.104411309353446, "learning_rate": 2.4885889999703426e-08, "loss": 0.5235, "step": 11857 }, { "epoch": 0.97, "grad_norm": 3.8549440430266007, "learning_rate": 2.4754153355769915e-08, "loss": 0.5376, "step": 11858 }, { "epoch": 0.97, "grad_norm": 4.0554960882424425, "learning_rate": 2.462276545491804e-08, "loss": 0.8901, "step": 11859 }, { "epoch": 0.97, "grad_norm": 4.691121017551824, "learning_rate": 2.4491726306357656e-08, "loss": 0.9459, "step": 11860 }, { "epoch": 0.97, "grad_norm": 5.946935112710814, "learning_rate": 2.4361035919273635e-08, "loss": 0.8441, "step": 11861 }, { "epoch": 0.97, "grad_norm": 4.272086858888599, "learning_rate": 2.423069430282643e-08, "loss": 1.0533, "step": 11862 }, { "epoch": 0.97, "grad_norm": 2.4291959785483113, "learning_rate": 2.4100701466153177e-08, "loss": 0.3649, "step": 11863 }, { "epoch": 0.97, "grad_norm": 3.7287457594289273, "learning_rate": 2.397105741836603e-08, "loss": 0.6391, "step": 11864 }, { "epoch": 0.97, "grad_norm": 3.531476649737334, "learning_rate": 2.384176216855161e-08, "loss": 0.5818, "step": 11865 }, { "epoch": 0.97, "grad_norm": 3.020496239571027, "learning_rate": 2.3712815725773774e-08, "loss": 0.4824, "step": 11866 }, { "epoch": 0.97, "grad_norm": 4.094119632365212, "learning_rate": 2.3584218099070298e-08, "loss": 0.8664, "step": 11867 }, { "epoch": 0.97, "grad_norm": 3.9308390645019133, "learning_rate": 2.345596929745564e-08, "loss": 0.5896, "step": 11868 }, { "epoch": 0.97, "grad_norm": 4.0067100581001664, "learning_rate": 2.3328069329919824e-08, "loss": 0.7413, "step": 11869 }, { "epoch": 0.97, "grad_norm": 3.6962664252409225, "learning_rate": 2.3200518205427346e-08, "loss": 0.6535, "step": 11870 }, { "epoch": 0.97, "grad_norm": 4.581880556701163, "learning_rate": 2.307331593291995e-08, "loss": 0.9012, "step": 11871 }, { "epoch": 0.97, "grad_norm": 2.5539149399642733, "learning_rate": 2.2946462521313274e-08, "loss": 0.6333, "step": 11872 }, { "epoch": 0.97, "grad_norm": 3.5346921718157773, "learning_rate": 2.2819957979499098e-08, "loss": 0.6966, "step": 11873 }, { "epoch": 0.97, "grad_norm": 4.8985294569505085, "learning_rate": 2.2693802316345327e-08, "loss": 1.0231, "step": 11874 }, { "epoch": 0.97, "grad_norm": 3.3983434931825447, "learning_rate": 2.2567995540694888e-08, "loss": 0.3441, "step": 11875 }, { "epoch": 0.97, "grad_norm": 3.1909594209394427, "learning_rate": 2.2442537661365727e-08, "loss": 0.6087, "step": 11876 }, { "epoch": 0.97, "grad_norm": 4.214137835453607, "learning_rate": 2.231742868715303e-08, "loss": 0.7074, "step": 11877 }, { "epoch": 0.97, "grad_norm": 5.373568865436762, "learning_rate": 2.2192668626824788e-08, "loss": 0.9279, "step": 11878 }, { "epoch": 0.97, "grad_norm": 3.858297833004091, "learning_rate": 2.206825748912733e-08, "loss": 0.5545, "step": 11879 }, { "epoch": 0.97, "grad_norm": 3.673507367762111, "learning_rate": 2.194419528278091e-08, "loss": 0.8805, "step": 11880 }, { "epoch": 0.97, "grad_norm": 6.24616288686617, "learning_rate": 2.1820482016481902e-08, "loss": 0.9465, "step": 11881 }, { "epoch": 0.97, "grad_norm": 5.86202337498881, "learning_rate": 2.1697117698901704e-08, "loss": 1.1141, "step": 11882 }, { "epoch": 0.97, "grad_norm": 3.3885582585985783, "learning_rate": 2.1574102338688395e-08, "loss": 0.4595, "step": 11883 }, { "epoch": 0.97, "grad_norm": 5.144123928509865, "learning_rate": 2.1451435944464528e-08, "loss": 1.4252, "step": 11884 }, { "epoch": 0.97, "grad_norm": 3.7105695583681575, "learning_rate": 2.1329118524827662e-08, "loss": 0.586, "step": 11885 }, { "epoch": 0.97, "grad_norm": 3.9768130214957003, "learning_rate": 2.120715008835261e-08, "loss": 0.7267, "step": 11886 }, { "epoch": 0.97, "grad_norm": 4.60573974763432, "learning_rate": 2.1085530643588094e-08, "loss": 0.8107, "step": 11887 }, { "epoch": 0.97, "grad_norm": 5.176063171160444, "learning_rate": 2.096426019906006e-08, "loss": 0.7135, "step": 11888 }, { "epoch": 0.97, "grad_norm": 3.689431942878935, "learning_rate": 2.0843338763268382e-08, "loss": 0.6004, "step": 11889 }, { "epoch": 0.97, "grad_norm": 4.014500092531755, "learning_rate": 2.0722766344689617e-08, "loss": 0.7066, "step": 11890 }, { "epoch": 0.97, "grad_norm": 3.82800488707636, "learning_rate": 2.0602542951774774e-08, "loss": 0.6034, "step": 11891 }, { "epoch": 0.97, "grad_norm": 5.390492523335453, "learning_rate": 2.0482668592951004e-08, "loss": 0.8957, "step": 11892 }, { "epoch": 0.97, "grad_norm": 4.226787649241726, "learning_rate": 2.036314327662159e-08, "loss": 0.8212, "step": 11893 }, { "epoch": 0.97, "grad_norm": 2.9940600042133187, "learning_rate": 2.0243967011164267e-08, "loss": 0.37, "step": 11894 }, { "epoch": 0.97, "grad_norm": 4.627394164356139, "learning_rate": 2.0125139804932913e-08, "loss": 0.602, "step": 11895 }, { "epoch": 0.97, "grad_norm": 4.748931715639397, "learning_rate": 2.0006661666256978e-08, "loss": 0.8945, "step": 11896 }, { "epoch": 0.97, "grad_norm": 3.1576488933848617, "learning_rate": 1.988853260344037e-08, "loss": 0.3595, "step": 11897 }, { "epoch": 0.97, "grad_norm": 3.3844736354884932, "learning_rate": 1.97707526247648e-08, "loss": 0.4471, "step": 11898 }, { "epoch": 0.97, "grad_norm": 5.928263498979704, "learning_rate": 1.965332173848533e-08, "loss": 1.2007, "step": 11899 }, { "epoch": 0.97, "grad_norm": 1.678241608241835, "learning_rate": 1.9536239952833712e-08, "loss": 0.2836, "step": 11900 }, { "epoch": 0.97, "grad_norm": 5.065948311688887, "learning_rate": 1.9419507276016158e-08, "loss": 1.2764, "step": 11901 }, { "epoch": 0.97, "grad_norm": 5.294378649306096, "learning_rate": 1.9303123716215565e-08, "loss": 0.9947, "step": 11902 }, { "epoch": 0.97, "grad_norm": 3.096427690802625, "learning_rate": 1.9187089281589853e-08, "loss": 0.6188, "step": 11903 }, { "epoch": 0.97, "grad_norm": 5.080728240667454, "learning_rate": 1.9071403980273075e-08, "loss": 1.1303, "step": 11904 }, { "epoch": 0.97, "grad_norm": 3.347288640554572, "learning_rate": 1.895606782037318e-08, "loss": 0.6736, "step": 11905 }, { "epoch": 0.97, "grad_norm": 4.780075138837884, "learning_rate": 1.8841080809975933e-08, "loss": 1.0143, "step": 11906 }, { "epoch": 0.97, "grad_norm": 5.762028599734933, "learning_rate": 1.872644295714099e-08, "loss": 0.9305, "step": 11907 }, { "epoch": 0.97, "grad_norm": 5.049603974319265, "learning_rate": 1.8612154269903036e-08, "loss": 1.0608, "step": 11908 }, { "epoch": 0.97, "grad_norm": 3.939565444899833, "learning_rate": 1.8498214756274558e-08, "loss": 0.5482, "step": 11909 }, { "epoch": 0.97, "grad_norm": 4.353598562575978, "learning_rate": 1.8384624424241383e-08, "loss": 0.9264, "step": 11910 }, { "epoch": 0.97, "grad_norm": 3.210083125387604, "learning_rate": 1.827138328176603e-08, "loss": 0.6805, "step": 11911 }, { "epoch": 0.97, "grad_norm": 3.789070033516132, "learning_rate": 1.81584913367866e-08, "loss": 0.6795, "step": 11912 }, { "epoch": 0.97, "grad_norm": 4.278633192332255, "learning_rate": 1.8045948597215646e-08, "loss": 1.0238, "step": 11913 }, { "epoch": 0.97, "grad_norm": 1.605182109465508, "learning_rate": 1.793375507094186e-08, "loss": 0.3206, "step": 11914 }, { "epoch": 0.97, "grad_norm": 3.003408022361503, "learning_rate": 1.7821910765830063e-08, "loss": 0.6041, "step": 11915 }, { "epoch": 0.97, "grad_norm": 2.765548228462366, "learning_rate": 1.771041568971954e-08, "loss": 0.4168, "step": 11916 }, { "epoch": 0.97, "grad_norm": 4.456842463420886, "learning_rate": 1.7599269850426258e-08, "loss": 0.8319, "step": 11917 }, { "epoch": 0.97, "grad_norm": 3.731562967711699, "learning_rate": 1.7488473255740657e-08, "loss": 0.653, "step": 11918 }, { "epoch": 0.97, "grad_norm": 3.1815817245847033, "learning_rate": 1.7378025913428743e-08, "loss": 0.6323, "step": 11919 }, { "epoch": 0.97, "grad_norm": 3.659768289388934, "learning_rate": 1.726792783123321e-08, "loss": 0.5598, "step": 11920 }, { "epoch": 0.97, "grad_norm": 3.5609989582963357, "learning_rate": 1.7158179016870668e-08, "loss": 0.5553, "step": 11921 }, { "epoch": 0.97, "grad_norm": 3.873352263562927, "learning_rate": 1.7048779478034404e-08, "loss": 0.8343, "step": 11922 }, { "epoch": 0.97, "grad_norm": 2.0772548699872506, "learning_rate": 1.6939729222393286e-08, "loss": 0.279, "step": 11923 }, { "epoch": 0.97, "grad_norm": 3.0376698788409873, "learning_rate": 1.6831028257590087e-08, "loss": 0.6518, "step": 11924 }, { "epoch": 0.97, "grad_norm": 3.657201488738241, "learning_rate": 1.6722676591245378e-08, "loss": 0.4616, "step": 11925 }, { "epoch": 0.97, "grad_norm": 3.8920629960410293, "learning_rate": 1.6614674230953643e-08, "loss": 0.751, "step": 11926 }, { "epoch": 0.97, "grad_norm": 3.7380583054259944, "learning_rate": 1.6507021184285488e-08, "loss": 0.5969, "step": 11927 }, { "epoch": 0.97, "grad_norm": 4.484805408914432, "learning_rate": 1.63997174587871e-08, "loss": 0.6674, "step": 11928 }, { "epoch": 0.98, "grad_norm": 2.708051571600095, "learning_rate": 1.629276306197969e-08, "loss": 0.4126, "step": 11929 }, { "epoch": 0.98, "grad_norm": 4.009169489358039, "learning_rate": 1.6186158001360587e-08, "loss": 0.9243, "step": 11930 }, { "epoch": 0.98, "grad_norm": 3.5079588041639775, "learning_rate": 1.607990228440215e-08, "loss": 0.6558, "step": 11931 }, { "epoch": 0.98, "grad_norm": 6.309796315389396, "learning_rate": 1.597399591855231e-08, "loss": 1.3317, "step": 11932 }, { "epoch": 0.98, "grad_norm": 4.191601799634067, "learning_rate": 1.5868438911234575e-08, "loss": 0.7678, "step": 11933 }, { "epoch": 0.98, "grad_norm": 5.033040535587328, "learning_rate": 1.5763231269848578e-08, "loss": 1.1157, "step": 11934 }, { "epoch": 0.98, "grad_norm": 5.441359331862082, "learning_rate": 1.5658373001768423e-08, "loss": 1.1209, "step": 11935 }, { "epoch": 0.98, "grad_norm": 0.9344080602485448, "learning_rate": 1.555386411434434e-08, "loss": 0.1267, "step": 11936 }, { "epoch": 0.98, "grad_norm": 5.224522930216481, "learning_rate": 1.544970461490214e-08, "loss": 0.7561, "step": 11937 }, { "epoch": 0.98, "grad_norm": 4.551766070858134, "learning_rate": 1.53458945107432e-08, "loss": 1.2067, "step": 11938 }, { "epoch": 0.98, "grad_norm": 3.9293824274572704, "learning_rate": 1.5242433809143364e-08, "loss": 0.2524, "step": 11939 }, { "epoch": 0.98, "grad_norm": 4.711783064616934, "learning_rate": 1.5139322517355172e-08, "loss": 0.9472, "step": 11940 }, { "epoch": 0.98, "grad_norm": 3.415126915218515, "learning_rate": 1.5036560642606167e-08, "loss": 0.8046, "step": 11941 }, { "epoch": 0.98, "grad_norm": 3.426248150142898, "learning_rate": 1.493414819210004e-08, "loss": 0.5639, "step": 11942 }, { "epoch": 0.98, "grad_norm": 3.3045330296380113, "learning_rate": 1.4832085173014376e-08, "loss": 0.6792, "step": 11943 }, { "epoch": 0.98, "grad_norm": 1.062424557004166, "learning_rate": 1.4730371592504567e-08, "loss": 0.1663, "step": 11944 }, { "epoch": 0.98, "grad_norm": 3.925225735951944, "learning_rate": 1.4629007457699906e-08, "loss": 0.6722, "step": 11945 }, { "epoch": 0.98, "grad_norm": 3.3487778120695566, "learning_rate": 1.4527992775704713e-08, "loss": 0.8364, "step": 11946 }, { "epoch": 0.98, "grad_norm": 4.472139605321025, "learning_rate": 1.4427327553601101e-08, "loss": 0.8356, "step": 11947 }, { "epoch": 0.98, "grad_norm": 3.172637141606483, "learning_rate": 1.432701179844398e-08, "loss": 0.495, "step": 11948 }, { "epoch": 0.98, "grad_norm": 4.643966694879904, "learning_rate": 1.4227045517266059e-08, "loss": 1.0396, "step": 11949 }, { "epoch": 0.98, "grad_norm": 5.667096334034253, "learning_rate": 1.4127428717073955e-08, "loss": 1.1284, "step": 11950 }, { "epoch": 0.98, "grad_norm": 3.9324324700908826, "learning_rate": 1.4028161404850415e-08, "loss": 0.5657, "step": 11951 }, { "epoch": 0.98, "grad_norm": 4.679804891627546, "learning_rate": 1.3929243587553764e-08, "loss": 0.6958, "step": 11952 }, { "epoch": 0.98, "grad_norm": 3.805587188154656, "learning_rate": 1.3830675272117344e-08, "loss": 0.8619, "step": 11953 }, { "epoch": 0.98, "grad_norm": 2.6120518053429453, "learning_rate": 1.3732456465451182e-08, "loss": 0.3556, "step": 11954 }, { "epoch": 0.98, "grad_norm": 5.866311449363066, "learning_rate": 1.3634587174439218e-08, "loss": 0.8505, "step": 11955 }, { "epoch": 0.98, "grad_norm": 3.881058075921292, "learning_rate": 1.3537067405942072e-08, "loss": 1.0058, "step": 11956 }, { "epoch": 0.98, "grad_norm": 1.8398806888597605, "learning_rate": 1.3439897166795945e-08, "loss": 0.318, "step": 11957 }, { "epoch": 0.98, "grad_norm": 4.063571392378166, "learning_rate": 1.3343076463810389e-08, "loss": 0.5925, "step": 11958 }, { "epoch": 0.98, "grad_norm": 5.298045597186789, "learning_rate": 1.3246605303773864e-08, "loss": 0.6629, "step": 11959 }, { "epoch": 0.98, "grad_norm": 6.473056012845691, "learning_rate": 1.3150483693447625e-08, "loss": 1.2219, "step": 11960 }, { "epoch": 0.98, "grad_norm": 5.532689657101384, "learning_rate": 1.3054711639569616e-08, "loss": 1.2582, "step": 11961 }, { "epoch": 0.98, "grad_norm": 3.7416802434167455, "learning_rate": 1.295928914885336e-08, "loss": 0.8745, "step": 11962 }, { "epoch": 0.98, "grad_norm": 3.9350085760977156, "learning_rate": 1.2864216227986837e-08, "loss": 0.6069, "step": 11963 }, { "epoch": 0.98, "grad_norm": 4.849630921846114, "learning_rate": 1.276949288363527e-08, "loss": 1.3068, "step": 11964 }, { "epoch": 0.98, "grad_norm": 4.206955644824944, "learning_rate": 1.267511912243724e-08, "loss": 0.8276, "step": 11965 }, { "epoch": 0.98, "grad_norm": 3.423765874126548, "learning_rate": 1.2581094951008566e-08, "loss": 0.5732, "step": 11966 }, { "epoch": 0.98, "grad_norm": 4.300025809881357, "learning_rate": 1.2487420375939529e-08, "loss": 0.8077, "step": 11967 }, { "epoch": 0.98, "grad_norm": 6.206757278466938, "learning_rate": 1.2394095403797102e-08, "loss": 1.3847, "step": 11968 }, { "epoch": 0.98, "grad_norm": 3.9055794737494822, "learning_rate": 1.2301120041122161e-08, "loss": 0.6262, "step": 11969 }, { "epoch": 0.98, "grad_norm": 5.613323997082934, "learning_rate": 1.2208494294432272e-08, "loss": 0.8649, "step": 11970 }, { "epoch": 0.98, "grad_norm": 2.230109954266526, "learning_rate": 1.2116218170220018e-08, "loss": 0.2765, "step": 11971 }, { "epoch": 0.98, "grad_norm": 2.2792223501490074, "learning_rate": 1.202429167495356e-08, "loss": 0.3214, "step": 11972 }, { "epoch": 0.98, "grad_norm": 4.2794069348955475, "learning_rate": 1.1932714815076075e-08, "loss": 0.7824, "step": 11973 }, { "epoch": 0.98, "grad_norm": 4.515920252114767, "learning_rate": 1.1841487597007983e-08, "loss": 0.7136, "step": 11974 }, { "epoch": 0.98, "grad_norm": 4.47317136412364, "learning_rate": 1.1750610027142506e-08, "loss": 1.1016, "step": 11975 }, { "epoch": 0.98, "grad_norm": 1.1656497913356112, "learning_rate": 1.1660082111850101e-08, "loss": 0.1311, "step": 11976 }, { "epoch": 0.98, "grad_norm": 3.428046457044096, "learning_rate": 1.156990385747736e-08, "loss": 0.5926, "step": 11977 }, { "epoch": 0.98, "grad_norm": 4.010542686065303, "learning_rate": 1.1480075270343671e-08, "loss": 0.4662, "step": 11978 }, { "epoch": 0.98, "grad_norm": 4.1502107158997195, "learning_rate": 1.139059635674733e-08, "loss": 0.6396, "step": 11979 }, { "epoch": 0.98, "grad_norm": 3.514111467989537, "learning_rate": 1.1301467122959432e-08, "loss": 0.6339, "step": 11980 }, { "epoch": 0.98, "grad_norm": 4.807595894519607, "learning_rate": 1.1212687575227754e-08, "loss": 0.7422, "step": 11981 }, { "epoch": 0.98, "grad_norm": 1.8296572392135173, "learning_rate": 1.1124257719775655e-08, "loss": 0.2994, "step": 11982 }, { "epoch": 0.98, "grad_norm": 4.495040332096613, "learning_rate": 1.1036177562800954e-08, "loss": 0.5596, "step": 11983 }, { "epoch": 0.98, "grad_norm": 5.374313945016398, "learning_rate": 1.0948447110478711e-08, "loss": 0.8237, "step": 11984 }, { "epoch": 0.98, "grad_norm": 3.3603395917008236, "learning_rate": 1.0861066368957341e-08, "loss": 0.5336, "step": 11985 }, { "epoch": 0.98, "grad_norm": 5.797164024142589, "learning_rate": 1.0774035344363054e-08, "loss": 1.0428, "step": 11986 }, { "epoch": 0.98, "grad_norm": 4.798474908659315, "learning_rate": 1.0687354042795417e-08, "loss": 0.5348, "step": 11987 }, { "epoch": 0.98, "grad_norm": 3.653075763774985, "learning_rate": 1.060102247033068e-08, "loss": 0.8898, "step": 11988 }, { "epoch": 0.98, "grad_norm": 3.7221055998469206, "learning_rate": 1.0515040633020112e-08, "loss": 0.8894, "step": 11989 }, { "epoch": 0.98, "grad_norm": 5.239262202588881, "learning_rate": 1.0429408536891117e-08, "loss": 1.143, "step": 11990 }, { "epoch": 0.98, "grad_norm": 1.4338166321686046, "learning_rate": 1.0344126187946113e-08, "loss": 0.1791, "step": 11991 }, { "epoch": 0.98, "grad_norm": 2.6841008740597605, "learning_rate": 1.0259193592162541e-08, "loss": 0.5626, "step": 11992 }, { "epoch": 0.98, "grad_norm": 2.378483590578005, "learning_rate": 1.0174610755493974e-08, "loss": 0.4065, "step": 11993 }, { "epoch": 0.98, "grad_norm": 2.2204144413208717, "learning_rate": 1.0090377683869557e-08, "loss": 0.2904, "step": 11994 }, { "epoch": 0.98, "grad_norm": 5.9902759711671125, "learning_rate": 1.0006494383193454e-08, "loss": 1.1846, "step": 11995 }, { "epoch": 0.98, "grad_norm": 3.853469311831628, "learning_rate": 9.92296085934541e-09, "loss": 0.547, "step": 11996 }, { "epoch": 0.98, "grad_norm": 5.9650594451607555, "learning_rate": 9.839777118181293e-09, "loss": 1.2478, "step": 11997 }, { "epoch": 0.98, "grad_norm": 5.338080966871579, "learning_rate": 9.756943165531441e-09, "loss": 0.9709, "step": 11998 }, { "epoch": 0.98, "grad_norm": 5.606574446922868, "learning_rate": 9.67445900720232e-09, "loss": 1.0099, "step": 11999 }, { "epoch": 0.98, "grad_norm": 3.2828035934487065, "learning_rate": 9.592324648975415e-09, "loss": 0.714, "step": 12000 }, { "epoch": 0.98, "grad_norm": 6.215669569185245, "learning_rate": 9.510540096608345e-09, "loss": 1.2572, "step": 12001 }, { "epoch": 0.98, "grad_norm": 3.2673720077363075, "learning_rate": 9.429105355833745e-09, "loss": 0.5131, "step": 12002 }, { "epoch": 0.98, "grad_norm": 2.8685929308413742, "learning_rate": 9.348020432359829e-09, "loss": 0.7717, "step": 12003 }, { "epoch": 0.98, "grad_norm": 4.6612516918354405, "learning_rate": 9.267285331870378e-09, "loss": 0.8762, "step": 12004 }, { "epoch": 0.98, "grad_norm": 4.835309486902613, "learning_rate": 9.186900060024207e-09, "loss": 1.3228, "step": 12005 }, { "epoch": 0.98, "grad_norm": 5.1110886609000055, "learning_rate": 9.106864622456246e-09, "loss": 0.6116, "step": 12006 }, { "epoch": 0.98, "grad_norm": 3.398732853475271, "learning_rate": 9.02717902477701e-09, "loss": 0.7167, "step": 12007 }, { "epoch": 0.98, "grad_norm": 6.905729799829959, "learning_rate": 8.947843272571477e-09, "loss": 1.4806, "step": 12008 }, { "epoch": 0.98, "grad_norm": 2.3584098306988754, "learning_rate": 8.868857371401306e-09, "loss": 0.3806, "step": 12009 }, { "epoch": 0.98, "grad_norm": 3.295833479211921, "learning_rate": 8.790221326802074e-09, "loss": 0.7821, "step": 12010 }, { "epoch": 0.98, "grad_norm": 6.056794579846256, "learning_rate": 8.711935144287142e-09, "loss": 1.2853, "step": 12011 }, { "epoch": 0.98, "grad_norm": 4.741309227622882, "learning_rate": 8.633998829343237e-09, "loss": 1.0193, "step": 12012 }, { "epoch": 0.98, "grad_norm": 4.416962423605944, "learning_rate": 8.55641238743321e-09, "loss": 1.1095, "step": 12013 }, { "epoch": 0.98, "grad_norm": 4.034344568422561, "learning_rate": 8.479175823996044e-09, "loss": 0.5427, "step": 12014 }, { "epoch": 0.98, "grad_norm": 3.6788069740696177, "learning_rate": 8.40228914444574e-09, "loss": 0.6993, "step": 12015 }, { "epoch": 0.98, "grad_norm": 4.450727521986115, "learning_rate": 8.325752354171324e-09, "loss": 0.3584, "step": 12016 }, { "epoch": 0.98, "grad_norm": 2.6095034716190466, "learning_rate": 8.24956545853739e-09, "loss": 0.5702, "step": 12017 }, { "epoch": 0.98, "grad_norm": 4.655746956771417, "learning_rate": 8.173728462885222e-09, "loss": 0.8922, "step": 12018 }, { "epoch": 0.98, "grad_norm": 4.668643716701874, "learning_rate": 8.098241372530013e-09, "loss": 0.5593, "step": 12019 }, { "epoch": 0.98, "grad_norm": 3.4627075621636396, "learning_rate": 8.023104192763642e-09, "loss": 0.7913, "step": 12020 }, { "epoch": 0.98, "grad_norm": 3.3071089435334793, "learning_rate": 7.948316928851896e-09, "loss": 0.5397, "step": 12021 }, { "epoch": 0.98, "grad_norm": 3.7342020527729853, "learning_rate": 7.873879586037803e-09, "loss": 0.7649, "step": 12022 }, { "epoch": 0.98, "grad_norm": 5.573511773337518, "learning_rate": 7.79979216953941e-09, "loss": 1.1509, "step": 12023 }, { "epoch": 0.98, "grad_norm": 5.709589592626596, "learning_rate": 7.726054684549234e-09, "loss": 1.1941, "step": 12024 }, { "epoch": 0.98, "grad_norm": 3.209516616837038, "learning_rate": 7.652667136235914e-09, "loss": 0.3777, "step": 12025 }, { "epoch": 0.98, "grad_norm": 2.3845853169481668, "learning_rate": 7.579629529744225e-09, "loss": 0.2514, "step": 12026 }, { "epoch": 0.98, "grad_norm": 3.636504614812306, "learning_rate": 7.506941870192851e-09, "loss": 0.6073, "step": 12027 }, { "epoch": 0.98, "grad_norm": 2.594125764328588, "learning_rate": 7.434604162678271e-09, "loss": 0.5927, "step": 12028 }, { "epoch": 0.98, "grad_norm": 5.09639929471377, "learning_rate": 7.362616412269763e-09, "loss": 0.8389, "step": 12029 }, { "epoch": 0.98, "grad_norm": 4.47282169012807, "learning_rate": 7.290978624013289e-09, "loss": 0.6142, "step": 12030 }, { "epoch": 0.98, "grad_norm": 2.471227365977711, "learning_rate": 7.2196908029315e-09, "loss": 0.3131, "step": 12031 }, { "epoch": 0.98, "grad_norm": 3.8664298739220238, "learning_rate": 7.148752954020955e-09, "loss": 0.6902, "step": 12032 }, { "epoch": 0.98, "grad_norm": 3.0530976836411043, "learning_rate": 7.07816508225323e-09, "loss": 0.4628, "step": 12033 }, { "epoch": 0.98, "grad_norm": 5.484740333351431, "learning_rate": 7.0079271925771465e-09, "loss": 1.124, "step": 12034 }, { "epoch": 0.98, "grad_norm": 4.338984250732091, "learning_rate": 6.9380392899159875e-09, "loss": 0.9777, "step": 12035 }, { "epoch": 0.98, "grad_norm": 4.42961326301565, "learning_rate": 6.868501379168058e-09, "loss": 0.6223, "step": 12036 }, { "epoch": 0.98, "grad_norm": 5.52526406643918, "learning_rate": 6.799313465208346e-09, "loss": 1.2402, "step": 12037 }, { "epoch": 0.98, "grad_norm": 3.624715613096466, "learning_rate": 6.730475552886306e-09, "loss": 0.6295, "step": 12038 }, { "epoch": 0.98, "grad_norm": 2.777159478225706, "learning_rate": 6.661987647026969e-09, "loss": 0.6173, "step": 12039 }, { "epoch": 0.98, "grad_norm": 2.5679183930589797, "learning_rate": 6.593849752430936e-09, "loss": 0.4285, "step": 12040 }, { "epoch": 0.98, "grad_norm": 4.2585865016766355, "learning_rate": 6.5260618738749445e-09, "loss": 0.7382, "step": 12041 }, { "epoch": 0.98, "grad_norm": 4.215975875018411, "learning_rate": 6.458624016110193e-09, "loss": 0.9977, "step": 12042 }, { "epoch": 0.98, "grad_norm": 2.232271902640726, "learning_rate": 6.391536183864566e-09, "loss": 0.3883, "step": 12043 }, { "epoch": 0.98, "grad_norm": 4.153310257696214, "learning_rate": 6.324798381839303e-09, "loss": 0.7757, "step": 12044 }, { "epoch": 0.98, "grad_norm": 2.174932807336339, "learning_rate": 6.2584106147134395e-09, "loss": 0.3164, "step": 12045 }, { "epoch": 0.98, "grad_norm": 2.3192495238076245, "learning_rate": 6.192372887139919e-09, "loss": 0.3615, "step": 12046 }, { "epoch": 0.98, "grad_norm": 4.130341919061836, "learning_rate": 6.126685203747818e-09, "loss": 0.802, "step": 12047 }, { "epoch": 0.98, "grad_norm": 4.352963632177688, "learning_rate": 6.0613475691417845e-09, "loss": 0.5643, "step": 12048 }, { "epoch": 0.98, "grad_norm": 5.739231463377654, "learning_rate": 5.996359987902045e-09, "loss": 1.1208, "step": 12049 }, { "epoch": 0.98, "grad_norm": 4.948863433441496, "learning_rate": 5.931722464583289e-09, "loss": 1.0449, "step": 12050 }, { "epoch": 0.99, "grad_norm": 1.2005746125622068, "learning_rate": 5.8674350037163374e-09, "loss": 0.1879, "step": 12051 }, { "epoch": 0.99, "grad_norm": 2.313853268223101, "learning_rate": 5.803497609807585e-09, "loss": 0.3264, "step": 12052 }, { "epoch": 0.99, "grad_norm": 5.948738610094623, "learning_rate": 5.7399102873390015e-09, "loss": 1.0435, "step": 12053 }, { "epoch": 0.99, "grad_norm": 4.410587302649346, "learning_rate": 5.676673040767578e-09, "loss": 0.6349, "step": 12054 }, { "epoch": 0.99, "grad_norm": 3.5198431546681044, "learning_rate": 5.613785874525879e-09, "loss": 0.8272, "step": 12055 }, { "epoch": 0.99, "grad_norm": 3.5320677154198457, "learning_rate": 5.551248793022601e-09, "loss": 0.707, "step": 12056 }, { "epoch": 0.99, "grad_norm": 2.365378882592431, "learning_rate": 5.48906180064035e-09, "loss": 0.5722, "step": 12057 }, { "epoch": 0.99, "grad_norm": 4.508363246514844, "learning_rate": 5.42722490173897e-09, "loss": 0.6041, "step": 12058 }, { "epoch": 0.99, "grad_norm": 3.7126384252880693, "learning_rate": 5.365738100652773e-09, "loss": 0.7763, "step": 12059 }, { "epoch": 0.99, "grad_norm": 3.957741692123336, "learning_rate": 5.304601401691089e-09, "loss": 0.64, "step": 12060 }, { "epoch": 0.99, "grad_norm": 5.367737865698479, "learning_rate": 5.243814809140491e-09, "loss": 0.8158, "step": 12061 }, { "epoch": 0.99, "grad_norm": 5.098217074943541, "learning_rate": 5.1833783272609016e-09, "loss": 1.0765, "step": 12062 }, { "epoch": 0.99, "grad_norm": 3.065663853354703, "learning_rate": 5.123291960288934e-09, "loss": 0.8675, "step": 12063 }, { "epoch": 0.99, "grad_norm": 3.3357442866701796, "learning_rate": 5.0635557124362185e-09, "loss": 0.6963, "step": 12064 }, { "epoch": 0.99, "grad_norm": 3.910208697172912, "learning_rate": 5.0041695878905175e-09, "loss": 0.9939, "step": 12065 }, { "epoch": 0.99, "grad_norm": 4.52745161151713, "learning_rate": 4.94513359081461e-09, "loss": 0.7236, "step": 12066 }, { "epoch": 0.99, "grad_norm": 2.697380862613846, "learning_rate": 4.886447725345744e-09, "loss": 0.3768, "step": 12067 }, { "epoch": 0.99, "grad_norm": 4.151388769159717, "learning_rate": 4.82811199559785e-09, "loss": 0.8545, "step": 12068 }, { "epoch": 0.99, "grad_norm": 7.4866699720702625, "learning_rate": 4.7701264056609905e-09, "loss": 1.1383, "step": 12069 }, { "epoch": 0.99, "grad_norm": 2.836390930087791, "learning_rate": 4.712490959598581e-09, "loss": 0.4125, "step": 12070 }, { "epoch": 0.99, "grad_norm": 3.1532673518670102, "learning_rate": 4.655205661450724e-09, "loss": 0.7576, "step": 12071 }, { "epoch": 0.99, "grad_norm": 1.4577000518900594, "learning_rate": 4.5982705152336496e-09, "loss": 0.1866, "step": 12072 }, { "epoch": 0.99, "grad_norm": 3.3940924734291844, "learning_rate": 4.5416855249375e-09, "loss": 0.4667, "step": 12073 }, { "epoch": 0.99, "grad_norm": 4.407259559105291, "learning_rate": 4.485450694528548e-09, "loss": 0.8662, "step": 12074 }, { "epoch": 0.99, "grad_norm": 3.0699053562622516, "learning_rate": 4.42956602794975e-09, "loss": 0.441, "step": 12075 }, { "epoch": 0.99, "grad_norm": 4.38530006511475, "learning_rate": 4.374031529116862e-09, "loss": 0.7571, "step": 12076 }, { "epoch": 0.99, "grad_norm": 4.387214073840642, "learning_rate": 4.318847201923437e-09, "loss": 0.9429, "step": 12077 }, { "epoch": 0.99, "grad_norm": 4.66135024157567, "learning_rate": 4.264013050238047e-09, "loss": 0.801, "step": 12078 }, { "epoch": 0.99, "grad_norm": 4.0667162608964755, "learning_rate": 4.209529077903174e-09, "loss": 0.5635, "step": 12079 }, { "epoch": 0.99, "grad_norm": 4.742693783361723, "learning_rate": 4.155395288739095e-09, "loss": 0.6254, "step": 12080 }, { "epoch": 0.99, "grad_norm": 3.044281661809891, "learning_rate": 4.101611686539442e-09, "loss": 0.6202, "step": 12081 }, { "epoch": 0.99, "grad_norm": 3.0761978720826253, "learning_rate": 4.0481782750745325e-09, "loss": 0.4201, "step": 12082 }, { "epoch": 0.99, "grad_norm": 6.880557286207846, "learning_rate": 3.995095058090259e-09, "loss": 0.9233, "step": 12083 }, { "epoch": 0.99, "grad_norm": 3.5296076657355235, "learning_rate": 3.942362039306979e-09, "loss": 0.5142, "step": 12084 }, { "epoch": 0.99, "grad_norm": 4.464720030419047, "learning_rate": 3.889979222421181e-09, "loss": 1.1719, "step": 12085 }, { "epoch": 0.99, "grad_norm": 4.1649747010164475, "learning_rate": 3.837946611104926e-09, "loss": 0.7354, "step": 12086 }, { "epoch": 0.99, "grad_norm": 2.6497758735676253, "learning_rate": 3.786264209004742e-09, "loss": 0.5491, "step": 12087 }, { "epoch": 0.99, "grad_norm": 6.349024582490853, "learning_rate": 3.7349320197443974e-09, "loss": 1.4597, "step": 12088 }, { "epoch": 0.99, "grad_norm": 1.7430878185833116, "learning_rate": 3.6839500469210145e-09, "loss": 0.3025, "step": 12089 }, { "epoch": 0.99, "grad_norm": 5.420295160858151, "learning_rate": 3.633318294108956e-09, "loss": 0.7601, "step": 12090 }, { "epoch": 0.99, "grad_norm": 3.9294396445199298, "learning_rate": 3.583036764857051e-09, "loss": 0.8451, "step": 12091 }, { "epoch": 0.99, "grad_norm": 4.166301839906999, "learning_rate": 3.533105462689701e-09, "loss": 0.774, "step": 12092 }, { "epoch": 0.99, "grad_norm": 4.400574956124744, "learning_rate": 3.483524391106885e-09, "loss": 0.9232, "step": 12093 }, { "epoch": 0.99, "grad_norm": 2.33060608225683, "learning_rate": 3.434293553584156e-09, "loss": 0.3535, "step": 12094 }, { "epoch": 0.99, "grad_norm": 1.7566506342748722, "learning_rate": 3.385412953572087e-09, "loss": 0.2392, "step": 12095 }, { "epoch": 0.99, "grad_norm": 3.964425428089187, "learning_rate": 3.3368825944973813e-09, "loss": 0.6337, "step": 12096 }, { "epoch": 0.99, "grad_norm": 3.5144354601565175, "learning_rate": 3.2887024797617628e-09, "loss": 0.4264, "step": 12097 }, { "epoch": 0.99, "grad_norm": 4.3015414214756476, "learning_rate": 3.2408726127425294e-09, "loss": 0.7685, "step": 12098 }, { "epoch": 0.99, "grad_norm": 3.7046508179692763, "learning_rate": 3.1933929967919996e-09, "loss": 0.6863, "step": 12099 }, { "epoch": 0.99, "grad_norm": 3.050043842814273, "learning_rate": 3.146263635238622e-09, "loss": 0.498, "step": 12100 }, { "epoch": 0.99, "grad_norm": 5.431672901040743, "learning_rate": 3.0994845313853106e-09, "loss": 0.87, "step": 12101 }, { "epoch": 0.99, "grad_norm": 6.753347493145347, "learning_rate": 3.0530556885116637e-09, "loss": 1.0715, "step": 12102 }, { "epoch": 0.99, "grad_norm": 2.7226135852669464, "learning_rate": 3.0069771098723e-09, "loss": 0.4195, "step": 12103 }, { "epoch": 0.99, "grad_norm": 3.2460739822213736, "learning_rate": 2.9612487986968587e-09, "loss": 0.8109, "step": 12104 }, { "epoch": 0.99, "grad_norm": 2.845247910493663, "learning_rate": 2.915870758190553e-09, "loss": 0.4189, "step": 12105 }, { "epoch": 0.99, "grad_norm": 3.5210622289945293, "learning_rate": 2.870842991534173e-09, "loss": 0.3681, "step": 12106 }, { "epoch": 0.99, "grad_norm": 3.3116643272956425, "learning_rate": 2.826165501884082e-09, "loss": 0.5471, "step": 12107 }, { "epoch": 0.99, "grad_norm": 5.084274314757648, "learning_rate": 2.7818382923722188e-09, "loss": 0.7128, "step": 12108 }, { "epoch": 0.99, "grad_norm": 5.071137499790908, "learning_rate": 2.737861366105543e-09, "loss": 0.831, "step": 12109 }, { "epoch": 0.99, "grad_norm": 3.2311637576709096, "learning_rate": 2.694234726166589e-09, "loss": 0.8248, "step": 12110 }, { "epoch": 0.99, "grad_norm": 3.6235213770575205, "learning_rate": 2.650958375613466e-09, "loss": 0.7375, "step": 12111 }, { "epoch": 0.99, "grad_norm": 1.0996566294527779, "learning_rate": 2.6080323174798583e-09, "loss": 0.152, "step": 12112 }, { "epoch": 0.99, "grad_norm": 4.692264412669275, "learning_rate": 2.565456554773915e-09, "loss": 0.6277, "step": 12113 }, { "epoch": 0.99, "grad_norm": 1.7889331061473897, "learning_rate": 2.5232310904810265e-09, "loss": 0.2284, "step": 12114 }, { "epoch": 0.99, "grad_norm": 4.218512258355676, "learning_rate": 2.4813559275604914e-09, "loss": 0.7349, "step": 12115 }, { "epoch": 0.99, "grad_norm": 4.812025075684706, "learning_rate": 2.439831068947185e-09, "loss": 1.0505, "step": 12116 }, { "epoch": 0.99, "grad_norm": 2.8313037967846735, "learning_rate": 2.3986565175526665e-09, "loss": 0.6363, "step": 12117 }, { "epoch": 0.99, "grad_norm": 4.111197016156909, "learning_rate": 2.357832276262961e-09, "loss": 0.6512, "step": 12118 }, { "epoch": 0.99, "grad_norm": 3.696129752437114, "learning_rate": 2.3173583479391137e-09, "loss": 0.5707, "step": 12119 }, { "epoch": 0.99, "grad_norm": 3.2821224263699524, "learning_rate": 2.2772347354182987e-09, "loss": 0.4001, "step": 12120 }, { "epoch": 0.99, "grad_norm": 3.429749999447198, "learning_rate": 2.2374614415132666e-09, "loss": 1.0118, "step": 12121 }, { "epoch": 0.99, "grad_norm": 5.773818785295129, "learning_rate": 2.198038469011787e-09, "loss": 1.1201, "step": 12122 }, { "epoch": 0.99, "grad_norm": 1.1309515054933859, "learning_rate": 2.1589658206772058e-09, "loss": 0.1553, "step": 12123 }, { "epoch": 0.99, "grad_norm": 3.181621946353914, "learning_rate": 2.1202434992484423e-09, "loss": 0.6917, "step": 12124 }, { "epoch": 0.99, "grad_norm": 5.798752576064015, "learning_rate": 2.081871507439992e-09, "loss": 1.1969, "step": 12125 }, { "epoch": 0.99, "grad_norm": 3.898025030454768, "learning_rate": 2.0438498479413705e-09, "loss": 0.5628, "step": 12126 }, { "epoch": 0.99, "grad_norm": 4.041043124480218, "learning_rate": 2.0061785234176677e-09, "loss": 0.6858, "step": 12127 }, { "epoch": 0.99, "grad_norm": 4.066576743807657, "learning_rate": 1.9688575365095497e-09, "loss": 0.8998, "step": 12128 }, { "epoch": 0.99, "grad_norm": 3.3224466226674085, "learning_rate": 1.9318868898327015e-09, "loss": 0.4893, "step": 12129 }, { "epoch": 0.99, "grad_norm": 4.3158964199414225, "learning_rate": 1.8952665859789387e-09, "loss": 1.0153, "step": 12130 }, { "epoch": 0.99, "grad_norm": 3.705210911064366, "learning_rate": 1.8589966275156523e-09, "loss": 0.7379, "step": 12131 }, { "epoch": 0.99, "grad_norm": 3.579219121921822, "learning_rate": 1.8230770169841427e-09, "loss": 0.7464, "step": 12132 }, { "epoch": 0.99, "grad_norm": 3.598790107251219, "learning_rate": 1.787507756903506e-09, "loss": 0.3411, "step": 12133 }, { "epoch": 0.99, "grad_norm": 3.074528102971161, "learning_rate": 1.7522888497656376e-09, "loss": 0.6873, "step": 12134 }, { "epoch": 0.99, "grad_norm": 2.343070224980005, "learning_rate": 1.7174202980402287e-09, "loss": 0.483, "step": 12135 }, { "epoch": 0.99, "grad_norm": 4.61630229921253, "learning_rate": 1.6829021041708805e-09, "loss": 0.8581, "step": 12136 }, { "epoch": 0.99, "grad_norm": 5.0913027516957134, "learning_rate": 1.6487342705773234e-09, "loss": 1.0862, "step": 12137 }, { "epoch": 0.99, "grad_norm": 4.793490104705858, "learning_rate": 1.6149167996548643e-09, "loss": 0.9786, "step": 12138 }, { "epoch": 0.99, "grad_norm": 4.979654398550657, "learning_rate": 1.5814496937732737e-09, "loss": 1.2805, "step": 12139 }, { "epoch": 0.99, "grad_norm": 6.044335416386995, "learning_rate": 1.5483329552790082e-09, "loss": 1.0259, "step": 12140 }, { "epoch": 0.99, "grad_norm": 4.232417148180656, "learning_rate": 1.5155665864935442e-09, "loss": 0.9337, "step": 12141 }, { "epoch": 0.99, "grad_norm": 3.1515921609154787, "learning_rate": 1.4831505897128229e-09, "loss": 0.5646, "step": 12142 }, { "epoch": 0.99, "grad_norm": 3.4521180420535695, "learning_rate": 1.4510849672100258e-09, "loss": 0.3731, "step": 12143 }, { "epoch": 0.99, "grad_norm": 2.650689960042224, "learning_rate": 1.4193697212322444e-09, "loss": 0.5591, "step": 12144 }, { "epoch": 0.99, "grad_norm": 3.795854212893908, "learning_rate": 1.3880048540032554e-09, "loss": 0.5542, "step": 12145 }, { "epoch": 0.99, "grad_norm": 4.290467557286918, "learning_rate": 1.3569903677207453e-09, "loss": 1.0103, "step": 12146 }, { "epoch": 0.99, "grad_norm": 2.233485159890757, "learning_rate": 1.3263262645585307e-09, "loss": 0.3186, "step": 12147 }, { "epoch": 0.99, "grad_norm": 3.040748043743469, "learning_rate": 1.2960125466671137e-09, "loss": 0.4931, "step": 12148 }, { "epoch": 0.99, "grad_norm": 4.1337209065940765, "learning_rate": 1.266049216170906e-09, "loss": 0.9532, "step": 12149 }, { "epoch": 0.99, "grad_norm": 4.040915504923403, "learning_rate": 1.2364362751698944e-09, "loss": 0.9432, "step": 12150 }, { "epoch": 0.99, "grad_norm": 3.61108572071553, "learning_rate": 1.2071737257401962e-09, "loss": 0.6336, "step": 12151 }, { "epoch": 0.99, "grad_norm": 4.652113726957336, "learning_rate": 1.1782615699323929e-09, "loss": 0.5952, "step": 12152 }, { "epoch": 0.99, "grad_norm": 3.98305392012479, "learning_rate": 1.149699809773752e-09, "loss": 0.5316, "step": 12153 }, { "epoch": 0.99, "grad_norm": 4.605235076813467, "learning_rate": 1.1214884472660059e-09, "loss": 0.6964, "step": 12154 }, { "epoch": 0.99, "grad_norm": 4.000185190848004, "learning_rate": 1.0936274843864615e-09, "loss": 0.992, "step": 12155 }, { "epoch": 0.99, "grad_norm": 5.933712871831323, "learning_rate": 1.0661169230891111e-09, "loss": 0.8956, "step": 12156 }, { "epoch": 0.99, "grad_norm": 3.976258849223066, "learning_rate": 1.038956765300747e-09, "loss": 1.0503, "step": 12157 }, { "epoch": 0.99, "grad_norm": 2.8920392286889363, "learning_rate": 1.012147012926512e-09, "loss": 0.361, "step": 12158 }, { "epoch": 0.99, "grad_norm": 0.9254614358134056, "learning_rate": 9.856876678443484e-10, "loss": 0.1275, "step": 12159 }, { "epoch": 0.99, "grad_norm": 6.667265819278876, "learning_rate": 9.595787319105488e-10, "loss": 0.814, "step": 12160 }, { "epoch": 0.99, "grad_norm": 6.063537108631886, "learning_rate": 9.338202069536506e-10, "loss": 1.0902, "step": 12161 }, { "epoch": 0.99, "grad_norm": 5.345372693382865, "learning_rate": 9.084120947805419e-10, "loss": 0.9118, "step": 12162 }, { "epoch": 0.99, "grad_norm": 5.53785749891009, "learning_rate": 8.833543971714653e-10, "loss": 0.6818, "step": 12163 }, { "epoch": 0.99, "grad_norm": 5.580761358394674, "learning_rate": 8.586471158827936e-10, "loss": 1.1031, "step": 12164 }, { "epoch": 0.99, "grad_norm": 2.9874127649923854, "learning_rate": 8.342902526470298e-10, "loss": 0.7016, "step": 12165 }, { "epoch": 0.99, "grad_norm": 2.721465501571412, "learning_rate": 8.102838091705867e-10, "loss": 0.4219, "step": 12166 }, { "epoch": 0.99, "grad_norm": 3.6672290208821887, "learning_rate": 7.866277871371175e-10, "loss": 1.019, "step": 12167 }, { "epoch": 0.99, "grad_norm": 3.720348100268207, "learning_rate": 7.633221882041852e-10, "loss": 0.903, "step": 12168 }, { "epoch": 0.99, "grad_norm": 4.33209776877341, "learning_rate": 7.403670140054831e-10, "loss": 0.9556, "step": 12169 }, { "epoch": 0.99, "grad_norm": 1.9504423045579515, "learning_rate": 7.177622661508343e-10, "loss": 0.3021, "step": 12170 }, { "epoch": 0.99, "grad_norm": 6.979290745231007, "learning_rate": 6.955079462234171e-10, "loss": 1.5872, "step": 12171 }, { "epoch": 0.99, "grad_norm": 3.9990523954197768, "learning_rate": 6.73604055784205e-10, "loss": 0.449, "step": 12172 }, { "epoch": 0.99, "grad_norm": 4.5592680367105105, "learning_rate": 6.520505963680813e-10, "loss": 0.7497, "step": 12173 }, { "epoch": 1.0, "grad_norm": 3.9589191050959562, "learning_rate": 6.308475694860594e-10, "loss": 0.6861, "step": 12174 }, { "epoch": 1.0, "grad_norm": 3.6985531257099646, "learning_rate": 6.099949766241731e-10, "loss": 0.5641, "step": 12175 }, { "epoch": 1.0, "grad_norm": 5.523605378907456, "learning_rate": 5.894928192440308e-10, "loss": 0.9872, "step": 12176 }, { "epoch": 1.0, "grad_norm": 3.1178306652394356, "learning_rate": 5.693410987833714e-10, "loss": 0.7559, "step": 12177 }, { "epoch": 1.0, "grad_norm": 3.862073416113283, "learning_rate": 5.495398166538435e-10, "loss": 0.6149, "step": 12178 }, { "epoch": 1.0, "grad_norm": 5.5432532289577665, "learning_rate": 5.30088974244336e-10, "loss": 0.9842, "step": 12179 }, { "epoch": 1.0, "grad_norm": 2.801296327518294, "learning_rate": 5.109885729176478e-10, "loss": 0.5074, "step": 12180 }, { "epoch": 1.0, "grad_norm": 5.111438497992996, "learning_rate": 4.922386140127078e-10, "loss": 1.0625, "step": 12181 }, { "epoch": 1.0, "grad_norm": 3.462694911081913, "learning_rate": 4.738390988440201e-10, "loss": 0.8326, "step": 12182 }, { "epoch": 1.0, "grad_norm": 3.6998639465625174, "learning_rate": 4.5579002870110854e-10, "loss": 0.6114, "step": 12183 }, { "epoch": 1.0, "grad_norm": 6.221654366729723, "learning_rate": 4.380914048490725e-10, "loss": 0.9554, "step": 12184 }, { "epoch": 1.0, "grad_norm": 1.3629454830687455, "learning_rate": 4.207432285291413e-10, "loss": 0.1495, "step": 12185 }, { "epoch": 1.0, "grad_norm": 3.8014041633328066, "learning_rate": 4.0374550095645394e-10, "loss": 0.7206, "step": 12186 }, { "epoch": 1.0, "grad_norm": 4.268004847733571, "learning_rate": 3.8709822332339e-10, "loss": 0.6349, "step": 12187 }, { "epoch": 1.0, "grad_norm": 5.32831186500912, "learning_rate": 3.708013967956836e-10, "loss": 0.9325, "step": 12188 }, { "epoch": 1.0, "grad_norm": 2.2594682309992455, "learning_rate": 3.5485502251686457e-10, "loss": 0.326, "step": 12189 }, { "epoch": 1.0, "grad_norm": 1.1085646358781025, "learning_rate": 3.3925910160381713e-10, "loss": 0.1365, "step": 12190 }, { "epoch": 1.0, "grad_norm": 4.098294810681433, "learning_rate": 3.2401363515011106e-10, "loss": 0.7592, "step": 12191 }, { "epoch": 1.0, "grad_norm": 3.3957241265330365, "learning_rate": 3.091186242248911e-10, "loss": 0.6154, "step": 12192 }, { "epoch": 1.0, "grad_norm": 4.451104193699946, "learning_rate": 2.9457406987121184e-10, "loss": 0.788, "step": 12193 }, { "epoch": 1.0, "grad_norm": 3.835245723800428, "learning_rate": 2.8037997310936814e-10, "loss": 0.5518, "step": 12194 }, { "epoch": 1.0, "grad_norm": 4.925595085825402, "learning_rate": 2.6653633493411993e-10, "loss": 0.9264, "step": 12195 }, { "epoch": 1.0, "grad_norm": 5.9349646188925025, "learning_rate": 2.530431563152469e-10, "loss": 0.8856, "step": 12196 }, { "epoch": 1.0, "grad_norm": 3.2103321274378227, "learning_rate": 2.3990043819976937e-10, "loss": 0.5727, "step": 12197 }, { "epoch": 1.0, "grad_norm": 5.350349197288943, "learning_rate": 2.2710818150750713e-10, "loss": 0.981, "step": 12198 }, { "epoch": 1.0, "grad_norm": 3.35746844741046, "learning_rate": 2.1466638713663057e-10, "loss": 0.764, "step": 12199 }, { "epoch": 1.0, "grad_norm": 4.045052932146058, "learning_rate": 2.0257505595810966e-10, "loss": 0.7412, "step": 12200 }, { "epoch": 1.0, "grad_norm": 2.933136007973078, "learning_rate": 1.908341888195997e-10, "loss": 0.3473, "step": 12201 }, { "epoch": 1.0, "grad_norm": 4.204389975478312, "learning_rate": 1.7944378654488616e-10, "loss": 0.9525, "step": 12202 }, { "epoch": 1.0, "grad_norm": 5.066516739160856, "learning_rate": 1.6840384993166425e-10, "loss": 0.7245, "step": 12203 }, { "epoch": 1.0, "grad_norm": 4.003356870050669, "learning_rate": 1.5771437975375948e-10, "loss": 0.8505, "step": 12204 }, { "epoch": 1.0, "grad_norm": 3.6243068643927847, "learning_rate": 1.4737537676112745e-10, "loss": 0.561, "step": 12205 }, { "epoch": 1.0, "grad_norm": 4.980907710831678, "learning_rate": 1.373868416776336e-10, "loss": 0.7577, "step": 12206 }, { "epoch": 1.0, "grad_norm": 3.7313240058721955, "learning_rate": 1.277487752043838e-10, "loss": 0.6469, "step": 12207 }, { "epoch": 1.0, "grad_norm": 3.0864475947641568, "learning_rate": 1.1846117801583846e-10, "loss": 0.5824, "step": 12208 }, { "epoch": 1.0, "grad_norm": 4.742322727664848, "learning_rate": 1.0952405076425365e-10, "loss": 0.9012, "step": 12209 }, { "epoch": 1.0, "grad_norm": 4.228041571087677, "learning_rate": 1.0093739407468495e-10, "loss": 0.4204, "step": 12210 }, { "epoch": 1.0, "grad_norm": 3.0323303040850136, "learning_rate": 9.270120855053855e-11, "loss": 0.6893, "step": 12211 }, { "epoch": 1.0, "grad_norm": 2.983008160472688, "learning_rate": 8.48154947680202e-11, "loss": 0.6281, "step": 12212 }, { "epoch": 1.0, "grad_norm": 4.856370516222304, "learning_rate": 7.728025328002098e-11, "loss": 0.6949, "step": 12213 }, { "epoch": 1.0, "grad_norm": 3.861057508428135, "learning_rate": 7.009548461500704e-11, "loss": 0.5709, "step": 12214 }, { "epoch": 1.0, "grad_norm": 2.8994326876419683, "learning_rate": 6.326118927646452e-11, "loss": 0.3508, "step": 12215 }, { "epoch": 1.0, "grad_norm": 5.741387773543702, "learning_rate": 5.677736774400977e-11, "loss": 0.845, "step": 12216 }, { "epoch": 1.0, "grad_norm": 3.373884783945141, "learning_rate": 5.0644020471168894e-11, "loss": 0.5915, "step": 12217 }, { "epoch": 1.0, "grad_norm": 5.499579017923998, "learning_rate": 4.4861147888708436e-11, "loss": 1.2168, "step": 12218 }, { "epoch": 1.0, "grad_norm": 3.0255508426308597, "learning_rate": 3.942875040130467e-11, "loss": 0.4464, "step": 12219 }, { "epoch": 1.0, "grad_norm": 5.220051245827825, "learning_rate": 3.4346828389764106e-11, "loss": 0.8531, "step": 12220 }, { "epoch": 1.0, "grad_norm": 4.085008211334056, "learning_rate": 2.961538221102345e-11, "loss": 0.5081, "step": 12221 }, { "epoch": 1.0, "grad_norm": 3.9561086440767137, "learning_rate": 2.523441219648426e-11, "loss": 0.7413, "step": 12222 }, { "epoch": 1.0, "grad_norm": 3.4472799777661978, "learning_rate": 2.1203918652568102e-11, "loss": 0.5137, "step": 12223 }, { "epoch": 1.0, "grad_norm": 5.6581789617909575, "learning_rate": 1.7523901862381844e-11, "loss": 0.9722, "step": 12224 }, { "epoch": 1.0, "grad_norm": 5.314810540569065, "learning_rate": 1.4194362084052338e-11, "loss": 0.9416, "step": 12225 }, { "epoch": 1.0, "grad_norm": 3.561922239934296, "learning_rate": 1.121529955017131e-11, "loss": 0.4867, "step": 12226 }, { "epoch": 1.0, "grad_norm": 4.230862525679776, "learning_rate": 8.586714470570912e-12, "loss": 0.717, "step": 12227 }, { "epoch": 1.0, "grad_norm": 3.8264697118005886, "learning_rate": 6.308607028993052e-12, "loss": 0.8789, "step": 12228 }, { "epoch": 1.0, "grad_norm": 3.792263429259282, "learning_rate": 4.3809773847547365e-12, "loss": 0.7892, "step": 12229 }, { "epoch": 1.0, "grad_norm": 5.202397881453768, "learning_rate": 2.803825673858285e-12, "loss": 1.2797, "step": 12230 }, { "epoch": 1.0, "grad_norm": 3.416111011046913, "learning_rate": 1.5771520062157764e-12, "loss": 0.3239, "step": 12231 }, { "epoch": 1.0, "grad_norm": 4.859482561826479, "learning_rate": 7.009564673143843e-13, "loss": 1.079, "step": 12232 }, { "epoch": 1.0, "grad_norm": 4.819431466634126, "learning_rate": 1.7523911988170939e-13, "loss": 0.7518, "step": 12233 }, { "epoch": 1.0, "grad_norm": 6.337511901978712, "learning_rate": 0.0, "loss": 1.3796, "step": 12234 }, { "epoch": 1.0, "step": 12234, "total_flos": 1636528356546560.0, "train_loss": 0.783344195976315, "train_runtime": 55679.5489, "train_samples_per_second": 1.758, "train_steps_per_second": 0.22 } ], "logging_steps": 1.0, "max_steps": 12234, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "total_flos": 1636528356546560.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }