diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,85668 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9999591319628918, + "eval_steps": 500, + "global_step": 12234, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "grad_norm": 10.763204990230978, + "learning_rate": 2.7173913043478262e-08, + "loss": 1.9124, + "step": 1 + }, + { + "epoch": 0.0, + "grad_norm": 10.252056662172615, + "learning_rate": 5.4347826086956524e-08, + "loss": 1.2475, + "step": 2 + }, + { + "epoch": 0.0, + "grad_norm": 12.5031240421653, + "learning_rate": 8.152173913043479e-08, + "loss": 1.8097, + "step": 3 + }, + { + "epoch": 0.0, + "grad_norm": 11.19022777352965, + "learning_rate": 1.0869565217391305e-07, + "loss": 1.7408, + "step": 4 + }, + { + "epoch": 0.0, + "grad_norm": 8.74911389200089, + "learning_rate": 1.3586956521739132e-07, + "loss": 1.3384, + "step": 5 + }, + { + "epoch": 0.0, + "grad_norm": 16.414301597091637, + "learning_rate": 1.6304347826086958e-07, + "loss": 2.1608, + "step": 6 + }, + { + "epoch": 0.0, + "grad_norm": 9.441165175742352, + "learning_rate": 1.9021739130434786e-07, + "loss": 1.8799, + "step": 7 + }, + { + "epoch": 0.0, + "grad_norm": 9.77584594887577, + "learning_rate": 2.173913043478261e-07, + "loss": 1.8447, + "step": 8 + }, + { + "epoch": 0.0, + "grad_norm": 10.111184232798522, + "learning_rate": 2.445652173913044e-07, + "loss": 1.4988, + "step": 9 + }, + { + "epoch": 0.0, + "grad_norm": 9.567135461407071, + "learning_rate": 2.7173913043478264e-07, + "loss": 1.5362, + "step": 10 + }, + { + "epoch": 0.0, + "grad_norm": 10.00231116510922, + "learning_rate": 2.989130434782609e-07, + "loss": 1.2534, + "step": 11 + }, + { + "epoch": 0.0, + "grad_norm": 7.806732616267708, + "learning_rate": 3.2608695652173915e-07, + "loss": 1.4756, + "step": 12 + }, + { + "epoch": 0.0, + "grad_norm": 10.617002504828708, + "learning_rate": 3.532608695652174e-07, + "loss": 1.7337, + "step": 13 + }, + { + "epoch": 0.0, + "grad_norm": 13.54765846286333, + "learning_rate": 3.804347826086957e-07, + "loss": 2.1608, + "step": 14 + }, + { + "epoch": 0.0, + "grad_norm": 11.123744709657245, + "learning_rate": 4.0760869565217393e-07, + "loss": 1.8026, + "step": 15 + }, + { + "epoch": 0.0, + "grad_norm": 7.142452879045726, + "learning_rate": 4.347826086956522e-07, + "loss": 1.38, + "step": 16 + }, + { + "epoch": 0.0, + "grad_norm": 10.641416240703439, + "learning_rate": 4.6195652173913045e-07, + "loss": 1.439, + "step": 17 + }, + { + "epoch": 0.0, + "grad_norm": 9.110065588139014, + "learning_rate": 4.891304347826088e-07, + "loss": 1.8888, + "step": 18 + }, + { + "epoch": 0.0, + "grad_norm": 10.116752917147823, + "learning_rate": 5.16304347826087e-07, + "loss": 1.4248, + "step": 19 + }, + { + "epoch": 0.0, + "grad_norm": 10.439796920171236, + "learning_rate": 5.434782608695653e-07, + "loss": 1.5432, + "step": 20 + }, + { + "epoch": 0.0, + "grad_norm": 8.992746653280529, + "learning_rate": 5.706521739130435e-07, + "loss": 1.7672, + "step": 21 + }, + { + "epoch": 0.0, + "grad_norm": 11.46654871784486, + "learning_rate": 5.978260869565218e-07, + "loss": 1.66, + "step": 22 + }, + { + "epoch": 0.0, + "grad_norm": 10.103379665707928, + "learning_rate": 6.25e-07, + "loss": 1.5059, + "step": 23 + }, + { + "epoch": 0.0, + "grad_norm": 8.922521662725512, + "learning_rate": 6.521739130434783e-07, + "loss": 1.3644, + "step": 24 + }, + { + "epoch": 0.0, + "grad_norm": 9.482787698784223, + "learning_rate": 6.793478260869566e-07, + "loss": 1.7136, + "step": 25 + }, + { + "epoch": 0.0, + "grad_norm": 10.131149963147784, + "learning_rate": 7.065217391304348e-07, + "loss": 1.5865, + "step": 26 + }, + { + "epoch": 0.0, + "grad_norm": 7.553296695300296, + "learning_rate": 7.336956521739132e-07, + "loss": 1.0809, + "step": 27 + }, + { + "epoch": 0.0, + "grad_norm": 7.447837089466901, + "learning_rate": 7.608695652173914e-07, + "loss": 1.3542, + "step": 28 + }, + { + "epoch": 0.0, + "grad_norm": 6.646186450438408, + "learning_rate": 7.880434782608697e-07, + "loss": 1.3759, + "step": 29 + }, + { + "epoch": 0.0, + "grad_norm": 11.363825771345514, + "learning_rate": 8.152173913043479e-07, + "loss": 1.3261, + "step": 30 + }, + { + "epoch": 0.0, + "grad_norm": 6.707159536720378, + "learning_rate": 8.423913043478261e-07, + "loss": 1.2247, + "step": 31 + }, + { + "epoch": 0.0, + "grad_norm": 7.5094610599711125, + "learning_rate": 8.695652173913044e-07, + "loss": 1.4841, + "step": 32 + }, + { + "epoch": 0.0, + "grad_norm": 7.783281889567137, + "learning_rate": 8.967391304347826e-07, + "loss": 1.5589, + "step": 33 + }, + { + "epoch": 0.0, + "grad_norm": 10.324049872893895, + "learning_rate": 9.239130434782609e-07, + "loss": 1.402, + "step": 34 + }, + { + "epoch": 0.0, + "grad_norm": 8.307774271557626, + "learning_rate": 9.510869565217393e-07, + "loss": 1.4388, + "step": 35 + }, + { + "epoch": 0.0, + "grad_norm": 8.48905313529863, + "learning_rate": 9.782608695652175e-07, + "loss": 1.5702, + "step": 36 + }, + { + "epoch": 0.0, + "grad_norm": 8.834200227156316, + "learning_rate": 1.0054347826086958e-06, + "loss": 1.3819, + "step": 37 + }, + { + "epoch": 0.0, + "grad_norm": 7.867659654983689, + "learning_rate": 1.032608695652174e-06, + "loss": 1.5146, + "step": 38 + }, + { + "epoch": 0.0, + "grad_norm": 7.104395892275559, + "learning_rate": 1.0597826086956523e-06, + "loss": 1.2098, + "step": 39 + }, + { + "epoch": 0.0, + "grad_norm": 8.485504914038025, + "learning_rate": 1.0869565217391306e-06, + "loss": 1.3955, + "step": 40 + }, + { + "epoch": 0.0, + "grad_norm": 10.13078856345608, + "learning_rate": 1.1141304347826088e-06, + "loss": 1.4476, + "step": 41 + }, + { + "epoch": 0.0, + "grad_norm": 8.66501688643045, + "learning_rate": 1.141304347826087e-06, + "loss": 1.1798, + "step": 42 + }, + { + "epoch": 0.0, + "grad_norm": 7.056438781274291, + "learning_rate": 1.1684782608695653e-06, + "loss": 1.1171, + "step": 43 + }, + { + "epoch": 0.0, + "grad_norm": 9.524989559260273, + "learning_rate": 1.1956521739130436e-06, + "loss": 1.2799, + "step": 44 + }, + { + "epoch": 0.0, + "grad_norm": 5.207602666230073, + "learning_rate": 1.2228260869565218e-06, + "loss": 1.1379, + "step": 45 + }, + { + "epoch": 0.0, + "grad_norm": 10.741407362172229, + "learning_rate": 1.25e-06, + "loss": 1.5437, + "step": 46 + }, + { + "epoch": 0.0, + "grad_norm": 5.112828564926632, + "learning_rate": 1.2771739130434786e-06, + "loss": 0.9981, + "step": 47 + }, + { + "epoch": 0.0, + "grad_norm": 5.503495238241442, + "learning_rate": 1.3043478260869566e-06, + "loss": 1.0219, + "step": 48 + }, + { + "epoch": 0.0, + "grad_norm": 8.465625542087716, + "learning_rate": 1.3315217391304349e-06, + "loss": 1.4285, + "step": 49 + }, + { + "epoch": 0.0, + "grad_norm": 7.756922552940579, + "learning_rate": 1.3586956521739131e-06, + "loss": 1.298, + "step": 50 + }, + { + "epoch": 0.0, + "grad_norm": 6.191522642983566, + "learning_rate": 1.3858695652173914e-06, + "loss": 1.2136, + "step": 51 + }, + { + "epoch": 0.0, + "grad_norm": 7.345904517953232, + "learning_rate": 1.4130434782608697e-06, + "loss": 1.2073, + "step": 52 + }, + { + "epoch": 0.0, + "grad_norm": 6.782042045608577, + "learning_rate": 1.440217391304348e-06, + "loss": 1.197, + "step": 53 + }, + { + "epoch": 0.0, + "grad_norm": 7.103653842504592, + "learning_rate": 1.4673913043478264e-06, + "loss": 1.1927, + "step": 54 + }, + { + "epoch": 0.0, + "grad_norm": 9.288565917912903, + "learning_rate": 1.4945652173913044e-06, + "loss": 1.728, + "step": 55 + }, + { + "epoch": 0.0, + "grad_norm": 6.496033995601524, + "learning_rate": 1.521739130434783e-06, + "loss": 1.1772, + "step": 56 + }, + { + "epoch": 0.0, + "grad_norm": 8.70760468187945, + "learning_rate": 1.548913043478261e-06, + "loss": 1.0784, + "step": 57 + }, + { + "epoch": 0.0, + "grad_norm": 5.164233229137625, + "learning_rate": 1.5760869565217394e-06, + "loss": 0.8166, + "step": 58 + }, + { + "epoch": 0.0, + "grad_norm": 6.153545718589251, + "learning_rate": 1.6032608695652175e-06, + "loss": 1.0213, + "step": 59 + }, + { + "epoch": 0.0, + "grad_norm": 9.224145677156148, + "learning_rate": 1.6304347826086957e-06, + "loss": 1.2201, + "step": 60 + }, + { + "epoch": 0.0, + "grad_norm": 5.929420117992498, + "learning_rate": 1.657608695652174e-06, + "loss": 1.0981, + "step": 61 + }, + { + "epoch": 0.01, + "grad_norm": 6.832090681262447, + "learning_rate": 1.6847826086956522e-06, + "loss": 1.1829, + "step": 62 + }, + { + "epoch": 0.01, + "grad_norm": 7.827788334408938, + "learning_rate": 1.7119565217391307e-06, + "loss": 1.1005, + "step": 63 + }, + { + "epoch": 0.01, + "grad_norm": 7.5134565802844415, + "learning_rate": 1.7391304347826088e-06, + "loss": 0.8607, + "step": 64 + }, + { + "epoch": 0.01, + "grad_norm": 6.975177473268082, + "learning_rate": 1.7663043478260872e-06, + "loss": 1.0803, + "step": 65 + }, + { + "epoch": 0.01, + "grad_norm": 8.425166183961684, + "learning_rate": 1.7934782608695653e-06, + "loss": 1.1596, + "step": 66 + }, + { + "epoch": 0.01, + "grad_norm": 8.723229692609811, + "learning_rate": 1.8206521739130437e-06, + "loss": 1.2265, + "step": 67 + }, + { + "epoch": 0.01, + "grad_norm": 6.7467229371397694, + "learning_rate": 1.8478260869565218e-06, + "loss": 0.7996, + "step": 68 + }, + { + "epoch": 0.01, + "grad_norm": 3.146784934298589, + "learning_rate": 1.8750000000000003e-06, + "loss": 0.3544, + "step": 69 + }, + { + "epoch": 0.01, + "grad_norm": 10.983495052283711, + "learning_rate": 1.9021739130434785e-06, + "loss": 1.5448, + "step": 70 + }, + { + "epoch": 0.01, + "grad_norm": 6.528084910725228, + "learning_rate": 1.9293478260869568e-06, + "loss": 0.9641, + "step": 71 + }, + { + "epoch": 0.01, + "grad_norm": 4.772154961625803, + "learning_rate": 1.956521739130435e-06, + "loss": 0.6387, + "step": 72 + }, + { + "epoch": 0.01, + "grad_norm": 5.372610251132539, + "learning_rate": 1.9836956521739133e-06, + "loss": 0.7883, + "step": 73 + }, + { + "epoch": 0.01, + "grad_norm": 5.3183594429603716, + "learning_rate": 2.0108695652173916e-06, + "loss": 0.6512, + "step": 74 + }, + { + "epoch": 0.01, + "grad_norm": 5.804934889637244, + "learning_rate": 2.03804347826087e-06, + "loss": 0.8766, + "step": 75 + }, + { + "epoch": 0.01, + "grad_norm": 6.42791079405579, + "learning_rate": 2.065217391304348e-06, + "loss": 0.8556, + "step": 76 + }, + { + "epoch": 0.01, + "grad_norm": 8.519309928339856, + "learning_rate": 2.0923913043478263e-06, + "loss": 1.5128, + "step": 77 + }, + { + "epoch": 0.01, + "grad_norm": 7.595715930891681, + "learning_rate": 2.1195652173913046e-06, + "loss": 1.3523, + "step": 78 + }, + { + "epoch": 0.01, + "grad_norm": 5.709132076023693, + "learning_rate": 2.146739130434783e-06, + "loss": 0.8167, + "step": 79 + }, + { + "epoch": 0.01, + "grad_norm": 5.721035794036643, + "learning_rate": 2.173913043478261e-06, + "loss": 0.8613, + "step": 80 + }, + { + "epoch": 0.01, + "grad_norm": 9.955864530501376, + "learning_rate": 2.2010869565217394e-06, + "loss": 1.4923, + "step": 81 + }, + { + "epoch": 0.01, + "grad_norm": 5.226117141588304, + "learning_rate": 2.2282608695652176e-06, + "loss": 1.0378, + "step": 82 + }, + { + "epoch": 0.01, + "grad_norm": 6.578554182689265, + "learning_rate": 2.255434782608696e-06, + "loss": 1.2069, + "step": 83 + }, + { + "epoch": 0.01, + "grad_norm": 4.854813831700824, + "learning_rate": 2.282608695652174e-06, + "loss": 0.8649, + "step": 84 + }, + { + "epoch": 0.01, + "grad_norm": 9.154347006791653, + "learning_rate": 2.3097826086956524e-06, + "loss": 0.8471, + "step": 85 + }, + { + "epoch": 0.01, + "grad_norm": 7.017133522659245, + "learning_rate": 2.3369565217391307e-06, + "loss": 1.1938, + "step": 86 + }, + { + "epoch": 0.01, + "grad_norm": 6.026983949592643, + "learning_rate": 2.364130434782609e-06, + "loss": 0.8922, + "step": 87 + }, + { + "epoch": 0.01, + "grad_norm": 8.405981979152875, + "learning_rate": 2.391304347826087e-06, + "loss": 1.4834, + "step": 88 + }, + { + "epoch": 0.01, + "grad_norm": 6.5999817340984075, + "learning_rate": 2.4184782608695654e-06, + "loss": 1.0044, + "step": 89 + }, + { + "epoch": 0.01, + "grad_norm": 9.162604562149298, + "learning_rate": 2.4456521739130437e-06, + "loss": 1.3329, + "step": 90 + }, + { + "epoch": 0.01, + "grad_norm": 7.074359325975014, + "learning_rate": 2.472826086956522e-06, + "loss": 1.5377, + "step": 91 + }, + { + "epoch": 0.01, + "grad_norm": 9.41639145189624, + "learning_rate": 2.5e-06, + "loss": 1.0232, + "step": 92 + }, + { + "epoch": 0.01, + "grad_norm": 6.4783028052120395, + "learning_rate": 2.5271739130434785e-06, + "loss": 0.955, + "step": 93 + }, + { + "epoch": 0.01, + "grad_norm": 5.739755004143212, + "learning_rate": 2.554347826086957e-06, + "loss": 0.753, + "step": 94 + }, + { + "epoch": 0.01, + "grad_norm": 5.632644951561571, + "learning_rate": 2.581521739130435e-06, + "loss": 1.1089, + "step": 95 + }, + { + "epoch": 0.01, + "grad_norm": 8.707986315608768, + "learning_rate": 2.6086956521739132e-06, + "loss": 1.6927, + "step": 96 + }, + { + "epoch": 0.01, + "grad_norm": 5.928093931905623, + "learning_rate": 2.6358695652173915e-06, + "loss": 0.7765, + "step": 97 + }, + { + "epoch": 0.01, + "grad_norm": 8.761801614748697, + "learning_rate": 2.6630434782608698e-06, + "loss": 1.3454, + "step": 98 + }, + { + "epoch": 0.01, + "grad_norm": 8.71085682910734, + "learning_rate": 2.6902173913043476e-06, + "loss": 1.2272, + "step": 99 + }, + { + "epoch": 0.01, + "grad_norm": 5.743239729296844, + "learning_rate": 2.7173913043478263e-06, + "loss": 0.8723, + "step": 100 + }, + { + "epoch": 0.01, + "grad_norm": 6.8842539643647065, + "learning_rate": 2.7445652173913045e-06, + "loss": 1.1771, + "step": 101 + }, + { + "epoch": 0.01, + "grad_norm": 6.6027040012838265, + "learning_rate": 2.771739130434783e-06, + "loss": 1.1343, + "step": 102 + }, + { + "epoch": 0.01, + "grad_norm": 5.93038824774127, + "learning_rate": 2.7989130434782615e-06, + "loss": 0.8708, + "step": 103 + }, + { + "epoch": 0.01, + "grad_norm": 5.992654839808304, + "learning_rate": 2.8260869565217393e-06, + "loss": 0.7075, + "step": 104 + }, + { + "epoch": 0.01, + "grad_norm": 6.638844600072942, + "learning_rate": 2.8532608695652176e-06, + "loss": 0.8676, + "step": 105 + }, + { + "epoch": 0.01, + "grad_norm": 6.919202754473433, + "learning_rate": 2.880434782608696e-06, + "loss": 1.1083, + "step": 106 + }, + { + "epoch": 0.01, + "grad_norm": 5.808777508326425, + "learning_rate": 2.9076086956521745e-06, + "loss": 0.8974, + "step": 107 + }, + { + "epoch": 0.01, + "grad_norm": 3.049341085789842, + "learning_rate": 2.9347826086956528e-06, + "loss": 0.4606, + "step": 108 + }, + { + "epoch": 0.01, + "grad_norm": 5.734898712066466, + "learning_rate": 2.9619565217391306e-06, + "loss": 0.7338, + "step": 109 + }, + { + "epoch": 0.01, + "grad_norm": 7.308385711610393, + "learning_rate": 2.989130434782609e-06, + "loss": 1.2931, + "step": 110 + }, + { + "epoch": 0.01, + "grad_norm": 8.37792345474363, + "learning_rate": 3.016304347826087e-06, + "loss": 1.3357, + "step": 111 + }, + { + "epoch": 0.01, + "grad_norm": 4.207211713951564, + "learning_rate": 3.043478260869566e-06, + "loss": 0.4874, + "step": 112 + }, + { + "epoch": 0.01, + "grad_norm": 8.291411813399872, + "learning_rate": 3.0706521739130436e-06, + "loss": 1.3978, + "step": 113 + }, + { + "epoch": 0.01, + "grad_norm": 8.685180426322534, + "learning_rate": 3.097826086956522e-06, + "loss": 0.8724, + "step": 114 + }, + { + "epoch": 0.01, + "grad_norm": 6.104442131372095, + "learning_rate": 3.125e-06, + "loss": 1.0527, + "step": 115 + }, + { + "epoch": 0.01, + "grad_norm": 7.478605850436171, + "learning_rate": 3.152173913043479e-06, + "loss": 1.4413, + "step": 116 + }, + { + "epoch": 0.01, + "grad_norm": 7.531709196936938, + "learning_rate": 3.179347826086957e-06, + "loss": 1.6845, + "step": 117 + }, + { + "epoch": 0.01, + "grad_norm": 5.4759958206666175, + "learning_rate": 3.206521739130435e-06, + "loss": 1.0112, + "step": 118 + }, + { + "epoch": 0.01, + "grad_norm": 9.049621112602324, + "learning_rate": 3.233695652173913e-06, + "loss": 1.3741, + "step": 119 + }, + { + "epoch": 0.01, + "grad_norm": 3.5817184153763173, + "learning_rate": 3.2608695652173914e-06, + "loss": 0.7584, + "step": 120 + }, + { + "epoch": 0.01, + "grad_norm": 6.404142910694771, + "learning_rate": 3.28804347826087e-06, + "loss": 1.1528, + "step": 121 + }, + { + "epoch": 0.01, + "grad_norm": 3.906408150238798, + "learning_rate": 3.315217391304348e-06, + "loss": 0.512, + "step": 122 + }, + { + "epoch": 0.01, + "grad_norm": 6.488336599440538, + "learning_rate": 3.3423913043478262e-06, + "loss": 1.018, + "step": 123 + }, + { + "epoch": 0.01, + "grad_norm": 6.981920682664463, + "learning_rate": 3.3695652173913045e-06, + "loss": 0.8915, + "step": 124 + }, + { + "epoch": 0.01, + "grad_norm": 8.05588044254937, + "learning_rate": 3.396739130434783e-06, + "loss": 0.8899, + "step": 125 + }, + { + "epoch": 0.01, + "grad_norm": 5.854430041120995, + "learning_rate": 3.4239130434782614e-06, + "loss": 1.0416, + "step": 126 + }, + { + "epoch": 0.01, + "grad_norm": 10.556756679163271, + "learning_rate": 3.4510869565217393e-06, + "loss": 1.6121, + "step": 127 + }, + { + "epoch": 0.01, + "grad_norm": 7.0566625292176495, + "learning_rate": 3.4782608695652175e-06, + "loss": 1.4485, + "step": 128 + }, + { + "epoch": 0.01, + "grad_norm": 3.883945959657133, + "learning_rate": 3.5054347826086958e-06, + "loss": 0.6082, + "step": 129 + }, + { + "epoch": 0.01, + "grad_norm": 7.701849857905996, + "learning_rate": 3.5326086956521745e-06, + "loss": 1.3073, + "step": 130 + }, + { + "epoch": 0.01, + "grad_norm": 5.27587282635949, + "learning_rate": 3.5597826086956527e-06, + "loss": 0.6537, + "step": 131 + }, + { + "epoch": 0.01, + "grad_norm": 3.3323783890401635, + "learning_rate": 3.5869565217391305e-06, + "loss": 0.5372, + "step": 132 + }, + { + "epoch": 0.01, + "grad_norm": 7.843012301008066, + "learning_rate": 3.614130434782609e-06, + "loss": 1.243, + "step": 133 + }, + { + "epoch": 0.01, + "grad_norm": 7.200673205505264, + "learning_rate": 3.6413043478260875e-06, + "loss": 1.2031, + "step": 134 + }, + { + "epoch": 0.01, + "grad_norm": 4.178859040391603, + "learning_rate": 3.6684782608695657e-06, + "loss": 0.7575, + "step": 135 + }, + { + "epoch": 0.01, + "grad_norm": 8.493390921146402, + "learning_rate": 3.6956521739130436e-06, + "loss": 1.2118, + "step": 136 + }, + { + "epoch": 0.01, + "grad_norm": 6.517598073215942, + "learning_rate": 3.722826086956522e-06, + "loss": 1.0804, + "step": 137 + }, + { + "epoch": 0.01, + "grad_norm": 5.872001208747307, + "learning_rate": 3.7500000000000005e-06, + "loss": 0.8877, + "step": 138 + }, + { + "epoch": 0.01, + "grad_norm": 5.751199032083687, + "learning_rate": 3.7771739130434788e-06, + "loss": 1.0684, + "step": 139 + }, + { + "epoch": 0.01, + "grad_norm": 8.814160757707059, + "learning_rate": 3.804347826086957e-06, + "loss": 1.4598, + "step": 140 + }, + { + "epoch": 0.01, + "grad_norm": 2.7161018974223925, + "learning_rate": 3.831521739130435e-06, + "loss": 0.2583, + "step": 141 + }, + { + "epoch": 0.01, + "grad_norm": 4.528220495890977, + "learning_rate": 3.8586956521739136e-06, + "loss": 0.9408, + "step": 142 + }, + { + "epoch": 0.01, + "grad_norm": 3.7952554346766822, + "learning_rate": 3.885869565217392e-06, + "loss": 0.4543, + "step": 143 + }, + { + "epoch": 0.01, + "grad_norm": 6.48531466674868, + "learning_rate": 3.91304347826087e-06, + "loss": 0.8372, + "step": 144 + }, + { + "epoch": 0.01, + "grad_norm": 6.993848127713756, + "learning_rate": 3.9402173913043475e-06, + "loss": 1.1149, + "step": 145 + }, + { + "epoch": 0.01, + "grad_norm": 7.60199739787131, + "learning_rate": 3.967391304347827e-06, + "loss": 1.4652, + "step": 146 + }, + { + "epoch": 0.01, + "grad_norm": 2.180849053327101, + "learning_rate": 3.994565217391305e-06, + "loss": 0.3403, + "step": 147 + }, + { + "epoch": 0.01, + "grad_norm": 6.675303114346352, + "learning_rate": 4.021739130434783e-06, + "loss": 1.1365, + "step": 148 + }, + { + "epoch": 0.01, + "grad_norm": 5.618577228665263, + "learning_rate": 4.048913043478261e-06, + "loss": 1.067, + "step": 149 + }, + { + "epoch": 0.01, + "grad_norm": 6.078694781710514, + "learning_rate": 4.07608695652174e-06, + "loss": 1.0104, + "step": 150 + }, + { + "epoch": 0.01, + "grad_norm": 7.767941978563566, + "learning_rate": 4.103260869565218e-06, + "loss": 1.3224, + "step": 151 + }, + { + "epoch": 0.01, + "grad_norm": 5.885594442243772, + "learning_rate": 4.130434782608696e-06, + "loss": 0.7112, + "step": 152 + }, + { + "epoch": 0.01, + "grad_norm": 10.457674878448332, + "learning_rate": 4.157608695652174e-06, + "loss": 1.8655, + "step": 153 + }, + { + "epoch": 0.01, + "grad_norm": 9.904533607873724, + "learning_rate": 4.184782608695653e-06, + "loss": 1.4322, + "step": 154 + }, + { + "epoch": 0.01, + "grad_norm": 8.049829569078577, + "learning_rate": 4.211956521739131e-06, + "loss": 1.1242, + "step": 155 + }, + { + "epoch": 0.01, + "grad_norm": 5.5836948972226566, + "learning_rate": 4.239130434782609e-06, + "loss": 0.6328, + "step": 156 + }, + { + "epoch": 0.01, + "grad_norm": 6.511416011994131, + "learning_rate": 4.2663043478260874e-06, + "loss": 0.7455, + "step": 157 + }, + { + "epoch": 0.01, + "grad_norm": 5.415113124345645, + "learning_rate": 4.293478260869566e-06, + "loss": 0.8246, + "step": 158 + }, + { + "epoch": 0.01, + "grad_norm": 7.563836391840908, + "learning_rate": 4.320652173913044e-06, + "loss": 1.2982, + "step": 159 + }, + { + "epoch": 0.01, + "grad_norm": 5.255558685888354, + "learning_rate": 4.347826086956522e-06, + "loss": 0.922, + "step": 160 + }, + { + "epoch": 0.01, + "grad_norm": 5.8007257579161235, + "learning_rate": 4.3750000000000005e-06, + "loss": 0.6111, + "step": 161 + }, + { + "epoch": 0.01, + "grad_norm": 6.138614187559548, + "learning_rate": 4.402173913043479e-06, + "loss": 1.0949, + "step": 162 + }, + { + "epoch": 0.01, + "grad_norm": 4.9221676977686535, + "learning_rate": 4.429347826086957e-06, + "loss": 0.909, + "step": 163 + }, + { + "epoch": 0.01, + "grad_norm": 4.942921539191711, + "learning_rate": 4.456521739130435e-06, + "loss": 0.7227, + "step": 164 + }, + { + "epoch": 0.01, + "grad_norm": 5.900295438543103, + "learning_rate": 4.4836956521739135e-06, + "loss": 0.6749, + "step": 165 + }, + { + "epoch": 0.01, + "grad_norm": 4.789778017522469, + "learning_rate": 4.510869565217392e-06, + "loss": 0.9947, + "step": 166 + }, + { + "epoch": 0.01, + "grad_norm": 8.048409907918103, + "learning_rate": 4.53804347826087e-06, + "loss": 1.2421, + "step": 167 + }, + { + "epoch": 0.01, + "grad_norm": 5.367898594382595, + "learning_rate": 4.565217391304348e-06, + "loss": 0.5863, + "step": 168 + }, + { + "epoch": 0.01, + "grad_norm": 5.038484443485752, + "learning_rate": 4.5923913043478265e-06, + "loss": 0.8006, + "step": 169 + }, + { + "epoch": 0.01, + "grad_norm": 6.980449471454865, + "learning_rate": 4.619565217391305e-06, + "loss": 1.2171, + "step": 170 + }, + { + "epoch": 0.01, + "grad_norm": 6.291826025186352, + "learning_rate": 4.646739130434783e-06, + "loss": 0.969, + "step": 171 + }, + { + "epoch": 0.01, + "grad_norm": 7.18831539912512, + "learning_rate": 4.673913043478261e-06, + "loss": 0.6138, + "step": 172 + }, + { + "epoch": 0.01, + "grad_norm": 4.608150004360503, + "learning_rate": 4.7010869565217396e-06, + "loss": 0.7879, + "step": 173 + }, + { + "epoch": 0.01, + "grad_norm": 2.410996394740663, + "learning_rate": 4.728260869565218e-06, + "loss": 0.4698, + "step": 174 + }, + { + "epoch": 0.01, + "grad_norm": 6.1740205070337355, + "learning_rate": 4.755434782608696e-06, + "loss": 1.0853, + "step": 175 + }, + { + "epoch": 0.01, + "grad_norm": 5.13726770207731, + "learning_rate": 4.782608695652174e-06, + "loss": 0.8261, + "step": 176 + }, + { + "epoch": 0.01, + "grad_norm": 7.361680179795122, + "learning_rate": 4.809782608695653e-06, + "loss": 1.1934, + "step": 177 + }, + { + "epoch": 0.01, + "grad_norm": 7.67123136273917, + "learning_rate": 4.836956521739131e-06, + "loss": 1.1739, + "step": 178 + }, + { + "epoch": 0.01, + "grad_norm": 5.2229598582844945, + "learning_rate": 4.864130434782609e-06, + "loss": 1.0926, + "step": 179 + }, + { + "epoch": 0.01, + "grad_norm": 4.7046608872567734, + "learning_rate": 4.891304347826087e-06, + "loss": 0.8426, + "step": 180 + }, + { + "epoch": 0.01, + "grad_norm": 6.198449576398156, + "learning_rate": 4.918478260869566e-06, + "loss": 1.1743, + "step": 181 + }, + { + "epoch": 0.01, + "grad_norm": 8.525933837362583, + "learning_rate": 4.945652173913044e-06, + "loss": 0.8965, + "step": 182 + }, + { + "epoch": 0.01, + "grad_norm": 5.102006691809892, + "learning_rate": 4.972826086956522e-06, + "loss": 0.4219, + "step": 183 + }, + { + "epoch": 0.02, + "grad_norm": 6.716964784019454, + "learning_rate": 5e-06, + "loss": 1.3908, + "step": 184 + }, + { + "epoch": 0.02, + "grad_norm": 5.812408168037797, + "learning_rate": 5.027173913043478e-06, + "loss": 0.9765, + "step": 185 + }, + { + "epoch": 0.02, + "grad_norm": 5.7300425111293345, + "learning_rate": 5.054347826086957e-06, + "loss": 0.7135, + "step": 186 + }, + { + "epoch": 0.02, + "grad_norm": 5.29453599820772, + "learning_rate": 5.081521739130435e-06, + "loss": 0.6838, + "step": 187 + }, + { + "epoch": 0.02, + "grad_norm": 4.3872345244405215, + "learning_rate": 5.108695652173914e-06, + "loss": 0.7755, + "step": 188 + }, + { + "epoch": 0.02, + "grad_norm": 6.350885267174286, + "learning_rate": 5.135869565217392e-06, + "loss": 0.7441, + "step": 189 + }, + { + "epoch": 0.02, + "grad_norm": 5.705837768030783, + "learning_rate": 5.16304347826087e-06, + "loss": 0.9734, + "step": 190 + }, + { + "epoch": 0.02, + "grad_norm": 4.031565248567177, + "learning_rate": 5.190217391304348e-06, + "loss": 0.521, + "step": 191 + }, + { + "epoch": 0.02, + "grad_norm": 9.231182683688848, + "learning_rate": 5.2173913043478265e-06, + "loss": 1.2869, + "step": 192 + }, + { + "epoch": 0.02, + "grad_norm": 8.081212572731191, + "learning_rate": 5.244565217391306e-06, + "loss": 1.1, + "step": 193 + }, + { + "epoch": 0.02, + "grad_norm": 1.923947857423653, + "learning_rate": 5.271739130434783e-06, + "loss": 0.1881, + "step": 194 + }, + { + "epoch": 0.02, + "grad_norm": 5.654283650048554, + "learning_rate": 5.298913043478261e-06, + "loss": 0.9192, + "step": 195 + }, + { + "epoch": 0.02, + "grad_norm": 6.772455621286309, + "learning_rate": 5.3260869565217395e-06, + "loss": 0.7107, + "step": 196 + }, + { + "epoch": 0.02, + "grad_norm": 2.8759240797330703, + "learning_rate": 5.353260869565218e-06, + "loss": 0.3039, + "step": 197 + }, + { + "epoch": 0.02, + "grad_norm": 4.517060510758465, + "learning_rate": 5.380434782608695e-06, + "loss": 0.8079, + "step": 198 + }, + { + "epoch": 0.02, + "grad_norm": 6.702676517665291, + "learning_rate": 5.407608695652174e-06, + "loss": 1.3456, + "step": 199 + }, + { + "epoch": 0.02, + "grad_norm": 6.5636098368903895, + "learning_rate": 5.4347826086956525e-06, + "loss": 0.9768, + "step": 200 + }, + { + "epoch": 0.02, + "grad_norm": 6.958333903626813, + "learning_rate": 5.461956521739132e-06, + "loss": 0.7762, + "step": 201 + }, + { + "epoch": 0.02, + "grad_norm": 5.581754010001346, + "learning_rate": 5.489130434782609e-06, + "loss": 0.7291, + "step": 202 + }, + { + "epoch": 0.02, + "grad_norm": 5.923986836029897, + "learning_rate": 5.516304347826087e-06, + "loss": 0.9687, + "step": 203 + }, + { + "epoch": 0.02, + "grad_norm": 6.081234676189072, + "learning_rate": 5.543478260869566e-06, + "loss": 0.8964, + "step": 204 + }, + { + "epoch": 0.02, + "grad_norm": 2.390859736544384, + "learning_rate": 5.570652173913044e-06, + "loss": 0.3286, + "step": 205 + }, + { + "epoch": 0.02, + "grad_norm": 6.417543485088989, + "learning_rate": 5.597826086956523e-06, + "loss": 1.6375, + "step": 206 + }, + { + "epoch": 0.02, + "grad_norm": 5.992210187529088, + "learning_rate": 5.625e-06, + "loss": 0.8719, + "step": 207 + }, + { + "epoch": 0.02, + "grad_norm": 6.392475147231553, + "learning_rate": 5.652173913043479e-06, + "loss": 0.9143, + "step": 208 + }, + { + "epoch": 0.02, + "grad_norm": 6.174327311086821, + "learning_rate": 5.679347826086957e-06, + "loss": 0.6731, + "step": 209 + }, + { + "epoch": 0.02, + "grad_norm": 3.743161076840916, + "learning_rate": 5.706521739130435e-06, + "loss": 0.6002, + "step": 210 + }, + { + "epoch": 0.02, + "grad_norm": 6.316271470573057, + "learning_rate": 5.733695652173914e-06, + "loss": 0.9979, + "step": 211 + }, + { + "epoch": 0.02, + "grad_norm": 5.871275447806914, + "learning_rate": 5.760869565217392e-06, + "loss": 0.8193, + "step": 212 + }, + { + "epoch": 0.02, + "grad_norm": 7.000709896186412, + "learning_rate": 5.78804347826087e-06, + "loss": 0.8606, + "step": 213 + }, + { + "epoch": 0.02, + "grad_norm": 4.24528787539437, + "learning_rate": 5.815217391304349e-06, + "loss": 0.7, + "step": 214 + }, + { + "epoch": 0.02, + "grad_norm": 7.942273062178936, + "learning_rate": 5.842391304347826e-06, + "loss": 0.9734, + "step": 215 + }, + { + "epoch": 0.02, + "grad_norm": 6.013164920105653, + "learning_rate": 5.8695652173913055e-06, + "loss": 0.9056, + "step": 216 + }, + { + "epoch": 0.02, + "grad_norm": 6.385221534082321, + "learning_rate": 5.896739130434783e-06, + "loss": 0.6014, + "step": 217 + }, + { + "epoch": 0.02, + "grad_norm": 8.202981310804969, + "learning_rate": 5.923913043478261e-06, + "loss": 1.2524, + "step": 218 + }, + { + "epoch": 0.02, + "grad_norm": 7.673247533397, + "learning_rate": 5.95108695652174e-06, + "loss": 1.2265, + "step": 219 + }, + { + "epoch": 0.02, + "grad_norm": 5.655756570467079, + "learning_rate": 5.978260869565218e-06, + "loss": 1.0721, + "step": 220 + }, + { + "epoch": 0.02, + "grad_norm": 6.234835376562726, + "learning_rate": 6.005434782608696e-06, + "loss": 1.1082, + "step": 221 + }, + { + "epoch": 0.02, + "grad_norm": 6.974427392129233, + "learning_rate": 6.032608695652174e-06, + "loss": 1.0017, + "step": 222 + }, + { + "epoch": 0.02, + "grad_norm": 7.916925594285953, + "learning_rate": 6.0597826086956525e-06, + "loss": 1.1169, + "step": 223 + }, + { + "epoch": 0.02, + "grad_norm": 7.069754222166293, + "learning_rate": 6.086956521739132e-06, + "loss": 1.1844, + "step": 224 + }, + { + "epoch": 0.02, + "grad_norm": 7.5925567538816, + "learning_rate": 6.114130434782609e-06, + "loss": 1.3222, + "step": 225 + }, + { + "epoch": 0.02, + "grad_norm": 6.406797145497836, + "learning_rate": 6.141304347826087e-06, + "loss": 0.7072, + "step": 226 + }, + { + "epoch": 0.02, + "grad_norm": 5.960839328635132, + "learning_rate": 6.1684782608695655e-06, + "loss": 1.0292, + "step": 227 + }, + { + "epoch": 0.02, + "grad_norm": 3.2287351299289333, + "learning_rate": 6.195652173913044e-06, + "loss": 0.4187, + "step": 228 + }, + { + "epoch": 0.02, + "grad_norm": 5.6174718743187375, + "learning_rate": 6.222826086956523e-06, + "loss": 0.9114, + "step": 229 + }, + { + "epoch": 0.02, + "grad_norm": 4.422614534330612, + "learning_rate": 6.25e-06, + "loss": 0.8558, + "step": 230 + }, + { + "epoch": 0.02, + "grad_norm": 5.2228528011626585, + "learning_rate": 6.2771739130434786e-06, + "loss": 0.8636, + "step": 231 + }, + { + "epoch": 0.02, + "grad_norm": 5.503777571503567, + "learning_rate": 6.304347826086958e-06, + "loss": 0.8099, + "step": 232 + }, + { + "epoch": 0.02, + "grad_norm": 6.906673443489322, + "learning_rate": 6.331521739130435e-06, + "loss": 0.7918, + "step": 233 + }, + { + "epoch": 0.02, + "grad_norm": 7.857207855313775, + "learning_rate": 6.358695652173914e-06, + "loss": 1.8075, + "step": 234 + }, + { + "epoch": 0.02, + "grad_norm": 8.148563326223139, + "learning_rate": 6.385869565217392e-06, + "loss": 1.1465, + "step": 235 + }, + { + "epoch": 0.02, + "grad_norm": 6.4249316447648654, + "learning_rate": 6.41304347826087e-06, + "loss": 1.1498, + "step": 236 + }, + { + "epoch": 0.02, + "grad_norm": 6.644600409673028, + "learning_rate": 6.440217391304349e-06, + "loss": 0.9854, + "step": 237 + }, + { + "epoch": 0.02, + "grad_norm": 4.008422155861959, + "learning_rate": 6.467391304347826e-06, + "loss": 0.4489, + "step": 238 + }, + { + "epoch": 0.02, + "grad_norm": 7.686369840867863, + "learning_rate": 6.4945652173913055e-06, + "loss": 1.2196, + "step": 239 + }, + { + "epoch": 0.02, + "grad_norm": 7.389822453643525, + "learning_rate": 6.521739130434783e-06, + "loss": 1.5096, + "step": 240 + }, + { + "epoch": 0.02, + "grad_norm": 6.40452020190104, + "learning_rate": 6.548913043478261e-06, + "loss": 1.0686, + "step": 241 + }, + { + "epoch": 0.02, + "grad_norm": 3.8513034466412925, + "learning_rate": 6.57608695652174e-06, + "loss": 0.5596, + "step": 242 + }, + { + "epoch": 0.02, + "grad_norm": 7.0985386159169215, + "learning_rate": 6.603260869565218e-06, + "loss": 0.8382, + "step": 243 + }, + { + "epoch": 0.02, + "grad_norm": 6.486981558596191, + "learning_rate": 6.630434782608696e-06, + "loss": 1.1326, + "step": 244 + }, + { + "epoch": 0.02, + "grad_norm": 7.04189008786626, + "learning_rate": 6.657608695652175e-06, + "loss": 1.0986, + "step": 245 + }, + { + "epoch": 0.02, + "grad_norm": 7.826697566070404, + "learning_rate": 6.6847826086956524e-06, + "loss": 1.224, + "step": 246 + }, + { + "epoch": 0.02, + "grad_norm": 7.079863511831709, + "learning_rate": 6.7119565217391315e-06, + "loss": 0.9992, + "step": 247 + }, + { + "epoch": 0.02, + "grad_norm": 5.103408839897494, + "learning_rate": 6.739130434782609e-06, + "loss": 0.71, + "step": 248 + }, + { + "epoch": 0.02, + "grad_norm": 7.1228953020068, + "learning_rate": 6.766304347826087e-06, + "loss": 0.9402, + "step": 249 + }, + { + "epoch": 0.02, + "grad_norm": 9.976070490177033, + "learning_rate": 6.793478260869566e-06, + "loss": 1.4335, + "step": 250 + }, + { + "epoch": 0.02, + "grad_norm": 5.589589798539868, + "learning_rate": 6.820652173913044e-06, + "loss": 0.7684, + "step": 251 + }, + { + "epoch": 0.02, + "grad_norm": 6.862103581613901, + "learning_rate": 6.847826086956523e-06, + "loss": 1.3673, + "step": 252 + }, + { + "epoch": 0.02, + "grad_norm": 5.107711867291614, + "learning_rate": 6.875e-06, + "loss": 0.7849, + "step": 253 + }, + { + "epoch": 0.02, + "grad_norm": 5.781420070769819, + "learning_rate": 6.9021739130434785e-06, + "loss": 1.1199, + "step": 254 + }, + { + "epoch": 0.02, + "grad_norm": 7.074452327593561, + "learning_rate": 6.929347826086958e-06, + "loss": 0.9971, + "step": 255 + }, + { + "epoch": 0.02, + "grad_norm": 4.882879030652116, + "learning_rate": 6.956521739130435e-06, + "loss": 0.7556, + "step": 256 + }, + { + "epoch": 0.02, + "grad_norm": 5.997966863791385, + "learning_rate": 6.983695652173914e-06, + "loss": 1.1353, + "step": 257 + }, + { + "epoch": 0.02, + "grad_norm": 7.651101842854983, + "learning_rate": 7.0108695652173915e-06, + "loss": 1.2194, + "step": 258 + }, + { + "epoch": 0.02, + "grad_norm": 4.586971534165365, + "learning_rate": 7.03804347826087e-06, + "loss": 0.8826, + "step": 259 + }, + { + "epoch": 0.02, + "grad_norm": 8.149993046803752, + "learning_rate": 7.065217391304349e-06, + "loss": 1.2882, + "step": 260 + }, + { + "epoch": 0.02, + "grad_norm": 5.5061265280242075, + "learning_rate": 7.092391304347826e-06, + "loss": 0.625, + "step": 261 + }, + { + "epoch": 0.02, + "grad_norm": 4.478712447315659, + "learning_rate": 7.119565217391305e-06, + "loss": 0.6542, + "step": 262 + }, + { + "epoch": 0.02, + "grad_norm": 7.837884654347885, + "learning_rate": 7.146739130434784e-06, + "loss": 1.0407, + "step": 263 + }, + { + "epoch": 0.02, + "grad_norm": 3.821659738606539, + "learning_rate": 7.173913043478261e-06, + "loss": 0.4997, + "step": 264 + }, + { + "epoch": 0.02, + "grad_norm": 7.876378892024102, + "learning_rate": 7.20108695652174e-06, + "loss": 1.266, + "step": 265 + }, + { + "epoch": 0.02, + "grad_norm": 4.167619207924342, + "learning_rate": 7.228260869565218e-06, + "loss": 0.5682, + "step": 266 + }, + { + "epoch": 0.02, + "grad_norm": 6.240061974639415, + "learning_rate": 7.255434782608696e-06, + "loss": 0.9576, + "step": 267 + }, + { + "epoch": 0.02, + "grad_norm": 3.287843463237732, + "learning_rate": 7.282608695652175e-06, + "loss": 0.4013, + "step": 268 + }, + { + "epoch": 0.02, + "grad_norm": 5.593555393743031, + "learning_rate": 7.309782608695652e-06, + "loss": 0.799, + "step": 269 + }, + { + "epoch": 0.02, + "grad_norm": 7.661077596444041, + "learning_rate": 7.3369565217391315e-06, + "loss": 1.6408, + "step": 270 + }, + { + "epoch": 0.02, + "grad_norm": 5.307588962336696, + "learning_rate": 7.364130434782609e-06, + "loss": 0.749, + "step": 271 + }, + { + "epoch": 0.02, + "grad_norm": 8.37410791120305, + "learning_rate": 7.391304347826087e-06, + "loss": 0.8649, + "step": 272 + }, + { + "epoch": 0.02, + "grad_norm": 9.370566236174257, + "learning_rate": 7.418478260869566e-06, + "loss": 1.5156, + "step": 273 + }, + { + "epoch": 0.02, + "grad_norm": 5.110290671960013, + "learning_rate": 7.445652173913044e-06, + "loss": 0.9931, + "step": 274 + }, + { + "epoch": 0.02, + "grad_norm": 7.699925092422563, + "learning_rate": 7.472826086956523e-06, + "loss": 1.0836, + "step": 275 + }, + { + "epoch": 0.02, + "grad_norm": 5.236535462488547, + "learning_rate": 7.500000000000001e-06, + "loss": 0.6872, + "step": 276 + }, + { + "epoch": 0.02, + "grad_norm": 8.182701055682035, + "learning_rate": 7.5271739130434784e-06, + "loss": 1.3735, + "step": 277 + }, + { + "epoch": 0.02, + "grad_norm": 5.376412752164515, + "learning_rate": 7.5543478260869576e-06, + "loss": 0.8981, + "step": 278 + }, + { + "epoch": 0.02, + "grad_norm": 3.9740375817891804, + "learning_rate": 7.581521739130435e-06, + "loss": 0.5276, + "step": 279 + }, + { + "epoch": 0.02, + "grad_norm": 6.909735996024911, + "learning_rate": 7.608695652173914e-06, + "loss": 0.8678, + "step": 280 + }, + { + "epoch": 0.02, + "grad_norm": 5.243795496334634, + "learning_rate": 7.635869565217392e-06, + "loss": 0.8842, + "step": 281 + }, + { + "epoch": 0.02, + "grad_norm": 2.985454133185386, + "learning_rate": 7.66304347826087e-06, + "loss": 0.3469, + "step": 282 + }, + { + "epoch": 0.02, + "grad_norm": 4.329242088544714, + "learning_rate": 7.690217391304349e-06, + "loss": 0.8734, + "step": 283 + }, + { + "epoch": 0.02, + "grad_norm": 6.438245468838116, + "learning_rate": 7.717391304347827e-06, + "loss": 1.1468, + "step": 284 + }, + { + "epoch": 0.02, + "grad_norm": 4.965606920686939, + "learning_rate": 7.744565217391305e-06, + "loss": 0.9502, + "step": 285 + }, + { + "epoch": 0.02, + "grad_norm": 7.078318625119426, + "learning_rate": 7.771739130434784e-06, + "loss": 1.2333, + "step": 286 + }, + { + "epoch": 0.02, + "grad_norm": 6.360334548540601, + "learning_rate": 7.798913043478262e-06, + "loss": 0.7249, + "step": 287 + }, + { + "epoch": 0.02, + "grad_norm": 2.3335049330687503, + "learning_rate": 7.82608695652174e-06, + "loss": 0.2457, + "step": 288 + }, + { + "epoch": 0.02, + "grad_norm": 3.7362915593195667, + "learning_rate": 7.853260869565218e-06, + "loss": 0.516, + "step": 289 + }, + { + "epoch": 0.02, + "grad_norm": 2.8781325692758477, + "learning_rate": 7.880434782608695e-06, + "loss": 0.3987, + "step": 290 + }, + { + "epoch": 0.02, + "grad_norm": 3.936096915610338, + "learning_rate": 7.907608695652175e-06, + "loss": 0.6694, + "step": 291 + }, + { + "epoch": 0.02, + "grad_norm": 5.903385405984172, + "learning_rate": 7.934782608695653e-06, + "loss": 0.9925, + "step": 292 + }, + { + "epoch": 0.02, + "grad_norm": 2.4190886985795634, + "learning_rate": 7.961956521739131e-06, + "loss": 0.2049, + "step": 293 + }, + { + "epoch": 0.02, + "grad_norm": 6.404488252552992, + "learning_rate": 7.98913043478261e-06, + "loss": 0.7957, + "step": 294 + }, + { + "epoch": 0.02, + "grad_norm": 6.097551330154639, + "learning_rate": 8.016304347826088e-06, + "loss": 0.966, + "step": 295 + }, + { + "epoch": 0.02, + "grad_norm": 7.976238038990829, + "learning_rate": 8.043478260869566e-06, + "loss": 1.1818, + "step": 296 + }, + { + "epoch": 0.02, + "grad_norm": 8.421521826437415, + "learning_rate": 8.070652173913044e-06, + "loss": 1.4973, + "step": 297 + }, + { + "epoch": 0.02, + "grad_norm": 4.077754124133221, + "learning_rate": 8.097826086956523e-06, + "loss": 0.5261, + "step": 298 + }, + { + "epoch": 0.02, + "grad_norm": 6.4070308899500175, + "learning_rate": 8.125000000000001e-06, + "loss": 1.1101, + "step": 299 + }, + { + "epoch": 0.02, + "grad_norm": 5.318753575824465, + "learning_rate": 8.15217391304348e-06, + "loss": 1.104, + "step": 300 + }, + { + "epoch": 0.02, + "grad_norm": 4.284465274777319, + "learning_rate": 8.179347826086957e-06, + "loss": 0.7641, + "step": 301 + }, + { + "epoch": 0.02, + "grad_norm": 4.625176035390117, + "learning_rate": 8.206521739130436e-06, + "loss": 0.8921, + "step": 302 + }, + { + "epoch": 0.02, + "grad_norm": 6.339376984014242, + "learning_rate": 8.233695652173914e-06, + "loss": 1.2197, + "step": 303 + }, + { + "epoch": 0.02, + "grad_norm": 6.157859884255341, + "learning_rate": 8.260869565217392e-06, + "loss": 0.9333, + "step": 304 + }, + { + "epoch": 0.02, + "grad_norm": 6.3629598823237234, + "learning_rate": 8.28804347826087e-06, + "loss": 0.9097, + "step": 305 + }, + { + "epoch": 0.03, + "grad_norm": 4.868071909896863, + "learning_rate": 8.315217391304349e-06, + "loss": 0.9008, + "step": 306 + }, + { + "epoch": 0.03, + "grad_norm": 3.978604409945964, + "learning_rate": 8.342391304347827e-06, + "loss": 0.5327, + "step": 307 + }, + { + "epoch": 0.03, + "grad_norm": 5.7437093095606695, + "learning_rate": 8.369565217391305e-06, + "loss": 0.8238, + "step": 308 + }, + { + "epoch": 0.03, + "grad_norm": 3.454985051623668, + "learning_rate": 8.396739130434784e-06, + "loss": 0.6295, + "step": 309 + }, + { + "epoch": 0.03, + "grad_norm": 6.614788356863233, + "learning_rate": 8.423913043478262e-06, + "loss": 0.9974, + "step": 310 + }, + { + "epoch": 0.03, + "grad_norm": 5.608916913600715, + "learning_rate": 8.45108695652174e-06, + "loss": 1.1464, + "step": 311 + }, + { + "epoch": 0.03, + "grad_norm": 4.436932595933803, + "learning_rate": 8.478260869565218e-06, + "loss": 0.4595, + "step": 312 + }, + { + "epoch": 0.03, + "grad_norm": 4.57722024795812, + "learning_rate": 8.505434782608697e-06, + "loss": 0.5938, + "step": 313 + }, + { + "epoch": 0.03, + "grad_norm": 6.677248597091288, + "learning_rate": 8.532608695652175e-06, + "loss": 1.1063, + "step": 314 + }, + { + "epoch": 0.03, + "grad_norm": 8.034438340002367, + "learning_rate": 8.559782608695653e-06, + "loss": 1.6156, + "step": 315 + }, + { + "epoch": 0.03, + "grad_norm": 3.2062511410316326, + "learning_rate": 8.586956521739131e-06, + "loss": 0.6021, + "step": 316 + }, + { + "epoch": 0.03, + "grad_norm": 6.145723079176569, + "learning_rate": 8.61413043478261e-06, + "loss": 1.6035, + "step": 317 + }, + { + "epoch": 0.03, + "grad_norm": 5.34202953981997, + "learning_rate": 8.641304347826088e-06, + "loss": 0.8125, + "step": 318 + }, + { + "epoch": 0.03, + "grad_norm": 6.337229311201793, + "learning_rate": 8.668478260869566e-06, + "loss": 1.1929, + "step": 319 + }, + { + "epoch": 0.03, + "grad_norm": 6.225481053597104, + "learning_rate": 8.695652173913044e-06, + "loss": 1.1157, + "step": 320 + }, + { + "epoch": 0.03, + "grad_norm": 5.017710989732211, + "learning_rate": 8.722826086956523e-06, + "loss": 0.8339, + "step": 321 + }, + { + "epoch": 0.03, + "grad_norm": 7.882373233059433, + "learning_rate": 8.750000000000001e-06, + "loss": 1.262, + "step": 322 + }, + { + "epoch": 0.03, + "grad_norm": 4.066556353039955, + "learning_rate": 8.77717391304348e-06, + "loss": 0.2942, + "step": 323 + }, + { + "epoch": 0.03, + "grad_norm": 7.770860273190021, + "learning_rate": 8.804347826086957e-06, + "loss": 1.4823, + "step": 324 + }, + { + "epoch": 0.03, + "grad_norm": 6.7343550772185505, + "learning_rate": 8.831521739130436e-06, + "loss": 0.7482, + "step": 325 + }, + { + "epoch": 0.03, + "grad_norm": 5.639168375286053, + "learning_rate": 8.858695652173914e-06, + "loss": 0.9131, + "step": 326 + }, + { + "epoch": 0.03, + "grad_norm": 7.518462985960601, + "learning_rate": 8.885869565217392e-06, + "loss": 1.058, + "step": 327 + }, + { + "epoch": 0.03, + "grad_norm": 7.54762146148649, + "learning_rate": 8.91304347826087e-06, + "loss": 1.2194, + "step": 328 + }, + { + "epoch": 0.03, + "grad_norm": 7.871385299233604, + "learning_rate": 8.940217391304349e-06, + "loss": 1.0078, + "step": 329 + }, + { + "epoch": 0.03, + "grad_norm": 4.359117961957575, + "learning_rate": 8.967391304347827e-06, + "loss": 0.7769, + "step": 330 + }, + { + "epoch": 0.03, + "grad_norm": 5.0982811956203244, + "learning_rate": 8.994565217391305e-06, + "loss": 0.7377, + "step": 331 + }, + { + "epoch": 0.03, + "grad_norm": 5.830234028030018, + "learning_rate": 9.021739130434784e-06, + "loss": 0.9978, + "step": 332 + }, + { + "epoch": 0.03, + "grad_norm": 5.8181359163099655, + "learning_rate": 9.048913043478262e-06, + "loss": 0.5928, + "step": 333 + }, + { + "epoch": 0.03, + "grad_norm": 5.681560068759461, + "learning_rate": 9.07608695652174e-06, + "loss": 1.0175, + "step": 334 + }, + { + "epoch": 0.03, + "grad_norm": 6.80680243793632, + "learning_rate": 9.103260869565218e-06, + "loss": 1.1993, + "step": 335 + }, + { + "epoch": 0.03, + "grad_norm": 6.186937644327905, + "learning_rate": 9.130434782608697e-06, + "loss": 0.6584, + "step": 336 + }, + { + "epoch": 0.03, + "grad_norm": 6.560489553639137, + "learning_rate": 9.157608695652175e-06, + "loss": 1.1444, + "step": 337 + }, + { + "epoch": 0.03, + "grad_norm": 8.058229643385323, + "learning_rate": 9.184782608695653e-06, + "loss": 1.1921, + "step": 338 + }, + { + "epoch": 0.03, + "grad_norm": 4.668465254356496, + "learning_rate": 9.211956521739131e-06, + "loss": 0.7071, + "step": 339 + }, + { + "epoch": 0.03, + "grad_norm": 6.35169558250226, + "learning_rate": 9.23913043478261e-06, + "loss": 0.9809, + "step": 340 + }, + { + "epoch": 0.03, + "grad_norm": 4.837242529338085, + "learning_rate": 9.266304347826088e-06, + "loss": 0.9961, + "step": 341 + }, + { + "epoch": 0.03, + "grad_norm": 6.198233376887303, + "learning_rate": 9.293478260869566e-06, + "loss": 1.0778, + "step": 342 + }, + { + "epoch": 0.03, + "grad_norm": 2.4647916149997653, + "learning_rate": 9.320652173913044e-06, + "loss": 0.3007, + "step": 343 + }, + { + "epoch": 0.03, + "grad_norm": 6.261137583211198, + "learning_rate": 9.347826086956523e-06, + "loss": 1.3224, + "step": 344 + }, + { + "epoch": 0.03, + "grad_norm": 6.556407019061477, + "learning_rate": 9.375000000000001e-06, + "loss": 1.2377, + "step": 345 + }, + { + "epoch": 0.03, + "grad_norm": 8.46885744874352, + "learning_rate": 9.402173913043479e-06, + "loss": 1.5947, + "step": 346 + }, + { + "epoch": 0.03, + "grad_norm": 5.194630590437151, + "learning_rate": 9.429347826086957e-06, + "loss": 0.8361, + "step": 347 + }, + { + "epoch": 0.03, + "grad_norm": 5.769530302655123, + "learning_rate": 9.456521739130436e-06, + "loss": 1.2915, + "step": 348 + }, + { + "epoch": 0.03, + "grad_norm": 3.27229215981989, + "learning_rate": 9.483695652173914e-06, + "loss": 0.4573, + "step": 349 + }, + { + "epoch": 0.03, + "grad_norm": 6.0281628916208145, + "learning_rate": 9.510869565217392e-06, + "loss": 0.9685, + "step": 350 + }, + { + "epoch": 0.03, + "grad_norm": 7.614906153133044, + "learning_rate": 9.53804347826087e-06, + "loss": 2.14, + "step": 351 + }, + { + "epoch": 0.03, + "grad_norm": 3.132861967845814, + "learning_rate": 9.565217391304349e-06, + "loss": 0.4094, + "step": 352 + }, + { + "epoch": 0.03, + "grad_norm": 7.769971710510702, + "learning_rate": 9.592391304347827e-06, + "loss": 1.5559, + "step": 353 + }, + { + "epoch": 0.03, + "grad_norm": 4.214541771357837, + "learning_rate": 9.619565217391305e-06, + "loss": 0.6542, + "step": 354 + }, + { + "epoch": 0.03, + "grad_norm": 2.368108862701037, + "learning_rate": 9.646739130434783e-06, + "loss": 0.3184, + "step": 355 + }, + { + "epoch": 0.03, + "grad_norm": 9.467183830886526, + "learning_rate": 9.673913043478262e-06, + "loss": 2.0, + "step": 356 + }, + { + "epoch": 0.03, + "grad_norm": 4.148717928615864, + "learning_rate": 9.70108695652174e-06, + "loss": 0.7562, + "step": 357 + }, + { + "epoch": 0.03, + "grad_norm": 4.699142596546084, + "learning_rate": 9.728260869565218e-06, + "loss": 0.5602, + "step": 358 + }, + { + "epoch": 0.03, + "grad_norm": 5.778849120224696, + "learning_rate": 9.755434782608696e-06, + "loss": 1.285, + "step": 359 + }, + { + "epoch": 0.03, + "grad_norm": 6.325812744530062, + "learning_rate": 9.782608695652175e-06, + "loss": 1.035, + "step": 360 + }, + { + "epoch": 0.03, + "grad_norm": 6.270446437848442, + "learning_rate": 9.809782608695653e-06, + "loss": 1.1036, + "step": 361 + }, + { + "epoch": 0.03, + "grad_norm": 5.098595435383664, + "learning_rate": 9.836956521739131e-06, + "loss": 0.7118, + "step": 362 + }, + { + "epoch": 0.03, + "grad_norm": 5.120947794599988, + "learning_rate": 9.86413043478261e-06, + "loss": 1.0194, + "step": 363 + }, + { + "epoch": 0.03, + "grad_norm": 4.753519153874003, + "learning_rate": 9.891304347826088e-06, + "loss": 0.8027, + "step": 364 + }, + { + "epoch": 0.03, + "grad_norm": 5.54491484144688, + "learning_rate": 9.918478260869566e-06, + "loss": 0.9281, + "step": 365 + }, + { + "epoch": 0.03, + "grad_norm": 4.88514814649314, + "learning_rate": 9.945652173913044e-06, + "loss": 0.8455, + "step": 366 + }, + { + "epoch": 0.03, + "grad_norm": 5.574575608802646, + "learning_rate": 9.972826086956523e-06, + "loss": 1.2401, + "step": 367 + }, + { + "epoch": 0.03, + "grad_norm": 5.295452016714822, + "learning_rate": 1e-05, + "loss": 0.7394, + "step": 368 + }, + { + "epoch": 0.03, + "grad_norm": 5.827174479039339, + "learning_rate": 9.999999824760881e-06, + "loss": 0.912, + "step": 369 + }, + { + "epoch": 0.03, + "grad_norm": 3.053210078293678, + "learning_rate": 9.999999299043533e-06, + "loss": 0.455, + "step": 370 + }, + { + "epoch": 0.03, + "grad_norm": 4.67096244105842, + "learning_rate": 9.999998422847994e-06, + "loss": 0.8681, + "step": 371 + }, + { + "epoch": 0.03, + "grad_norm": 5.554637130438726, + "learning_rate": 9.999997196174327e-06, + "loss": 0.7649, + "step": 372 + }, + { + "epoch": 0.03, + "grad_norm": 3.5853043492657246, + "learning_rate": 9.999995619022615e-06, + "loss": 0.6937, + "step": 373 + }, + { + "epoch": 0.03, + "grad_norm": 4.6670712091283875, + "learning_rate": 9.999993691392973e-06, + "loss": 0.8806, + "step": 374 + }, + { + "epoch": 0.03, + "grad_norm": 5.168060014150088, + "learning_rate": 9.99999141328553e-06, + "loss": 0.8426, + "step": 375 + }, + { + "epoch": 0.03, + "grad_norm": 7.843194545607991, + "learning_rate": 9.999988784700451e-06, + "loss": 1.1695, + "step": 376 + }, + { + "epoch": 0.03, + "grad_norm": 5.168568665105547, + "learning_rate": 9.999985805637917e-06, + "loss": 0.7686, + "step": 377 + }, + { + "epoch": 0.03, + "grad_norm": 3.894165959519665, + "learning_rate": 9.999982476098138e-06, + "loss": 0.7126, + "step": 378 + }, + { + "epoch": 0.03, + "grad_norm": 9.08781693842811, + "learning_rate": 9.99997879608135e-06, + "loss": 0.9496, + "step": 379 + }, + { + "epoch": 0.03, + "grad_norm": 6.7907899593817636, + "learning_rate": 9.999974765587805e-06, + "loss": 1.0241, + "step": 380 + }, + { + "epoch": 0.03, + "grad_norm": 6.14564735190624, + "learning_rate": 9.99997038461779e-06, + "loss": 1.084, + "step": 381 + }, + { + "epoch": 0.03, + "grad_norm": 4.632103552875158, + "learning_rate": 9.99996565317161e-06, + "loss": 0.6177, + "step": 382 + }, + { + "epoch": 0.03, + "grad_norm": 7.06828195446152, + "learning_rate": 9.999960571249599e-06, + "loss": 1.2956, + "step": 383 + }, + { + "epoch": 0.03, + "grad_norm": 5.651102377791331, + "learning_rate": 9.999955138852111e-06, + "loss": 1.3542, + "step": 384 + }, + { + "epoch": 0.03, + "grad_norm": 4.042294337025638, + "learning_rate": 9.99994935597953e-06, + "loss": 0.6006, + "step": 385 + }, + { + "epoch": 0.03, + "grad_norm": 3.402994414654537, + "learning_rate": 9.999943222632258e-06, + "loss": 0.4791, + "step": 386 + }, + { + "epoch": 0.03, + "grad_norm": 4.965056543386612, + "learning_rate": 9.999936738810725e-06, + "loss": 1.0879, + "step": 387 + }, + { + "epoch": 0.03, + "grad_norm": 6.1713896321431605, + "learning_rate": 9.999929904515386e-06, + "loss": 1.053, + "step": 388 + }, + { + "epoch": 0.03, + "grad_norm": 6.145030010793328, + "learning_rate": 9.99992271974672e-06, + "loss": 0.7253, + "step": 389 + }, + { + "epoch": 0.03, + "grad_norm": 5.336075295397067, + "learning_rate": 9.999915184505233e-06, + "loss": 0.7442, + "step": 390 + }, + { + "epoch": 0.03, + "grad_norm": 5.5450996060354, + "learning_rate": 9.99990729879145e-06, + "loss": 1.277, + "step": 391 + }, + { + "epoch": 0.03, + "grad_norm": 7.636158624112354, + "learning_rate": 9.999899062605928e-06, + "loss": 1.239, + "step": 392 + }, + { + "epoch": 0.03, + "grad_norm": 5.477488726410057, + "learning_rate": 9.999890475949236e-06, + "loss": 0.9772, + "step": 393 + }, + { + "epoch": 0.03, + "grad_norm": 5.051495747823013, + "learning_rate": 9.999881538821985e-06, + "loss": 0.5469, + "step": 394 + }, + { + "epoch": 0.03, + "grad_norm": 4.300752608556988, + "learning_rate": 9.999872251224796e-06, + "loss": 0.7399, + "step": 395 + }, + { + "epoch": 0.03, + "grad_norm": 3.5526737909232016, + "learning_rate": 9.999862613158323e-06, + "loss": 0.513, + "step": 396 + }, + { + "epoch": 0.03, + "grad_norm": 4.933537207255718, + "learning_rate": 9.99985262462324e-06, + "loss": 0.8352, + "step": 397 + }, + { + "epoch": 0.03, + "grad_norm": 4.738769227354006, + "learning_rate": 9.999842285620247e-06, + "loss": 1.0434, + "step": 398 + }, + { + "epoch": 0.03, + "grad_norm": 2.762740426870926, + "learning_rate": 9.999831596150069e-06, + "loss": 0.3968, + "step": 399 + }, + { + "epoch": 0.03, + "grad_norm": 4.1264125436257375, + "learning_rate": 9.999820556213455e-06, + "loss": 0.7019, + "step": 400 + }, + { + "epoch": 0.03, + "grad_norm": 6.500334497583717, + "learning_rate": 9.99980916581118e-06, + "loss": 0.8607, + "step": 401 + }, + { + "epoch": 0.03, + "grad_norm": 4.481620748046511, + "learning_rate": 9.999797424944041e-06, + "loss": 0.8718, + "step": 402 + }, + { + "epoch": 0.03, + "grad_norm": 3.151238330741133, + "learning_rate": 9.999785333612863e-06, + "loss": 0.7545, + "step": 403 + }, + { + "epoch": 0.03, + "grad_norm": 5.976320787775345, + "learning_rate": 9.999772891818493e-06, + "loss": 1.2427, + "step": 404 + }, + { + "epoch": 0.03, + "grad_norm": 5.000150498697293, + "learning_rate": 9.999760099561802e-06, + "loss": 0.9778, + "step": 405 + }, + { + "epoch": 0.03, + "grad_norm": 3.6502724932396453, + "learning_rate": 9.999746956843685e-06, + "loss": 0.6976, + "step": 406 + }, + { + "epoch": 0.03, + "grad_norm": 5.193776525977321, + "learning_rate": 9.999733463665067e-06, + "loss": 0.8279, + "step": 407 + }, + { + "epoch": 0.03, + "grad_norm": 7.239212491469085, + "learning_rate": 9.999719620026891e-06, + "loss": 1.3266, + "step": 408 + }, + { + "epoch": 0.03, + "grad_norm": 5.596325626752294, + "learning_rate": 9.99970542593013e-06, + "loss": 1.1974, + "step": 409 + }, + { + "epoch": 0.03, + "grad_norm": 6.706543574475022, + "learning_rate": 9.999690881375777e-06, + "loss": 1.2101, + "step": 410 + }, + { + "epoch": 0.03, + "grad_norm": 4.644646579385012, + "learning_rate": 9.99967598636485e-06, + "loss": 0.9091, + "step": 411 + }, + { + "epoch": 0.03, + "grad_norm": 3.9175230022782546, + "learning_rate": 9.999660740898397e-06, + "loss": 0.498, + "step": 412 + }, + { + "epoch": 0.03, + "grad_norm": 5.173890194845122, + "learning_rate": 9.999645144977483e-06, + "loss": 0.7663, + "step": 413 + }, + { + "epoch": 0.03, + "grad_norm": 4.748467260869798, + "learning_rate": 9.999629198603205e-06, + "loss": 0.7728, + "step": 414 + }, + { + "epoch": 0.03, + "grad_norm": 5.778886530785687, + "learning_rate": 9.999612901776678e-06, + "loss": 1.2248, + "step": 415 + }, + { + "epoch": 0.03, + "grad_norm": 4.935476197785821, + "learning_rate": 9.999596254499044e-06, + "loss": 0.6129, + "step": 416 + }, + { + "epoch": 0.03, + "grad_norm": 4.02337258831855, + "learning_rate": 9.999579256771473e-06, + "loss": 0.6155, + "step": 417 + }, + { + "epoch": 0.03, + "grad_norm": 3.8062484181428986, + "learning_rate": 9.999561908595153e-06, + "loss": 0.6679, + "step": 418 + }, + { + "epoch": 0.03, + "grad_norm": 4.8954362755783265, + "learning_rate": 9.999544209971299e-06, + "loss": 0.66, + "step": 419 + }, + { + "epoch": 0.03, + "grad_norm": 4.099290865501664, + "learning_rate": 9.999526160901156e-06, + "loss": 0.8223, + "step": 420 + }, + { + "epoch": 0.03, + "grad_norm": 4.863034209422073, + "learning_rate": 9.999507761385989e-06, + "loss": 0.6422, + "step": 421 + }, + { + "epoch": 0.03, + "grad_norm": 5.083975787293613, + "learning_rate": 9.999489011427084e-06, + "loss": 0.7041, + "step": 422 + }, + { + "epoch": 0.03, + "grad_norm": 3.3178956260126076, + "learning_rate": 9.999469911025756e-06, + "loss": 0.5422, + "step": 423 + }, + { + "epoch": 0.03, + "grad_norm": 6.650247498520858, + "learning_rate": 9.999450460183347e-06, + "loss": 1.0725, + "step": 424 + }, + { + "epoch": 0.03, + "grad_norm": 5.95259074049211, + "learning_rate": 9.999430658901217e-06, + "loss": 0.9106, + "step": 425 + }, + { + "epoch": 0.03, + "grad_norm": 7.71544477110711, + "learning_rate": 9.999410507180757e-06, + "loss": 1.443, + "step": 426 + }, + { + "epoch": 0.03, + "grad_norm": 5.346969103609945, + "learning_rate": 9.999390005023377e-06, + "loss": 1.1114, + "step": 427 + }, + { + "epoch": 0.03, + "grad_norm": 6.434687806214507, + "learning_rate": 9.999369152430514e-06, + "loss": 1.0029, + "step": 428 + }, + { + "epoch": 0.04, + "grad_norm": 6.2244364829348955, + "learning_rate": 9.999347949403633e-06, + "loss": 0.8434, + "step": 429 + }, + { + "epoch": 0.04, + "grad_norm": 5.439228351059763, + "learning_rate": 9.999326395944217e-06, + "loss": 1.0175, + "step": 430 + }, + { + "epoch": 0.04, + "grad_norm": 5.899604465621711, + "learning_rate": 9.999304492053777e-06, + "loss": 0.9335, + "step": 431 + }, + { + "epoch": 0.04, + "grad_norm": 5.3167350747899755, + "learning_rate": 9.999282237733849e-06, + "loss": 1.1625, + "step": 432 + }, + { + "epoch": 0.04, + "grad_norm": 4.770326297294352, + "learning_rate": 9.999259632985996e-06, + "loss": 0.896, + "step": 433 + }, + { + "epoch": 0.04, + "grad_norm": 5.427169696584599, + "learning_rate": 9.999236677811796e-06, + "loss": 0.8969, + "step": 434 + }, + { + "epoch": 0.04, + "grad_norm": 6.297151047850789, + "learning_rate": 9.999213372212863e-06, + "loss": 0.9043, + "step": 435 + }, + { + "epoch": 0.04, + "grad_norm": 6.20610963603229, + "learning_rate": 9.99918971619083e-06, + "loss": 1.2859, + "step": 436 + }, + { + "epoch": 0.04, + "grad_norm": 4.8465537937699406, + "learning_rate": 9.999165709747353e-06, + "loss": 1.0852, + "step": 437 + }, + { + "epoch": 0.04, + "grad_norm": 4.657766614343768, + "learning_rate": 9.999141352884118e-06, + "loss": 1.0622, + "step": 438 + }, + { + "epoch": 0.04, + "grad_norm": 4.9847198110116775, + "learning_rate": 9.999116645602828e-06, + "loss": 0.9139, + "step": 439 + }, + { + "epoch": 0.04, + "grad_norm": 4.4219398024801, + "learning_rate": 9.99909158790522e-06, + "loss": 0.8118, + "step": 440 + }, + { + "epoch": 0.04, + "grad_norm": 6.208092284451877, + "learning_rate": 9.999066179793047e-06, + "loss": 1.0925, + "step": 441 + }, + { + "epoch": 0.04, + "grad_norm": 5.381473249926374, + "learning_rate": 9.99904042126809e-06, + "loss": 1.1494, + "step": 442 + }, + { + "epoch": 0.04, + "grad_norm": 5.171395550274936, + "learning_rate": 9.999014312332156e-06, + "loss": 1.1686, + "step": 443 + }, + { + "epoch": 0.04, + "grad_norm": 4.07792792749062, + "learning_rate": 9.998987852987074e-06, + "loss": 0.9116, + "step": 444 + }, + { + "epoch": 0.04, + "grad_norm": 7.609250844455039, + "learning_rate": 9.9989610432347e-06, + "loss": 1.7275, + "step": 445 + }, + { + "epoch": 0.04, + "grad_norm": 6.042050660855812, + "learning_rate": 9.998933883076912e-06, + "loss": 1.1827, + "step": 446 + }, + { + "epoch": 0.04, + "grad_norm": 4.387281639470219, + "learning_rate": 9.998906372515615e-06, + "loss": 0.9077, + "step": 447 + }, + { + "epoch": 0.04, + "grad_norm": 4.807479350295363, + "learning_rate": 9.998878511552734e-06, + "loss": 0.9013, + "step": 448 + }, + { + "epoch": 0.04, + "grad_norm": 4.673383829189851, + "learning_rate": 9.998850300190226e-06, + "loss": 0.7067, + "step": 449 + }, + { + "epoch": 0.04, + "grad_norm": 5.482918184763173, + "learning_rate": 9.99882173843007e-06, + "loss": 1.2614, + "step": 450 + }, + { + "epoch": 0.04, + "grad_norm": 4.972131746168448, + "learning_rate": 9.99879282627426e-06, + "loss": 0.911, + "step": 451 + }, + { + "epoch": 0.04, + "grad_norm": 7.2324493611028275, + "learning_rate": 9.998763563724831e-06, + "loss": 1.1946, + "step": 452 + }, + { + "epoch": 0.04, + "grad_norm": 5.36145192963037, + "learning_rate": 9.99873395078383e-06, + "loss": 1.0718, + "step": 453 + }, + { + "epoch": 0.04, + "grad_norm": 6.139122292070844, + "learning_rate": 9.998703987453334e-06, + "loss": 0.8382, + "step": 454 + }, + { + "epoch": 0.04, + "grad_norm": 6.657219504198628, + "learning_rate": 9.998673673735442e-06, + "loss": 1.268, + "step": 455 + }, + { + "epoch": 0.04, + "grad_norm": 6.009202914261718, + "learning_rate": 9.998643009632281e-06, + "loss": 0.7224, + "step": 456 + }, + { + "epoch": 0.04, + "grad_norm": 4.475889755505141, + "learning_rate": 9.998611995145997e-06, + "loss": 0.7266, + "step": 457 + }, + { + "epoch": 0.04, + "grad_norm": 1.9204812272815377, + "learning_rate": 9.99858063027877e-06, + "loss": 0.2579, + "step": 458 + }, + { + "epoch": 0.04, + "grad_norm": 5.499067475167554, + "learning_rate": 9.99854891503279e-06, + "loss": 1.1072, + "step": 459 + }, + { + "epoch": 0.04, + "grad_norm": 2.41932921975513, + "learning_rate": 9.998516849410287e-06, + "loss": 0.4482, + "step": 460 + }, + { + "epoch": 0.04, + "grad_norm": 4.714106357668273, + "learning_rate": 9.998484433413507e-06, + "loss": 0.7622, + "step": 461 + }, + { + "epoch": 0.04, + "grad_norm": 5.043566688600208, + "learning_rate": 9.998451667044721e-06, + "loss": 0.6495, + "step": 462 + }, + { + "epoch": 0.04, + "grad_norm": 5.919820905006889, + "learning_rate": 9.998418550306228e-06, + "loss": 0.8238, + "step": 463 + }, + { + "epoch": 0.04, + "grad_norm": 5.294308724711821, + "learning_rate": 9.998385083200346e-06, + "loss": 0.5751, + "step": 464 + }, + { + "epoch": 0.04, + "grad_norm": 4.301981205204581, + "learning_rate": 9.998351265729423e-06, + "loss": 0.4563, + "step": 465 + }, + { + "epoch": 0.04, + "grad_norm": 6.89628096529308, + "learning_rate": 9.99831709789583e-06, + "loss": 1.39, + "step": 466 + }, + { + "epoch": 0.04, + "grad_norm": 4.64203438591841, + "learning_rate": 9.99828257970196e-06, + "loss": 0.8958, + "step": 467 + }, + { + "epoch": 0.04, + "grad_norm": 4.151009132010121, + "learning_rate": 9.998247711150235e-06, + "loss": 0.4676, + "step": 468 + }, + { + "epoch": 0.04, + "grad_norm": 6.055988336093534, + "learning_rate": 9.998212492243099e-06, + "loss": 1.0388, + "step": 469 + }, + { + "epoch": 0.04, + "grad_norm": 4.290773029422374, + "learning_rate": 9.998176922983017e-06, + "loss": 1.0765, + "step": 470 + }, + { + "epoch": 0.04, + "grad_norm": 5.447842072761543, + "learning_rate": 9.998141003372486e-06, + "loss": 1.2072, + "step": 471 + }, + { + "epoch": 0.04, + "grad_norm": 5.586951220708749, + "learning_rate": 9.998104733414022e-06, + "loss": 1.0186, + "step": 472 + }, + { + "epoch": 0.04, + "grad_norm": 5.485052880067488, + "learning_rate": 9.998068113110168e-06, + "loss": 0.9249, + "step": 473 + }, + { + "epoch": 0.04, + "grad_norm": 6.381424161053315, + "learning_rate": 9.99803114246349e-06, + "loss": 1.4598, + "step": 474 + }, + { + "epoch": 0.04, + "grad_norm": 3.656368392426574, + "learning_rate": 9.997993821476583e-06, + "loss": 0.7038, + "step": 475 + }, + { + "epoch": 0.04, + "grad_norm": 4.572194725600882, + "learning_rate": 9.99795615015206e-06, + "loss": 1.0368, + "step": 476 + }, + { + "epoch": 0.04, + "grad_norm": 4.633066802629584, + "learning_rate": 9.99791812849256e-06, + "loss": 1.0351, + "step": 477 + }, + { + "epoch": 0.04, + "grad_norm": 5.647468770382145, + "learning_rate": 9.997879756500752e-06, + "loss": 1.2678, + "step": 478 + }, + { + "epoch": 0.04, + "grad_norm": 4.661062102561084, + "learning_rate": 9.997841034179323e-06, + "loss": 0.879, + "step": 479 + }, + { + "epoch": 0.04, + "grad_norm": 4.5448288381588355, + "learning_rate": 9.997801961530989e-06, + "loss": 0.8834, + "step": 480 + }, + { + "epoch": 0.04, + "grad_norm": 6.746344735056418, + "learning_rate": 9.997762538558488e-06, + "loss": 1.4655, + "step": 481 + }, + { + "epoch": 0.04, + "grad_norm": 3.7462834947998007, + "learning_rate": 9.997722765264582e-06, + "loss": 0.7281, + "step": 482 + }, + { + "epoch": 0.04, + "grad_norm": 5.7286846769815325, + "learning_rate": 9.997682641652062e-06, + "loss": 1.1582, + "step": 483 + }, + { + "epoch": 0.04, + "grad_norm": 5.03167073385437, + "learning_rate": 9.997642167723737e-06, + "loss": 0.9327, + "step": 484 + }, + { + "epoch": 0.04, + "grad_norm": 6.232339007741685, + "learning_rate": 9.997601343482448e-06, + "loss": 1.1108, + "step": 485 + }, + { + "epoch": 0.04, + "grad_norm": 6.245419578495987, + "learning_rate": 9.997560168931053e-06, + "loss": 0.8701, + "step": 486 + }, + { + "epoch": 0.04, + "grad_norm": 6.9573609801548555, + "learning_rate": 9.99751864407244e-06, + "loss": 1.6929, + "step": 487 + }, + { + "epoch": 0.04, + "grad_norm": 4.6903216396074745, + "learning_rate": 9.99747676890952e-06, + "loss": 1.0227, + "step": 488 + }, + { + "epoch": 0.04, + "grad_norm": 5.879451561124423, + "learning_rate": 9.997434543445227e-06, + "loss": 1.1881, + "step": 489 + }, + { + "epoch": 0.04, + "grad_norm": 4.016960042499101, + "learning_rate": 9.997391967682522e-06, + "loss": 0.6582, + "step": 490 + }, + { + "epoch": 0.04, + "grad_norm": 6.015495969337913, + "learning_rate": 9.997349041624387e-06, + "loss": 1.2102, + "step": 491 + }, + { + "epoch": 0.04, + "grad_norm": 4.627930447954648, + "learning_rate": 9.997305765273834e-06, + "loss": 0.6894, + "step": 492 + }, + { + "epoch": 0.04, + "grad_norm": 4.403979572529196, + "learning_rate": 9.997262138633895e-06, + "loss": 0.9797, + "step": 493 + }, + { + "epoch": 0.04, + "grad_norm": 5.264874126549467, + "learning_rate": 9.99721816170763e-06, + "loss": 1.0349, + "step": 494 + }, + { + "epoch": 0.04, + "grad_norm": 2.456314950766531, + "learning_rate": 9.997173834498118e-06, + "loss": 0.2876, + "step": 495 + }, + { + "epoch": 0.04, + "grad_norm": 7.390226223155865, + "learning_rate": 9.997129157008467e-06, + "loss": 0.9932, + "step": 496 + }, + { + "epoch": 0.04, + "grad_norm": 5.220155764196953, + "learning_rate": 9.99708412924181e-06, + "loss": 0.9571, + "step": 497 + }, + { + "epoch": 0.04, + "grad_norm": 5.090257690615265, + "learning_rate": 9.997038751201305e-06, + "loss": 0.6411, + "step": 498 + }, + { + "epoch": 0.04, + "grad_norm": 3.4198073238913955, + "learning_rate": 9.996993022890129e-06, + "loss": 0.4335, + "step": 499 + }, + { + "epoch": 0.04, + "grad_norm": 2.9754266714771402, + "learning_rate": 9.99694694431149e-06, + "loss": 0.5786, + "step": 500 + }, + { + "epoch": 0.04, + "grad_norm": 3.862299420547906, + "learning_rate": 9.996900515468614e-06, + "loss": 0.4861, + "step": 501 + }, + { + "epoch": 0.04, + "grad_norm": 4.749216334168513, + "learning_rate": 9.996853736364763e-06, + "loss": 1.0879, + "step": 502 + }, + { + "epoch": 0.04, + "grad_norm": 5.357431072223263, + "learning_rate": 9.996806607003209e-06, + "loss": 1.2146, + "step": 503 + }, + { + "epoch": 0.04, + "grad_norm": 3.591655212241847, + "learning_rate": 9.996759127387259e-06, + "loss": 0.7087, + "step": 504 + }, + { + "epoch": 0.04, + "grad_norm": 6.45088704039877, + "learning_rate": 9.996711297520238e-06, + "loss": 1.3822, + "step": 505 + }, + { + "epoch": 0.04, + "grad_norm": 7.018084335327446, + "learning_rate": 9.996663117405503e-06, + "loss": 1.5407, + "step": 506 + }, + { + "epoch": 0.04, + "grad_norm": 4.427680298582249, + "learning_rate": 9.99661458704643e-06, + "loss": 0.5793, + "step": 507 + }, + { + "epoch": 0.04, + "grad_norm": 7.765130560248742, + "learning_rate": 9.996565706446418e-06, + "loss": 1.67, + "step": 508 + }, + { + "epoch": 0.04, + "grad_norm": 5.8745700844606406, + "learning_rate": 9.996516475608894e-06, + "loss": 1.421, + "step": 509 + }, + { + "epoch": 0.04, + "grad_norm": 6.075923799198178, + "learning_rate": 9.996466894537311e-06, + "loss": 1.1038, + "step": 510 + }, + { + "epoch": 0.04, + "grad_norm": 2.259111833152261, + "learning_rate": 9.996416963235144e-06, + "loss": 0.2819, + "step": 511 + }, + { + "epoch": 0.04, + "grad_norm": 1.9609336099359, + "learning_rate": 9.996366681705892e-06, + "loss": 0.4535, + "step": 512 + }, + { + "epoch": 0.04, + "grad_norm": 4.9779750310059985, + "learning_rate": 9.99631604995308e-06, + "loss": 0.9532, + "step": 513 + }, + { + "epoch": 0.04, + "grad_norm": 3.5643470817068112, + "learning_rate": 9.996265067980256e-06, + "loss": 0.4561, + "step": 514 + }, + { + "epoch": 0.04, + "grad_norm": 6.865293599732065, + "learning_rate": 9.996213735790995e-06, + "loss": 1.4386, + "step": 515 + }, + { + "epoch": 0.04, + "grad_norm": 6.698458945782838, + "learning_rate": 9.996162053388895e-06, + "loss": 1.2189, + "step": 516 + }, + { + "epoch": 0.04, + "grad_norm": 5.947033920225605, + "learning_rate": 9.996110020777579e-06, + "loss": 0.9818, + "step": 517 + }, + { + "epoch": 0.04, + "grad_norm": 5.38030957812177, + "learning_rate": 9.996057637960694e-06, + "loss": 0.8517, + "step": 518 + }, + { + "epoch": 0.04, + "grad_norm": 5.3778991466873185, + "learning_rate": 9.996004904941911e-06, + "loss": 1.012, + "step": 519 + }, + { + "epoch": 0.04, + "grad_norm": 5.495684553725476, + "learning_rate": 9.995951821724926e-06, + "loss": 1.1764, + "step": 520 + }, + { + "epoch": 0.04, + "grad_norm": 6.828192139209808, + "learning_rate": 9.99589838831346e-06, + "loss": 1.6443, + "step": 521 + }, + { + "epoch": 0.04, + "grad_norm": 5.584025659582305, + "learning_rate": 9.995844604711262e-06, + "loss": 0.8156, + "step": 522 + }, + { + "epoch": 0.04, + "grad_norm": 3.378898352225901, + "learning_rate": 9.995790470922098e-06, + "loss": 0.7585, + "step": 523 + }, + { + "epoch": 0.04, + "grad_norm": 4.815193156427421, + "learning_rate": 9.995735986949763e-06, + "loss": 1.1676, + "step": 524 + }, + { + "epoch": 0.04, + "grad_norm": 4.110317832939963, + "learning_rate": 9.995681152798079e-06, + "loss": 0.6391, + "step": 525 + }, + { + "epoch": 0.04, + "grad_norm": 3.39861470191864, + "learning_rate": 9.995625968470883e-06, + "loss": 0.7007, + "step": 526 + }, + { + "epoch": 0.04, + "grad_norm": 7.833803015864922, + "learning_rate": 9.995570433972051e-06, + "loss": 1.3191, + "step": 527 + }, + { + "epoch": 0.04, + "grad_norm": 6.314688180821015, + "learning_rate": 9.995514549305472e-06, + "loss": 1.2388, + "step": 528 + }, + { + "epoch": 0.04, + "grad_norm": 6.017090408912602, + "learning_rate": 9.995458314475064e-06, + "loss": 0.9468, + "step": 529 + }, + { + "epoch": 0.04, + "grad_norm": 5.001208522010045, + "learning_rate": 9.995401729484768e-06, + "loss": 0.831, + "step": 530 + }, + { + "epoch": 0.04, + "grad_norm": 4.782985672211555, + "learning_rate": 9.995344794338551e-06, + "loss": 0.8729, + "step": 531 + }, + { + "epoch": 0.04, + "grad_norm": 4.543829103793221, + "learning_rate": 9.995287509040403e-06, + "loss": 1.1005, + "step": 532 + }, + { + "epoch": 0.04, + "grad_norm": 4.598299872795798, + "learning_rate": 9.99522987359434e-06, + "loss": 1.0555, + "step": 533 + }, + { + "epoch": 0.04, + "grad_norm": 4.2787368892690845, + "learning_rate": 9.995171888004403e-06, + "loss": 1.141, + "step": 534 + }, + { + "epoch": 0.04, + "grad_norm": 2.8012577004433523, + "learning_rate": 9.995113552274656e-06, + "loss": 0.7279, + "step": 535 + }, + { + "epoch": 0.04, + "grad_norm": 5.805953330549673, + "learning_rate": 9.995054866409186e-06, + "loss": 1.3504, + "step": 536 + }, + { + "epoch": 0.04, + "grad_norm": 5.725355607677452, + "learning_rate": 9.99499583041211e-06, + "loss": 1.1677, + "step": 537 + }, + { + "epoch": 0.04, + "grad_norm": 5.409679139961245, + "learning_rate": 9.994936444287565e-06, + "loss": 1.1266, + "step": 538 + }, + { + "epoch": 0.04, + "grad_norm": 6.340503058191312, + "learning_rate": 9.994876708039712e-06, + "loss": 0.924, + "step": 539 + }, + { + "epoch": 0.04, + "grad_norm": 3.635724815694596, + "learning_rate": 9.99481662167274e-06, + "loss": 0.6758, + "step": 540 + }, + { + "epoch": 0.04, + "grad_norm": 5.137018574224754, + "learning_rate": 9.99475618519086e-06, + "loss": 1.488, + "step": 541 + }, + { + "epoch": 0.04, + "grad_norm": 4.69956991439474, + "learning_rate": 9.99469539859831e-06, + "loss": 0.823, + "step": 542 + }, + { + "epoch": 0.04, + "grad_norm": 5.019347119841303, + "learning_rate": 9.994634261899347e-06, + "loss": 0.9059, + "step": 543 + }, + { + "epoch": 0.04, + "grad_norm": 6.7141592038245035, + "learning_rate": 9.994572775098262e-06, + "loss": 1.0794, + "step": 544 + }, + { + "epoch": 0.04, + "grad_norm": 7.810662155227189, + "learning_rate": 9.99451093819936e-06, + "loss": 1.2191, + "step": 545 + }, + { + "epoch": 0.04, + "grad_norm": 1.6379710591200758, + "learning_rate": 9.994448751206978e-06, + "loss": 0.2519, + "step": 546 + }, + { + "epoch": 0.04, + "grad_norm": 6.276706835772696, + "learning_rate": 9.994386214125476e-06, + "loss": 1.4129, + "step": 547 + }, + { + "epoch": 0.04, + "grad_norm": 7.014552652694709, + "learning_rate": 9.994323326959234e-06, + "loss": 1.2461, + "step": 548 + }, + { + "epoch": 0.04, + "grad_norm": 4.620540826198909, + "learning_rate": 9.994260089712662e-06, + "loss": 1.0425, + "step": 549 + }, + { + "epoch": 0.04, + "grad_norm": 8.080536553202029, + "learning_rate": 9.994196502390194e-06, + "loss": 1.7665, + "step": 550 + }, + { + "epoch": 0.05, + "grad_norm": 6.637864424017921, + "learning_rate": 9.994132564996284e-06, + "loss": 0.9307, + "step": 551 + }, + { + "epoch": 0.05, + "grad_norm": 4.781137741702556, + "learning_rate": 9.994068277535418e-06, + "loss": 0.8607, + "step": 552 + }, + { + "epoch": 0.05, + "grad_norm": 1.1510246907020736, + "learning_rate": 9.994003640012099e-06, + "loss": 0.2683, + "step": 553 + }, + { + "epoch": 0.05, + "grad_norm": 4.250424983427129, + "learning_rate": 9.99393865243086e-06, + "loss": 0.6378, + "step": 554 + }, + { + "epoch": 0.05, + "grad_norm": 6.319957939313974, + "learning_rate": 9.993873314796253e-06, + "loss": 0.8753, + "step": 555 + }, + { + "epoch": 0.05, + "grad_norm": 6.405556982487025, + "learning_rate": 9.993807627112862e-06, + "loss": 1.3312, + "step": 556 + }, + { + "epoch": 0.05, + "grad_norm": 4.160310787271504, + "learning_rate": 9.993741589385287e-06, + "loss": 0.7164, + "step": 557 + }, + { + "epoch": 0.05, + "grad_norm": 4.749100097319363, + "learning_rate": 9.993675201618162e-06, + "loss": 1.0449, + "step": 558 + }, + { + "epoch": 0.05, + "grad_norm": 6.796749991766679, + "learning_rate": 9.993608463816137e-06, + "loss": 1.0364, + "step": 559 + }, + { + "epoch": 0.05, + "grad_norm": 3.0955792124834423, + "learning_rate": 9.99354137598389e-06, + "loss": 0.5138, + "step": 560 + }, + { + "epoch": 0.05, + "grad_norm": 5.177971692772812, + "learning_rate": 9.993473938126126e-06, + "loss": 1.0678, + "step": 561 + }, + { + "epoch": 0.05, + "grad_norm": 5.299457668753542, + "learning_rate": 9.99340615024757e-06, + "loss": 1.4612, + "step": 562 + }, + { + "epoch": 0.05, + "grad_norm": 4.471041279551884, + "learning_rate": 9.993338012352973e-06, + "loss": 0.8609, + "step": 563 + }, + { + "epoch": 0.05, + "grad_norm": 4.131374831065728, + "learning_rate": 9.993269524447115e-06, + "loss": 0.8387, + "step": 564 + }, + { + "epoch": 0.05, + "grad_norm": 5.95767345172936, + "learning_rate": 9.993200686534793e-06, + "loss": 1.1389, + "step": 565 + }, + { + "epoch": 0.05, + "grad_norm": 4.787905925023107, + "learning_rate": 9.993131498620833e-06, + "loss": 1.0385, + "step": 566 + }, + { + "epoch": 0.05, + "grad_norm": 5.709708416684444, + "learning_rate": 9.993061960710084e-06, + "loss": 1.262, + "step": 567 + }, + { + "epoch": 0.05, + "grad_norm": 4.7600247581497, + "learning_rate": 9.992992072807424e-06, + "loss": 0.7766, + "step": 568 + }, + { + "epoch": 0.05, + "grad_norm": 6.040306502808157, + "learning_rate": 9.992921834917748e-06, + "loss": 1.0075, + "step": 569 + }, + { + "epoch": 0.05, + "grad_norm": 5.765888597714929, + "learning_rate": 9.99285124704598e-06, + "loss": 1.1774, + "step": 570 + }, + { + "epoch": 0.05, + "grad_norm": 4.251268554699921, + "learning_rate": 9.99278030919707e-06, + "loss": 0.8978, + "step": 571 + }, + { + "epoch": 0.05, + "grad_norm": 4.687606443004777, + "learning_rate": 9.992709021375987e-06, + "loss": 0.6764, + "step": 572 + }, + { + "epoch": 0.05, + "grad_norm": 5.7975790585142155, + "learning_rate": 9.992637383587731e-06, + "loss": 1.3707, + "step": 573 + }, + { + "epoch": 0.05, + "grad_norm": 6.216275180993614, + "learning_rate": 9.992565395837323e-06, + "loss": 1.0971, + "step": 574 + }, + { + "epoch": 0.05, + "grad_norm": 5.248741788140497, + "learning_rate": 9.992493058129808e-06, + "loss": 0.7766, + "step": 575 + }, + { + "epoch": 0.05, + "grad_norm": 5.605246423497219, + "learning_rate": 9.992420370470257e-06, + "loss": 0.7862, + "step": 576 + }, + { + "epoch": 0.05, + "grad_norm": 5.2123803620145885, + "learning_rate": 9.992347332863766e-06, + "loss": 1.1643, + "step": 577 + }, + { + "epoch": 0.05, + "grad_norm": 5.017266738904277, + "learning_rate": 9.992273945315451e-06, + "loss": 0.9691, + "step": 578 + }, + { + "epoch": 0.05, + "grad_norm": 3.856181346386722, + "learning_rate": 9.992200207830461e-06, + "loss": 0.6863, + "step": 579 + }, + { + "epoch": 0.05, + "grad_norm": 4.208324579382292, + "learning_rate": 9.992126120413963e-06, + "loss": 0.8127, + "step": 580 + }, + { + "epoch": 0.05, + "grad_norm": 4.758991816274172, + "learning_rate": 9.99205168307115e-06, + "loss": 0.7493, + "step": 581 + }, + { + "epoch": 0.05, + "grad_norm": 4.657302028990941, + "learning_rate": 9.991976895807237e-06, + "loss": 0.9181, + "step": 582 + }, + { + "epoch": 0.05, + "grad_norm": 3.7383748965946557, + "learning_rate": 9.99190175862747e-06, + "loss": 0.5278, + "step": 583 + }, + { + "epoch": 0.05, + "grad_norm": 6.017967974387647, + "learning_rate": 9.991826271537115e-06, + "loss": 1.0367, + "step": 584 + }, + { + "epoch": 0.05, + "grad_norm": 5.830582460586334, + "learning_rate": 9.991750434541463e-06, + "loss": 1.2415, + "step": 585 + }, + { + "epoch": 0.05, + "grad_norm": 4.414455961267636, + "learning_rate": 9.99167424764583e-06, + "loss": 0.823, + "step": 586 + }, + { + "epoch": 0.05, + "grad_norm": 6.008113109219425, + "learning_rate": 9.991597710855555e-06, + "loss": 1.4785, + "step": 587 + }, + { + "epoch": 0.05, + "grad_norm": 5.6219971020775015, + "learning_rate": 9.991520824176004e-06, + "loss": 1.1196, + "step": 588 + }, + { + "epoch": 0.05, + "grad_norm": 3.2924239603883967, + "learning_rate": 9.991443587612568e-06, + "loss": 0.4934, + "step": 589 + }, + { + "epoch": 0.05, + "grad_norm": 5.8783937859474875, + "learning_rate": 9.991366001170656e-06, + "loss": 1.0913, + "step": 590 + }, + { + "epoch": 0.05, + "grad_norm": 4.6596365717238735, + "learning_rate": 9.991288064855713e-06, + "loss": 0.8888, + "step": 591 + }, + { + "epoch": 0.05, + "grad_norm": 1.9574292806593845, + "learning_rate": 9.991209778673199e-06, + "loss": 0.241, + "step": 592 + }, + { + "epoch": 0.05, + "grad_norm": 4.958682017079736, + "learning_rate": 9.991131142628601e-06, + "loss": 1.2773, + "step": 593 + }, + { + "epoch": 0.05, + "grad_norm": 4.497872038290533, + "learning_rate": 9.99105215672743e-06, + "loss": 0.9163, + "step": 594 + }, + { + "epoch": 0.05, + "grad_norm": 4.356815585248249, + "learning_rate": 9.990972820975224e-06, + "loss": 0.9554, + "step": 595 + }, + { + "epoch": 0.05, + "grad_norm": 3.823751138948768, + "learning_rate": 9.990893135377544e-06, + "loss": 0.6938, + "step": 596 + }, + { + "epoch": 0.05, + "grad_norm": 5.869332541879883, + "learning_rate": 9.990813099939977e-06, + "loss": 0.9904, + "step": 597 + }, + { + "epoch": 0.05, + "grad_norm": 2.557426669936456, + "learning_rate": 9.990732714668132e-06, + "loss": 0.4088, + "step": 598 + }, + { + "epoch": 0.05, + "grad_norm": 4.994165849060579, + "learning_rate": 9.99065197956764e-06, + "loss": 0.5122, + "step": 599 + }, + { + "epoch": 0.05, + "grad_norm": 5.441289707355455, + "learning_rate": 9.990570894644168e-06, + "loss": 1.1079, + "step": 600 + }, + { + "epoch": 0.05, + "grad_norm": 4.361083611031531, + "learning_rate": 9.990489459903391e-06, + "loss": 0.7912, + "step": 601 + }, + { + "epoch": 0.05, + "grad_norm": 4.907837876833044, + "learning_rate": 9.990407675351027e-06, + "loss": 1.0607, + "step": 602 + }, + { + "epoch": 0.05, + "grad_norm": 4.583072805757161, + "learning_rate": 9.990325540992798e-06, + "loss": 1.0303, + "step": 603 + }, + { + "epoch": 0.05, + "grad_norm": 6.063504134304614, + "learning_rate": 9.99024305683447e-06, + "loss": 0.9656, + "step": 604 + }, + { + "epoch": 0.05, + "grad_norm": 6.3739718312431455, + "learning_rate": 9.99016022288182e-06, + "loss": 1.9083, + "step": 605 + }, + { + "epoch": 0.05, + "grad_norm": 3.3717803890143556, + "learning_rate": 9.990077039140655e-06, + "loss": 0.6516, + "step": 606 + }, + { + "epoch": 0.05, + "grad_norm": 3.8385892151528815, + "learning_rate": 9.989993505616807e-06, + "loss": 0.7832, + "step": 607 + }, + { + "epoch": 0.05, + "grad_norm": 6.415013525607286, + "learning_rate": 9.989909622316132e-06, + "loss": 0.9748, + "step": 608 + }, + { + "epoch": 0.05, + "grad_norm": 5.30838058965553, + "learning_rate": 9.989825389244508e-06, + "loss": 1.1293, + "step": 609 + }, + { + "epoch": 0.05, + "grad_norm": 3.9641337437991773, + "learning_rate": 9.989740806407839e-06, + "loss": 0.7073, + "step": 610 + }, + { + "epoch": 0.05, + "grad_norm": 4.539025450895296, + "learning_rate": 9.989655873812054e-06, + "loss": 0.4529, + "step": 611 + }, + { + "epoch": 0.05, + "grad_norm": 8.404028963526692, + "learning_rate": 9.98957059146311e-06, + "loss": 1.2211, + "step": 612 + }, + { + "epoch": 0.05, + "grad_norm": 5.464389130013617, + "learning_rate": 9.989484959366981e-06, + "loss": 0.9611, + "step": 613 + }, + { + "epoch": 0.05, + "grad_norm": 6.268794174288258, + "learning_rate": 9.98939897752967e-06, + "loss": 1.5645, + "step": 614 + }, + { + "epoch": 0.05, + "grad_norm": 6.190325579863303, + "learning_rate": 9.989312645957206e-06, + "loss": 0.9776, + "step": 615 + }, + { + "epoch": 0.05, + "grad_norm": 4.519508259471203, + "learning_rate": 9.989225964655638e-06, + "loss": 0.7953, + "step": 616 + }, + { + "epoch": 0.05, + "grad_norm": 3.031059724485583, + "learning_rate": 9.989138933631042e-06, + "loss": 0.5061, + "step": 617 + }, + { + "epoch": 0.05, + "grad_norm": 6.168095468868191, + "learning_rate": 9.989051552889521e-06, + "loss": 1.3652, + "step": 618 + }, + { + "epoch": 0.05, + "grad_norm": 4.7765541732269625, + "learning_rate": 9.9889638224372e-06, + "loss": 0.8034, + "step": 619 + }, + { + "epoch": 0.05, + "grad_norm": 5.109554857741848, + "learning_rate": 9.988875742280225e-06, + "loss": 0.9353, + "step": 620 + }, + { + "epoch": 0.05, + "grad_norm": 4.082910298532423, + "learning_rate": 9.988787312424773e-06, + "loss": 0.8639, + "step": 621 + }, + { + "epoch": 0.05, + "grad_norm": 2.8718090115890407, + "learning_rate": 9.98869853287704e-06, + "loss": 0.6346, + "step": 622 + }, + { + "epoch": 0.05, + "grad_norm": 5.209133554282209, + "learning_rate": 9.988609403643254e-06, + "loss": 1.0097, + "step": 623 + }, + { + "epoch": 0.05, + "grad_norm": 4.2825195486259995, + "learning_rate": 9.988519924729658e-06, + "loss": 1.0552, + "step": 624 + }, + { + "epoch": 0.05, + "grad_norm": 4.090137938891286, + "learning_rate": 9.988430096142523e-06, + "loss": 0.9436, + "step": 625 + }, + { + "epoch": 0.05, + "grad_norm": 2.669478879594225, + "learning_rate": 9.98833991788815e-06, + "loss": 0.4304, + "step": 626 + }, + { + "epoch": 0.05, + "grad_norm": 2.7927076917738867, + "learning_rate": 9.988249389972859e-06, + "loss": 0.4305, + "step": 627 + }, + { + "epoch": 0.05, + "grad_norm": 5.556812199763877, + "learning_rate": 9.988158512402993e-06, + "loss": 1.3901, + "step": 628 + }, + { + "epoch": 0.05, + "grad_norm": 2.615049583677811, + "learning_rate": 9.988067285184924e-06, + "loss": 0.5666, + "step": 629 + }, + { + "epoch": 0.05, + "grad_norm": 4.2011693520645546, + "learning_rate": 9.987975708325048e-06, + "loss": 0.7677, + "step": 630 + }, + { + "epoch": 0.05, + "grad_norm": 3.2099903765384887, + "learning_rate": 9.98788378182978e-06, + "loss": 0.5586, + "step": 631 + }, + { + "epoch": 0.05, + "grad_norm": 1.2163063910389378, + "learning_rate": 9.987791505705568e-06, + "loss": 0.2413, + "step": 632 + }, + { + "epoch": 0.05, + "grad_norm": 4.488636294376988, + "learning_rate": 9.987698879958879e-06, + "loss": 0.9507, + "step": 633 + }, + { + "epoch": 0.05, + "grad_norm": 2.9033659480857534, + "learning_rate": 9.987605904596203e-06, + "loss": 0.2726, + "step": 634 + }, + { + "epoch": 0.05, + "grad_norm": 4.480796139908746, + "learning_rate": 9.987512579624061e-06, + "loss": 0.8255, + "step": 635 + }, + { + "epoch": 0.05, + "grad_norm": 5.527278591142934, + "learning_rate": 9.987418905048993e-06, + "loss": 0.9837, + "step": 636 + }, + { + "epoch": 0.05, + "grad_norm": 3.120787858966511, + "learning_rate": 9.987324880877564e-06, + "loss": 0.6009, + "step": 637 + }, + { + "epoch": 0.05, + "grad_norm": 4.656164287100769, + "learning_rate": 9.987230507116366e-06, + "loss": 0.951, + "step": 638 + }, + { + "epoch": 0.05, + "grad_norm": 4.721155595685185, + "learning_rate": 9.987135783772014e-06, + "loss": 1.1902, + "step": 639 + }, + { + "epoch": 0.05, + "grad_norm": 5.837561241031265, + "learning_rate": 9.987040710851148e-06, + "loss": 1.3766, + "step": 640 + }, + { + "epoch": 0.05, + "grad_norm": 3.4364747725629443, + "learning_rate": 9.986945288360431e-06, + "loss": 0.5502, + "step": 641 + }, + { + "epoch": 0.05, + "grad_norm": 6.505635828322947, + "learning_rate": 9.986849516306554e-06, + "loss": 1.3267, + "step": 642 + }, + { + "epoch": 0.05, + "grad_norm": 4.644333592153027, + "learning_rate": 9.986753394696227e-06, + "loss": 0.6654, + "step": 643 + }, + { + "epoch": 0.05, + "grad_norm": 5.0669290688594195, + "learning_rate": 9.98665692353619e-06, + "loss": 1.1344, + "step": 644 + }, + { + "epoch": 0.05, + "grad_norm": 4.344739819105405, + "learning_rate": 9.986560102833206e-06, + "loss": 0.9141, + "step": 645 + }, + { + "epoch": 0.05, + "grad_norm": 3.8933191624197305, + "learning_rate": 9.986462932594059e-06, + "loss": 0.487, + "step": 646 + }, + { + "epoch": 0.05, + "grad_norm": 4.096586790266863, + "learning_rate": 9.986365412825562e-06, + "loss": 0.7367, + "step": 647 + }, + { + "epoch": 0.05, + "grad_norm": 5.303554708517804, + "learning_rate": 9.98626754353455e-06, + "loss": 0.8811, + "step": 648 + }, + { + "epoch": 0.05, + "grad_norm": 3.3709015554748842, + "learning_rate": 9.986169324727883e-06, + "loss": 0.617, + "step": 649 + }, + { + "epoch": 0.05, + "grad_norm": 2.4762846409644337, + "learning_rate": 9.986070756412447e-06, + "loss": 0.4911, + "step": 650 + }, + { + "epoch": 0.05, + "grad_norm": 4.601479155779438, + "learning_rate": 9.98597183859515e-06, + "loss": 0.6638, + "step": 651 + }, + { + "epoch": 0.05, + "grad_norm": 4.004344661451242, + "learning_rate": 9.985872571282927e-06, + "loss": 0.6485, + "step": 652 + }, + { + "epoch": 0.05, + "grad_norm": 6.256974623442099, + "learning_rate": 9.985772954482736e-06, + "loss": 1.076, + "step": 653 + }, + { + "epoch": 0.05, + "grad_norm": 5.072351574263503, + "learning_rate": 9.985672988201557e-06, + "loss": 1.0198, + "step": 654 + }, + { + "epoch": 0.05, + "grad_norm": 3.2702241501346734, + "learning_rate": 9.9855726724464e-06, + "loss": 0.6186, + "step": 655 + }, + { + "epoch": 0.05, + "grad_norm": 5.057842319740918, + "learning_rate": 9.985472007224296e-06, + "loss": 1.0415, + "step": 656 + }, + { + "epoch": 0.05, + "grad_norm": 6.149812927136185, + "learning_rate": 9.9853709925423e-06, + "loss": 1.3504, + "step": 657 + }, + { + "epoch": 0.05, + "grad_norm": 6.477356519779247, + "learning_rate": 9.985269628407497e-06, + "loss": 0.957, + "step": 658 + }, + { + "epoch": 0.05, + "grad_norm": 4.9499863604689995, + "learning_rate": 9.985167914826986e-06, + "loss": 0.9287, + "step": 659 + }, + { + "epoch": 0.05, + "grad_norm": 3.666449716748098, + "learning_rate": 9.9850658518079e-06, + "loss": 1.204, + "step": 660 + }, + { + "epoch": 0.05, + "grad_norm": 3.5396153481196504, + "learning_rate": 9.984963439357395e-06, + "loss": 0.4128, + "step": 661 + }, + { + "epoch": 0.05, + "grad_norm": 3.9862693153077973, + "learning_rate": 9.984860677482646e-06, + "loss": 0.531, + "step": 662 + }, + { + "epoch": 0.05, + "grad_norm": 3.3886584701639064, + "learning_rate": 9.984757566190856e-06, + "loss": 0.5735, + "step": 663 + }, + { + "epoch": 0.05, + "grad_norm": 6.718073410688344, + "learning_rate": 9.984654105489258e-06, + "loss": 1.4752, + "step": 664 + }, + { + "epoch": 0.05, + "grad_norm": 1.6470470212912942, + "learning_rate": 9.984550295385097e-06, + "loss": 0.1962, + "step": 665 + }, + { + "epoch": 0.05, + "grad_norm": 4.677375978952525, + "learning_rate": 9.984446135885657e-06, + "loss": 1.2061, + "step": 666 + }, + { + "epoch": 0.05, + "grad_norm": 6.1751126586821545, + "learning_rate": 9.984341626998234e-06, + "loss": 1.6522, + "step": 667 + }, + { + "epoch": 0.05, + "grad_norm": 4.198772872513048, + "learning_rate": 9.984236768730152e-06, + "loss": 0.9486, + "step": 668 + }, + { + "epoch": 0.05, + "grad_norm": 4.550318005535857, + "learning_rate": 9.984131561088766e-06, + "loss": 0.8527, + "step": 669 + }, + { + "epoch": 0.05, + "grad_norm": 3.8438636208886927, + "learning_rate": 9.98402600408145e-06, + "loss": 0.6577, + "step": 670 + }, + { + "epoch": 0.05, + "grad_norm": 4.408862532659198, + "learning_rate": 9.983920097715599e-06, + "loss": 0.9216, + "step": 671 + }, + { + "epoch": 0.05, + "grad_norm": 4.077809477522914, + "learning_rate": 9.983813841998639e-06, + "loss": 1.0509, + "step": 672 + }, + { + "epoch": 0.06, + "grad_norm": 3.2653328768983725, + "learning_rate": 9.98370723693802e-06, + "loss": 0.567, + "step": 673 + }, + { + "epoch": 0.06, + "grad_norm": 5.705621516826731, + "learning_rate": 9.983600282541213e-06, + "loss": 1.3581, + "step": 674 + }, + { + "epoch": 0.06, + "grad_norm": 4.5134736946043725, + "learning_rate": 9.983492978815716e-06, + "loss": 0.7735, + "step": 675 + }, + { + "epoch": 0.06, + "grad_norm": 3.973311993907203, + "learning_rate": 9.983385325769047e-06, + "loss": 0.7616, + "step": 676 + }, + { + "epoch": 0.06, + "grad_norm": 6.666889227432155, + "learning_rate": 9.983277323408755e-06, + "loss": 1.2404, + "step": 677 + }, + { + "epoch": 0.06, + "grad_norm": 5.986813884345034, + "learning_rate": 9.983168971742411e-06, + "loss": 1.295, + "step": 678 + }, + { + "epoch": 0.06, + "grad_norm": 5.530210032015996, + "learning_rate": 9.983060270777607e-06, + "loss": 1.124, + "step": 679 + }, + { + "epoch": 0.06, + "grad_norm": 5.079685519249418, + "learning_rate": 9.982951220521965e-06, + "loss": 1.0623, + "step": 680 + }, + { + "epoch": 0.06, + "grad_norm": 6.617538825588403, + "learning_rate": 9.98284182098313e-06, + "loss": 1.4321, + "step": 681 + }, + { + "epoch": 0.06, + "grad_norm": 4.736195546177727, + "learning_rate": 9.982732072168768e-06, + "loss": 0.9926, + "step": 682 + }, + { + "epoch": 0.06, + "grad_norm": 4.911523379676589, + "learning_rate": 9.982621974086572e-06, + "loss": 1.1862, + "step": 683 + }, + { + "epoch": 0.06, + "grad_norm": 6.679514520970694, + "learning_rate": 9.98251152674426e-06, + "loss": 1.266, + "step": 684 + }, + { + "epoch": 0.06, + "grad_norm": 5.072991628720345, + "learning_rate": 9.982400730149574e-06, + "loss": 0.9329, + "step": 685 + }, + { + "epoch": 0.06, + "grad_norm": 4.69136136575507, + "learning_rate": 9.98228958431028e-06, + "loss": 0.9406, + "step": 686 + }, + { + "epoch": 0.06, + "grad_norm": 5.591915346611839, + "learning_rate": 9.982178089234171e-06, + "loss": 1.3627, + "step": 687 + }, + { + "epoch": 0.06, + "grad_norm": 5.747909856781231, + "learning_rate": 9.982066244929058e-06, + "loss": 1.2625, + "step": 688 + }, + { + "epoch": 0.06, + "grad_norm": 7.065528326230627, + "learning_rate": 9.981954051402785e-06, + "loss": 1.0663, + "step": 689 + }, + { + "epoch": 0.06, + "grad_norm": 4.0001909990159215, + "learning_rate": 9.981841508663214e-06, + "loss": 0.7058, + "step": 690 + }, + { + "epoch": 0.06, + "grad_norm": 3.3998445578928598, + "learning_rate": 9.981728616718234e-06, + "loss": 0.7326, + "step": 691 + }, + { + "epoch": 0.06, + "grad_norm": 3.855144959078903, + "learning_rate": 9.98161537557576e-06, + "loss": 0.6568, + "step": 692 + }, + { + "epoch": 0.06, + "grad_norm": 5.066439170079921, + "learning_rate": 9.981501785243725e-06, + "loss": 1.1099, + "step": 693 + }, + { + "epoch": 0.06, + "grad_norm": 4.380398469657443, + "learning_rate": 9.981387845730097e-06, + "loss": 1.0579, + "step": 694 + }, + { + "epoch": 0.06, + "grad_norm": 4.897195008908236, + "learning_rate": 9.981273557042861e-06, + "loss": 1.1187, + "step": 695 + }, + { + "epoch": 0.06, + "grad_norm": 6.252493751213305, + "learning_rate": 9.981158919190024e-06, + "loss": 1.4001, + "step": 696 + }, + { + "epoch": 0.06, + "grad_norm": 3.075321407345318, + "learning_rate": 9.981043932179629e-06, + "loss": 0.654, + "step": 697 + }, + { + "epoch": 0.06, + "grad_norm": 4.737293080752598, + "learning_rate": 9.980928596019727e-06, + "loss": 0.9636, + "step": 698 + }, + { + "epoch": 0.06, + "grad_norm": 5.220734829615724, + "learning_rate": 9.98081291071841e-06, + "loss": 1.1473, + "step": 699 + }, + { + "epoch": 0.06, + "grad_norm": 4.338659674445185, + "learning_rate": 9.980696876283785e-06, + "loss": 0.7959, + "step": 700 + }, + { + "epoch": 0.06, + "grad_norm": 3.4732476751320323, + "learning_rate": 9.980580492723984e-06, + "loss": 0.6809, + "step": 701 + }, + { + "epoch": 0.06, + "grad_norm": 4.506473008904325, + "learning_rate": 9.980463760047167e-06, + "loss": 0.853, + "step": 702 + }, + { + "epoch": 0.06, + "grad_norm": 5.132017256774828, + "learning_rate": 9.980346678261515e-06, + "loss": 0.9536, + "step": 703 + }, + { + "epoch": 0.06, + "grad_norm": 2.583142094061532, + "learning_rate": 9.980229247375236e-06, + "loss": 0.447, + "step": 704 + }, + { + "epoch": 0.06, + "grad_norm": 3.83052530122639, + "learning_rate": 9.980111467396561e-06, + "loss": 0.389, + "step": 705 + }, + { + "epoch": 0.06, + "grad_norm": 6.794608506821916, + "learning_rate": 9.979993338333745e-06, + "loss": 1.4973, + "step": 706 + }, + { + "epoch": 0.06, + "grad_norm": 4.703224751249668, + "learning_rate": 9.979874860195068e-06, + "loss": 0.8389, + "step": 707 + }, + { + "epoch": 0.06, + "grad_norm": 3.1532072810420435, + "learning_rate": 9.979756032988837e-06, + "loss": 0.6571, + "step": 708 + }, + { + "epoch": 0.06, + "grad_norm": 6.015780174494101, + "learning_rate": 9.979636856723379e-06, + "loss": 1.3718, + "step": 709 + }, + { + "epoch": 0.06, + "grad_norm": 5.2193398459983555, + "learning_rate": 9.97951733140705e-06, + "loss": 0.8092, + "step": 710 + }, + { + "epoch": 0.06, + "grad_norm": 4.705786850338479, + "learning_rate": 9.979397457048226e-06, + "loss": 0.9143, + "step": 711 + }, + { + "epoch": 0.06, + "grad_norm": 3.9202053826916483, + "learning_rate": 9.97927723365531e-06, + "loss": 0.7494, + "step": 712 + }, + { + "epoch": 0.06, + "grad_norm": 4.633324076574433, + "learning_rate": 9.979156661236733e-06, + "loss": 0.8238, + "step": 713 + }, + { + "epoch": 0.06, + "grad_norm": 2.2712391089999926, + "learning_rate": 9.97903573980094e-06, + "loss": 0.4017, + "step": 714 + }, + { + "epoch": 0.06, + "grad_norm": 4.71236457496792, + "learning_rate": 9.978914469356413e-06, + "loss": 0.8248, + "step": 715 + }, + { + "epoch": 0.06, + "grad_norm": 5.412765845449585, + "learning_rate": 9.97879284991165e-06, + "loss": 1.0794, + "step": 716 + }, + { + "epoch": 0.06, + "grad_norm": 3.969742110862581, + "learning_rate": 9.978670881475173e-06, + "loss": 0.832, + "step": 717 + }, + { + "epoch": 0.06, + "grad_norm": 5.931801381819527, + "learning_rate": 9.978548564055537e-06, + "loss": 0.9876, + "step": 718 + }, + { + "epoch": 0.06, + "grad_norm": 5.316852797200995, + "learning_rate": 9.978425897661312e-06, + "loss": 0.8552, + "step": 719 + }, + { + "epoch": 0.06, + "grad_norm": 1.2826681827328124, + "learning_rate": 9.978302882301098e-06, + "loss": 0.1575, + "step": 720 + }, + { + "epoch": 0.06, + "grad_norm": 6.760416074010551, + "learning_rate": 9.97817951798352e-06, + "loss": 1.1668, + "step": 721 + }, + { + "epoch": 0.06, + "grad_norm": 4.310256577274185, + "learning_rate": 9.97805580471722e-06, + "loss": 1.0728, + "step": 722 + }, + { + "epoch": 0.06, + "grad_norm": 6.802609025604089, + "learning_rate": 9.977931742510873e-06, + "loss": 1.4633, + "step": 723 + }, + { + "epoch": 0.06, + "grad_norm": 1.8533623374374455, + "learning_rate": 9.977807331373176e-06, + "loss": 0.2901, + "step": 724 + }, + { + "epoch": 0.06, + "grad_norm": 2.6339167914487835, + "learning_rate": 9.977682571312847e-06, + "loss": 0.5014, + "step": 725 + }, + { + "epoch": 0.06, + "grad_norm": 4.062861416110086, + "learning_rate": 9.977557462338635e-06, + "loss": 0.7564, + "step": 726 + }, + { + "epoch": 0.06, + "grad_norm": 3.059168335509616, + "learning_rate": 9.977432004459306e-06, + "loss": 0.549, + "step": 727 + }, + { + "epoch": 0.06, + "grad_norm": 5.702152763867874, + "learning_rate": 9.977306197683656e-06, + "loss": 1.2333, + "step": 728 + }, + { + "epoch": 0.06, + "grad_norm": 6.630310813593966, + "learning_rate": 9.977180042020502e-06, + "loss": 1.1659, + "step": 729 + }, + { + "epoch": 0.06, + "grad_norm": 5.511468366268989, + "learning_rate": 9.977053537478686e-06, + "loss": 1.0232, + "step": 730 + }, + { + "epoch": 0.06, + "grad_norm": 3.0047996416749823, + "learning_rate": 9.976926684067082e-06, + "loss": 0.4844, + "step": 731 + }, + { + "epoch": 0.06, + "grad_norm": 3.247586868009353, + "learning_rate": 9.976799481794573e-06, + "loss": 0.4085, + "step": 732 + }, + { + "epoch": 0.06, + "grad_norm": 3.131277338264275, + "learning_rate": 9.976671930670081e-06, + "loss": 0.6309, + "step": 733 + }, + { + "epoch": 0.06, + "grad_norm": 4.2916927804561515, + "learning_rate": 9.976544030702546e-06, + "loss": 0.8517, + "step": 734 + }, + { + "epoch": 0.06, + "grad_norm": 4.487087260109036, + "learning_rate": 9.97641578190093e-06, + "loss": 0.7722, + "step": 735 + }, + { + "epoch": 0.06, + "grad_norm": 3.2674515774054473, + "learning_rate": 9.976287184274228e-06, + "loss": 0.5029, + "step": 736 + }, + { + "epoch": 0.06, + "grad_norm": 3.887167598931923, + "learning_rate": 9.976158237831449e-06, + "loss": 0.5151, + "step": 737 + }, + { + "epoch": 0.06, + "grad_norm": 4.117472870245781, + "learning_rate": 9.976028942581636e-06, + "loss": 0.9479, + "step": 738 + }, + { + "epoch": 0.06, + "grad_norm": 4.499321395252827, + "learning_rate": 9.975899298533848e-06, + "loss": 0.5442, + "step": 739 + }, + { + "epoch": 0.06, + "grad_norm": 2.571042528541903, + "learning_rate": 9.975769305697174e-06, + "loss": 0.5613, + "step": 740 + }, + { + "epoch": 0.06, + "grad_norm": 4.101831596434222, + "learning_rate": 9.975638964080727e-06, + "loss": 0.9055, + "step": 741 + }, + { + "epoch": 0.06, + "grad_norm": 6.5100041104182385, + "learning_rate": 9.975508273693643e-06, + "loss": 1.3315, + "step": 742 + }, + { + "epoch": 0.06, + "grad_norm": 3.0554279292222346, + "learning_rate": 9.975377234545083e-06, + "loss": 0.6267, + "step": 743 + }, + { + "epoch": 0.06, + "grad_norm": 5.89859289725527, + "learning_rate": 9.97524584664423e-06, + "loss": 1.6799, + "step": 744 + }, + { + "epoch": 0.06, + "grad_norm": 4.430271664446078, + "learning_rate": 9.975114110000297e-06, + "loss": 0.8808, + "step": 745 + }, + { + "epoch": 0.06, + "grad_norm": 5.731882166706876, + "learning_rate": 9.974982024622517e-06, + "loss": 1.1066, + "step": 746 + }, + { + "epoch": 0.06, + "grad_norm": 3.459257443847804, + "learning_rate": 9.974849590520148e-06, + "loss": 0.7659, + "step": 747 + }, + { + "epoch": 0.06, + "grad_norm": 4.939537799631192, + "learning_rate": 9.974716807702473e-06, + "loss": 0.8695, + "step": 748 + }, + { + "epoch": 0.06, + "grad_norm": 4.520153583674207, + "learning_rate": 9.9745836761788e-06, + "loss": 0.8502, + "step": 749 + }, + { + "epoch": 0.06, + "grad_norm": 3.8020858847345793, + "learning_rate": 9.974450195958459e-06, + "loss": 0.6382, + "step": 750 + }, + { + "epoch": 0.06, + "grad_norm": 3.8753527561070307, + "learning_rate": 9.97431636705081e-06, + "loss": 0.7334, + "step": 751 + }, + { + "epoch": 0.06, + "grad_norm": 4.723888177512531, + "learning_rate": 9.974182189465232e-06, + "loss": 0.7886, + "step": 752 + }, + { + "epoch": 0.06, + "grad_norm": 6.128138154428111, + "learning_rate": 9.974047663211131e-06, + "loss": 1.7984, + "step": 753 + }, + { + "epoch": 0.06, + "grad_norm": 5.773109744828223, + "learning_rate": 9.973912788297933e-06, + "loss": 1.306, + "step": 754 + }, + { + "epoch": 0.06, + "grad_norm": 4.00723529489966, + "learning_rate": 9.973777564735097e-06, + "loss": 0.8355, + "step": 755 + }, + { + "epoch": 0.06, + "grad_norm": 2.9820207602050672, + "learning_rate": 9.973641992532099e-06, + "loss": 0.4745, + "step": 756 + }, + { + "epoch": 0.06, + "grad_norm": 4.715125249193091, + "learning_rate": 9.973506071698444e-06, + "loss": 1.0481, + "step": 757 + }, + { + "epoch": 0.06, + "grad_norm": 2.439607792847339, + "learning_rate": 9.973369802243658e-06, + "loss": 0.4661, + "step": 758 + }, + { + "epoch": 0.06, + "grad_norm": 4.530386457711915, + "learning_rate": 9.97323318417729e-06, + "loss": 0.9343, + "step": 759 + }, + { + "epoch": 0.06, + "grad_norm": 3.740555661029211, + "learning_rate": 9.973096217508925e-06, + "loss": 0.7943, + "step": 760 + }, + { + "epoch": 0.06, + "grad_norm": 2.7744732026579766, + "learning_rate": 9.972958902248153e-06, + "loss": 0.6054, + "step": 761 + }, + { + "epoch": 0.06, + "grad_norm": 1.929079530607702, + "learning_rate": 9.972821238404607e-06, + "loss": 0.3151, + "step": 762 + }, + { + "epoch": 0.06, + "grad_norm": 4.495765793088585, + "learning_rate": 9.972683225987933e-06, + "loss": 1.0715, + "step": 763 + }, + { + "epoch": 0.06, + "grad_norm": 5.467686535685932, + "learning_rate": 9.972544865007807e-06, + "loss": 0.6673, + "step": 764 + }, + { + "epoch": 0.06, + "grad_norm": 4.146818085401882, + "learning_rate": 9.972406155473925e-06, + "loss": 0.8161, + "step": 765 + }, + { + "epoch": 0.06, + "grad_norm": 5.526183507517289, + "learning_rate": 9.972267097396013e-06, + "loss": 1.1339, + "step": 766 + }, + { + "epoch": 0.06, + "grad_norm": 5.391115260744661, + "learning_rate": 9.972127690783815e-06, + "loss": 1.6191, + "step": 767 + }, + { + "epoch": 0.06, + "grad_norm": 5.089685092999949, + "learning_rate": 9.971987935647106e-06, + "loss": 1.0668, + "step": 768 + }, + { + "epoch": 0.06, + "grad_norm": 5.520289400789311, + "learning_rate": 9.97184783199568e-06, + "loss": 0.8992, + "step": 769 + }, + { + "epoch": 0.06, + "grad_norm": 5.625631151177899, + "learning_rate": 9.97170737983936e-06, + "loss": 1.1493, + "step": 770 + }, + { + "epoch": 0.06, + "grad_norm": 4.97478303005252, + "learning_rate": 9.971566579187988e-06, + "loss": 1.1023, + "step": 771 + }, + { + "epoch": 0.06, + "grad_norm": 4.547320317403259, + "learning_rate": 9.971425430051437e-06, + "loss": 1.0619, + "step": 772 + }, + { + "epoch": 0.06, + "grad_norm": 5.636700625448609, + "learning_rate": 9.971283932439597e-06, + "loss": 1.1449, + "step": 773 + }, + { + "epoch": 0.06, + "grad_norm": 5.769386076301568, + "learning_rate": 9.971142086362392e-06, + "loss": 1.3077, + "step": 774 + }, + { + "epoch": 0.06, + "grad_norm": 4.635243672503049, + "learning_rate": 9.970999891829757e-06, + "loss": 1.1862, + "step": 775 + }, + { + "epoch": 0.06, + "grad_norm": 5.848083854458284, + "learning_rate": 9.970857348851667e-06, + "loss": 0.9362, + "step": 776 + }, + { + "epoch": 0.06, + "grad_norm": 3.4745444676936006, + "learning_rate": 9.970714457438106e-06, + "loss": 0.5107, + "step": 777 + }, + { + "epoch": 0.06, + "grad_norm": 5.489989337493255, + "learning_rate": 9.970571217599096e-06, + "loss": 0.9107, + "step": 778 + }, + { + "epoch": 0.06, + "grad_norm": 3.096443541651896, + "learning_rate": 9.970427629344676e-06, + "loss": 0.4118, + "step": 779 + }, + { + "epoch": 0.06, + "grad_norm": 4.530946936104318, + "learning_rate": 9.970283692684911e-06, + "loss": 0.8377, + "step": 780 + }, + { + "epoch": 0.06, + "grad_norm": 5.22289394368659, + "learning_rate": 9.97013940762989e-06, + "loss": 1.3088, + "step": 781 + }, + { + "epoch": 0.06, + "grad_norm": 5.453490008200965, + "learning_rate": 9.969994774189726e-06, + "loss": 1.451, + "step": 782 + }, + { + "epoch": 0.06, + "grad_norm": 4.76093404920054, + "learning_rate": 9.969849792374558e-06, + "loss": 1.0252, + "step": 783 + }, + { + "epoch": 0.06, + "grad_norm": 5.57953489925236, + "learning_rate": 9.969704462194549e-06, + "loss": 1.4084, + "step": 784 + }, + { + "epoch": 0.06, + "grad_norm": 3.4399668508195123, + "learning_rate": 9.969558783659884e-06, + "loss": 0.5534, + "step": 785 + }, + { + "epoch": 0.06, + "grad_norm": 4.7564780956274015, + "learning_rate": 9.969412756780776e-06, + "loss": 1.0238, + "step": 786 + }, + { + "epoch": 0.06, + "grad_norm": 4.826815046400798, + "learning_rate": 9.969266381567462e-06, + "loss": 0.94, + "step": 787 + }, + { + "epoch": 0.06, + "grad_norm": 4.080648998198349, + "learning_rate": 9.9691196580302e-06, + "loss": 0.897, + "step": 788 + }, + { + "epoch": 0.06, + "grad_norm": 3.6470320822489994, + "learning_rate": 9.968972586179275e-06, + "loss": 0.8945, + "step": 789 + }, + { + "epoch": 0.06, + "grad_norm": 3.408120703606715, + "learning_rate": 9.968825166024999e-06, + "loss": 0.4984, + "step": 790 + }, + { + "epoch": 0.06, + "grad_norm": 7.859349302027108, + "learning_rate": 9.968677397577701e-06, + "loss": 1.8087, + "step": 791 + }, + { + "epoch": 0.06, + "grad_norm": 2.3390940932775894, + "learning_rate": 9.968529280847743e-06, + "loss": 0.474, + "step": 792 + }, + { + "epoch": 0.06, + "grad_norm": 3.1126987657491423, + "learning_rate": 9.968380815845504e-06, + "loss": 0.6885, + "step": 793 + }, + { + "epoch": 0.06, + "grad_norm": 4.074268950114531, + "learning_rate": 9.968232002581394e-06, + "loss": 0.8573, + "step": 794 + }, + { + "epoch": 0.06, + "grad_norm": 5.5693261133819005, + "learning_rate": 9.96808284106584e-06, + "loss": 1.0474, + "step": 795 + }, + { + "epoch": 0.07, + "grad_norm": 4.392596789543054, + "learning_rate": 9.967933331309302e-06, + "loss": 0.7573, + "step": 796 + }, + { + "epoch": 0.07, + "grad_norm": 3.7715217973867903, + "learning_rate": 9.967783473322258e-06, + "loss": 0.6199, + "step": 797 + }, + { + "epoch": 0.07, + "grad_norm": 4.074185896638036, + "learning_rate": 9.96763326711521e-06, + "loss": 0.4578, + "step": 798 + }, + { + "epoch": 0.07, + "grad_norm": 4.176116951214148, + "learning_rate": 9.967482712698694e-06, + "loss": 0.6773, + "step": 799 + }, + { + "epoch": 0.07, + "grad_norm": 4.76439860919025, + "learning_rate": 9.967331810083254e-06, + "loss": 1.1874, + "step": 800 + }, + { + "epoch": 0.07, + "grad_norm": 3.582020589446579, + "learning_rate": 9.967180559279472e-06, + "loss": 0.9202, + "step": 801 + }, + { + "epoch": 0.07, + "grad_norm": 2.0977116361481984, + "learning_rate": 9.967028960297954e-06, + "loss": 0.4643, + "step": 802 + }, + { + "epoch": 0.07, + "grad_norm": 3.098250568478063, + "learning_rate": 9.966877013149319e-06, + "loss": 0.4353, + "step": 803 + }, + { + "epoch": 0.07, + "grad_norm": 4.337788120873333, + "learning_rate": 9.966724717844222e-06, + "loss": 0.7149, + "step": 804 + }, + { + "epoch": 0.07, + "grad_norm": 5.694222423633388, + "learning_rate": 9.966572074393337e-06, + "loss": 1.0502, + "step": 805 + }, + { + "epoch": 0.07, + "grad_norm": 2.776286287183273, + "learning_rate": 9.966419082807366e-06, + "loss": 0.5987, + "step": 806 + }, + { + "epoch": 0.07, + "grad_norm": 3.21966864717937, + "learning_rate": 9.96626574309703e-06, + "loss": 0.4904, + "step": 807 + }, + { + "epoch": 0.07, + "grad_norm": 5.924492528369482, + "learning_rate": 9.966112055273078e-06, + "loss": 1.1346, + "step": 808 + }, + { + "epoch": 0.07, + "grad_norm": 4.1712724961958045, + "learning_rate": 9.965958019346284e-06, + "loss": 0.6334, + "step": 809 + }, + { + "epoch": 0.07, + "grad_norm": 3.8522795318151353, + "learning_rate": 9.965803635327445e-06, + "loss": 0.7694, + "step": 810 + }, + { + "epoch": 0.07, + "grad_norm": 1.574605370309623, + "learning_rate": 9.965648903227383e-06, + "loss": 0.261, + "step": 811 + }, + { + "epoch": 0.07, + "grad_norm": 4.705029403781401, + "learning_rate": 9.965493823056943e-06, + "loss": 0.6301, + "step": 812 + }, + { + "epoch": 0.07, + "grad_norm": 5.734011167965443, + "learning_rate": 9.965338394826995e-06, + "loss": 1.5186, + "step": 813 + }, + { + "epoch": 0.07, + "grad_norm": 2.9008709218185853, + "learning_rate": 9.965182618548437e-06, + "loss": 0.5122, + "step": 814 + }, + { + "epoch": 0.07, + "grad_norm": 2.156045034777844, + "learning_rate": 9.965026494232184e-06, + "loss": 0.2935, + "step": 815 + }, + { + "epoch": 0.07, + "grad_norm": 4.339119067796793, + "learning_rate": 9.964870021889181e-06, + "loss": 1.0165, + "step": 816 + }, + { + "epoch": 0.07, + "grad_norm": 5.134637446860939, + "learning_rate": 9.964713201530399e-06, + "loss": 0.7808, + "step": 817 + }, + { + "epoch": 0.07, + "grad_norm": 5.597641511494216, + "learning_rate": 9.964556033166826e-06, + "loss": 0.9558, + "step": 818 + }, + { + "epoch": 0.07, + "grad_norm": 3.3933383102397845, + "learning_rate": 9.964398516809482e-06, + "loss": 0.7344, + "step": 819 + }, + { + "epoch": 0.07, + "grad_norm": 5.0015608056776495, + "learning_rate": 9.964240652469408e-06, + "loss": 0.6732, + "step": 820 + }, + { + "epoch": 0.07, + "grad_norm": 3.065452808847677, + "learning_rate": 9.964082440157668e-06, + "loss": 0.6751, + "step": 821 + }, + { + "epoch": 0.07, + "grad_norm": 3.916773694484184, + "learning_rate": 9.963923879885351e-06, + "loss": 0.6768, + "step": 822 + }, + { + "epoch": 0.07, + "grad_norm": 2.630978787822194, + "learning_rate": 9.963764971663575e-06, + "loss": 0.4979, + "step": 823 + }, + { + "epoch": 0.07, + "grad_norm": 6.170772583289991, + "learning_rate": 9.963605715503477e-06, + "loss": 1.2307, + "step": 824 + }, + { + "epoch": 0.07, + "grad_norm": 4.211658909815121, + "learning_rate": 9.96344611141622e-06, + "loss": 0.9501, + "step": 825 + }, + { + "epoch": 0.07, + "grad_norm": 6.606969981147839, + "learning_rate": 9.96328615941299e-06, + "loss": 1.4042, + "step": 826 + }, + { + "epoch": 0.07, + "grad_norm": 3.4391829172628925, + "learning_rate": 9.963125859505e-06, + "loss": 0.604, + "step": 827 + }, + { + "epoch": 0.07, + "grad_norm": 2.6553164080327796, + "learning_rate": 9.96296521170349e-06, + "loss": 0.4192, + "step": 828 + }, + { + "epoch": 0.07, + "grad_norm": 2.426383397582067, + "learning_rate": 9.962804216019715e-06, + "loss": 0.3705, + "step": 829 + }, + { + "epoch": 0.07, + "grad_norm": 5.210417920800824, + "learning_rate": 9.962642872464964e-06, + "loss": 1.218, + "step": 830 + }, + { + "epoch": 0.07, + "grad_norm": 5.627215980781415, + "learning_rate": 9.962481181050544e-06, + "loss": 1.1948, + "step": 831 + }, + { + "epoch": 0.07, + "grad_norm": 4.6291433620745694, + "learning_rate": 9.96231914178779e-06, + "loss": 0.9308, + "step": 832 + }, + { + "epoch": 0.07, + "grad_norm": 5.467086133481989, + "learning_rate": 9.962156754688062e-06, + "loss": 1.166, + "step": 833 + }, + { + "epoch": 0.07, + "grad_norm": 3.5665160649933267, + "learning_rate": 9.96199401976274e-06, + "loss": 0.7913, + "step": 834 + }, + { + "epoch": 0.07, + "grad_norm": 3.3983297626025517, + "learning_rate": 9.96183093702323e-06, + "loss": 0.6424, + "step": 835 + }, + { + "epoch": 0.07, + "grad_norm": 6.069701322695037, + "learning_rate": 9.961667506480967e-06, + "loss": 1.5727, + "step": 836 + }, + { + "epoch": 0.07, + "grad_norm": 6.402442563901685, + "learning_rate": 9.961503728147405e-06, + "loss": 1.4001, + "step": 837 + }, + { + "epoch": 0.07, + "grad_norm": 4.720680137374831, + "learning_rate": 9.961339602034026e-06, + "loss": 1.0963, + "step": 838 + }, + { + "epoch": 0.07, + "grad_norm": 3.0981566758358903, + "learning_rate": 9.96117512815233e-06, + "loss": 0.4581, + "step": 839 + }, + { + "epoch": 0.07, + "grad_norm": 3.2511679821777086, + "learning_rate": 9.96101030651385e-06, + "loss": 0.4581, + "step": 840 + }, + { + "epoch": 0.07, + "grad_norm": 4.790266578683795, + "learning_rate": 9.960845137130137e-06, + "loss": 0.7372, + "step": 841 + }, + { + "epoch": 0.07, + "grad_norm": 4.982491338777422, + "learning_rate": 9.96067962001277e-06, + "loss": 0.972, + "step": 842 + }, + { + "epoch": 0.07, + "grad_norm": 3.7105151457719914, + "learning_rate": 9.96051375517335e-06, + "loss": 0.8763, + "step": 843 + }, + { + "epoch": 0.07, + "grad_norm": 4.185155104774775, + "learning_rate": 9.960347542623506e-06, + "loss": 0.5442, + "step": 844 + }, + { + "epoch": 0.07, + "grad_norm": 3.156382246620666, + "learning_rate": 9.960180982374884e-06, + "loss": 0.4527, + "step": 845 + }, + { + "epoch": 0.07, + "grad_norm": 3.4217843784477244, + "learning_rate": 9.960014074439164e-06, + "loss": 0.5007, + "step": 846 + }, + { + "epoch": 0.07, + "grad_norm": 5.391806637780469, + "learning_rate": 9.959846818828041e-06, + "loss": 1.3905, + "step": 847 + }, + { + "epoch": 0.07, + "grad_norm": 3.459019607298156, + "learning_rate": 9.959679215553244e-06, + "loss": 0.6361, + "step": 848 + }, + { + "epoch": 0.07, + "grad_norm": 4.619323467927718, + "learning_rate": 9.959511264626518e-06, + "loss": 0.7784, + "step": 849 + }, + { + "epoch": 0.07, + "grad_norm": 4.728782464117148, + "learning_rate": 9.959342966059636e-06, + "loss": 1.2141, + "step": 850 + }, + { + "epoch": 0.07, + "grad_norm": 4.738622637108753, + "learning_rate": 9.959174319864395e-06, + "loss": 1.1925, + "step": 851 + }, + { + "epoch": 0.07, + "grad_norm": 5.479658138287649, + "learning_rate": 9.959005326052615e-06, + "loss": 1.0201, + "step": 852 + }, + { + "epoch": 0.07, + "grad_norm": 5.614085673476612, + "learning_rate": 9.958835984636146e-06, + "loss": 1.165, + "step": 853 + }, + { + "epoch": 0.07, + "grad_norm": 3.5184482099574366, + "learning_rate": 9.958666295626854e-06, + "loss": 0.8741, + "step": 854 + }, + { + "epoch": 0.07, + "grad_norm": 7.138268357473538, + "learning_rate": 9.958496259036635e-06, + "loss": 1.638, + "step": 855 + }, + { + "epoch": 0.07, + "grad_norm": 3.042464483331498, + "learning_rate": 9.958325874877408e-06, + "loss": 0.7941, + "step": 856 + }, + { + "epoch": 0.07, + "grad_norm": 4.184549738302962, + "learning_rate": 9.958155143161115e-06, + "loss": 0.915, + "step": 857 + }, + { + "epoch": 0.07, + "grad_norm": 5.664162701019237, + "learning_rate": 9.957984063899727e-06, + "loss": 1.1962, + "step": 858 + }, + { + "epoch": 0.07, + "grad_norm": 1.8646671334584424, + "learning_rate": 9.95781263710523e-06, + "loss": 0.308, + "step": 859 + }, + { + "epoch": 0.07, + "grad_norm": 6.639081617480649, + "learning_rate": 9.957640862789644e-06, + "loss": 1.092, + "step": 860 + }, + { + "epoch": 0.07, + "grad_norm": 3.5841789111561875, + "learning_rate": 9.95746874096501e-06, + "loss": 0.5315, + "step": 861 + }, + { + "epoch": 0.07, + "grad_norm": 4.234477795410627, + "learning_rate": 9.957296271643393e-06, + "loss": 0.769, + "step": 862 + }, + { + "epoch": 0.07, + "grad_norm": 5.313150775350866, + "learning_rate": 9.957123454836882e-06, + "loss": 0.9755, + "step": 863 + }, + { + "epoch": 0.07, + "grad_norm": 5.115863019680228, + "learning_rate": 9.95695029055759e-06, + "loss": 1.3551, + "step": 864 + }, + { + "epoch": 0.07, + "grad_norm": 5.635816693720637, + "learning_rate": 9.956776778817654e-06, + "loss": 1.222, + "step": 865 + }, + { + "epoch": 0.07, + "grad_norm": 3.4986707461344753, + "learning_rate": 9.956602919629239e-06, + "loss": 0.6475, + "step": 866 + }, + { + "epoch": 0.07, + "grad_norm": 3.7358230723201142, + "learning_rate": 9.956428713004529e-06, + "loss": 0.5906, + "step": 867 + }, + { + "epoch": 0.07, + "grad_norm": 6.528042168147081, + "learning_rate": 9.956254158955738e-06, + "loss": 1.3471, + "step": 868 + }, + { + "epoch": 0.07, + "grad_norm": 4.613230901572128, + "learning_rate": 9.9560792574951e-06, + "loss": 0.9629, + "step": 869 + }, + { + "epoch": 0.07, + "grad_norm": 5.86962822781077, + "learning_rate": 9.955904008634876e-06, + "loss": 1.3635, + "step": 870 + }, + { + "epoch": 0.07, + "grad_norm": 5.695620277128768, + "learning_rate": 9.955728412387347e-06, + "loss": 1.0614, + "step": 871 + }, + { + "epoch": 0.07, + "grad_norm": 4.794859671216474, + "learning_rate": 9.955552468764825e-06, + "loss": 1.0326, + "step": 872 + }, + { + "epoch": 0.07, + "grad_norm": 5.291780152477322, + "learning_rate": 9.955376177779641e-06, + "loss": 0.6374, + "step": 873 + }, + { + "epoch": 0.07, + "grad_norm": 3.777384653007664, + "learning_rate": 9.955199539444154e-06, + "loss": 0.5032, + "step": 874 + }, + { + "epoch": 0.07, + "grad_norm": 4.269996080253249, + "learning_rate": 9.955022553770743e-06, + "loss": 0.7474, + "step": 875 + }, + { + "epoch": 0.07, + "grad_norm": 4.790840369737783, + "learning_rate": 9.954845220771816e-06, + "loss": 0.9783, + "step": 876 + }, + { + "epoch": 0.07, + "grad_norm": 6.356400418330578, + "learning_rate": 9.954667540459802e-06, + "loss": 1.2314, + "step": 877 + }, + { + "epoch": 0.07, + "grad_norm": 5.2532838304563505, + "learning_rate": 9.954489512847156e-06, + "loss": 0.8081, + "step": 878 + }, + { + "epoch": 0.07, + "grad_norm": 3.461855774922967, + "learning_rate": 9.954311137946358e-06, + "loss": 0.5668, + "step": 879 + }, + { + "epoch": 0.07, + "grad_norm": 4.000711677503919, + "learning_rate": 9.954132415769911e-06, + "loss": 0.8234, + "step": 880 + }, + { + "epoch": 0.07, + "grad_norm": 4.01348735820718, + "learning_rate": 9.95395334633034e-06, + "loss": 0.8112, + "step": 881 + }, + { + "epoch": 0.07, + "grad_norm": 4.796895609166545, + "learning_rate": 9.953773929640202e-06, + "loss": 0.9285, + "step": 882 + }, + { + "epoch": 0.07, + "grad_norm": 3.523348062987123, + "learning_rate": 9.953594165712068e-06, + "loss": 0.3327, + "step": 883 + }, + { + "epoch": 0.07, + "grad_norm": 4.060091186298477, + "learning_rate": 9.953414054558543e-06, + "loss": 0.7849, + "step": 884 + }, + { + "epoch": 0.07, + "grad_norm": 5.531720184448185, + "learning_rate": 9.95323359619225e-06, + "loss": 1.4627, + "step": 885 + }, + { + "epoch": 0.07, + "grad_norm": 5.124767255143433, + "learning_rate": 9.953052790625835e-06, + "loss": 1.147, + "step": 886 + }, + { + "epoch": 0.07, + "grad_norm": 4.174673608668578, + "learning_rate": 9.95287163787198e-06, + "loss": 1.0221, + "step": 887 + }, + { + "epoch": 0.07, + "grad_norm": 4.759573418537057, + "learning_rate": 9.952690137943374e-06, + "loss": 0.7192, + "step": 888 + }, + { + "epoch": 0.07, + "grad_norm": 4.32649581631083, + "learning_rate": 9.952508290852746e-06, + "loss": 0.9132, + "step": 889 + }, + { + "epoch": 0.07, + "grad_norm": 4.49609553767445, + "learning_rate": 9.95232609661284e-06, + "loss": 0.9195, + "step": 890 + }, + { + "epoch": 0.07, + "grad_norm": 4.744183694556523, + "learning_rate": 9.952143555236426e-06, + "loss": 0.5019, + "step": 891 + }, + { + "epoch": 0.07, + "grad_norm": 5.346790453736447, + "learning_rate": 9.9519606667363e-06, + "loss": 1.061, + "step": 892 + }, + { + "epoch": 0.07, + "grad_norm": 3.8526268520642506, + "learning_rate": 9.951777431125285e-06, + "loss": 0.7299, + "step": 893 + }, + { + "epoch": 0.07, + "grad_norm": 4.280723702096715, + "learning_rate": 9.95159384841622e-06, + "loss": 0.8639, + "step": 894 + }, + { + "epoch": 0.07, + "grad_norm": 5.5124974801196025, + "learning_rate": 9.951409918621977e-06, + "loss": 1.5656, + "step": 895 + }, + { + "epoch": 0.07, + "grad_norm": 4.493827302226007, + "learning_rate": 9.951225641755447e-06, + "loss": 1.2438, + "step": 896 + }, + { + "epoch": 0.07, + "grad_norm": 4.500693687023412, + "learning_rate": 9.951041017829546e-06, + "loss": 1.3041, + "step": 897 + }, + { + "epoch": 0.07, + "grad_norm": 5.256144142745983, + "learning_rate": 9.950856046857218e-06, + "loss": 1.1518, + "step": 898 + }, + { + "epoch": 0.07, + "grad_norm": 5.0082769764561474, + "learning_rate": 9.950670728851428e-06, + "loss": 1.2247, + "step": 899 + }, + { + "epoch": 0.07, + "grad_norm": 3.9888071975117203, + "learning_rate": 9.950485063825164e-06, + "loss": 0.8726, + "step": 900 + }, + { + "epoch": 0.07, + "grad_norm": 3.6636039422830637, + "learning_rate": 9.950299051791442e-06, + "loss": 0.6408, + "step": 901 + }, + { + "epoch": 0.07, + "grad_norm": 5.611758379413739, + "learning_rate": 9.9501126927633e-06, + "loss": 1.0135, + "step": 902 + }, + { + "epoch": 0.07, + "grad_norm": 5.193649777684185, + "learning_rate": 9.949925986753801e-06, + "loss": 0.975, + "step": 903 + }, + { + "epoch": 0.07, + "grad_norm": 1.2652496595795037, + "learning_rate": 9.949738933776034e-06, + "loss": 0.2191, + "step": 904 + }, + { + "epoch": 0.07, + "grad_norm": 5.00394153281254, + "learning_rate": 9.949551533843108e-06, + "loss": 1.1315, + "step": 905 + }, + { + "epoch": 0.07, + "grad_norm": 4.376078409437867, + "learning_rate": 9.949363786968161e-06, + "loss": 0.8514, + "step": 906 + }, + { + "epoch": 0.07, + "grad_norm": 3.0947758925959357, + "learning_rate": 9.94917569316435e-06, + "loss": 0.4318, + "step": 907 + }, + { + "epoch": 0.07, + "grad_norm": 3.8579514061113698, + "learning_rate": 9.948987252444863e-06, + "loss": 0.8145, + "step": 908 + }, + { + "epoch": 0.07, + "grad_norm": 4.341532052086995, + "learning_rate": 9.948798464822908e-06, + "loss": 1.0982, + "step": 909 + }, + { + "epoch": 0.07, + "grad_norm": 5.061424905844848, + "learning_rate": 9.948609330311717e-06, + "loss": 1.2016, + "step": 910 + }, + { + "epoch": 0.07, + "grad_norm": 6.975840773315984, + "learning_rate": 9.94841984892455e-06, + "loss": 1.241, + "step": 911 + }, + { + "epoch": 0.07, + "grad_norm": 2.617213604441644, + "learning_rate": 9.948230020674685e-06, + "loss": 0.409, + "step": 912 + }, + { + "epoch": 0.07, + "grad_norm": 5.751113151861608, + "learning_rate": 9.948039845575433e-06, + "loss": 1.0532, + "step": 913 + }, + { + "epoch": 0.07, + "grad_norm": 4.045895048439603, + "learning_rate": 9.947849323640119e-06, + "loss": 1.2309, + "step": 914 + }, + { + "epoch": 0.07, + "grad_norm": 5.5548904890189315, + "learning_rate": 9.947658454882102e-06, + "loss": 1.2756, + "step": 915 + }, + { + "epoch": 0.07, + "grad_norm": 5.567909285488109, + "learning_rate": 9.947467239314759e-06, + "loss": 1.6017, + "step": 916 + }, + { + "epoch": 0.07, + "grad_norm": 4.686842726080642, + "learning_rate": 9.947275676951493e-06, + "loss": 0.9778, + "step": 917 + }, + { + "epoch": 0.08, + "grad_norm": 6.261416529520622, + "learning_rate": 9.947083767805736e-06, + "loss": 1.2024, + "step": 918 + }, + { + "epoch": 0.08, + "grad_norm": 2.1007870673806903, + "learning_rate": 9.946891511890934e-06, + "loss": 0.3884, + "step": 919 + }, + { + "epoch": 0.08, + "grad_norm": 3.098588168379796, + "learning_rate": 9.946698909220567e-06, + "loss": 0.5807, + "step": 920 + }, + { + "epoch": 0.08, + "grad_norm": 4.367683093526055, + "learning_rate": 9.946505959808133e-06, + "loss": 0.9718, + "step": 921 + }, + { + "epoch": 0.08, + "grad_norm": 3.9092380880050563, + "learning_rate": 9.94631266366716e-06, + "loss": 0.6939, + "step": 922 + }, + { + "epoch": 0.08, + "grad_norm": 6.2437644983208935, + "learning_rate": 9.946119020811196e-06, + "loss": 1.6135, + "step": 923 + }, + { + "epoch": 0.08, + "grad_norm": 3.559272956140806, + "learning_rate": 9.945925031253814e-06, + "loss": 0.5448, + "step": 924 + }, + { + "epoch": 0.08, + "grad_norm": 3.824143064798661, + "learning_rate": 9.945730695008611e-06, + "loss": 0.9135, + "step": 925 + }, + { + "epoch": 0.08, + "grad_norm": 4.722676137253339, + "learning_rate": 9.94553601208921e-06, + "loss": 1.153, + "step": 926 + }, + { + "epoch": 0.08, + "grad_norm": 3.396639886547055, + "learning_rate": 9.94534098250926e-06, + "loss": 0.6553, + "step": 927 + }, + { + "epoch": 0.08, + "grad_norm": 5.255260531499106, + "learning_rate": 9.945145606282427e-06, + "loss": 1.4183, + "step": 928 + }, + { + "epoch": 0.08, + "grad_norm": 3.9560741446234613, + "learning_rate": 9.944949883422409e-06, + "loss": 0.8848, + "step": 929 + }, + { + "epoch": 0.08, + "grad_norm": 4.44099530624096, + "learning_rate": 9.944753813942924e-06, + "loss": 1.0808, + "step": 930 + }, + { + "epoch": 0.08, + "grad_norm": 5.612367341298937, + "learning_rate": 9.944557397857717e-06, + "loss": 1.7301, + "step": 931 + }, + { + "epoch": 0.08, + "grad_norm": 5.774377825851068, + "learning_rate": 9.944360635180554e-06, + "loss": 1.2681, + "step": 932 + }, + { + "epoch": 0.08, + "grad_norm": 4.4851455553756505, + "learning_rate": 9.94416352592523e-06, + "loss": 1.0088, + "step": 933 + }, + { + "epoch": 0.08, + "grad_norm": 3.6727752379053444, + "learning_rate": 9.94396607010556e-06, + "loss": 0.799, + "step": 934 + }, + { + "epoch": 0.08, + "grad_norm": 5.661571569893697, + "learning_rate": 9.943768267735384e-06, + "loss": 1.2836, + "step": 935 + }, + { + "epoch": 0.08, + "grad_norm": 4.73506630653504, + "learning_rate": 9.943570118828569e-06, + "loss": 1.107, + "step": 936 + }, + { + "epoch": 0.08, + "grad_norm": 5.216817841660681, + "learning_rate": 9.943371623399001e-06, + "loss": 1.3673, + "step": 937 + }, + { + "epoch": 0.08, + "grad_norm": 4.588521825500617, + "learning_rate": 9.943172781460596e-06, + "loss": 0.9086, + "step": 938 + }, + { + "epoch": 0.08, + "grad_norm": 4.840878788819319, + "learning_rate": 9.942973593027295e-06, + "loss": 0.8751, + "step": 939 + }, + { + "epoch": 0.08, + "grad_norm": 5.159323698725976, + "learning_rate": 9.942774058113053e-06, + "loss": 0.969, + "step": 940 + }, + { + "epoch": 0.08, + "grad_norm": 4.500036698511603, + "learning_rate": 9.942574176731863e-06, + "loss": 0.9783, + "step": 941 + }, + { + "epoch": 0.08, + "grad_norm": 4.637897868620929, + "learning_rate": 9.942373948897732e-06, + "loss": 1.1139, + "step": 942 + }, + { + "epoch": 0.08, + "grad_norm": 5.260102017631009, + "learning_rate": 9.942173374624697e-06, + "loss": 1.0704, + "step": 943 + }, + { + "epoch": 0.08, + "grad_norm": 2.6440686740791235, + "learning_rate": 9.941972453926817e-06, + "loss": 0.3392, + "step": 944 + }, + { + "epoch": 0.08, + "grad_norm": 3.4619149305092707, + "learning_rate": 9.941771186818176e-06, + "loss": 0.7033, + "step": 945 + }, + { + "epoch": 0.08, + "grad_norm": 4.2853503596861815, + "learning_rate": 9.941569573312882e-06, + "loss": 0.7557, + "step": 946 + }, + { + "epoch": 0.08, + "grad_norm": 3.4535644407032398, + "learning_rate": 9.941367613425064e-06, + "loss": 0.5132, + "step": 947 + }, + { + "epoch": 0.08, + "grad_norm": 7.988273824309304, + "learning_rate": 9.941165307168883e-06, + "loss": 1.0222, + "step": 948 + }, + { + "epoch": 0.08, + "grad_norm": 4.797267330365143, + "learning_rate": 9.94096265455852e-06, + "loss": 0.8054, + "step": 949 + }, + { + "epoch": 0.08, + "grad_norm": 5.451973933847118, + "learning_rate": 9.940759655608174e-06, + "loss": 1.2013, + "step": 950 + }, + { + "epoch": 0.08, + "grad_norm": 4.937150092892459, + "learning_rate": 9.940556310332081e-06, + "loss": 1.0607, + "step": 951 + }, + { + "epoch": 0.08, + "grad_norm": 5.48017657805159, + "learning_rate": 9.94035261874449e-06, + "loss": 1.2165, + "step": 952 + }, + { + "epoch": 0.08, + "grad_norm": 6.6979240049013375, + "learning_rate": 9.940148580859684e-06, + "loss": 1.169, + "step": 953 + }, + { + "epoch": 0.08, + "grad_norm": 5.048990796591552, + "learning_rate": 9.93994419669196e-06, + "loss": 1.193, + "step": 954 + }, + { + "epoch": 0.08, + "grad_norm": 7.120682486408936, + "learning_rate": 9.939739466255646e-06, + "loss": 1.006, + "step": 955 + }, + { + "epoch": 0.08, + "grad_norm": 4.25295719086572, + "learning_rate": 9.939534389565096e-06, + "loss": 0.5601, + "step": 956 + }, + { + "epoch": 0.08, + "grad_norm": 2.9677365404338754, + "learning_rate": 9.939328966634679e-06, + "loss": 0.5047, + "step": 957 + }, + { + "epoch": 0.08, + "grad_norm": 4.720941728762174, + "learning_rate": 9.9391231974788e-06, + "loss": 1.2215, + "step": 958 + }, + { + "epoch": 0.08, + "grad_norm": 1.7910568012463464, + "learning_rate": 9.938917082111878e-06, + "loss": 0.2296, + "step": 959 + }, + { + "epoch": 0.08, + "grad_norm": 4.461023834809202, + "learning_rate": 9.938710620548363e-06, + "loss": 0.8519, + "step": 960 + }, + { + "epoch": 0.08, + "grad_norm": 3.880190533803183, + "learning_rate": 9.938503812802726e-06, + "loss": 0.8242, + "step": 961 + }, + { + "epoch": 0.08, + "grad_norm": 3.93656669188537, + "learning_rate": 9.938296658889467e-06, + "loss": 0.7574, + "step": 962 + }, + { + "epoch": 0.08, + "grad_norm": 5.2002274052516855, + "learning_rate": 9.938089158823101e-06, + "loss": 0.9766, + "step": 963 + }, + { + "epoch": 0.08, + "grad_norm": 3.8287118106163005, + "learning_rate": 9.937881312618178e-06, + "loss": 0.984, + "step": 964 + }, + { + "epoch": 0.08, + "grad_norm": 6.032433365734891, + "learning_rate": 9.937673120289264e-06, + "loss": 1.3364, + "step": 965 + }, + { + "epoch": 0.08, + "grad_norm": 1.624076949651301, + "learning_rate": 9.937464581850952e-06, + "loss": 0.2272, + "step": 966 + }, + { + "epoch": 0.08, + "grad_norm": 2.46448601887881, + "learning_rate": 9.937255697317862e-06, + "loss": 0.6506, + "step": 967 + }, + { + "epoch": 0.08, + "grad_norm": 4.687183703503713, + "learning_rate": 9.937046466704635e-06, + "loss": 1.1185, + "step": 968 + }, + { + "epoch": 0.08, + "grad_norm": 3.12344443076618, + "learning_rate": 9.936836890025934e-06, + "loss": 0.6552, + "step": 969 + }, + { + "epoch": 0.08, + "grad_norm": 4.008246852671144, + "learning_rate": 9.936626967296454e-06, + "loss": 0.3855, + "step": 970 + }, + { + "epoch": 0.08, + "grad_norm": 5.727741706894561, + "learning_rate": 9.936416698530908e-06, + "loss": 1.3848, + "step": 971 + }, + { + "epoch": 0.08, + "grad_norm": 3.0058232533086335, + "learning_rate": 9.936206083744036e-06, + "loss": 0.607, + "step": 972 + }, + { + "epoch": 0.08, + "grad_norm": 4.474463900134665, + "learning_rate": 9.935995122950597e-06, + "loss": 0.9998, + "step": 973 + }, + { + "epoch": 0.08, + "grad_norm": 2.71840698955997, + "learning_rate": 9.935783816165384e-06, + "loss": 0.4255, + "step": 974 + }, + { + "epoch": 0.08, + "grad_norm": 4.69033891376098, + "learning_rate": 9.935572163403205e-06, + "loss": 1.203, + "step": 975 + }, + { + "epoch": 0.08, + "grad_norm": 4.058363801460739, + "learning_rate": 9.935360164678897e-06, + "loss": 0.9688, + "step": 976 + }, + { + "epoch": 0.08, + "grad_norm": 4.1298057644289985, + "learning_rate": 9.93514782000732e-06, + "loss": 0.8337, + "step": 977 + }, + { + "epoch": 0.08, + "grad_norm": 3.248683450242094, + "learning_rate": 9.93493512940336e-06, + "loss": 0.718, + "step": 978 + }, + { + "epoch": 0.08, + "grad_norm": 3.004927639727986, + "learning_rate": 9.934722092881923e-06, + "loss": 0.3778, + "step": 979 + }, + { + "epoch": 0.08, + "grad_norm": 6.349465302935186, + "learning_rate": 9.934508710457944e-06, + "loss": 1.3507, + "step": 980 + }, + { + "epoch": 0.08, + "grad_norm": 3.6202710526830284, + "learning_rate": 9.934294982146379e-06, + "loss": 0.5398, + "step": 981 + }, + { + "epoch": 0.08, + "grad_norm": 4.969030239131703, + "learning_rate": 9.93408090796221e-06, + "loss": 1.1796, + "step": 982 + }, + { + "epoch": 0.08, + "grad_norm": 5.911435850540964, + "learning_rate": 9.933866487920443e-06, + "loss": 1.5327, + "step": 983 + }, + { + "epoch": 0.08, + "grad_norm": 5.304625028599958, + "learning_rate": 9.933651722036106e-06, + "loss": 1.1508, + "step": 984 + }, + { + "epoch": 0.08, + "grad_norm": 3.8855327016276147, + "learning_rate": 9.933436610324256e-06, + "loss": 0.7818, + "step": 985 + }, + { + "epoch": 0.08, + "grad_norm": 2.284180123451619, + "learning_rate": 9.93322115279997e-06, + "loss": 0.505, + "step": 986 + }, + { + "epoch": 0.08, + "grad_norm": 3.1541005528884596, + "learning_rate": 9.93300534947835e-06, + "loss": 0.7007, + "step": 987 + }, + { + "epoch": 0.08, + "grad_norm": 4.915895095443689, + "learning_rate": 9.932789200374525e-06, + "loss": 1.5247, + "step": 988 + }, + { + "epoch": 0.08, + "grad_norm": 4.442887963856919, + "learning_rate": 9.93257270550364e-06, + "loss": 0.6883, + "step": 989 + }, + { + "epoch": 0.08, + "grad_norm": 4.345430763632468, + "learning_rate": 9.93235586488088e-06, + "loss": 1.083, + "step": 990 + }, + { + "epoch": 0.08, + "grad_norm": 5.6329279194096324, + "learning_rate": 9.932138678521438e-06, + "loss": 1.4216, + "step": 991 + }, + { + "epoch": 0.08, + "grad_norm": 3.8404128106622193, + "learning_rate": 9.93192114644054e-06, + "loss": 0.5213, + "step": 992 + }, + { + "epoch": 0.08, + "grad_norm": 3.829742066244959, + "learning_rate": 9.931703268653431e-06, + "loss": 0.9371, + "step": 993 + }, + { + "epoch": 0.08, + "grad_norm": 5.9845704625784375, + "learning_rate": 9.931485045175388e-06, + "loss": 1.1885, + "step": 994 + }, + { + "epoch": 0.08, + "grad_norm": 3.3190081677989216, + "learning_rate": 9.931266476021704e-06, + "loss": 0.6295, + "step": 995 + }, + { + "epoch": 0.08, + "grad_norm": 5.910239429035196, + "learning_rate": 9.931047561207702e-06, + "loss": 1.2699, + "step": 996 + }, + { + "epoch": 0.08, + "grad_norm": 3.619849252844896, + "learning_rate": 9.930828300748726e-06, + "loss": 0.7838, + "step": 997 + }, + { + "epoch": 0.08, + "grad_norm": 5.235519631028344, + "learning_rate": 9.930608694660144e-06, + "loss": 1.2416, + "step": 998 + }, + { + "epoch": 0.08, + "grad_norm": 4.353341612052121, + "learning_rate": 9.930388742957351e-06, + "loss": 0.939, + "step": 999 + }, + { + "epoch": 0.08, + "grad_norm": 3.028762046178202, + "learning_rate": 9.930168445655766e-06, + "loss": 0.4832, + "step": 1000 + }, + { + "epoch": 0.08, + "grad_norm": 5.591371052059989, + "learning_rate": 9.929947802770827e-06, + "loss": 1.2762, + "step": 1001 + }, + { + "epoch": 0.08, + "grad_norm": 2.697433120159279, + "learning_rate": 9.929726814318004e-06, + "loss": 0.2835, + "step": 1002 + }, + { + "epoch": 0.08, + "grad_norm": 4.835924666363637, + "learning_rate": 9.929505480312785e-06, + "loss": 0.9394, + "step": 1003 + }, + { + "epoch": 0.08, + "grad_norm": 2.3460503065278155, + "learning_rate": 9.929283800770684e-06, + "loss": 0.4421, + "step": 1004 + }, + { + "epoch": 0.08, + "grad_norm": 4.235110616209546, + "learning_rate": 9.92906177570724e-06, + "loss": 0.7821, + "step": 1005 + }, + { + "epoch": 0.08, + "grad_norm": 5.546859033042787, + "learning_rate": 9.92883940513802e-06, + "loss": 1.023, + "step": 1006 + }, + { + "epoch": 0.08, + "grad_norm": 4.881188064635342, + "learning_rate": 9.928616689078605e-06, + "loss": 1.1696, + "step": 1007 + }, + { + "epoch": 0.08, + "grad_norm": 4.414790877336852, + "learning_rate": 9.928393627544612e-06, + "loss": 0.9386, + "step": 1008 + }, + { + "epoch": 0.08, + "grad_norm": 5.553507370975231, + "learning_rate": 9.928170220551671e-06, + "loss": 1.222, + "step": 1009 + }, + { + "epoch": 0.08, + "grad_norm": 3.6732592042961465, + "learning_rate": 9.927946468115448e-06, + "loss": 0.7668, + "step": 1010 + }, + { + "epoch": 0.08, + "grad_norm": 5.3890264197841145, + "learning_rate": 9.927722370251623e-06, + "loss": 0.8403, + "step": 1011 + }, + { + "epoch": 0.08, + "grad_norm": 5.415266088346729, + "learning_rate": 9.927497926975906e-06, + "loss": 1.3521, + "step": 1012 + }, + { + "epoch": 0.08, + "grad_norm": 3.0549480173712524, + "learning_rate": 9.927273138304028e-06, + "loss": 0.691, + "step": 1013 + }, + { + "epoch": 0.08, + "grad_norm": 4.4185227553692865, + "learning_rate": 9.927048004251748e-06, + "loss": 0.7587, + "step": 1014 + }, + { + "epoch": 0.08, + "grad_norm": 1.5284307873609142, + "learning_rate": 9.926822524834845e-06, + "loss": 0.2633, + "step": 1015 + }, + { + "epoch": 0.08, + "grad_norm": 5.855098239343265, + "learning_rate": 9.926596700069122e-06, + "loss": 0.8446, + "step": 1016 + }, + { + "epoch": 0.08, + "grad_norm": 4.706309854028427, + "learning_rate": 9.926370529970414e-06, + "loss": 1.1894, + "step": 1017 + }, + { + "epoch": 0.08, + "grad_norm": 4.751806675384341, + "learning_rate": 9.92614401455457e-06, + "loss": 0.7973, + "step": 1018 + }, + { + "epoch": 0.08, + "grad_norm": 3.9276380261145714, + "learning_rate": 9.925917153837469e-06, + "loss": 0.792, + "step": 1019 + }, + { + "epoch": 0.08, + "grad_norm": 2.120171889646719, + "learning_rate": 9.925689947835015e-06, + "loss": 0.412, + "step": 1020 + }, + { + "epoch": 0.08, + "grad_norm": 3.509134466109772, + "learning_rate": 9.925462396563131e-06, + "loss": 0.7342, + "step": 1021 + }, + { + "epoch": 0.08, + "grad_norm": 5.275264743662667, + "learning_rate": 9.925234500037768e-06, + "loss": 1.1055, + "step": 1022 + }, + { + "epoch": 0.08, + "grad_norm": 3.706494826186237, + "learning_rate": 9.925006258274903e-06, + "loss": 0.6293, + "step": 1023 + }, + { + "epoch": 0.08, + "grad_norm": 4.601024212740757, + "learning_rate": 9.924777671290532e-06, + "loss": 1.1678, + "step": 1024 + }, + { + "epoch": 0.08, + "grad_norm": 3.604621793451188, + "learning_rate": 9.92454873910068e-06, + "loss": 0.443, + "step": 1025 + }, + { + "epoch": 0.08, + "grad_norm": 3.552121826306486, + "learning_rate": 9.924319461721391e-06, + "loss": 0.7045, + "step": 1026 + }, + { + "epoch": 0.08, + "grad_norm": 2.5575856771387238, + "learning_rate": 9.92408983916874e-06, + "loss": 0.4696, + "step": 1027 + }, + { + "epoch": 0.08, + "grad_norm": 5.289654417823295, + "learning_rate": 9.92385987145882e-06, + "loss": 1.1612, + "step": 1028 + }, + { + "epoch": 0.08, + "grad_norm": 4.014493264742926, + "learning_rate": 9.923629558607753e-06, + "loss": 1.0631, + "step": 1029 + }, + { + "epoch": 0.08, + "grad_norm": 2.69656964091769, + "learning_rate": 9.923398900631681e-06, + "loss": 0.7831, + "step": 1030 + }, + { + "epoch": 0.08, + "grad_norm": 1.7779160202979185, + "learning_rate": 9.923167897546773e-06, + "loss": 0.4389, + "step": 1031 + }, + { + "epoch": 0.08, + "grad_norm": 4.123388231016368, + "learning_rate": 9.92293654936922e-06, + "loss": 1.0849, + "step": 1032 + }, + { + "epoch": 0.08, + "grad_norm": 4.652202727799504, + "learning_rate": 9.922704856115241e-06, + "loss": 0.7509, + "step": 1033 + }, + { + "epoch": 0.08, + "grad_norm": 1.9835718893039749, + "learning_rate": 9.922472817801075e-06, + "loss": 0.4422, + "step": 1034 + }, + { + "epoch": 0.08, + "grad_norm": 5.67324732639257, + "learning_rate": 9.922240434442988e-06, + "loss": 1.484, + "step": 1035 + }, + { + "epoch": 0.08, + "grad_norm": 3.1649733381598733, + "learning_rate": 9.922007706057266e-06, + "loss": 0.4445, + "step": 1036 + }, + { + "epoch": 0.08, + "grad_norm": 2.977160755506409, + "learning_rate": 9.921774632660226e-06, + "loss": 0.6295, + "step": 1037 + }, + { + "epoch": 0.08, + "grad_norm": 2.9528745949766493, + "learning_rate": 9.921541214268202e-06, + "loss": 0.5489, + "step": 1038 + }, + { + "epoch": 0.08, + "grad_norm": 3.978548604532141, + "learning_rate": 9.92130745089756e-06, + "loss": 0.5643, + "step": 1039 + }, + { + "epoch": 0.09, + "grad_norm": 3.542451724141814, + "learning_rate": 9.921073342564681e-06, + "loss": 0.781, + "step": 1040 + }, + { + "epoch": 0.09, + "grad_norm": 3.3721141526352976, + "learning_rate": 9.920838889285979e-06, + "loss": 0.5458, + "step": 1041 + }, + { + "epoch": 0.09, + "grad_norm": 2.7847159815128384, + "learning_rate": 9.920604091077886e-06, + "loss": 0.6578, + "step": 1042 + }, + { + "epoch": 0.09, + "grad_norm": 2.9754391055259912, + "learning_rate": 9.92036894795686e-06, + "loss": 0.613, + "step": 1043 + }, + { + "epoch": 0.09, + "grad_norm": 4.030356464555068, + "learning_rate": 9.920133459939385e-06, + "loss": 0.6361, + "step": 1044 + }, + { + "epoch": 0.09, + "grad_norm": 4.880834558249092, + "learning_rate": 9.919897627041967e-06, + "loss": 0.8937, + "step": 1045 + }, + { + "epoch": 0.09, + "grad_norm": 1.2906676140310624, + "learning_rate": 9.919661449281136e-06, + "loss": 0.288, + "step": 1046 + }, + { + "epoch": 0.09, + "grad_norm": 3.284278659461712, + "learning_rate": 9.919424926673449e-06, + "loss": 0.635, + "step": 1047 + }, + { + "epoch": 0.09, + "grad_norm": 4.82577780947523, + "learning_rate": 9.919188059235483e-06, + "loss": 0.9913, + "step": 1048 + }, + { + "epoch": 0.09, + "grad_norm": 1.758298021782454, + "learning_rate": 9.918950846983844e-06, + "loss": 0.4224, + "step": 1049 + }, + { + "epoch": 0.09, + "grad_norm": 5.663385005526883, + "learning_rate": 9.918713289935156e-06, + "loss": 1.3562, + "step": 1050 + }, + { + "epoch": 0.09, + "grad_norm": 3.746255654862295, + "learning_rate": 9.918475388106075e-06, + "loss": 0.7948, + "step": 1051 + }, + { + "epoch": 0.09, + "grad_norm": 4.1445840998076156, + "learning_rate": 9.918237141513272e-06, + "loss": 0.687, + "step": 1052 + }, + { + "epoch": 0.09, + "grad_norm": 2.729431954155433, + "learning_rate": 9.917998550173451e-06, + "loss": 0.6632, + "step": 1053 + }, + { + "epoch": 0.09, + "grad_norm": 3.7846370807915215, + "learning_rate": 9.917759614103335e-06, + "loss": 0.6811, + "step": 1054 + }, + { + "epoch": 0.09, + "grad_norm": 5.817056381788165, + "learning_rate": 9.917520333319671e-06, + "loss": 0.9256, + "step": 1055 + }, + { + "epoch": 0.09, + "grad_norm": 2.366125843476706, + "learning_rate": 9.917280707839235e-06, + "loss": 0.4559, + "step": 1056 + }, + { + "epoch": 0.09, + "grad_norm": 2.2462937271026755, + "learning_rate": 9.91704073767882e-06, + "loss": 0.5, + "step": 1057 + }, + { + "epoch": 0.09, + "grad_norm": 4.598722852680578, + "learning_rate": 9.916800422855247e-06, + "loss": 0.8063, + "step": 1058 + }, + { + "epoch": 0.09, + "grad_norm": 6.463919076864924, + "learning_rate": 9.916559763385364e-06, + "loss": 1.3674, + "step": 1059 + }, + { + "epoch": 0.09, + "grad_norm": 3.0754104305945087, + "learning_rate": 9.916318759286039e-06, + "loss": 0.5969, + "step": 1060 + }, + { + "epoch": 0.09, + "grad_norm": 5.634585435044843, + "learning_rate": 9.916077410574163e-06, + "loss": 1.1031, + "step": 1061 + }, + { + "epoch": 0.09, + "grad_norm": 3.7528878203710736, + "learning_rate": 9.915835717266658e-06, + "loss": 0.7968, + "step": 1062 + }, + { + "epoch": 0.09, + "grad_norm": 4.66701414858373, + "learning_rate": 9.91559367938046e-06, + "loss": 0.8395, + "step": 1063 + }, + { + "epoch": 0.09, + "grad_norm": 6.168129947730295, + "learning_rate": 9.91535129693254e-06, + "loss": 1.4167, + "step": 1064 + }, + { + "epoch": 0.09, + "grad_norm": 3.6400228917682234, + "learning_rate": 9.915108569939884e-06, + "loss": 0.8876, + "step": 1065 + }, + { + "epoch": 0.09, + "grad_norm": 2.3615172096238357, + "learning_rate": 9.91486549841951e-06, + "loss": 0.3196, + "step": 1066 + }, + { + "epoch": 0.09, + "grad_norm": 6.1590255485854595, + "learning_rate": 9.914622082388452e-06, + "loss": 1.7367, + "step": 1067 + }, + { + "epoch": 0.09, + "grad_norm": 1.5833506445418588, + "learning_rate": 9.914378321863776e-06, + "loss": 0.2484, + "step": 1068 + }, + { + "epoch": 0.09, + "grad_norm": 3.9244870590910623, + "learning_rate": 9.914134216862568e-06, + "loss": 0.9296, + "step": 1069 + }, + { + "epoch": 0.09, + "grad_norm": 4.118313589125725, + "learning_rate": 9.913889767401935e-06, + "loss": 1.171, + "step": 1070 + }, + { + "epoch": 0.09, + "grad_norm": 4.112181123473643, + "learning_rate": 9.913644973499017e-06, + "loss": 0.726, + "step": 1071 + }, + { + "epoch": 0.09, + "grad_norm": 2.9211444194643703, + "learning_rate": 9.913399835170969e-06, + "loss": 0.4475, + "step": 1072 + }, + { + "epoch": 0.09, + "grad_norm": 3.2639064043344446, + "learning_rate": 9.913154352434977e-06, + "loss": 0.7201, + "step": 1073 + }, + { + "epoch": 0.09, + "grad_norm": 2.9042396813762323, + "learning_rate": 9.912908525308246e-06, + "loss": 0.6094, + "step": 1074 + }, + { + "epoch": 0.09, + "grad_norm": 4.129947465502224, + "learning_rate": 9.912662353808009e-06, + "loss": 0.783, + "step": 1075 + }, + { + "epoch": 0.09, + "grad_norm": 5.217328334333304, + "learning_rate": 9.912415837951522e-06, + "loss": 0.9254, + "step": 1076 + }, + { + "epoch": 0.09, + "grad_norm": 5.371558982797147, + "learning_rate": 9.912168977756061e-06, + "loss": 0.9842, + "step": 1077 + }, + { + "epoch": 0.09, + "grad_norm": 4.104153333889138, + "learning_rate": 9.911921773238935e-06, + "loss": 0.9036, + "step": 1078 + }, + { + "epoch": 0.09, + "grad_norm": 5.151522115800109, + "learning_rate": 9.911674224417469e-06, + "loss": 1.0193, + "step": 1079 + }, + { + "epoch": 0.09, + "grad_norm": 5.746469156002981, + "learning_rate": 9.911426331309013e-06, + "loss": 1.1992, + "step": 1080 + }, + { + "epoch": 0.09, + "grad_norm": 5.258407588455397, + "learning_rate": 9.911178093930949e-06, + "loss": 1.3186, + "step": 1081 + }, + { + "epoch": 0.09, + "grad_norm": 4.235760757939858, + "learning_rate": 9.910929512300673e-06, + "loss": 1.0851, + "step": 1082 + }, + { + "epoch": 0.09, + "grad_norm": 4.453081187996927, + "learning_rate": 9.91068058643561e-06, + "loss": 0.737, + "step": 1083 + }, + { + "epoch": 0.09, + "grad_norm": 4.71536401568929, + "learning_rate": 9.91043131635321e-06, + "loss": 1.3866, + "step": 1084 + }, + { + "epoch": 0.09, + "grad_norm": 5.547313676110811, + "learning_rate": 9.910181702070944e-06, + "loss": 1.1275, + "step": 1085 + }, + { + "epoch": 0.09, + "grad_norm": 3.2128405071826625, + "learning_rate": 9.90993174360631e-06, + "loss": 0.5183, + "step": 1086 + }, + { + "epoch": 0.09, + "grad_norm": 4.711441991976482, + "learning_rate": 9.90968144097683e-06, + "loss": 0.9171, + "step": 1087 + }, + { + "epoch": 0.09, + "grad_norm": 3.8161551402219795, + "learning_rate": 9.909430794200047e-06, + "loss": 0.6247, + "step": 1088 + }, + { + "epoch": 0.09, + "grad_norm": 4.613553679871445, + "learning_rate": 9.909179803293532e-06, + "loss": 0.9455, + "step": 1089 + }, + { + "epoch": 0.09, + "grad_norm": 4.1355714333717275, + "learning_rate": 9.90892846827488e-06, + "loss": 1.2161, + "step": 1090 + }, + { + "epoch": 0.09, + "grad_norm": 5.0312869401343745, + "learning_rate": 9.908676789161701e-06, + "loss": 1.0728, + "step": 1091 + }, + { + "epoch": 0.09, + "grad_norm": 2.8283986450010477, + "learning_rate": 9.908424765971644e-06, + "loss": 0.5282, + "step": 1092 + }, + { + "epoch": 0.09, + "grad_norm": 2.571969875090206, + "learning_rate": 9.908172398722374e-06, + "loss": 0.518, + "step": 1093 + }, + { + "epoch": 0.09, + "grad_norm": 4.920075402725496, + "learning_rate": 9.907919687431578e-06, + "loss": 0.8735, + "step": 1094 + }, + { + "epoch": 0.09, + "grad_norm": 3.4311968729395437, + "learning_rate": 9.90766663211697e-06, + "loss": 0.8832, + "step": 1095 + }, + { + "epoch": 0.09, + "grad_norm": 4.73982103420756, + "learning_rate": 9.90741323279629e-06, + "loss": 1.2395, + "step": 1096 + }, + { + "epoch": 0.09, + "grad_norm": 4.256664032664819, + "learning_rate": 9.907159489487301e-06, + "loss": 0.913, + "step": 1097 + }, + { + "epoch": 0.09, + "grad_norm": 5.182530238530806, + "learning_rate": 9.906905402207786e-06, + "loss": 1.1338, + "step": 1098 + }, + { + "epoch": 0.09, + "grad_norm": 5.10551835294458, + "learning_rate": 9.90665097097556e-06, + "loss": 0.9437, + "step": 1099 + }, + { + "epoch": 0.09, + "grad_norm": 2.9631556450062604, + "learning_rate": 9.906396195808452e-06, + "loss": 0.6628, + "step": 1100 + }, + { + "epoch": 0.09, + "grad_norm": 4.946634427167888, + "learning_rate": 9.906141076724324e-06, + "loss": 1.1265, + "step": 1101 + }, + { + "epoch": 0.09, + "grad_norm": 4.369504579349011, + "learning_rate": 9.905885613741058e-06, + "loss": 0.6546, + "step": 1102 + }, + { + "epoch": 0.09, + "grad_norm": 3.9024786940759517, + "learning_rate": 9.905629806876562e-06, + "loss": 0.6797, + "step": 1103 + }, + { + "epoch": 0.09, + "grad_norm": 2.6359571441599225, + "learning_rate": 9.905373656148765e-06, + "loss": 0.3359, + "step": 1104 + }, + { + "epoch": 0.09, + "grad_norm": 5.629817569105054, + "learning_rate": 9.905117161575622e-06, + "loss": 1.1071, + "step": 1105 + }, + { + "epoch": 0.09, + "grad_norm": 4.820538418080594, + "learning_rate": 9.904860323175114e-06, + "loss": 0.6526, + "step": 1106 + }, + { + "epoch": 0.09, + "grad_norm": 2.639546998032372, + "learning_rate": 9.904603140965244e-06, + "loss": 0.3731, + "step": 1107 + }, + { + "epoch": 0.09, + "grad_norm": 5.60234746760394, + "learning_rate": 9.904345614964039e-06, + "loss": 1.2051, + "step": 1108 + }, + { + "epoch": 0.09, + "grad_norm": 3.467763997576738, + "learning_rate": 9.90408774518955e-06, + "loss": 0.498, + "step": 1109 + }, + { + "epoch": 0.09, + "grad_norm": 3.706513283245458, + "learning_rate": 9.903829531659853e-06, + "loss": 0.6948, + "step": 1110 + }, + { + "epoch": 0.09, + "grad_norm": 5.228780493202378, + "learning_rate": 9.903570974393044e-06, + "loss": 0.6231, + "step": 1111 + }, + { + "epoch": 0.09, + "grad_norm": 4.23269091182088, + "learning_rate": 9.903312073407255e-06, + "loss": 0.808, + "step": 1112 + }, + { + "epoch": 0.09, + "grad_norm": 3.5909195757076335, + "learning_rate": 9.903052828720626e-06, + "loss": 0.9084, + "step": 1113 + }, + { + "epoch": 0.09, + "grad_norm": 4.665407647763041, + "learning_rate": 9.902793240351332e-06, + "loss": 1.209, + "step": 1114 + }, + { + "epoch": 0.09, + "grad_norm": 4.181500368016984, + "learning_rate": 9.90253330831757e-06, + "loss": 0.9321, + "step": 1115 + }, + { + "epoch": 0.09, + "grad_norm": 4.007003219308571, + "learning_rate": 9.902273032637558e-06, + "loss": 0.6731, + "step": 1116 + }, + { + "epoch": 0.09, + "grad_norm": 5.249991934849754, + "learning_rate": 9.902012413329541e-06, + "loss": 0.9785, + "step": 1117 + }, + { + "epoch": 0.09, + "grad_norm": 3.636337888119685, + "learning_rate": 9.901751450411787e-06, + "loss": 0.5601, + "step": 1118 + }, + { + "epoch": 0.09, + "grad_norm": 3.845419166719933, + "learning_rate": 9.90149014390259e-06, + "loss": 0.9011, + "step": 1119 + }, + { + "epoch": 0.09, + "grad_norm": 4.201928266545957, + "learning_rate": 9.901228493820265e-06, + "loss": 1.3249, + "step": 1120 + }, + { + "epoch": 0.09, + "grad_norm": 4.578795797853542, + "learning_rate": 9.900966500183153e-06, + "loss": 1.1478, + "step": 1121 + }, + { + "epoch": 0.09, + "grad_norm": 4.6095756033428, + "learning_rate": 9.90070416300962e-06, + "loss": 0.6756, + "step": 1122 + }, + { + "epoch": 0.09, + "grad_norm": 4.911041891172558, + "learning_rate": 9.900441482318051e-06, + "loss": 1.011, + "step": 1123 + }, + { + "epoch": 0.09, + "grad_norm": 2.284294682149408, + "learning_rate": 9.900178458126862e-06, + "loss": 0.4008, + "step": 1124 + }, + { + "epoch": 0.09, + "grad_norm": 3.7326206756730653, + "learning_rate": 9.899915090454487e-06, + "loss": 0.6352, + "step": 1125 + }, + { + "epoch": 0.09, + "grad_norm": 5.428099213081668, + "learning_rate": 9.89965137931939e-06, + "loss": 0.8726, + "step": 1126 + }, + { + "epoch": 0.09, + "grad_norm": 3.3340338283313966, + "learning_rate": 9.899387324740053e-06, + "loss": 0.813, + "step": 1127 + }, + { + "epoch": 0.09, + "grad_norm": 2.746073944845481, + "learning_rate": 9.899122926734988e-06, + "loss": 0.6223, + "step": 1128 + }, + { + "epoch": 0.09, + "grad_norm": 4.590965204756769, + "learning_rate": 9.898858185322728e-06, + "loss": 0.8611, + "step": 1129 + }, + { + "epoch": 0.09, + "grad_norm": 5.894240050324802, + "learning_rate": 9.898593100521828e-06, + "loss": 0.8581, + "step": 1130 + }, + { + "epoch": 0.09, + "grad_norm": 3.7572285946477177, + "learning_rate": 9.898327672350871e-06, + "loss": 0.6341, + "step": 1131 + }, + { + "epoch": 0.09, + "grad_norm": 3.4296680237638593, + "learning_rate": 9.898061900828461e-06, + "loss": 0.7285, + "step": 1132 + }, + { + "epoch": 0.09, + "grad_norm": 4.7157870658361665, + "learning_rate": 9.897795785973227e-06, + "loss": 0.9815, + "step": 1133 + }, + { + "epoch": 0.09, + "grad_norm": 4.270557295796642, + "learning_rate": 9.897529327803825e-06, + "loss": 0.7319, + "step": 1134 + }, + { + "epoch": 0.09, + "grad_norm": 2.906704857168554, + "learning_rate": 9.897262526338933e-06, + "loss": 0.436, + "step": 1135 + }, + { + "epoch": 0.09, + "grad_norm": 2.3758294790859398, + "learning_rate": 9.896995381597248e-06, + "loss": 0.4856, + "step": 1136 + }, + { + "epoch": 0.09, + "grad_norm": 3.349959256999474, + "learning_rate": 9.8967278935975e-06, + "loss": 0.6012, + "step": 1137 + }, + { + "epoch": 0.09, + "grad_norm": 2.3521814313627614, + "learning_rate": 9.896460062358437e-06, + "loss": 0.3664, + "step": 1138 + }, + { + "epoch": 0.09, + "grad_norm": 4.938674501026468, + "learning_rate": 9.896191887898833e-06, + "loss": 0.7762, + "step": 1139 + }, + { + "epoch": 0.09, + "grad_norm": 4.476274030084168, + "learning_rate": 9.895923370237487e-06, + "loss": 0.8053, + "step": 1140 + }, + { + "epoch": 0.09, + "grad_norm": 5.477353094729364, + "learning_rate": 9.895654509393219e-06, + "loss": 1.5004, + "step": 1141 + }, + { + "epoch": 0.09, + "grad_norm": 3.7789096830346423, + "learning_rate": 9.895385305384875e-06, + "loss": 0.9327, + "step": 1142 + }, + { + "epoch": 0.09, + "grad_norm": 4.052117060974299, + "learning_rate": 9.895115758231327e-06, + "loss": 0.7655, + "step": 1143 + }, + { + "epoch": 0.09, + "grad_norm": 4.311851068484635, + "learning_rate": 9.894845867951468e-06, + "loss": 0.7396, + "step": 1144 + }, + { + "epoch": 0.09, + "grad_norm": 3.7923414529988455, + "learning_rate": 9.894575634564216e-06, + "loss": 0.7855, + "step": 1145 + }, + { + "epoch": 0.09, + "grad_norm": 3.945249613145775, + "learning_rate": 9.894305058088513e-06, + "loss": 0.9153, + "step": 1146 + }, + { + "epoch": 0.09, + "grad_norm": 4.7840576172347395, + "learning_rate": 9.894034138543325e-06, + "loss": 1.0292, + "step": 1147 + }, + { + "epoch": 0.09, + "grad_norm": 2.93236236975966, + "learning_rate": 9.893762875947643e-06, + "loss": 0.4059, + "step": 1148 + }, + { + "epoch": 0.09, + "grad_norm": 4.58890168616701, + "learning_rate": 9.893491270320482e-06, + "loss": 1.0852, + "step": 1149 + }, + { + "epoch": 0.09, + "grad_norm": 4.559899603280683, + "learning_rate": 9.89321932168088e-06, + "loss": 1.1237, + "step": 1150 + }, + { + "epoch": 0.09, + "grad_norm": 4.60214704397996, + "learning_rate": 9.892947030047897e-06, + "loss": 0.9579, + "step": 1151 + }, + { + "epoch": 0.09, + "grad_norm": 3.435288516889513, + "learning_rate": 9.892674395440623e-06, + "loss": 0.7672, + "step": 1152 + }, + { + "epoch": 0.09, + "grad_norm": 3.966391987672413, + "learning_rate": 9.892401417878166e-06, + "loss": 0.494, + "step": 1153 + }, + { + "epoch": 0.09, + "grad_norm": 3.5165349113611164, + "learning_rate": 9.89212809737966e-06, + "loss": 0.6433, + "step": 1154 + }, + { + "epoch": 0.09, + "grad_norm": 4.964943785418872, + "learning_rate": 9.891854433964268e-06, + "loss": 1.0597, + "step": 1155 + }, + { + "epoch": 0.09, + "grad_norm": 2.7599503203593763, + "learning_rate": 9.891580427651169e-06, + "loss": 0.6986, + "step": 1156 + }, + { + "epoch": 0.09, + "grad_norm": 4.787335060739045, + "learning_rate": 9.891306078459569e-06, + "loss": 1.4073, + "step": 1157 + }, + { + "epoch": 0.09, + "grad_norm": 4.2838187991127645, + "learning_rate": 9.8910313864087e-06, + "loss": 0.7886, + "step": 1158 + }, + { + "epoch": 0.09, + "grad_norm": 3.4466029728678587, + "learning_rate": 9.890756351517816e-06, + "loss": 0.6352, + "step": 1159 + }, + { + "epoch": 0.09, + "grad_norm": 4.956320751045181, + "learning_rate": 9.890480973806198e-06, + "loss": 1.3651, + "step": 1160 + }, + { + "epoch": 0.09, + "grad_norm": 5.405188553619671, + "learning_rate": 9.890205253293145e-06, + "loss": 0.6927, + "step": 1161 + }, + { + "epoch": 0.09, + "grad_norm": 4.331448218013707, + "learning_rate": 9.889929189997988e-06, + "loss": 1.1553, + "step": 1162 + }, + { + "epoch": 0.1, + "grad_norm": 4.717470241828111, + "learning_rate": 9.889652783940075e-06, + "loss": 1.2707, + "step": 1163 + }, + { + "epoch": 0.1, + "grad_norm": 3.3074133176826, + "learning_rate": 9.889376035138782e-06, + "loss": 0.8255, + "step": 1164 + }, + { + "epoch": 0.1, + "grad_norm": 2.8692669892049047, + "learning_rate": 9.889098943613508e-06, + "loss": 0.669, + "step": 1165 + }, + { + "epoch": 0.1, + "grad_norm": 3.0717586645649155, + "learning_rate": 9.888821509383676e-06, + "loss": 0.6628, + "step": 1166 + }, + { + "epoch": 0.1, + "grad_norm": 4.781072878569787, + "learning_rate": 9.888543732468732e-06, + "loss": 1.0705, + "step": 1167 + }, + { + "epoch": 0.1, + "grad_norm": 2.952219904396231, + "learning_rate": 9.888265612888145e-06, + "loss": 0.5253, + "step": 1168 + }, + { + "epoch": 0.1, + "grad_norm": 5.000881351619064, + "learning_rate": 9.887987150661415e-06, + "loss": 1.2732, + "step": 1169 + }, + { + "epoch": 0.1, + "grad_norm": 4.308052200362575, + "learning_rate": 9.887708345808059e-06, + "loss": 0.8583, + "step": 1170 + }, + { + "epoch": 0.1, + "grad_norm": 4.8577523911579465, + "learning_rate": 9.887429198347617e-06, + "loss": 1.3483, + "step": 1171 + }, + { + "epoch": 0.1, + "grad_norm": 3.898494233387562, + "learning_rate": 9.88714970829966e-06, + "loss": 0.9511, + "step": 1172 + }, + { + "epoch": 0.1, + "grad_norm": 4.794041871561278, + "learning_rate": 9.886869875683776e-06, + "loss": 0.6661, + "step": 1173 + }, + { + "epoch": 0.1, + "grad_norm": 2.9150265779956146, + "learning_rate": 9.886589700519583e-06, + "loss": 0.3034, + "step": 1174 + }, + { + "epoch": 0.1, + "grad_norm": 4.368498537449657, + "learning_rate": 9.886309182826717e-06, + "loss": 0.5748, + "step": 1175 + }, + { + "epoch": 0.1, + "grad_norm": 4.713244358718991, + "learning_rate": 9.886028322624843e-06, + "loss": 1.1596, + "step": 1176 + }, + { + "epoch": 0.1, + "grad_norm": 4.28797089150776, + "learning_rate": 9.885747119933648e-06, + "loss": 1.0055, + "step": 1177 + }, + { + "epoch": 0.1, + "grad_norm": 5.465500959031329, + "learning_rate": 9.885465574772842e-06, + "loss": 1.1441, + "step": 1178 + }, + { + "epoch": 0.1, + "grad_norm": 3.163076994144076, + "learning_rate": 9.885183687162162e-06, + "loss": 0.4571, + "step": 1179 + }, + { + "epoch": 0.1, + "grad_norm": 3.7490054918364697, + "learning_rate": 9.884901457121366e-06, + "loss": 0.6162, + "step": 1180 + }, + { + "epoch": 0.1, + "grad_norm": 4.299431696370699, + "learning_rate": 9.884618884670236e-06, + "loss": 0.9722, + "step": 1181 + }, + { + "epoch": 0.1, + "grad_norm": 3.29584811568989, + "learning_rate": 9.88433596982858e-06, + "loss": 0.6991, + "step": 1182 + }, + { + "epoch": 0.1, + "grad_norm": 4.759772526614289, + "learning_rate": 9.88405271261623e-06, + "loss": 1.0675, + "step": 1183 + }, + { + "epoch": 0.1, + "grad_norm": 1.3720315128058174, + "learning_rate": 9.883769113053039e-06, + "loss": 0.149, + "step": 1184 + }, + { + "epoch": 0.1, + "grad_norm": 3.66518376225917, + "learning_rate": 9.883485171158889e-06, + "loss": 0.8875, + "step": 1185 + }, + { + "epoch": 0.1, + "grad_norm": 5.227971159935246, + "learning_rate": 9.883200886953682e-06, + "loss": 1.3924, + "step": 1186 + }, + { + "epoch": 0.1, + "grad_norm": 6.02754405998497, + "learning_rate": 9.882916260457343e-06, + "loss": 1.6309, + "step": 1187 + }, + { + "epoch": 0.1, + "grad_norm": 4.07747958412001, + "learning_rate": 9.882631291689823e-06, + "loss": 0.8492, + "step": 1188 + }, + { + "epoch": 0.1, + "grad_norm": 3.2134197448319526, + "learning_rate": 9.882345980671102e-06, + "loss": 0.6647, + "step": 1189 + }, + { + "epoch": 0.1, + "grad_norm": 5.346689217592907, + "learning_rate": 9.882060327421174e-06, + "loss": 1.1335, + "step": 1190 + }, + { + "epoch": 0.1, + "grad_norm": 2.263765075179125, + "learning_rate": 9.881774331960065e-06, + "loss": 0.3391, + "step": 1191 + }, + { + "epoch": 0.1, + "grad_norm": 2.641436335366062, + "learning_rate": 9.88148799430782e-06, + "loss": 0.4934, + "step": 1192 + }, + { + "epoch": 0.1, + "grad_norm": 6.032362831368957, + "learning_rate": 9.881201314484513e-06, + "loss": 0.8404, + "step": 1193 + }, + { + "epoch": 0.1, + "grad_norm": 2.9748639666149304, + "learning_rate": 9.880914292510233e-06, + "loss": 0.7072, + "step": 1194 + }, + { + "epoch": 0.1, + "grad_norm": 3.8610638245991957, + "learning_rate": 9.880626928405106e-06, + "loss": 1.1508, + "step": 1195 + }, + { + "epoch": 0.1, + "grad_norm": 2.3376495404426785, + "learning_rate": 9.88033922218927e-06, + "loss": 0.4453, + "step": 1196 + }, + { + "epoch": 0.1, + "grad_norm": 5.859625847530836, + "learning_rate": 9.880051173882896e-06, + "loss": 1.4039, + "step": 1197 + }, + { + "epoch": 0.1, + "grad_norm": 4.202714929903976, + "learning_rate": 9.879762783506172e-06, + "loss": 0.9029, + "step": 1198 + }, + { + "epoch": 0.1, + "grad_norm": 4.083474294001203, + "learning_rate": 9.879474051079312e-06, + "loss": 0.9305, + "step": 1199 + }, + { + "epoch": 0.1, + "grad_norm": 4.199315543955551, + "learning_rate": 9.879184976622557e-06, + "loss": 1.0684, + "step": 1200 + }, + { + "epoch": 0.1, + "grad_norm": 4.131337594808835, + "learning_rate": 9.878895560156172e-06, + "loss": 0.9519, + "step": 1201 + }, + { + "epoch": 0.1, + "grad_norm": 3.1636870069489524, + "learning_rate": 9.87860580170044e-06, + "loss": 0.4102, + "step": 1202 + }, + { + "epoch": 0.1, + "grad_norm": 5.486130674054726, + "learning_rate": 9.878315701275671e-06, + "loss": 1.1242, + "step": 1203 + }, + { + "epoch": 0.1, + "grad_norm": 4.040979726468128, + "learning_rate": 9.878025258902204e-06, + "loss": 0.8364, + "step": 1204 + }, + { + "epoch": 0.1, + "grad_norm": 5.454378469994424, + "learning_rate": 9.877734474600395e-06, + "loss": 1.3415, + "step": 1205 + }, + { + "epoch": 0.1, + "grad_norm": 5.1153828727308985, + "learning_rate": 9.877443348390627e-06, + "loss": 1.0923, + "step": 1206 + }, + { + "epoch": 0.1, + "grad_norm": 5.157714411270862, + "learning_rate": 9.877151880293307e-06, + "loss": 1.1689, + "step": 1207 + }, + { + "epoch": 0.1, + "grad_norm": 3.205116285973389, + "learning_rate": 9.876860070328867e-06, + "loss": 0.7162, + "step": 1208 + }, + { + "epoch": 0.1, + "grad_norm": 3.125812556804762, + "learning_rate": 9.876567918517759e-06, + "loss": 0.6812, + "step": 1209 + }, + { + "epoch": 0.1, + "grad_norm": 2.136997839119193, + "learning_rate": 9.876275424880463e-06, + "loss": 0.3998, + "step": 1210 + }, + { + "epoch": 0.1, + "grad_norm": 3.9726971596754406, + "learning_rate": 9.875982589437481e-06, + "loss": 0.9348, + "step": 1211 + }, + { + "epoch": 0.1, + "grad_norm": 3.295857980061875, + "learning_rate": 9.87568941220934e-06, + "loss": 0.5959, + "step": 1212 + }, + { + "epoch": 0.1, + "grad_norm": 6.049165049333135, + "learning_rate": 9.87539589321659e-06, + "loss": 1.2373, + "step": 1213 + }, + { + "epoch": 0.1, + "grad_norm": 4.855626558039712, + "learning_rate": 9.875102032479807e-06, + "loss": 0.7339, + "step": 1214 + }, + { + "epoch": 0.1, + "grad_norm": 3.826138284182142, + "learning_rate": 9.874807830019586e-06, + "loss": 0.6591, + "step": 1215 + }, + { + "epoch": 0.1, + "grad_norm": 5.83684207885743, + "learning_rate": 9.874513285856553e-06, + "loss": 0.7731, + "step": 1216 + }, + { + "epoch": 0.1, + "grad_norm": 3.933104564450693, + "learning_rate": 9.874218400011352e-06, + "loss": 0.8364, + "step": 1217 + }, + { + "epoch": 0.1, + "grad_norm": 1.4001799848197887, + "learning_rate": 9.873923172504653e-06, + "loss": 0.18, + "step": 1218 + }, + { + "epoch": 0.1, + "grad_norm": 5.252528244067028, + "learning_rate": 9.873627603357152e-06, + "loss": 0.8766, + "step": 1219 + }, + { + "epoch": 0.1, + "grad_norm": 3.988190929247905, + "learning_rate": 9.873331692589566e-06, + "loss": 0.8168, + "step": 1220 + }, + { + "epoch": 0.1, + "grad_norm": 4.0318100849354, + "learning_rate": 9.873035440222638e-06, + "loss": 0.8017, + "step": 1221 + }, + { + "epoch": 0.1, + "grad_norm": 2.756505224009058, + "learning_rate": 9.872738846277133e-06, + "loss": 0.6414, + "step": 1222 + }, + { + "epoch": 0.1, + "grad_norm": 4.069998346474113, + "learning_rate": 9.87244191077384e-06, + "loss": 0.7499, + "step": 1223 + }, + { + "epoch": 0.1, + "grad_norm": 2.5809666241236684, + "learning_rate": 9.872144633733573e-06, + "loss": 0.739, + "step": 1224 + }, + { + "epoch": 0.1, + "grad_norm": 3.0947168680195705, + "learning_rate": 9.871847015177173e-06, + "loss": 0.5898, + "step": 1225 + }, + { + "epoch": 0.1, + "grad_norm": 3.1358159711006333, + "learning_rate": 9.8715490551255e-06, + "loss": 0.4362, + "step": 1226 + }, + { + "epoch": 0.1, + "grad_norm": 5.202307254243295, + "learning_rate": 9.871250753599438e-06, + "loss": 0.8291, + "step": 1227 + }, + { + "epoch": 0.1, + "grad_norm": 4.560667754171791, + "learning_rate": 9.870952110619899e-06, + "loss": 1.2004, + "step": 1228 + }, + { + "epoch": 0.1, + "grad_norm": 4.839448570202385, + "learning_rate": 9.870653126207813e-06, + "loss": 1.1906, + "step": 1229 + }, + { + "epoch": 0.1, + "grad_norm": 4.587820228326079, + "learning_rate": 9.870353800384142e-06, + "loss": 1.1577, + "step": 1230 + }, + { + "epoch": 0.1, + "grad_norm": 2.369399402473251, + "learning_rate": 9.870054133169864e-06, + "loss": 0.3833, + "step": 1231 + }, + { + "epoch": 0.1, + "grad_norm": 4.332705145152846, + "learning_rate": 9.869754124585988e-06, + "loss": 0.9675, + "step": 1232 + }, + { + "epoch": 0.1, + "grad_norm": 5.892316115188248, + "learning_rate": 9.869453774653539e-06, + "loss": 1.351, + "step": 1233 + }, + { + "epoch": 0.1, + "grad_norm": 5.125532292329167, + "learning_rate": 9.869153083393573e-06, + "loss": 0.9729, + "step": 1234 + }, + { + "epoch": 0.1, + "grad_norm": 3.038649194505703, + "learning_rate": 9.868852050827167e-06, + "loss": 0.6525, + "step": 1235 + }, + { + "epoch": 0.1, + "grad_norm": 2.5708776653671337, + "learning_rate": 9.868550676975422e-06, + "loss": 0.7557, + "step": 1236 + }, + { + "epoch": 0.1, + "grad_norm": 2.9247024667032946, + "learning_rate": 9.86824896185946e-06, + "loss": 0.6781, + "step": 1237 + }, + { + "epoch": 0.1, + "grad_norm": 3.0777651806845143, + "learning_rate": 9.867946905500437e-06, + "loss": 0.3866, + "step": 1238 + }, + { + "epoch": 0.1, + "grad_norm": 3.8429779511171898, + "learning_rate": 9.867644507919518e-06, + "loss": 0.7838, + "step": 1239 + }, + { + "epoch": 0.1, + "grad_norm": 2.9761613135796563, + "learning_rate": 9.867341769137902e-06, + "loss": 0.7101, + "step": 1240 + }, + { + "epoch": 0.1, + "grad_norm": 6.379783725511916, + "learning_rate": 9.867038689176814e-06, + "loss": 1.2627, + "step": 1241 + }, + { + "epoch": 0.1, + "grad_norm": 2.9441494398725743, + "learning_rate": 9.866735268057492e-06, + "loss": 0.6645, + "step": 1242 + }, + { + "epoch": 0.1, + "grad_norm": 4.3048297328129035, + "learning_rate": 9.86643150580121e-06, + "loss": 0.7912, + "step": 1243 + }, + { + "epoch": 0.1, + "grad_norm": 4.212674300762759, + "learning_rate": 9.866127402429257e-06, + "loss": 0.5092, + "step": 1244 + }, + { + "epoch": 0.1, + "grad_norm": 4.867708233325816, + "learning_rate": 9.865822957962952e-06, + "loss": 1.2898, + "step": 1245 + }, + { + "epoch": 0.1, + "grad_norm": 4.9895126564047425, + "learning_rate": 9.865518172423634e-06, + "loss": 0.8157, + "step": 1246 + }, + { + "epoch": 0.1, + "grad_norm": 3.1574728111342276, + "learning_rate": 9.865213045832664e-06, + "loss": 0.613, + "step": 1247 + }, + { + "epoch": 0.1, + "grad_norm": 4.308032016001108, + "learning_rate": 9.864907578211436e-06, + "loss": 0.9943, + "step": 1248 + }, + { + "epoch": 0.1, + "grad_norm": 3.978772964634741, + "learning_rate": 9.864601769581357e-06, + "loss": 0.5542, + "step": 1249 + }, + { + "epoch": 0.1, + "grad_norm": 3.5859393692610575, + "learning_rate": 9.864295619963866e-06, + "loss": 0.708, + "step": 1250 + }, + { + "epoch": 0.1, + "grad_norm": 3.2253481480588952, + "learning_rate": 9.863989129380421e-06, + "loss": 0.5619, + "step": 1251 + }, + { + "epoch": 0.1, + "grad_norm": 5.248573746610271, + "learning_rate": 9.863682297852506e-06, + "loss": 1.1753, + "step": 1252 + }, + { + "epoch": 0.1, + "grad_norm": 3.7692726514908133, + "learning_rate": 9.86337512540163e-06, + "loss": 0.785, + "step": 1253 + }, + { + "epoch": 0.1, + "grad_norm": 4.140073443579133, + "learning_rate": 9.863067612049321e-06, + "loss": 0.7547, + "step": 1254 + }, + { + "epoch": 0.1, + "grad_norm": 1.219012600204615, + "learning_rate": 9.862759757817138e-06, + "loss": 0.2145, + "step": 1255 + }, + { + "epoch": 0.1, + "grad_norm": 4.692923749310751, + "learning_rate": 9.862451562726659e-06, + "loss": 1.0327, + "step": 1256 + }, + { + "epoch": 0.1, + "grad_norm": 5.142416334886868, + "learning_rate": 9.862143026799486e-06, + "loss": 1.1972, + "step": 1257 + }, + { + "epoch": 0.1, + "grad_norm": 2.215708570787776, + "learning_rate": 9.861834150057247e-06, + "loss": 0.462, + "step": 1258 + }, + { + "epoch": 0.1, + "grad_norm": 4.209753111231955, + "learning_rate": 9.861524932521595e-06, + "loss": 0.9367, + "step": 1259 + }, + { + "epoch": 0.1, + "grad_norm": 3.4564385364514285, + "learning_rate": 9.8612153742142e-06, + "loss": 0.701, + "step": 1260 + }, + { + "epoch": 0.1, + "grad_norm": 3.7263112565077634, + "learning_rate": 9.860905475156765e-06, + "loss": 0.8455, + "step": 1261 + }, + { + "epoch": 0.1, + "grad_norm": 4.164659038297447, + "learning_rate": 9.86059523537101e-06, + "loss": 0.7103, + "step": 1262 + }, + { + "epoch": 0.1, + "grad_norm": 3.4221185950858892, + "learning_rate": 9.860284654878682e-06, + "loss": 0.5031, + "step": 1263 + }, + { + "epoch": 0.1, + "grad_norm": 3.8565077383119695, + "learning_rate": 9.859973733701553e-06, + "loss": 0.7379, + "step": 1264 + }, + { + "epoch": 0.1, + "grad_norm": 5.610438004102319, + "learning_rate": 9.859662471861415e-06, + "loss": 1.1086, + "step": 1265 + }, + { + "epoch": 0.1, + "grad_norm": 3.0726742543457335, + "learning_rate": 9.859350869380086e-06, + "loss": 0.6462, + "step": 1266 + }, + { + "epoch": 0.1, + "grad_norm": 5.0570579775243845, + "learning_rate": 9.859038926279412e-06, + "loss": 1.3292, + "step": 1267 + }, + { + "epoch": 0.1, + "grad_norm": 3.549728041559905, + "learning_rate": 9.858726642581253e-06, + "loss": 0.7154, + "step": 1268 + }, + { + "epoch": 0.1, + "grad_norm": 5.013422156375804, + "learning_rate": 9.858414018307503e-06, + "loss": 1.2002, + "step": 1269 + }, + { + "epoch": 0.1, + "grad_norm": 3.2470568492629837, + "learning_rate": 9.858101053480074e-06, + "loss": 0.5817, + "step": 1270 + }, + { + "epoch": 0.1, + "grad_norm": 3.8005829520202603, + "learning_rate": 9.857787748120904e-06, + "loss": 0.9949, + "step": 1271 + }, + { + "epoch": 0.1, + "grad_norm": 4.094339621776428, + "learning_rate": 9.857474102251955e-06, + "loss": 0.7578, + "step": 1272 + }, + { + "epoch": 0.1, + "grad_norm": 4.525640812991137, + "learning_rate": 9.857160115895208e-06, + "loss": 0.7344, + "step": 1273 + }, + { + "epoch": 0.1, + "grad_norm": 4.002900024886897, + "learning_rate": 9.856845789072678e-06, + "loss": 0.9144, + "step": 1274 + }, + { + "epoch": 0.1, + "grad_norm": 4.72727273650834, + "learning_rate": 9.856531121806395e-06, + "loss": 1.2397, + "step": 1275 + }, + { + "epoch": 0.1, + "grad_norm": 3.5075716444438942, + "learning_rate": 9.856216114118416e-06, + "loss": 0.5811, + "step": 1276 + }, + { + "epoch": 0.1, + "grad_norm": 4.847247096507675, + "learning_rate": 9.855900766030819e-06, + "loss": 0.9566, + "step": 1277 + }, + { + "epoch": 0.1, + "grad_norm": 2.5594697153204096, + "learning_rate": 9.855585077565714e-06, + "loss": 0.4128, + "step": 1278 + }, + { + "epoch": 0.1, + "grad_norm": 4.821428414387075, + "learning_rate": 9.855269048745227e-06, + "loss": 0.9853, + "step": 1279 + }, + { + "epoch": 0.1, + "grad_norm": 2.722415247680657, + "learning_rate": 9.854952679591508e-06, + "loss": 0.4608, + "step": 1280 + }, + { + "epoch": 0.1, + "grad_norm": 3.2581275069199576, + "learning_rate": 9.854635970126738e-06, + "loss": 0.4616, + "step": 1281 + }, + { + "epoch": 0.1, + "grad_norm": 4.600979724613779, + "learning_rate": 9.854318920373111e-06, + "loss": 0.869, + "step": 1282 + }, + { + "epoch": 0.1, + "grad_norm": 3.5221907799442045, + "learning_rate": 9.854001530352855e-06, + "loss": 0.6486, + "step": 1283 + }, + { + "epoch": 0.1, + "grad_norm": 3.6404075289214157, + "learning_rate": 9.853683800088217e-06, + "loss": 0.7015, + "step": 1284 + }, + { + "epoch": 0.11, + "grad_norm": 3.650201614991193, + "learning_rate": 9.853365729601465e-06, + "loss": 0.7917, + "step": 1285 + }, + { + "epoch": 0.11, + "grad_norm": 4.977376505203249, + "learning_rate": 9.853047318914898e-06, + "loss": 0.9086, + "step": 1286 + }, + { + "epoch": 0.11, + "grad_norm": 4.186796749935245, + "learning_rate": 9.852728568050838e-06, + "loss": 0.7049, + "step": 1287 + }, + { + "epoch": 0.11, + "grad_norm": 5.395589551710405, + "learning_rate": 9.852409477031621e-06, + "loss": 1.4272, + "step": 1288 + }, + { + "epoch": 0.11, + "grad_norm": 5.224627049772095, + "learning_rate": 9.852090045879619e-06, + "loss": 0.9042, + "step": 1289 + }, + { + "epoch": 0.11, + "grad_norm": 3.4495366289399074, + "learning_rate": 9.85177027461722e-06, + "loss": 0.6737, + "step": 1290 + }, + { + "epoch": 0.11, + "grad_norm": 5.118977930027873, + "learning_rate": 9.851450163266843e-06, + "loss": 1.3165, + "step": 1291 + }, + { + "epoch": 0.11, + "grad_norm": 2.946791989038946, + "learning_rate": 9.85112971185092e-06, + "loss": 0.5781, + "step": 1292 + }, + { + "epoch": 0.11, + "grad_norm": 3.9952593447159943, + "learning_rate": 9.850808920391917e-06, + "loss": 0.9511, + "step": 1293 + }, + { + "epoch": 0.11, + "grad_norm": 4.293125196572581, + "learning_rate": 9.850487788912319e-06, + "loss": 0.8557, + "step": 1294 + }, + { + "epoch": 0.11, + "grad_norm": 3.410250168793054, + "learning_rate": 9.850166317434638e-06, + "loss": 0.6575, + "step": 1295 + }, + { + "epoch": 0.11, + "grad_norm": 3.481123343866197, + "learning_rate": 9.849844505981405e-06, + "loss": 0.5825, + "step": 1296 + }, + { + "epoch": 0.11, + "grad_norm": 5.3876454567069585, + "learning_rate": 9.849522354575178e-06, + "loss": 1.014, + "step": 1297 + }, + { + "epoch": 0.11, + "grad_norm": 2.192263677148237, + "learning_rate": 9.84919986323854e-06, + "loss": 0.3586, + "step": 1298 + }, + { + "epoch": 0.11, + "grad_norm": 3.3341819516293008, + "learning_rate": 9.848877031994095e-06, + "loss": 0.6102, + "step": 1299 + }, + { + "epoch": 0.11, + "grad_norm": 3.2551287600956305, + "learning_rate": 9.848553860864474e-06, + "loss": 0.5703, + "step": 1300 + }, + { + "epoch": 0.11, + "grad_norm": 3.7703916932902035, + "learning_rate": 9.848230349872326e-06, + "loss": 1.0168, + "step": 1301 + }, + { + "epoch": 0.11, + "grad_norm": 4.053425404359997, + "learning_rate": 9.847906499040332e-06, + "loss": 0.9807, + "step": 1302 + }, + { + "epoch": 0.11, + "grad_norm": 5.072379777925076, + "learning_rate": 9.847582308391189e-06, + "loss": 1.0596, + "step": 1303 + }, + { + "epoch": 0.11, + "grad_norm": 3.207348153568482, + "learning_rate": 9.847257777947624e-06, + "loss": 0.5372, + "step": 1304 + }, + { + "epoch": 0.11, + "grad_norm": 5.7962584417662635, + "learning_rate": 9.846932907732383e-06, + "loss": 1.4428, + "step": 1305 + }, + { + "epoch": 0.11, + "grad_norm": 3.9570045662665962, + "learning_rate": 9.84660769776824e-06, + "loss": 0.9904, + "step": 1306 + }, + { + "epoch": 0.11, + "grad_norm": 4.37422552493355, + "learning_rate": 9.84628214807799e-06, + "loss": 1.3554, + "step": 1307 + }, + { + "epoch": 0.11, + "grad_norm": 5.344126050620689, + "learning_rate": 9.845956258684453e-06, + "loss": 1.1511, + "step": 1308 + }, + { + "epoch": 0.11, + "grad_norm": 3.878686990867304, + "learning_rate": 9.84563002961047e-06, + "loss": 0.9766, + "step": 1309 + }, + { + "epoch": 0.11, + "grad_norm": 3.2087546006341934, + "learning_rate": 9.845303460878913e-06, + "loss": 0.5185, + "step": 1310 + }, + { + "epoch": 0.11, + "grad_norm": 6.443723713364844, + "learning_rate": 9.844976552512669e-06, + "loss": 2.064, + "step": 1311 + }, + { + "epoch": 0.11, + "grad_norm": 1.9195624220575043, + "learning_rate": 9.844649304534653e-06, + "loss": 0.2766, + "step": 1312 + }, + { + "epoch": 0.11, + "grad_norm": 3.073045313682807, + "learning_rate": 9.844321716967805e-06, + "loss": 0.6306, + "step": 1313 + }, + { + "epoch": 0.11, + "grad_norm": 3.6814448226780194, + "learning_rate": 9.843993789835088e-06, + "loss": 0.8284, + "step": 1314 + }, + { + "epoch": 0.11, + "grad_norm": 4.174265067657053, + "learning_rate": 9.843665523159488e-06, + "loss": 0.9003, + "step": 1315 + }, + { + "epoch": 0.11, + "grad_norm": 4.8093957573304404, + "learning_rate": 9.843336916964012e-06, + "loss": 1.1277, + "step": 1316 + }, + { + "epoch": 0.11, + "grad_norm": 4.1501702688347955, + "learning_rate": 9.8430079712717e-06, + "loss": 0.9939, + "step": 1317 + }, + { + "epoch": 0.11, + "grad_norm": 3.180878812579681, + "learning_rate": 9.842678686105603e-06, + "loss": 0.6883, + "step": 1318 + }, + { + "epoch": 0.11, + "grad_norm": 3.9089167772488325, + "learning_rate": 9.842349061488805e-06, + "loss": 1.0128, + "step": 1319 + }, + { + "epoch": 0.11, + "grad_norm": 4.7583039485954535, + "learning_rate": 9.842019097444414e-06, + "loss": 1.3914, + "step": 1320 + }, + { + "epoch": 0.11, + "grad_norm": 5.575696603618623, + "learning_rate": 9.841688793995556e-06, + "loss": 1.1302, + "step": 1321 + }, + { + "epoch": 0.11, + "grad_norm": 2.32329831084482, + "learning_rate": 9.841358151165385e-06, + "loss": 0.4465, + "step": 1322 + }, + { + "epoch": 0.11, + "grad_norm": 4.344382550224537, + "learning_rate": 9.841027168977078e-06, + "loss": 1.4557, + "step": 1323 + }, + { + "epoch": 0.11, + "grad_norm": 3.7431485492243355, + "learning_rate": 9.840695847453833e-06, + "loss": 0.901, + "step": 1324 + }, + { + "epoch": 0.11, + "grad_norm": 4.841450732154974, + "learning_rate": 9.840364186618876e-06, + "loss": 0.8751, + "step": 1325 + }, + { + "epoch": 0.11, + "grad_norm": 0.86898813254035, + "learning_rate": 9.840032186495457e-06, + "loss": 0.1148, + "step": 1326 + }, + { + "epoch": 0.11, + "grad_norm": 4.354391039173452, + "learning_rate": 9.839699847106843e-06, + "loss": 0.9702, + "step": 1327 + }, + { + "epoch": 0.11, + "grad_norm": 3.6741596542471933, + "learning_rate": 9.839367168476333e-06, + "loss": 0.7526, + "step": 1328 + }, + { + "epoch": 0.11, + "grad_norm": 4.16879096917253, + "learning_rate": 9.839034150627245e-06, + "loss": 0.7054, + "step": 1329 + }, + { + "epoch": 0.11, + "grad_norm": 4.765591481592283, + "learning_rate": 9.838700793582925e-06, + "loss": 0.8889, + "step": 1330 + }, + { + "epoch": 0.11, + "grad_norm": 3.38145209692892, + "learning_rate": 9.838367097366734e-06, + "loss": 0.6064, + "step": 1331 + }, + { + "epoch": 0.11, + "grad_norm": 4.854729782273505, + "learning_rate": 9.83803306200207e-06, + "loss": 1.133, + "step": 1332 + }, + { + "epoch": 0.11, + "grad_norm": 5.116468267131302, + "learning_rate": 9.83769868751234e-06, + "loss": 1.1587, + "step": 1333 + }, + { + "epoch": 0.11, + "grad_norm": 6.275829287323761, + "learning_rate": 9.837363973920989e-06, + "loss": 1.2242, + "step": 1334 + }, + { + "epoch": 0.11, + "grad_norm": 2.645549953945881, + "learning_rate": 9.837028921251472e-06, + "loss": 0.4298, + "step": 1335 + }, + { + "epoch": 0.11, + "grad_norm": 4.623616570720286, + "learning_rate": 9.836693529527281e-06, + "loss": 0.7962, + "step": 1336 + }, + { + "epoch": 0.11, + "grad_norm": 4.405795229808539, + "learning_rate": 9.836357798771922e-06, + "loss": 1.0292, + "step": 1337 + }, + { + "epoch": 0.11, + "grad_norm": 3.8284035944571864, + "learning_rate": 9.83602172900893e-06, + "loss": 0.7384, + "step": 1338 + }, + { + "epoch": 0.11, + "grad_norm": 4.405684722902709, + "learning_rate": 9.83568532026186e-06, + "loss": 1.0122, + "step": 1339 + }, + { + "epoch": 0.11, + "grad_norm": 4.241662354295038, + "learning_rate": 9.835348572554296e-06, + "loss": 0.8407, + "step": 1340 + }, + { + "epoch": 0.11, + "grad_norm": 4.2711045608163785, + "learning_rate": 9.835011485909837e-06, + "loss": 0.9705, + "step": 1341 + }, + { + "epoch": 0.11, + "grad_norm": 5.518128074176903, + "learning_rate": 9.834674060352119e-06, + "loss": 1.445, + "step": 1342 + }, + { + "epoch": 0.11, + "grad_norm": 3.9578130578482336, + "learning_rate": 9.834336295904787e-06, + "loss": 0.5462, + "step": 1343 + }, + { + "epoch": 0.11, + "grad_norm": 4.230932342502631, + "learning_rate": 9.83399819259152e-06, + "loss": 0.7917, + "step": 1344 + }, + { + "epoch": 0.11, + "grad_norm": 3.904182482171247, + "learning_rate": 9.83365975043602e-06, + "loss": 0.8205, + "step": 1345 + }, + { + "epoch": 0.11, + "grad_norm": 4.052625787870512, + "learning_rate": 9.833320969462006e-06, + "loss": 0.9524, + "step": 1346 + }, + { + "epoch": 0.11, + "grad_norm": 2.9363570141585504, + "learning_rate": 9.832981849693226e-06, + "loss": 0.6908, + "step": 1347 + }, + { + "epoch": 0.11, + "grad_norm": 4.845194021761832, + "learning_rate": 9.832642391153452e-06, + "loss": 0.9929, + "step": 1348 + }, + { + "epoch": 0.11, + "grad_norm": 4.250043298251886, + "learning_rate": 9.832302593866478e-06, + "loss": 1.3876, + "step": 1349 + }, + { + "epoch": 0.11, + "grad_norm": 3.9347958176876774, + "learning_rate": 9.831962457856124e-06, + "loss": 0.897, + "step": 1350 + }, + { + "epoch": 0.11, + "grad_norm": 5.7234753317121525, + "learning_rate": 9.831621983146227e-06, + "loss": 1.291, + "step": 1351 + }, + { + "epoch": 0.11, + "grad_norm": 2.657411952759672, + "learning_rate": 9.83128116976066e-06, + "loss": 0.6593, + "step": 1352 + }, + { + "epoch": 0.11, + "grad_norm": 3.4986134293129334, + "learning_rate": 9.830940017723308e-06, + "loss": 0.6853, + "step": 1353 + }, + { + "epoch": 0.11, + "grad_norm": 3.6022819684060194, + "learning_rate": 9.830598527058083e-06, + "loss": 0.9227, + "step": 1354 + }, + { + "epoch": 0.11, + "grad_norm": 3.0415847920356756, + "learning_rate": 9.830256697788924e-06, + "loss": 0.6046, + "step": 1355 + }, + { + "epoch": 0.11, + "grad_norm": 4.222301158214487, + "learning_rate": 9.829914529939794e-06, + "loss": 0.6136, + "step": 1356 + }, + { + "epoch": 0.11, + "grad_norm": 4.498417460325489, + "learning_rate": 9.829572023534675e-06, + "loss": 0.8575, + "step": 1357 + }, + { + "epoch": 0.11, + "grad_norm": 5.3827152069059965, + "learning_rate": 9.829229178597575e-06, + "loss": 0.9866, + "step": 1358 + }, + { + "epoch": 0.11, + "grad_norm": 4.531474859604405, + "learning_rate": 9.828885995152525e-06, + "loss": 1.1519, + "step": 1359 + }, + { + "epoch": 0.11, + "grad_norm": 3.3485362372809284, + "learning_rate": 9.828542473223586e-06, + "loss": 0.4419, + "step": 1360 + }, + { + "epoch": 0.11, + "grad_norm": 4.76206496778344, + "learning_rate": 9.82819861283483e-06, + "loss": 0.9233, + "step": 1361 + }, + { + "epoch": 0.11, + "grad_norm": 6.539558811679356, + "learning_rate": 9.827854414010366e-06, + "loss": 1.7233, + "step": 1362 + }, + { + "epoch": 0.11, + "grad_norm": 5.815566721141583, + "learning_rate": 9.827509876774315e-06, + "loss": 0.6861, + "step": 1363 + }, + { + "epoch": 0.11, + "grad_norm": 5.757901899383783, + "learning_rate": 9.827165001150834e-06, + "loss": 1.1095, + "step": 1364 + }, + { + "epoch": 0.11, + "grad_norm": 2.886305903447551, + "learning_rate": 9.826819787164095e-06, + "loss": 0.44, + "step": 1365 + }, + { + "epoch": 0.11, + "grad_norm": 2.926176324529388, + "learning_rate": 9.826474234838293e-06, + "loss": 0.6576, + "step": 1366 + }, + { + "epoch": 0.11, + "grad_norm": 5.459558687055677, + "learning_rate": 9.826128344197653e-06, + "loss": 1.2845, + "step": 1367 + }, + { + "epoch": 0.11, + "grad_norm": 4.384367792526328, + "learning_rate": 9.82578211526642e-06, + "loss": 0.9965, + "step": 1368 + }, + { + "epoch": 0.11, + "grad_norm": 3.3568468480970686, + "learning_rate": 9.825435548068862e-06, + "loss": 0.6548, + "step": 1369 + }, + { + "epoch": 0.11, + "grad_norm": 4.872095489295021, + "learning_rate": 9.825088642629271e-06, + "loss": 1.259, + "step": 1370 + }, + { + "epoch": 0.11, + "grad_norm": 4.260165583117883, + "learning_rate": 9.824741398971966e-06, + "loss": 0.9074, + "step": 1371 + }, + { + "epoch": 0.11, + "grad_norm": 5.568108760017305, + "learning_rate": 9.824393817121288e-06, + "loss": 1.2388, + "step": 1372 + }, + { + "epoch": 0.11, + "grad_norm": 3.952084072267941, + "learning_rate": 9.824045897101598e-06, + "loss": 0.7787, + "step": 1373 + }, + { + "epoch": 0.11, + "grad_norm": 6.423914169789909, + "learning_rate": 9.823697638937283e-06, + "loss": 1.3275, + "step": 1374 + }, + { + "epoch": 0.11, + "grad_norm": 3.7116454839421893, + "learning_rate": 9.82334904265276e-06, + "loss": 0.7016, + "step": 1375 + }, + { + "epoch": 0.11, + "grad_norm": 2.2828456033633255, + "learning_rate": 9.823000108272458e-06, + "loss": 0.397, + "step": 1376 + }, + { + "epoch": 0.11, + "grad_norm": 3.5598140162103995, + "learning_rate": 9.82265083582084e-06, + "loss": 0.7259, + "step": 1377 + }, + { + "epoch": 0.11, + "grad_norm": 4.4343896057203995, + "learning_rate": 9.822301225322384e-06, + "loss": 0.9835, + "step": 1378 + }, + { + "epoch": 0.11, + "grad_norm": 3.0649876892148185, + "learning_rate": 9.8219512768016e-06, + "loss": 0.6065, + "step": 1379 + }, + { + "epoch": 0.11, + "grad_norm": 2.373673238593863, + "learning_rate": 9.821600990283018e-06, + "loss": 0.3573, + "step": 1380 + }, + { + "epoch": 0.11, + "grad_norm": 4.275665587049736, + "learning_rate": 9.821250365791189e-06, + "loss": 1.1575, + "step": 1381 + }, + { + "epoch": 0.11, + "grad_norm": 4.8296355375812245, + "learning_rate": 9.820899403350693e-06, + "loss": 1.0401, + "step": 1382 + }, + { + "epoch": 0.11, + "grad_norm": 4.274330476790679, + "learning_rate": 9.820548102986126e-06, + "loss": 0.7298, + "step": 1383 + }, + { + "epoch": 0.11, + "grad_norm": 3.54174599316339, + "learning_rate": 9.820196464722118e-06, + "loss": 0.5905, + "step": 1384 + }, + { + "epoch": 0.11, + "grad_norm": 4.140626710176374, + "learning_rate": 9.819844488583316e-06, + "loss": 0.9191, + "step": 1385 + }, + { + "epoch": 0.11, + "grad_norm": 1.9979127187144647, + "learning_rate": 9.819492174594391e-06, + "loss": 0.4385, + "step": 1386 + }, + { + "epoch": 0.11, + "grad_norm": 3.5767283557263294, + "learning_rate": 9.819139522780038e-06, + "loss": 0.6833, + "step": 1387 + }, + { + "epoch": 0.11, + "grad_norm": 2.796622823599924, + "learning_rate": 9.81878653316498e-06, + "loss": 0.5222, + "step": 1388 + }, + { + "epoch": 0.11, + "grad_norm": 5.207140533365846, + "learning_rate": 9.818433205773957e-06, + "loss": 1.5635, + "step": 1389 + }, + { + "epoch": 0.11, + "grad_norm": 3.294209540551426, + "learning_rate": 9.818079540631732e-06, + "loss": 0.4936, + "step": 1390 + }, + { + "epoch": 0.11, + "grad_norm": 3.614697231475761, + "learning_rate": 9.817725537763105e-06, + "loss": 0.6669, + "step": 1391 + }, + { + "epoch": 0.11, + "grad_norm": 4.692084220618287, + "learning_rate": 9.817371197192883e-06, + "loss": 1.22, + "step": 1392 + }, + { + "epoch": 0.11, + "grad_norm": 3.752787361806504, + "learning_rate": 9.817016518945904e-06, + "loss": 0.7487, + "step": 1393 + }, + { + "epoch": 0.11, + "grad_norm": 4.355449957476081, + "learning_rate": 9.816661503047032e-06, + "loss": 0.8125, + "step": 1394 + }, + { + "epoch": 0.11, + "grad_norm": 4.128535214061091, + "learning_rate": 9.816306149521149e-06, + "loss": 0.8633, + "step": 1395 + }, + { + "epoch": 0.11, + "grad_norm": 3.5263385505708444, + "learning_rate": 9.815950458393166e-06, + "loss": 0.932, + "step": 1396 + }, + { + "epoch": 0.11, + "grad_norm": 4.112181680433037, + "learning_rate": 9.815594429688015e-06, + "loss": 0.9579, + "step": 1397 + }, + { + "epoch": 0.11, + "grad_norm": 6.645733663557989, + "learning_rate": 9.815238063430655e-06, + "loss": 1.6471, + "step": 1398 + }, + { + "epoch": 0.11, + "grad_norm": 3.1645593081286645, + "learning_rate": 9.81488135964606e-06, + "loss": 0.5587, + "step": 1399 + }, + { + "epoch": 0.11, + "grad_norm": 2.099895044129971, + "learning_rate": 9.814524318359235e-06, + "loss": 0.3865, + "step": 1400 + }, + { + "epoch": 0.11, + "grad_norm": 3.3226922818944016, + "learning_rate": 9.81416693959521e-06, + "loss": 0.6881, + "step": 1401 + }, + { + "epoch": 0.11, + "grad_norm": 4.234298859212859, + "learning_rate": 9.813809223379035e-06, + "loss": 0.7269, + "step": 1402 + }, + { + "epoch": 0.11, + "grad_norm": 4.8633112567954715, + "learning_rate": 9.813451169735781e-06, + "loss": 1.3484, + "step": 1403 + }, + { + "epoch": 0.11, + "grad_norm": 5.249634519658076, + "learning_rate": 9.813092778690549e-06, + "loss": 1.1644, + "step": 1404 + }, + { + "epoch": 0.11, + "grad_norm": 1.9572073819275848, + "learning_rate": 9.81273405026846e-06, + "loss": 0.3299, + "step": 1405 + }, + { + "epoch": 0.11, + "grad_norm": 3.362360907175296, + "learning_rate": 9.81237498449466e-06, + "loss": 0.374, + "step": 1406 + }, + { + "epoch": 0.12, + "grad_norm": 3.593558326449794, + "learning_rate": 9.812015581394316e-06, + "loss": 0.8593, + "step": 1407 + }, + { + "epoch": 0.12, + "grad_norm": 5.463677247987641, + "learning_rate": 9.811655840992621e-06, + "loss": 1.2366, + "step": 1408 + }, + { + "epoch": 0.12, + "grad_norm": 4.32660737602931, + "learning_rate": 9.811295763314793e-06, + "loss": 0.9649, + "step": 1409 + }, + { + "epoch": 0.12, + "grad_norm": 2.616667588532532, + "learning_rate": 9.810935348386071e-06, + "loss": 0.6619, + "step": 1410 + }, + { + "epoch": 0.12, + "grad_norm": 3.5831618295972625, + "learning_rate": 9.810574596231717e-06, + "loss": 0.9357, + "step": 1411 + }, + { + "epoch": 0.12, + "grad_norm": 3.107669813938388, + "learning_rate": 9.810213506877021e-06, + "loss": 0.4273, + "step": 1412 + }, + { + "epoch": 0.12, + "grad_norm": 4.507060032810869, + "learning_rate": 9.80985208034729e-06, + "loss": 1.0523, + "step": 1413 + }, + { + "epoch": 0.12, + "grad_norm": 4.778321036304939, + "learning_rate": 9.809490316667864e-06, + "loss": 1.1447, + "step": 1414 + }, + { + "epoch": 0.12, + "grad_norm": 3.34623077029894, + "learning_rate": 9.809128215864096e-06, + "loss": 0.4395, + "step": 1415 + }, + { + "epoch": 0.12, + "grad_norm": 5.176105641856664, + "learning_rate": 9.80876577796137e-06, + "loss": 1.2677, + "step": 1416 + }, + { + "epoch": 0.12, + "grad_norm": 2.7614486871129627, + "learning_rate": 9.808403002985089e-06, + "loss": 0.5742, + "step": 1417 + }, + { + "epoch": 0.12, + "grad_norm": 4.620567580821391, + "learning_rate": 9.808039890960687e-06, + "loss": 0.7539, + "step": 1418 + }, + { + "epoch": 0.12, + "grad_norm": 2.6874968173664113, + "learning_rate": 9.807676441913611e-06, + "loss": 0.4365, + "step": 1419 + }, + { + "epoch": 0.12, + "grad_norm": 4.541845640368986, + "learning_rate": 9.80731265586934e-06, + "loss": 0.9808, + "step": 1420 + }, + { + "epoch": 0.12, + "grad_norm": 7.123859209612732, + "learning_rate": 9.806948532853373e-06, + "loss": 1.6505, + "step": 1421 + }, + { + "epoch": 0.12, + "grad_norm": 4.068838188795062, + "learning_rate": 9.806584072891234e-06, + "loss": 0.8492, + "step": 1422 + }, + { + "epoch": 0.12, + "grad_norm": 4.347877420451936, + "learning_rate": 9.80621927600847e-06, + "loss": 0.7774, + "step": 1423 + }, + { + "epoch": 0.12, + "grad_norm": 5.895252251166327, + "learning_rate": 9.805854142230652e-06, + "loss": 1.3487, + "step": 1424 + }, + { + "epoch": 0.12, + "grad_norm": 2.267175221265885, + "learning_rate": 9.805488671583372e-06, + "loss": 0.3279, + "step": 1425 + }, + { + "epoch": 0.12, + "grad_norm": 5.513202645820931, + "learning_rate": 9.80512286409225e-06, + "loss": 0.8466, + "step": 1426 + }, + { + "epoch": 0.12, + "grad_norm": 4.423363671659631, + "learning_rate": 9.80475671978293e-06, + "loss": 0.7481, + "step": 1427 + }, + { + "epoch": 0.12, + "grad_norm": 5.215881653445727, + "learning_rate": 9.804390238681072e-06, + "loss": 1.2405, + "step": 1428 + }, + { + "epoch": 0.12, + "grad_norm": 1.3735270370900747, + "learning_rate": 9.804023420812368e-06, + "loss": 0.2282, + "step": 1429 + }, + { + "epoch": 0.12, + "grad_norm": 3.832715397250428, + "learning_rate": 9.803656266202528e-06, + "loss": 0.7381, + "step": 1430 + }, + { + "epoch": 0.12, + "grad_norm": 4.34847388319156, + "learning_rate": 9.80328877487729e-06, + "loss": 0.8616, + "step": 1431 + }, + { + "epoch": 0.12, + "grad_norm": 4.360354162722593, + "learning_rate": 9.802920946862413e-06, + "loss": 0.9632, + "step": 1432 + }, + { + "epoch": 0.12, + "grad_norm": 4.954599355658937, + "learning_rate": 9.802552782183682e-06, + "loss": 1.3467, + "step": 1433 + }, + { + "epoch": 0.12, + "grad_norm": 4.53958496323328, + "learning_rate": 9.802184280866898e-06, + "loss": 1.011, + "step": 1434 + }, + { + "epoch": 0.12, + "grad_norm": 5.434045092648887, + "learning_rate": 9.801815442937897e-06, + "loss": 1.3907, + "step": 1435 + }, + { + "epoch": 0.12, + "grad_norm": 3.6551014885191977, + "learning_rate": 9.80144626842253e-06, + "loss": 0.8479, + "step": 1436 + }, + { + "epoch": 0.12, + "grad_norm": 3.6799219829730974, + "learning_rate": 9.801076757346677e-06, + "loss": 0.9056, + "step": 1437 + }, + { + "epoch": 0.12, + "grad_norm": 5.763827428568593, + "learning_rate": 9.800706909736237e-06, + "loss": 1.3745, + "step": 1438 + }, + { + "epoch": 0.12, + "grad_norm": 4.907388733876458, + "learning_rate": 9.800336725617136e-06, + "loss": 1.1375, + "step": 1439 + }, + { + "epoch": 0.12, + "grad_norm": 5.367826033412014, + "learning_rate": 9.79996620501532e-06, + "loss": 1.2576, + "step": 1440 + }, + { + "epoch": 0.12, + "grad_norm": 5.256085637336182, + "learning_rate": 9.799595347956764e-06, + "loss": 1.3381, + "step": 1441 + }, + { + "epoch": 0.12, + "grad_norm": 3.1059074280871393, + "learning_rate": 9.79922415446746e-06, + "loss": 0.7916, + "step": 1442 + }, + { + "epoch": 0.12, + "grad_norm": 4.329508448979159, + "learning_rate": 9.798852624573432e-06, + "loss": 0.9223, + "step": 1443 + }, + { + "epoch": 0.12, + "grad_norm": 3.8386067688568892, + "learning_rate": 9.79848075830072e-06, + "loss": 0.5353, + "step": 1444 + }, + { + "epoch": 0.12, + "grad_norm": 4.926444301146794, + "learning_rate": 9.798108555675388e-06, + "loss": 0.9793, + "step": 1445 + }, + { + "epoch": 0.12, + "grad_norm": 5.056237294722397, + "learning_rate": 9.797736016723527e-06, + "loss": 0.8944, + "step": 1446 + }, + { + "epoch": 0.12, + "grad_norm": 2.7066633631885098, + "learning_rate": 9.797363141471252e-06, + "loss": 0.5836, + "step": 1447 + }, + { + "epoch": 0.12, + "grad_norm": 1.5653471821648113, + "learning_rate": 9.796989929944699e-06, + "loss": 0.2457, + "step": 1448 + }, + { + "epoch": 0.12, + "grad_norm": 4.171565444891341, + "learning_rate": 9.796616382170028e-06, + "loss": 1.3466, + "step": 1449 + }, + { + "epoch": 0.12, + "grad_norm": 5.8421857025707835, + "learning_rate": 9.796242498173425e-06, + "loss": 1.4725, + "step": 1450 + }, + { + "epoch": 0.12, + "grad_norm": 1.1240727792271181, + "learning_rate": 9.795868277981095e-06, + "loss": 0.1594, + "step": 1451 + }, + { + "epoch": 0.12, + "grad_norm": 3.2964525728779703, + "learning_rate": 9.795493721619271e-06, + "loss": 0.7353, + "step": 1452 + }, + { + "epoch": 0.12, + "grad_norm": 3.073169173071545, + "learning_rate": 9.795118829114205e-06, + "loss": 0.5664, + "step": 1453 + }, + { + "epoch": 0.12, + "grad_norm": 4.921321716329079, + "learning_rate": 9.79474360049218e-06, + "loss": 0.8704, + "step": 1454 + }, + { + "epoch": 0.12, + "grad_norm": 4.326213766913278, + "learning_rate": 9.794368035779496e-06, + "loss": 0.6958, + "step": 1455 + }, + { + "epoch": 0.12, + "grad_norm": 5.670985328015172, + "learning_rate": 9.793992135002476e-06, + "loss": 1.1243, + "step": 1456 + }, + { + "epoch": 0.12, + "grad_norm": 5.921221306493759, + "learning_rate": 9.793615898187473e-06, + "loss": 1.4096, + "step": 1457 + }, + { + "epoch": 0.12, + "grad_norm": 4.652832396557077, + "learning_rate": 9.793239325360855e-06, + "loss": 1.0637, + "step": 1458 + }, + { + "epoch": 0.12, + "grad_norm": 3.7588457089114455, + "learning_rate": 9.792862416549021e-06, + "loss": 0.6599, + "step": 1459 + }, + { + "epoch": 0.12, + "grad_norm": 2.13310023429643, + "learning_rate": 9.792485171778389e-06, + "loss": 0.3432, + "step": 1460 + }, + { + "epoch": 0.12, + "grad_norm": 3.7093931763306576, + "learning_rate": 9.792107591075406e-06, + "loss": 0.8885, + "step": 1461 + }, + { + "epoch": 0.12, + "grad_norm": 3.21724107443512, + "learning_rate": 9.791729674466534e-06, + "loss": 0.6405, + "step": 1462 + }, + { + "epoch": 0.12, + "grad_norm": 5.805812423017493, + "learning_rate": 9.791351421978269e-06, + "loss": 1.5855, + "step": 1463 + }, + { + "epoch": 0.12, + "grad_norm": 3.1123464014190056, + "learning_rate": 9.790972833637118e-06, + "loss": 0.3719, + "step": 1464 + }, + { + "epoch": 0.12, + "grad_norm": 2.7958239629043624, + "learning_rate": 9.790593909469623e-06, + "loss": 0.4876, + "step": 1465 + }, + { + "epoch": 0.12, + "grad_norm": 4.37017892442726, + "learning_rate": 9.790214649502343e-06, + "loss": 1.2543, + "step": 1466 + }, + { + "epoch": 0.12, + "grad_norm": 3.9239535160362804, + "learning_rate": 9.789835053761865e-06, + "loss": 0.8073, + "step": 1467 + }, + { + "epoch": 0.12, + "grad_norm": 4.555633739107286, + "learning_rate": 9.789455122274793e-06, + "loss": 1.0265, + "step": 1468 + }, + { + "epoch": 0.12, + "grad_norm": 4.869738418739143, + "learning_rate": 9.789074855067761e-06, + "loss": 1.1461, + "step": 1469 + }, + { + "epoch": 0.12, + "grad_norm": 3.8066477350594425, + "learning_rate": 9.788694252167424e-06, + "loss": 0.6731, + "step": 1470 + }, + { + "epoch": 0.12, + "grad_norm": 2.9441755106990506, + "learning_rate": 9.788313313600462e-06, + "loss": 0.6184, + "step": 1471 + }, + { + "epoch": 0.12, + "grad_norm": 3.9209845177925553, + "learning_rate": 9.787932039393574e-06, + "loss": 0.7527, + "step": 1472 + }, + { + "epoch": 0.12, + "grad_norm": 3.5411521898176197, + "learning_rate": 9.787550429573487e-06, + "loss": 0.6581, + "step": 1473 + }, + { + "epoch": 0.12, + "grad_norm": 4.588217832482472, + "learning_rate": 9.78716848416695e-06, + "loss": 1.0138, + "step": 1474 + }, + { + "epoch": 0.12, + "grad_norm": 3.908534892628834, + "learning_rate": 9.786786203200738e-06, + "loss": 1.1612, + "step": 1475 + }, + { + "epoch": 0.12, + "grad_norm": 3.6631338624314527, + "learning_rate": 9.786403586701643e-06, + "loss": 0.9183, + "step": 1476 + }, + { + "epoch": 0.12, + "grad_norm": 5.24287728720431, + "learning_rate": 9.786020634696489e-06, + "loss": 1.4494, + "step": 1477 + }, + { + "epoch": 0.12, + "grad_norm": 5.355388150801754, + "learning_rate": 9.785637347212117e-06, + "loss": 1.1414, + "step": 1478 + }, + { + "epoch": 0.12, + "grad_norm": 5.574465500450339, + "learning_rate": 9.785253724275394e-06, + "loss": 0.948, + "step": 1479 + }, + { + "epoch": 0.12, + "grad_norm": 3.8645555875140043, + "learning_rate": 9.78486976591321e-06, + "loss": 1.345, + "step": 1480 + }, + { + "epoch": 0.12, + "grad_norm": 4.0871937132066, + "learning_rate": 9.784485472152479e-06, + "loss": 0.7189, + "step": 1481 + }, + { + "epoch": 0.12, + "grad_norm": 4.754552849267925, + "learning_rate": 9.784100843020139e-06, + "loss": 1.1957, + "step": 1482 + }, + { + "epoch": 0.12, + "grad_norm": 3.231759982859807, + "learning_rate": 9.783715878543149e-06, + "loss": 0.5092, + "step": 1483 + }, + { + "epoch": 0.12, + "grad_norm": 3.9490285385860218, + "learning_rate": 9.783330578748497e-06, + "loss": 0.8589, + "step": 1484 + }, + { + "epoch": 0.12, + "grad_norm": 3.2352180597670688, + "learning_rate": 9.782944943663187e-06, + "loss": 0.6635, + "step": 1485 + }, + { + "epoch": 0.12, + "grad_norm": 2.184309454595811, + "learning_rate": 9.782558973314254e-06, + "loss": 0.41, + "step": 1486 + }, + { + "epoch": 0.12, + "grad_norm": 4.461287240576972, + "learning_rate": 9.782172667728748e-06, + "loss": 1.5705, + "step": 1487 + }, + { + "epoch": 0.12, + "grad_norm": 2.9641417501280753, + "learning_rate": 9.781786026933752e-06, + "loss": 0.3327, + "step": 1488 + }, + { + "epoch": 0.12, + "grad_norm": 2.7825304552740193, + "learning_rate": 9.781399050956364e-06, + "loss": 0.5386, + "step": 1489 + }, + { + "epoch": 0.12, + "grad_norm": 5.403847438502365, + "learning_rate": 9.781011739823715e-06, + "loss": 0.9521, + "step": 1490 + }, + { + "epoch": 0.12, + "grad_norm": 2.746341743522744, + "learning_rate": 9.780624093562944e-06, + "loss": 0.8428, + "step": 1491 + }, + { + "epoch": 0.12, + "grad_norm": 4.220237980809123, + "learning_rate": 9.780236112201235e-06, + "loss": 0.9644, + "step": 1492 + }, + { + "epoch": 0.12, + "grad_norm": 1.9812539161163594, + "learning_rate": 9.779847795765776e-06, + "loss": 0.3911, + "step": 1493 + }, + { + "epoch": 0.12, + "grad_norm": 3.3386624342418085, + "learning_rate": 9.779459144283788e-06, + "loss": 0.8402, + "step": 1494 + }, + { + "epoch": 0.12, + "grad_norm": 3.488310991588463, + "learning_rate": 9.779070157782515e-06, + "loss": 0.7609, + "step": 1495 + }, + { + "epoch": 0.12, + "grad_norm": 2.805387887932308, + "learning_rate": 9.778680836289222e-06, + "loss": 0.5685, + "step": 1496 + }, + { + "epoch": 0.12, + "grad_norm": 3.6968205267244985, + "learning_rate": 9.778291179831201e-06, + "loss": 0.9227, + "step": 1497 + }, + { + "epoch": 0.12, + "grad_norm": 3.1847305569872852, + "learning_rate": 9.777901188435762e-06, + "loss": 0.6676, + "step": 1498 + }, + { + "epoch": 0.12, + "grad_norm": 4.209245828758046, + "learning_rate": 9.777510862130242e-06, + "loss": 1.0329, + "step": 1499 + }, + { + "epoch": 0.12, + "grad_norm": 3.037052184090017, + "learning_rate": 9.777120200942004e-06, + "loss": 0.6037, + "step": 1500 + }, + { + "epoch": 0.12, + "grad_norm": 2.672518759031155, + "learning_rate": 9.77672920489843e-06, + "loss": 0.5179, + "step": 1501 + }, + { + "epoch": 0.12, + "grad_norm": 3.6466801675576104, + "learning_rate": 9.776337874026926e-06, + "loss": 0.7091, + "step": 1502 + }, + { + "epoch": 0.12, + "grad_norm": 5.411916495301089, + "learning_rate": 9.775946208354924e-06, + "loss": 1.0544, + "step": 1503 + }, + { + "epoch": 0.12, + "grad_norm": 4.060051829029946, + "learning_rate": 9.775554207909879e-06, + "loss": 0.9035, + "step": 1504 + }, + { + "epoch": 0.12, + "grad_norm": 3.07841372268714, + "learning_rate": 9.775161872719268e-06, + "loss": 0.7718, + "step": 1505 + }, + { + "epoch": 0.12, + "grad_norm": 3.4655063677047417, + "learning_rate": 9.77476920281059e-06, + "loss": 0.6952, + "step": 1506 + }, + { + "epoch": 0.12, + "grad_norm": 4.008834167325511, + "learning_rate": 9.77437619821137e-06, + "loss": 0.4992, + "step": 1507 + }, + { + "epoch": 0.12, + "grad_norm": 5.618330088193883, + "learning_rate": 9.773982858949157e-06, + "loss": 0.8979, + "step": 1508 + }, + { + "epoch": 0.12, + "grad_norm": 4.409746682340359, + "learning_rate": 9.773589185051522e-06, + "loss": 1.0297, + "step": 1509 + }, + { + "epoch": 0.12, + "grad_norm": 4.7310014040891994, + "learning_rate": 9.77319517654606e-06, + "loss": 1.2448, + "step": 1510 + }, + { + "epoch": 0.12, + "grad_norm": 3.9619728725901817, + "learning_rate": 9.77280083346039e-06, + "loss": 0.5431, + "step": 1511 + }, + { + "epoch": 0.12, + "grad_norm": 4.205388385519706, + "learning_rate": 9.772406155822152e-06, + "loss": 0.773, + "step": 1512 + }, + { + "epoch": 0.12, + "grad_norm": 3.5967486073724833, + "learning_rate": 9.772011143659013e-06, + "loss": 0.805, + "step": 1513 + }, + { + "epoch": 0.12, + "grad_norm": 3.3716225881025577, + "learning_rate": 9.77161579699866e-06, + "loss": 0.5813, + "step": 1514 + }, + { + "epoch": 0.12, + "grad_norm": 2.0722565727923925, + "learning_rate": 9.771220115868805e-06, + "loss": 0.3726, + "step": 1515 + }, + { + "epoch": 0.12, + "grad_norm": 5.1345323779270755, + "learning_rate": 9.770824100297185e-06, + "loss": 1.6538, + "step": 1516 + }, + { + "epoch": 0.12, + "grad_norm": 4.337949991423968, + "learning_rate": 9.770427750311557e-06, + "loss": 1.219, + "step": 1517 + }, + { + "epoch": 0.12, + "grad_norm": 4.0302519135261194, + "learning_rate": 9.770031065939707e-06, + "loss": 1.1707, + "step": 1518 + }, + { + "epoch": 0.12, + "grad_norm": 4.807159466088738, + "learning_rate": 9.769634047209437e-06, + "loss": 1.3163, + "step": 1519 + }, + { + "epoch": 0.12, + "grad_norm": 3.4439580054897303, + "learning_rate": 9.769236694148579e-06, + "loss": 0.4129, + "step": 1520 + }, + { + "epoch": 0.12, + "grad_norm": 4.4429376768979445, + "learning_rate": 9.768839006784983e-06, + "loss": 0.9111, + "step": 1521 + }, + { + "epoch": 0.12, + "grad_norm": 4.056315866869216, + "learning_rate": 9.768440985146529e-06, + "loss": 0.9465, + "step": 1522 + }, + { + "epoch": 0.12, + "grad_norm": 2.043077745088097, + "learning_rate": 9.768042629261112e-06, + "loss": 0.4399, + "step": 1523 + }, + { + "epoch": 0.12, + "grad_norm": 3.3192423340805197, + "learning_rate": 9.767643939156658e-06, + "loss": 0.5992, + "step": 1524 + }, + { + "epoch": 0.12, + "grad_norm": 2.724102578486323, + "learning_rate": 9.767244914861114e-06, + "loss": 0.7851, + "step": 1525 + }, + { + "epoch": 0.12, + "grad_norm": 3.960661209852135, + "learning_rate": 9.766845556402447e-06, + "loss": 0.853, + "step": 1526 + }, + { + "epoch": 0.12, + "grad_norm": 3.132502240121899, + "learning_rate": 9.766445863808652e-06, + "loss": 0.8542, + "step": 1527 + }, + { + "epoch": 0.12, + "grad_norm": 4.464818791787063, + "learning_rate": 9.766045837107745e-06, + "loss": 0.6607, + "step": 1528 + }, + { + "epoch": 0.12, + "grad_norm": 2.9151827316498458, + "learning_rate": 9.765645476327768e-06, + "loss": 0.6962, + "step": 1529 + }, + { + "epoch": 0.13, + "grad_norm": 5.397921765887368, + "learning_rate": 9.765244781496783e-06, + "loss": 1.2484, + "step": 1530 + }, + { + "epoch": 0.13, + "grad_norm": 4.647934850478616, + "learning_rate": 9.764843752642876e-06, + "loss": 1.2478, + "step": 1531 + }, + { + "epoch": 0.13, + "grad_norm": 2.3059946410621546, + "learning_rate": 9.76444238979416e-06, + "loss": 0.4285, + "step": 1532 + }, + { + "epoch": 0.13, + "grad_norm": 3.8796632727172904, + "learning_rate": 9.764040692978767e-06, + "loss": 1.0721, + "step": 1533 + }, + { + "epoch": 0.13, + "grad_norm": 4.532900000889822, + "learning_rate": 9.763638662224854e-06, + "loss": 0.8132, + "step": 1534 + }, + { + "epoch": 0.13, + "grad_norm": 2.640399343376495, + "learning_rate": 9.763236297560603e-06, + "loss": 0.7806, + "step": 1535 + }, + { + "epoch": 0.13, + "grad_norm": 3.1418755348948055, + "learning_rate": 9.762833599014219e-06, + "loss": 0.5318, + "step": 1536 + }, + { + "epoch": 0.13, + "grad_norm": 4.586786253813165, + "learning_rate": 9.762430566613925e-06, + "loss": 0.9011, + "step": 1537 + }, + { + "epoch": 0.13, + "grad_norm": 2.8280442642483803, + "learning_rate": 9.762027200387974e-06, + "loss": 0.5989, + "step": 1538 + }, + { + "epoch": 0.13, + "grad_norm": 3.916761256288874, + "learning_rate": 9.761623500364643e-06, + "loss": 1.084, + "step": 1539 + }, + { + "epoch": 0.13, + "grad_norm": 3.1449798089696324, + "learning_rate": 9.761219466572227e-06, + "loss": 0.8712, + "step": 1540 + }, + { + "epoch": 0.13, + "grad_norm": 3.0647901636925643, + "learning_rate": 9.760815099039045e-06, + "loss": 0.6931, + "step": 1541 + }, + { + "epoch": 0.13, + "grad_norm": 4.645838963488648, + "learning_rate": 9.760410397793445e-06, + "loss": 1.1038, + "step": 1542 + }, + { + "epoch": 0.13, + "grad_norm": 4.045311760642244, + "learning_rate": 9.760005362863794e-06, + "loss": 0.8072, + "step": 1543 + }, + { + "epoch": 0.13, + "grad_norm": 3.1799765839075955, + "learning_rate": 9.759599994278481e-06, + "loss": 0.6287, + "step": 1544 + }, + { + "epoch": 0.13, + "grad_norm": 3.814868528193712, + "learning_rate": 9.759194292065925e-06, + "loss": 0.8318, + "step": 1545 + }, + { + "epoch": 0.13, + "grad_norm": 5.554545532458709, + "learning_rate": 9.758788256254559e-06, + "loss": 1.5856, + "step": 1546 + }, + { + "epoch": 0.13, + "grad_norm": 4.196075660470384, + "learning_rate": 9.758381886872848e-06, + "loss": 0.958, + "step": 1547 + }, + { + "epoch": 0.13, + "grad_norm": 1.0660407285250322, + "learning_rate": 9.757975183949275e-06, + "loss": 0.1906, + "step": 1548 + }, + { + "epoch": 0.13, + "grad_norm": 4.842620018989324, + "learning_rate": 9.757568147512347e-06, + "loss": 1.0968, + "step": 1549 + }, + { + "epoch": 0.13, + "grad_norm": 4.776522241854018, + "learning_rate": 9.757160777590597e-06, + "loss": 0.9168, + "step": 1550 + }, + { + "epoch": 0.13, + "grad_norm": 5.409198973311985, + "learning_rate": 9.756753074212581e-06, + "loss": 1.8172, + "step": 1551 + }, + { + "epoch": 0.13, + "grad_norm": 3.1874379937760207, + "learning_rate": 9.756345037406876e-06, + "loss": 0.5296, + "step": 1552 + }, + { + "epoch": 0.13, + "grad_norm": 4.191603881891276, + "learning_rate": 9.755936667202084e-06, + "loss": 1.0245, + "step": 1553 + }, + { + "epoch": 0.13, + "grad_norm": 3.9092492108199655, + "learning_rate": 9.755527963626828e-06, + "loss": 0.8931, + "step": 1554 + }, + { + "epoch": 0.13, + "grad_norm": 4.429272274630198, + "learning_rate": 9.755118926709757e-06, + "loss": 0.6388, + "step": 1555 + }, + { + "epoch": 0.13, + "grad_norm": 2.9498809549310594, + "learning_rate": 9.754709556479546e-06, + "loss": 0.6189, + "step": 1556 + }, + { + "epoch": 0.13, + "grad_norm": 4.36139121660757, + "learning_rate": 9.754299852964886e-06, + "loss": 0.8474, + "step": 1557 + }, + { + "epoch": 0.13, + "grad_norm": 5.489779862984102, + "learning_rate": 9.753889816194498e-06, + "loss": 0.7664, + "step": 1558 + }, + { + "epoch": 0.13, + "grad_norm": 4.718174282412993, + "learning_rate": 9.753479446197122e-06, + "loss": 0.9349, + "step": 1559 + }, + { + "epoch": 0.13, + "grad_norm": 4.191140162879363, + "learning_rate": 9.753068743001525e-06, + "loss": 0.5427, + "step": 1560 + }, + { + "epoch": 0.13, + "grad_norm": 3.3149836151747762, + "learning_rate": 9.752657706636494e-06, + "loss": 0.4822, + "step": 1561 + }, + { + "epoch": 0.13, + "grad_norm": 6.986035668649894, + "learning_rate": 9.752246337130841e-06, + "loss": 0.9761, + "step": 1562 + }, + { + "epoch": 0.13, + "grad_norm": 2.6889568491015656, + "learning_rate": 9.751834634513404e-06, + "loss": 0.6154, + "step": 1563 + }, + { + "epoch": 0.13, + "grad_norm": 3.1432527726484345, + "learning_rate": 9.751422598813037e-06, + "loss": 0.5122, + "step": 1564 + }, + { + "epoch": 0.13, + "grad_norm": 3.3942869359327195, + "learning_rate": 9.751010230058624e-06, + "loss": 0.7232, + "step": 1565 + }, + { + "epoch": 0.13, + "grad_norm": 2.3100380532903135, + "learning_rate": 9.750597528279072e-06, + "loss": 0.5781, + "step": 1566 + }, + { + "epoch": 0.13, + "grad_norm": 5.7308147712033355, + "learning_rate": 9.750184493503306e-06, + "loss": 1.463, + "step": 1567 + }, + { + "epoch": 0.13, + "grad_norm": 2.8363396138495314, + "learning_rate": 9.749771125760281e-06, + "loss": 0.623, + "step": 1568 + }, + { + "epoch": 0.13, + "grad_norm": 4.067053229913921, + "learning_rate": 9.74935742507897e-06, + "loss": 0.8514, + "step": 1569 + }, + { + "epoch": 0.13, + "grad_norm": 4.582862830286583, + "learning_rate": 9.748943391488374e-06, + "loss": 1.1104, + "step": 1570 + }, + { + "epoch": 0.13, + "grad_norm": 3.7410002095176718, + "learning_rate": 9.748529025017512e-06, + "loss": 0.649, + "step": 1571 + }, + { + "epoch": 0.13, + "grad_norm": 3.59880905028096, + "learning_rate": 9.748114325695433e-06, + "loss": 0.8195, + "step": 1572 + }, + { + "epoch": 0.13, + "grad_norm": 4.108543123341428, + "learning_rate": 9.747699293551203e-06, + "loss": 0.6387, + "step": 1573 + }, + { + "epoch": 0.13, + "grad_norm": 4.757953511646958, + "learning_rate": 9.747283928613915e-06, + "loss": 1.1136, + "step": 1574 + }, + { + "epoch": 0.13, + "grad_norm": 3.635770244508862, + "learning_rate": 9.746868230912683e-06, + "loss": 0.6198, + "step": 1575 + }, + { + "epoch": 0.13, + "grad_norm": 2.8130446795283235, + "learning_rate": 9.746452200476647e-06, + "loss": 0.6654, + "step": 1576 + }, + { + "epoch": 0.13, + "grad_norm": 4.229086650299244, + "learning_rate": 9.74603583733497e-06, + "loss": 0.8575, + "step": 1577 + }, + { + "epoch": 0.13, + "grad_norm": 4.877197051217789, + "learning_rate": 9.745619141516833e-06, + "loss": 0.7622, + "step": 1578 + }, + { + "epoch": 0.13, + "grad_norm": 4.832704704597798, + "learning_rate": 9.745202113051448e-06, + "loss": 1.442, + "step": 1579 + }, + { + "epoch": 0.13, + "grad_norm": 4.83291036043407, + "learning_rate": 9.744784751968046e-06, + "loss": 1.1734, + "step": 1580 + }, + { + "epoch": 0.13, + "grad_norm": 5.766668118698729, + "learning_rate": 9.744367058295881e-06, + "loss": 1.0921, + "step": 1581 + }, + { + "epoch": 0.13, + "grad_norm": 3.7452921009672466, + "learning_rate": 9.743949032064235e-06, + "loss": 0.8583, + "step": 1582 + }, + { + "epoch": 0.13, + "grad_norm": 3.5324161965107184, + "learning_rate": 9.743530673302407e-06, + "loss": 0.615, + "step": 1583 + }, + { + "epoch": 0.13, + "grad_norm": 4.96485593458066, + "learning_rate": 9.743111982039721e-06, + "loss": 1.2621, + "step": 1584 + }, + { + "epoch": 0.13, + "grad_norm": 3.404087488808251, + "learning_rate": 9.742692958305528e-06, + "loss": 0.6942, + "step": 1585 + }, + { + "epoch": 0.13, + "grad_norm": 5.4955877818953915, + "learning_rate": 9.742273602129201e-06, + "loss": 1.2858, + "step": 1586 + }, + { + "epoch": 0.13, + "grad_norm": 5.213928992509133, + "learning_rate": 9.741853913540132e-06, + "loss": 1.1965, + "step": 1587 + }, + { + "epoch": 0.13, + "grad_norm": 3.190641501195003, + "learning_rate": 9.74143389256774e-06, + "loss": 0.7693, + "step": 1588 + }, + { + "epoch": 0.13, + "grad_norm": 3.70693513925525, + "learning_rate": 9.741013539241467e-06, + "loss": 0.6004, + "step": 1589 + }, + { + "epoch": 0.13, + "grad_norm": 3.8013575804530837, + "learning_rate": 9.740592853590776e-06, + "loss": 0.7414, + "step": 1590 + }, + { + "epoch": 0.13, + "grad_norm": 3.125539338774779, + "learning_rate": 9.740171835645158e-06, + "loss": 0.7715, + "step": 1591 + }, + { + "epoch": 0.13, + "grad_norm": 1.8688054107462326, + "learning_rate": 9.739750485434126e-06, + "loss": 0.4439, + "step": 1592 + }, + { + "epoch": 0.13, + "grad_norm": 3.3397393731341016, + "learning_rate": 9.739328802987209e-06, + "loss": 0.6942, + "step": 1593 + }, + { + "epoch": 0.13, + "grad_norm": 1.3694727764828176, + "learning_rate": 9.738906788333971e-06, + "loss": 0.2148, + "step": 1594 + }, + { + "epoch": 0.13, + "grad_norm": 2.298421483587479, + "learning_rate": 9.738484441503989e-06, + "loss": 0.4418, + "step": 1595 + }, + { + "epoch": 0.13, + "grad_norm": 5.0267036380367305, + "learning_rate": 9.738061762526871e-06, + "loss": 0.6679, + "step": 1596 + }, + { + "epoch": 0.13, + "grad_norm": 4.346525534672905, + "learning_rate": 9.737638751432244e-06, + "loss": 0.7686, + "step": 1597 + }, + { + "epoch": 0.13, + "grad_norm": 5.250728585271418, + "learning_rate": 9.737215408249757e-06, + "loss": 1.2046, + "step": 1598 + }, + { + "epoch": 0.13, + "grad_norm": 4.722914124483091, + "learning_rate": 9.736791733009087e-06, + "loss": 0.9467, + "step": 1599 + }, + { + "epoch": 0.13, + "grad_norm": 5.902253643134758, + "learning_rate": 9.736367725739932e-06, + "loss": 1.284, + "step": 1600 + }, + { + "epoch": 0.13, + "grad_norm": 4.683735001274794, + "learning_rate": 9.735943386472012e-06, + "loss": 1.4378, + "step": 1601 + }, + { + "epoch": 0.13, + "grad_norm": 4.20889208970552, + "learning_rate": 9.73551871523507e-06, + "loss": 0.7832, + "step": 1602 + }, + { + "epoch": 0.13, + "grad_norm": 3.0226202867704597, + "learning_rate": 9.735093712058876e-06, + "loss": 0.5958, + "step": 1603 + }, + { + "epoch": 0.13, + "grad_norm": 2.3969939040986987, + "learning_rate": 9.73466837697322e-06, + "loss": 0.3861, + "step": 1604 + }, + { + "epoch": 0.13, + "grad_norm": 2.404473157535664, + "learning_rate": 9.734242710007918e-06, + "loss": 0.2924, + "step": 1605 + }, + { + "epoch": 0.13, + "grad_norm": 2.237250360859097, + "learning_rate": 9.733816711192803e-06, + "loss": 0.3559, + "step": 1606 + }, + { + "epoch": 0.13, + "grad_norm": 2.6456248141718715, + "learning_rate": 9.733390380557739e-06, + "loss": 0.4604, + "step": 1607 + }, + { + "epoch": 0.13, + "grad_norm": 4.761247734859934, + "learning_rate": 9.732963718132609e-06, + "loss": 1.1434, + "step": 1608 + }, + { + "epoch": 0.13, + "grad_norm": 3.707625528974655, + "learning_rate": 9.73253672394732e-06, + "loss": 0.8052, + "step": 1609 + }, + { + "epoch": 0.13, + "grad_norm": 3.0816386035698535, + "learning_rate": 9.732109398031804e-06, + "loss": 0.5118, + "step": 1610 + }, + { + "epoch": 0.13, + "grad_norm": 5.1755136938926745, + "learning_rate": 9.731681740416012e-06, + "loss": 1.3818, + "step": 1611 + }, + { + "epoch": 0.13, + "grad_norm": 2.469392922393817, + "learning_rate": 9.731253751129923e-06, + "loss": 0.3968, + "step": 1612 + }, + { + "epoch": 0.13, + "grad_norm": 3.3961943593224126, + "learning_rate": 9.730825430203536e-06, + "loss": 0.7839, + "step": 1613 + }, + { + "epoch": 0.13, + "grad_norm": 4.042124620832441, + "learning_rate": 9.730396777666875e-06, + "loss": 0.8608, + "step": 1614 + }, + { + "epoch": 0.13, + "grad_norm": 5.685312270195895, + "learning_rate": 9.729967793549987e-06, + "loss": 0.968, + "step": 1615 + }, + { + "epoch": 0.13, + "grad_norm": 4.494774242668652, + "learning_rate": 9.729538477882942e-06, + "loss": 0.896, + "step": 1616 + }, + { + "epoch": 0.13, + "grad_norm": 4.48705535782037, + "learning_rate": 9.729108830695833e-06, + "loss": 1.0858, + "step": 1617 + }, + { + "epoch": 0.13, + "grad_norm": 2.079053530713893, + "learning_rate": 9.728678852018775e-06, + "loss": 0.3735, + "step": 1618 + }, + { + "epoch": 0.13, + "grad_norm": 3.522198117801278, + "learning_rate": 9.728248541881909e-06, + "loss": 0.7978, + "step": 1619 + }, + { + "epoch": 0.13, + "grad_norm": 3.6673248515297145, + "learning_rate": 9.727817900315399e-06, + "loss": 0.7038, + "step": 1620 + }, + { + "epoch": 0.13, + "grad_norm": 2.3868342089022954, + "learning_rate": 9.727386927349427e-06, + "loss": 0.4164, + "step": 1621 + }, + { + "epoch": 0.13, + "grad_norm": 4.096149179181914, + "learning_rate": 9.726955623014207e-06, + "loss": 0.7268, + "step": 1622 + }, + { + "epoch": 0.13, + "grad_norm": 4.1892463351865405, + "learning_rate": 9.72652398733997e-06, + "loss": 0.999, + "step": 1623 + }, + { + "epoch": 0.13, + "grad_norm": 4.111134640592722, + "learning_rate": 9.72609202035697e-06, + "loss": 0.8955, + "step": 1624 + }, + { + "epoch": 0.13, + "grad_norm": 3.619508841593312, + "learning_rate": 9.725659722095488e-06, + "loss": 0.8145, + "step": 1625 + }, + { + "epoch": 0.13, + "grad_norm": 3.284117540605217, + "learning_rate": 9.725227092585824e-06, + "loss": 0.7883, + "step": 1626 + }, + { + "epoch": 0.13, + "grad_norm": 1.3053035878084933, + "learning_rate": 9.724794131858309e-06, + "loss": 0.2017, + "step": 1627 + }, + { + "epoch": 0.13, + "grad_norm": 4.787383239731883, + "learning_rate": 9.724360839943285e-06, + "loss": 0.7844, + "step": 1628 + }, + { + "epoch": 0.13, + "grad_norm": 2.566361724602944, + "learning_rate": 9.723927216871127e-06, + "loss": 0.623, + "step": 1629 + }, + { + "epoch": 0.13, + "grad_norm": 1.4600819399127305, + "learning_rate": 9.723493262672229e-06, + "loss": 0.2211, + "step": 1630 + }, + { + "epoch": 0.13, + "grad_norm": 5.008762197460526, + "learning_rate": 9.723058977377012e-06, + "loss": 1.1448, + "step": 1631 + }, + { + "epoch": 0.13, + "grad_norm": 6.186521654143597, + "learning_rate": 9.722624361015913e-06, + "loss": 1.0704, + "step": 1632 + }, + { + "epoch": 0.13, + "grad_norm": 5.255225640204374, + "learning_rate": 9.7221894136194e-06, + "loss": 1.6081, + "step": 1633 + }, + { + "epoch": 0.13, + "grad_norm": 6.173596004247248, + "learning_rate": 9.72175413521796e-06, + "loss": 1.2399, + "step": 1634 + }, + { + "epoch": 0.13, + "grad_norm": 2.385440698351537, + "learning_rate": 9.721318525842105e-06, + "loss": 0.467, + "step": 1635 + }, + { + "epoch": 0.13, + "grad_norm": 4.450899861913443, + "learning_rate": 9.720882585522368e-06, + "loss": 0.7838, + "step": 1636 + }, + { + "epoch": 0.13, + "grad_norm": 4.001750450563991, + "learning_rate": 9.720446314289309e-06, + "loss": 0.7508, + "step": 1637 + }, + { + "epoch": 0.13, + "grad_norm": 2.809705500312421, + "learning_rate": 9.720009712173504e-06, + "loss": 0.7776, + "step": 1638 + }, + { + "epoch": 0.13, + "grad_norm": 3.0593428034581707, + "learning_rate": 9.719572779205562e-06, + "loss": 0.4057, + "step": 1639 + }, + { + "epoch": 0.13, + "grad_norm": 6.409124956173562, + "learning_rate": 9.719135515416107e-06, + "loss": 1.4807, + "step": 1640 + }, + { + "epoch": 0.13, + "grad_norm": 2.9182037085275163, + "learning_rate": 9.71869792083579e-06, + "loss": 0.5006, + "step": 1641 + }, + { + "epoch": 0.13, + "grad_norm": 1.8982368084936982, + "learning_rate": 9.718259995495284e-06, + "loss": 0.3411, + "step": 1642 + }, + { + "epoch": 0.13, + "grad_norm": 3.6506744249878254, + "learning_rate": 9.717821739425286e-06, + "loss": 0.6956, + "step": 1643 + }, + { + "epoch": 0.13, + "grad_norm": 3.7552476156750574, + "learning_rate": 9.717383152656518e-06, + "loss": 1.046, + "step": 1644 + }, + { + "epoch": 0.13, + "grad_norm": 2.0608675911426966, + "learning_rate": 9.71694423521972e-06, + "loss": 0.4058, + "step": 1645 + }, + { + "epoch": 0.13, + "grad_norm": 5.030380844163063, + "learning_rate": 9.716504987145658e-06, + "loss": 1.2439, + "step": 1646 + }, + { + "epoch": 0.13, + "grad_norm": 3.742621411829504, + "learning_rate": 9.716065408465124e-06, + "loss": 0.8299, + "step": 1647 + }, + { + "epoch": 0.13, + "grad_norm": 4.194510187995256, + "learning_rate": 9.715625499208931e-06, + "loss": 1.0898, + "step": 1648 + }, + { + "epoch": 0.13, + "grad_norm": 3.2683273114298403, + "learning_rate": 9.715185259407911e-06, + "loss": 0.8357, + "step": 1649 + }, + { + "epoch": 0.13, + "grad_norm": 5.4839620674400145, + "learning_rate": 9.714744689092925e-06, + "loss": 1.0901, + "step": 1650 + }, + { + "epoch": 0.13, + "grad_norm": 3.131210682499025, + "learning_rate": 9.714303788294854e-06, + "loss": 0.8754, + "step": 1651 + }, + { + "epoch": 0.14, + "grad_norm": 2.6719171196306264, + "learning_rate": 9.713862557044607e-06, + "loss": 0.4085, + "step": 1652 + }, + { + "epoch": 0.14, + "grad_norm": 2.524196692690876, + "learning_rate": 9.713420995373108e-06, + "loss": 0.6716, + "step": 1653 + }, + { + "epoch": 0.14, + "grad_norm": 1.3654936297880624, + "learning_rate": 9.712979103311308e-06, + "loss": 0.2285, + "step": 1654 + }, + { + "epoch": 0.14, + "grad_norm": 4.852407882522216, + "learning_rate": 9.712536880890186e-06, + "loss": 1.2415, + "step": 1655 + }, + { + "epoch": 0.14, + "grad_norm": 5.363295234003041, + "learning_rate": 9.712094328140738e-06, + "loss": 1.155, + "step": 1656 + }, + { + "epoch": 0.14, + "grad_norm": 5.119060952663525, + "learning_rate": 9.711651445093984e-06, + "loss": 1.4858, + "step": 1657 + }, + { + "epoch": 0.14, + "grad_norm": 6.07263896623256, + "learning_rate": 9.711208231780969e-06, + "loss": 1.5487, + "step": 1658 + }, + { + "epoch": 0.14, + "grad_norm": 4.417118269313506, + "learning_rate": 9.71076468823276e-06, + "loss": 1.0961, + "step": 1659 + }, + { + "epoch": 0.14, + "grad_norm": 4.240636340765624, + "learning_rate": 9.710320814480448e-06, + "loss": 1.0508, + "step": 1660 + }, + { + "epoch": 0.14, + "grad_norm": 2.7871496737720873, + "learning_rate": 9.709876610555148e-06, + "loss": 0.7364, + "step": 1661 + }, + { + "epoch": 0.14, + "grad_norm": 2.7911961749043086, + "learning_rate": 9.709432076487991e-06, + "loss": 0.3766, + "step": 1662 + }, + { + "epoch": 0.14, + "grad_norm": 4.285382931590067, + "learning_rate": 9.708987212310144e-06, + "loss": 0.7827, + "step": 1663 + }, + { + "epoch": 0.14, + "grad_norm": 1.770461234555658, + "learning_rate": 9.708542018052786e-06, + "loss": 0.378, + "step": 1664 + }, + { + "epoch": 0.14, + "grad_norm": 2.6635570174102163, + "learning_rate": 9.708096493747123e-06, + "loss": 0.6873, + "step": 1665 + }, + { + "epoch": 0.14, + "grad_norm": 6.309510039579954, + "learning_rate": 9.70765063942439e-06, + "loss": 1.3979, + "step": 1666 + }, + { + "epoch": 0.14, + "grad_norm": 4.5408722162646455, + "learning_rate": 9.707204455115829e-06, + "loss": 0.9899, + "step": 1667 + }, + { + "epoch": 0.14, + "grad_norm": 2.983879844016031, + "learning_rate": 9.706757940852724e-06, + "loss": 0.6502, + "step": 1668 + }, + { + "epoch": 0.14, + "grad_norm": 3.611735659887384, + "learning_rate": 9.706311096666372e-06, + "loss": 0.8234, + "step": 1669 + }, + { + "epoch": 0.14, + "grad_norm": 2.342774436992476, + "learning_rate": 9.705863922588093e-06, + "loss": 0.5715, + "step": 1670 + }, + { + "epoch": 0.14, + "grad_norm": 4.347075365810961, + "learning_rate": 9.705416418649233e-06, + "loss": 0.685, + "step": 1671 + }, + { + "epoch": 0.14, + "grad_norm": 4.097322467099301, + "learning_rate": 9.704968584881163e-06, + "loss": 0.957, + "step": 1672 + }, + { + "epoch": 0.14, + "grad_norm": 2.284651690740073, + "learning_rate": 9.704520421315268e-06, + "loss": 0.412, + "step": 1673 + }, + { + "epoch": 0.14, + "grad_norm": 3.9964240169463348, + "learning_rate": 9.704071927982966e-06, + "loss": 0.9967, + "step": 1674 + }, + { + "epoch": 0.14, + "grad_norm": 0.8789215636177454, + "learning_rate": 9.703623104915696e-06, + "loss": 0.1869, + "step": 1675 + }, + { + "epoch": 0.14, + "grad_norm": 3.3620035126073153, + "learning_rate": 9.703173952144915e-06, + "loss": 0.3587, + "step": 1676 + }, + { + "epoch": 0.14, + "grad_norm": 3.0358459729420963, + "learning_rate": 9.702724469702107e-06, + "loss": 0.5392, + "step": 1677 + }, + { + "epoch": 0.14, + "grad_norm": 4.952662389243149, + "learning_rate": 9.702274657618781e-06, + "loss": 1.0653, + "step": 1678 + }, + { + "epoch": 0.14, + "grad_norm": 5.242149136790763, + "learning_rate": 9.701824515926469e-06, + "loss": 0.9788, + "step": 1679 + }, + { + "epoch": 0.14, + "grad_norm": 4.708365030480298, + "learning_rate": 9.701374044656716e-06, + "loss": 0.793, + "step": 1680 + }, + { + "epoch": 0.14, + "grad_norm": 3.764014596285324, + "learning_rate": 9.700923243841106e-06, + "loss": 1.0391, + "step": 1681 + }, + { + "epoch": 0.14, + "grad_norm": 3.482193777342618, + "learning_rate": 9.700472113511234e-06, + "loss": 0.6562, + "step": 1682 + }, + { + "epoch": 0.14, + "grad_norm": 5.047861768371731, + "learning_rate": 9.700020653698722e-06, + "loss": 1.6494, + "step": 1683 + }, + { + "epoch": 0.14, + "grad_norm": 5.170079677755622, + "learning_rate": 9.699568864435219e-06, + "loss": 1.4652, + "step": 1684 + }, + { + "epoch": 0.14, + "grad_norm": 4.076027692146769, + "learning_rate": 9.69911674575239e-06, + "loss": 0.9265, + "step": 1685 + }, + { + "epoch": 0.14, + "grad_norm": 2.85699929880583, + "learning_rate": 9.698664297681929e-06, + "loss": 0.4537, + "step": 1686 + }, + { + "epoch": 0.14, + "grad_norm": 2.0338154983394117, + "learning_rate": 9.698211520255549e-06, + "loss": 0.2813, + "step": 1687 + }, + { + "epoch": 0.14, + "grad_norm": 2.875842754777342, + "learning_rate": 9.697758413504987e-06, + "loss": 0.3438, + "step": 1688 + }, + { + "epoch": 0.14, + "grad_norm": 4.029530453781569, + "learning_rate": 9.697304977462005e-06, + "loss": 0.9498, + "step": 1689 + }, + { + "epoch": 0.14, + "grad_norm": 3.754246632963344, + "learning_rate": 9.696851212158388e-06, + "loss": 1.0082, + "step": 1690 + }, + { + "epoch": 0.14, + "grad_norm": 4.549144461758611, + "learning_rate": 9.696397117625942e-06, + "loss": 0.6994, + "step": 1691 + }, + { + "epoch": 0.14, + "grad_norm": 3.530806771898034, + "learning_rate": 9.695942693896495e-06, + "loss": 0.6279, + "step": 1692 + }, + { + "epoch": 0.14, + "grad_norm": 4.794353861007372, + "learning_rate": 9.695487941001905e-06, + "loss": 0.8341, + "step": 1693 + }, + { + "epoch": 0.14, + "grad_norm": 2.96207377085134, + "learning_rate": 9.695032858974042e-06, + "loss": 0.755, + "step": 1694 + }, + { + "epoch": 0.14, + "grad_norm": 3.6405072518293555, + "learning_rate": 9.694577447844809e-06, + "loss": 1.0264, + "step": 1695 + }, + { + "epoch": 0.14, + "grad_norm": 3.4635637170023017, + "learning_rate": 9.694121707646129e-06, + "loss": 0.8758, + "step": 1696 + }, + { + "epoch": 0.14, + "grad_norm": 5.624796096775306, + "learning_rate": 9.693665638409946e-06, + "loss": 1.5846, + "step": 1697 + }, + { + "epoch": 0.14, + "grad_norm": 3.3551310744837854, + "learning_rate": 9.693209240168227e-06, + "loss": 0.6217, + "step": 1698 + }, + { + "epoch": 0.14, + "grad_norm": 3.6274131018894096, + "learning_rate": 9.692752512952968e-06, + "loss": 0.9201, + "step": 1699 + }, + { + "epoch": 0.14, + "grad_norm": 4.944204078548009, + "learning_rate": 9.692295456796178e-06, + "loss": 1.2045, + "step": 1700 + }, + { + "epoch": 0.14, + "grad_norm": 4.46614524346582, + "learning_rate": 9.691838071729899e-06, + "loss": 1.0025, + "step": 1701 + }, + { + "epoch": 0.14, + "grad_norm": 3.707777143872066, + "learning_rate": 9.691380357786189e-06, + "loss": 0.8076, + "step": 1702 + }, + { + "epoch": 0.14, + "grad_norm": 4.450657216952367, + "learning_rate": 9.690922314997132e-06, + "loss": 0.9325, + "step": 1703 + }, + { + "epoch": 0.14, + "grad_norm": 3.1716327050722235, + "learning_rate": 9.69046394339484e-06, + "loss": 0.663, + "step": 1704 + }, + { + "epoch": 0.14, + "grad_norm": 2.7394523838000957, + "learning_rate": 9.690005243011436e-06, + "loss": 0.7823, + "step": 1705 + }, + { + "epoch": 0.14, + "grad_norm": 3.4636145269488483, + "learning_rate": 9.689546213879074e-06, + "loss": 0.6237, + "step": 1706 + }, + { + "epoch": 0.14, + "grad_norm": 3.690978397307599, + "learning_rate": 9.689086856029931e-06, + "loss": 0.7688, + "step": 1707 + }, + { + "epoch": 0.14, + "grad_norm": 4.86435667426149, + "learning_rate": 9.68862716949621e-06, + "loss": 0.8823, + "step": 1708 + }, + { + "epoch": 0.14, + "grad_norm": 6.120164203164415, + "learning_rate": 9.688167154310127e-06, + "loss": 1.3421, + "step": 1709 + }, + { + "epoch": 0.14, + "grad_norm": 3.418183466759219, + "learning_rate": 9.68770681050393e-06, + "loss": 0.8123, + "step": 1710 + }, + { + "epoch": 0.14, + "grad_norm": 2.6492709004623847, + "learning_rate": 9.687246138109888e-06, + "loss": 0.5541, + "step": 1711 + }, + { + "epoch": 0.14, + "grad_norm": 2.604298506319751, + "learning_rate": 9.686785137160287e-06, + "loss": 0.5288, + "step": 1712 + }, + { + "epoch": 0.14, + "grad_norm": 4.706652364518963, + "learning_rate": 9.686323807687447e-06, + "loss": 1.2949, + "step": 1713 + }, + { + "epoch": 0.14, + "grad_norm": 3.5539009429313473, + "learning_rate": 9.685862149723703e-06, + "loss": 0.8843, + "step": 1714 + }, + { + "epoch": 0.14, + "grad_norm": 3.9510422841886905, + "learning_rate": 9.685400163301415e-06, + "loss": 0.7165, + "step": 1715 + }, + { + "epoch": 0.14, + "grad_norm": 1.4029927629261745, + "learning_rate": 9.684937848452966e-06, + "loss": 0.2267, + "step": 1716 + }, + { + "epoch": 0.14, + "grad_norm": 2.8532270273503113, + "learning_rate": 9.684475205210764e-06, + "loss": 0.4159, + "step": 1717 + }, + { + "epoch": 0.14, + "grad_norm": 3.683097077822043, + "learning_rate": 9.684012233607237e-06, + "loss": 0.7487, + "step": 1718 + }, + { + "epoch": 0.14, + "grad_norm": 3.564688641828072, + "learning_rate": 9.683548933674837e-06, + "loss": 0.6828, + "step": 1719 + }, + { + "epoch": 0.14, + "grad_norm": 6.669827615735416, + "learning_rate": 9.683085305446038e-06, + "loss": 1.514, + "step": 1720 + }, + { + "epoch": 0.14, + "grad_norm": 5.390248451343604, + "learning_rate": 9.682621348953343e-06, + "loss": 0.8943, + "step": 1721 + }, + { + "epoch": 0.14, + "grad_norm": 3.465928623189289, + "learning_rate": 9.682157064229268e-06, + "loss": 0.777, + "step": 1722 + }, + { + "epoch": 0.14, + "grad_norm": 3.4567929831863875, + "learning_rate": 9.68169245130636e-06, + "loss": 0.9962, + "step": 1723 + }, + { + "epoch": 0.14, + "grad_norm": 5.5170790192773405, + "learning_rate": 9.681227510217186e-06, + "loss": 0.7667, + "step": 1724 + }, + { + "epoch": 0.14, + "grad_norm": 3.8671609039319743, + "learning_rate": 9.680762240994336e-06, + "loss": 0.8341, + "step": 1725 + }, + { + "epoch": 0.14, + "grad_norm": 3.090436071858452, + "learning_rate": 9.680296643670425e-06, + "loss": 0.4865, + "step": 1726 + }, + { + "epoch": 0.14, + "grad_norm": 3.830201007655817, + "learning_rate": 9.679830718278087e-06, + "loss": 0.884, + "step": 1727 + }, + { + "epoch": 0.14, + "grad_norm": 6.80538832131407, + "learning_rate": 9.679364464849983e-06, + "loss": 1.8187, + "step": 1728 + }, + { + "epoch": 0.14, + "grad_norm": 2.6906925076304757, + "learning_rate": 9.678897883418794e-06, + "loss": 0.4176, + "step": 1729 + }, + { + "epoch": 0.14, + "grad_norm": 5.0343769521331945, + "learning_rate": 9.678430974017226e-06, + "loss": 1.252, + "step": 1730 + }, + { + "epoch": 0.14, + "grad_norm": 2.8324493094817256, + "learning_rate": 9.677963736678007e-06, + "loss": 0.4308, + "step": 1731 + }, + { + "epoch": 0.14, + "grad_norm": 4.4653845451836105, + "learning_rate": 9.677496171433889e-06, + "loss": 0.9912, + "step": 1732 + }, + { + "epoch": 0.14, + "grad_norm": 3.9840595685560345, + "learning_rate": 9.677028278317646e-06, + "loss": 0.7753, + "step": 1733 + }, + { + "epoch": 0.14, + "grad_norm": 2.8594742130196154, + "learning_rate": 9.676560057362076e-06, + "loss": 0.7411, + "step": 1734 + }, + { + "epoch": 0.14, + "grad_norm": 3.5574909671573813, + "learning_rate": 9.676091508599995e-06, + "loss": 0.8224, + "step": 1735 + }, + { + "epoch": 0.14, + "grad_norm": 4.469295454039877, + "learning_rate": 9.675622632064255e-06, + "loss": 1.0785, + "step": 1736 + }, + { + "epoch": 0.14, + "grad_norm": 5.839232896804892, + "learning_rate": 9.675153427787713e-06, + "loss": 1.437, + "step": 1737 + }, + { + "epoch": 0.14, + "grad_norm": 3.4990413210459708, + "learning_rate": 9.674683895803262e-06, + "loss": 0.7086, + "step": 1738 + }, + { + "epoch": 0.14, + "grad_norm": 6.6088857858705525, + "learning_rate": 9.674214036143817e-06, + "loss": 1.5871, + "step": 1739 + }, + { + "epoch": 0.14, + "grad_norm": 2.3806875521861675, + "learning_rate": 9.673743848842309e-06, + "loss": 0.291, + "step": 1740 + }, + { + "epoch": 0.14, + "grad_norm": 4.384847468400944, + "learning_rate": 9.673273333931696e-06, + "loss": 1.3201, + "step": 1741 + }, + { + "epoch": 0.14, + "grad_norm": 2.7623502944249165, + "learning_rate": 9.672802491444962e-06, + "loss": 0.4727, + "step": 1742 + }, + { + "epoch": 0.14, + "grad_norm": 3.839881108249043, + "learning_rate": 9.672331321415109e-06, + "loss": 0.8002, + "step": 1743 + }, + { + "epoch": 0.14, + "grad_norm": 4.267489571195822, + "learning_rate": 9.671859823875166e-06, + "loss": 0.6568, + "step": 1744 + }, + { + "epoch": 0.14, + "grad_norm": 4.02003689875668, + "learning_rate": 9.671387998858178e-06, + "loss": 1.0033, + "step": 1745 + }, + { + "epoch": 0.14, + "grad_norm": 3.334547483328481, + "learning_rate": 9.670915846397224e-06, + "loss": 1.0469, + "step": 1746 + }, + { + "epoch": 0.14, + "grad_norm": 2.894229760592162, + "learning_rate": 9.670443366525396e-06, + "loss": 0.5112, + "step": 1747 + }, + { + "epoch": 0.14, + "grad_norm": 5.335584294099776, + "learning_rate": 9.669970559275814e-06, + "loss": 1.3517, + "step": 1748 + }, + { + "epoch": 0.14, + "grad_norm": 3.3717056903934015, + "learning_rate": 9.66949742468162e-06, + "loss": 0.7089, + "step": 1749 + }, + { + "epoch": 0.14, + "grad_norm": 3.627215878970121, + "learning_rate": 9.669023962775976e-06, + "loss": 0.7062, + "step": 1750 + }, + { + "epoch": 0.14, + "grad_norm": 4.289061741779872, + "learning_rate": 9.668550173592075e-06, + "loss": 0.8946, + "step": 1751 + }, + { + "epoch": 0.14, + "grad_norm": 4.224035246700839, + "learning_rate": 9.668076057163122e-06, + "loss": 0.8385, + "step": 1752 + }, + { + "epoch": 0.14, + "grad_norm": 3.429426490400277, + "learning_rate": 9.667601613522355e-06, + "loss": 0.8882, + "step": 1753 + }, + { + "epoch": 0.14, + "grad_norm": 1.511297438492241, + "learning_rate": 9.667126842703027e-06, + "loss": 0.2118, + "step": 1754 + }, + { + "epoch": 0.14, + "grad_norm": 4.094554570312316, + "learning_rate": 9.666651744738417e-06, + "loss": 0.7532, + "step": 1755 + }, + { + "epoch": 0.14, + "grad_norm": 5.468245134556565, + "learning_rate": 9.666176319661833e-06, + "loss": 0.8349, + "step": 1756 + }, + { + "epoch": 0.14, + "grad_norm": 3.9396583629732396, + "learning_rate": 9.665700567506594e-06, + "loss": 1.0694, + "step": 1757 + }, + { + "epoch": 0.14, + "grad_norm": 4.172716573001196, + "learning_rate": 9.66522448830605e-06, + "loss": 1.1011, + "step": 1758 + }, + { + "epoch": 0.14, + "grad_norm": 3.2948059164102803, + "learning_rate": 9.664748082093573e-06, + "loss": 0.84, + "step": 1759 + }, + { + "epoch": 0.14, + "grad_norm": 3.6774929634029596, + "learning_rate": 9.664271348902558e-06, + "loss": 0.3558, + "step": 1760 + }, + { + "epoch": 0.14, + "grad_norm": 4.293291501424655, + "learning_rate": 9.66379428876642e-06, + "loss": 1.0833, + "step": 1761 + }, + { + "epoch": 0.14, + "grad_norm": 4.8630268243168615, + "learning_rate": 9.663316901718599e-06, + "loss": 1.419, + "step": 1762 + }, + { + "epoch": 0.14, + "grad_norm": 3.144353130832779, + "learning_rate": 9.662839187792556e-06, + "loss": 0.5933, + "step": 1763 + }, + { + "epoch": 0.14, + "grad_norm": 3.4750146714606323, + "learning_rate": 9.66236114702178e-06, + "loss": 0.888, + "step": 1764 + }, + { + "epoch": 0.14, + "grad_norm": 3.391849057094597, + "learning_rate": 9.661882779439778e-06, + "loss": 0.5401, + "step": 1765 + }, + { + "epoch": 0.14, + "grad_norm": 2.330429068652904, + "learning_rate": 9.661404085080082e-06, + "loss": 0.47, + "step": 1766 + }, + { + "epoch": 0.14, + "grad_norm": 4.802510374591477, + "learning_rate": 9.660925063976247e-06, + "loss": 1.0848, + "step": 1767 + }, + { + "epoch": 0.14, + "grad_norm": 3.8303554655188647, + "learning_rate": 9.660445716161849e-06, + "loss": 1.1645, + "step": 1768 + }, + { + "epoch": 0.14, + "grad_norm": 5.332359385121991, + "learning_rate": 9.659966041670488e-06, + "loss": 1.3822, + "step": 1769 + }, + { + "epoch": 0.14, + "grad_norm": 4.49826770270501, + "learning_rate": 9.659486040535788e-06, + "loss": 0.75, + "step": 1770 + }, + { + "epoch": 0.14, + "grad_norm": 4.625414454564575, + "learning_rate": 9.659005712791394e-06, + "loss": 1.0039, + "step": 1771 + }, + { + "epoch": 0.14, + "grad_norm": 3.5687891069349362, + "learning_rate": 9.658525058470977e-06, + "loss": 0.7928, + "step": 1772 + }, + { + "epoch": 0.14, + "grad_norm": 3.0856531814552612, + "learning_rate": 9.658044077608227e-06, + "loss": 0.9806, + "step": 1773 + }, + { + "epoch": 0.14, + "grad_norm": 2.0481731703439325, + "learning_rate": 9.657562770236857e-06, + "loss": 0.3284, + "step": 1774 + }, + { + "epoch": 0.15, + "grad_norm": 3.477448597774813, + "learning_rate": 9.65708113639061e-06, + "loss": 0.684, + "step": 1775 + }, + { + "epoch": 0.15, + "grad_norm": 5.786223392707992, + "learning_rate": 9.656599176103241e-06, + "loss": 1.5706, + "step": 1776 + }, + { + "epoch": 0.15, + "grad_norm": 3.4299279077369706, + "learning_rate": 9.656116889408536e-06, + "loss": 0.9167, + "step": 1777 + }, + { + "epoch": 0.15, + "grad_norm": 3.845132154284454, + "learning_rate": 9.6556342763403e-06, + "loss": 0.6764, + "step": 1778 + }, + { + "epoch": 0.15, + "grad_norm": 5.990118489197662, + "learning_rate": 9.655151336932362e-06, + "loss": 1.2797, + "step": 1779 + }, + { + "epoch": 0.15, + "grad_norm": 2.883998603403723, + "learning_rate": 9.654668071218576e-06, + "loss": 0.5424, + "step": 1780 + }, + { + "epoch": 0.15, + "grad_norm": 4.976081723771014, + "learning_rate": 9.654184479232815e-06, + "loss": 1.2058, + "step": 1781 + }, + { + "epoch": 0.15, + "grad_norm": 4.612574873209907, + "learning_rate": 9.653700561008976e-06, + "loss": 1.0735, + "step": 1782 + }, + { + "epoch": 0.15, + "grad_norm": 3.153755087512503, + "learning_rate": 9.653216316580983e-06, + "loss": 0.9932, + "step": 1783 + }, + { + "epoch": 0.15, + "grad_norm": 4.250713760253444, + "learning_rate": 9.652731745982775e-06, + "loss": 1.0884, + "step": 1784 + }, + { + "epoch": 0.15, + "grad_norm": 3.5887055213448864, + "learning_rate": 9.652246849248321e-06, + "loss": 0.9022, + "step": 1785 + }, + { + "epoch": 0.15, + "grad_norm": 4.760788781957178, + "learning_rate": 9.651761626411608e-06, + "loss": 1.2023, + "step": 1786 + }, + { + "epoch": 0.15, + "grad_norm": 3.00386777303116, + "learning_rate": 9.651276077506653e-06, + "loss": 0.6427, + "step": 1787 + }, + { + "epoch": 0.15, + "grad_norm": 4.734213588146607, + "learning_rate": 9.650790202567484e-06, + "loss": 1.0691, + "step": 1788 + }, + { + "epoch": 0.15, + "grad_norm": 4.153949352710521, + "learning_rate": 9.650304001628164e-06, + "loss": 1.1805, + "step": 1789 + }, + { + "epoch": 0.15, + "grad_norm": 4.59076553094134, + "learning_rate": 9.649817474722772e-06, + "loss": 1.0221, + "step": 1790 + }, + { + "epoch": 0.15, + "grad_norm": 3.26331738370133, + "learning_rate": 9.64933062188541e-06, + "loss": 0.7572, + "step": 1791 + }, + { + "epoch": 0.15, + "grad_norm": 3.920409775213098, + "learning_rate": 9.648843443150203e-06, + "loss": 0.727, + "step": 1792 + }, + { + "epoch": 0.15, + "grad_norm": 4.005900979192892, + "learning_rate": 9.648355938551307e-06, + "loss": 0.5756, + "step": 1793 + }, + { + "epoch": 0.15, + "grad_norm": 3.8375256998421667, + "learning_rate": 9.647868108122887e-06, + "loss": 0.9936, + "step": 1794 + }, + { + "epoch": 0.15, + "grad_norm": 3.9040876808362346, + "learning_rate": 9.64737995189914e-06, + "loss": 0.7834, + "step": 1795 + }, + { + "epoch": 0.15, + "grad_norm": 4.041394694735733, + "learning_rate": 9.646891469914285e-06, + "loss": 0.8196, + "step": 1796 + }, + { + "epoch": 0.15, + "grad_norm": 4.082808833427863, + "learning_rate": 9.646402662202562e-06, + "loss": 0.8898, + "step": 1797 + }, + { + "epoch": 0.15, + "grad_norm": 1.8436244812636793, + "learning_rate": 9.645913528798231e-06, + "loss": 0.4136, + "step": 1798 + }, + { + "epoch": 0.15, + "grad_norm": 4.493558306583951, + "learning_rate": 9.645424069735582e-06, + "loss": 1.0796, + "step": 1799 + }, + { + "epoch": 0.15, + "grad_norm": 4.195389897526096, + "learning_rate": 9.644934285048924e-06, + "loss": 0.8139, + "step": 1800 + }, + { + "epoch": 0.15, + "grad_norm": 4.203540340494399, + "learning_rate": 9.644444174772586e-06, + "loss": 0.9193, + "step": 1801 + }, + { + "epoch": 0.15, + "grad_norm": 3.9164410427299736, + "learning_rate": 9.643953738940926e-06, + "loss": 0.6488, + "step": 1802 + }, + { + "epoch": 0.15, + "grad_norm": 3.565239558464744, + "learning_rate": 9.643462977588315e-06, + "loss": 0.9911, + "step": 1803 + }, + { + "epoch": 0.15, + "grad_norm": 3.6880077819346773, + "learning_rate": 9.642971890749163e-06, + "loss": 0.8051, + "step": 1804 + }, + { + "epoch": 0.15, + "grad_norm": 4.401719903746576, + "learning_rate": 9.642480478457883e-06, + "loss": 0.7522, + "step": 1805 + }, + { + "epoch": 0.15, + "grad_norm": 4.275589998252871, + "learning_rate": 9.64198874074893e-06, + "loss": 0.9793, + "step": 1806 + }, + { + "epoch": 0.15, + "grad_norm": 5.114652840279528, + "learning_rate": 9.641496677656766e-06, + "loss": 0.9217, + "step": 1807 + }, + { + "epoch": 0.15, + "grad_norm": 3.0759207295978834, + "learning_rate": 9.641004289215884e-06, + "loss": 0.4599, + "step": 1808 + }, + { + "epoch": 0.15, + "grad_norm": 4.237292245483329, + "learning_rate": 9.6405115754608e-06, + "loss": 0.6787, + "step": 1809 + }, + { + "epoch": 0.15, + "grad_norm": 3.9325714220187704, + "learning_rate": 9.64001853642605e-06, + "loss": 0.708, + "step": 1810 + }, + { + "epoch": 0.15, + "grad_norm": 3.4734493440378533, + "learning_rate": 9.639525172146194e-06, + "loss": 0.8007, + "step": 1811 + }, + { + "epoch": 0.15, + "grad_norm": 1.3238979248148512, + "learning_rate": 9.639031482655814e-06, + "loss": 0.1702, + "step": 1812 + }, + { + "epoch": 0.15, + "grad_norm": 4.147979100312514, + "learning_rate": 9.638537467989517e-06, + "loss": 0.832, + "step": 1813 + }, + { + "epoch": 0.15, + "grad_norm": 3.5306368535026618, + "learning_rate": 9.63804312818193e-06, + "loss": 0.7108, + "step": 1814 + }, + { + "epoch": 0.15, + "grad_norm": 3.354011197916151, + "learning_rate": 9.637548463267705e-06, + "loss": 0.6388, + "step": 1815 + }, + { + "epoch": 0.15, + "grad_norm": 3.3059057159340535, + "learning_rate": 9.637053473281517e-06, + "loss": 0.5239, + "step": 1816 + }, + { + "epoch": 0.15, + "grad_norm": 3.2267324783997355, + "learning_rate": 9.636558158258057e-06, + "loss": 0.6933, + "step": 1817 + }, + { + "epoch": 0.15, + "grad_norm": 1.7180296092373988, + "learning_rate": 9.636062518232052e-06, + "loss": 0.3191, + "step": 1818 + }, + { + "epoch": 0.15, + "grad_norm": 2.910215876952668, + "learning_rate": 9.63556655323824e-06, + "loss": 0.6652, + "step": 1819 + }, + { + "epoch": 0.15, + "grad_norm": 2.5030198200823057, + "learning_rate": 9.635070263311386e-06, + "loss": 0.5872, + "step": 1820 + }, + { + "epoch": 0.15, + "grad_norm": 4.245126438609335, + "learning_rate": 9.634573648486278e-06, + "loss": 0.8985, + "step": 1821 + }, + { + "epoch": 0.15, + "grad_norm": 4.960095535317417, + "learning_rate": 9.634076708797728e-06, + "loss": 0.6347, + "step": 1822 + }, + { + "epoch": 0.15, + "grad_norm": 4.189854565011895, + "learning_rate": 9.633579444280568e-06, + "loss": 0.7024, + "step": 1823 + }, + { + "epoch": 0.15, + "grad_norm": 4.554196019272498, + "learning_rate": 9.633081854969655e-06, + "loss": 0.7781, + "step": 1824 + }, + { + "epoch": 0.15, + "grad_norm": 4.304378751681649, + "learning_rate": 9.632583940899866e-06, + "loss": 1.4871, + "step": 1825 + }, + { + "epoch": 0.15, + "grad_norm": 3.481617498792958, + "learning_rate": 9.632085702106104e-06, + "loss": 0.6789, + "step": 1826 + }, + { + "epoch": 0.15, + "grad_norm": 2.4928801318769964, + "learning_rate": 9.631587138623295e-06, + "loss": 0.4831, + "step": 1827 + }, + { + "epoch": 0.15, + "grad_norm": 3.0021844643847864, + "learning_rate": 9.631088250486383e-06, + "loss": 0.4137, + "step": 1828 + }, + { + "epoch": 0.15, + "grad_norm": 3.4877779902389694, + "learning_rate": 9.630589037730338e-06, + "loss": 0.4901, + "step": 1829 + }, + { + "epoch": 0.15, + "grad_norm": 3.6749886358582238, + "learning_rate": 9.630089500390154e-06, + "loss": 0.9204, + "step": 1830 + }, + { + "epoch": 0.15, + "grad_norm": 4.7402582348689775, + "learning_rate": 9.629589638500849e-06, + "loss": 1.2322, + "step": 1831 + }, + { + "epoch": 0.15, + "grad_norm": 4.4426550739107515, + "learning_rate": 9.629089452097455e-06, + "loss": 0.7329, + "step": 1832 + }, + { + "epoch": 0.15, + "grad_norm": 2.6734999505376194, + "learning_rate": 9.628588941215037e-06, + "loss": 0.7417, + "step": 1833 + }, + { + "epoch": 0.15, + "grad_norm": 4.942397568879872, + "learning_rate": 9.62808810588868e-06, + "loss": 0.9746, + "step": 1834 + }, + { + "epoch": 0.15, + "grad_norm": 3.175304781579661, + "learning_rate": 9.627586946153487e-06, + "loss": 0.5455, + "step": 1835 + }, + { + "epoch": 0.15, + "grad_norm": 3.486209116943911, + "learning_rate": 9.62708546204459e-06, + "loss": 0.4457, + "step": 1836 + }, + { + "epoch": 0.15, + "grad_norm": 3.699947940635257, + "learning_rate": 9.626583653597136e-06, + "loss": 0.7648, + "step": 1837 + }, + { + "epoch": 0.15, + "grad_norm": 3.563861331354613, + "learning_rate": 9.626081520846304e-06, + "loss": 0.9206, + "step": 1838 + }, + { + "epoch": 0.15, + "grad_norm": 3.9746637208939366, + "learning_rate": 9.625579063827291e-06, + "loss": 0.9338, + "step": 1839 + }, + { + "epoch": 0.15, + "grad_norm": 2.993335491725338, + "learning_rate": 9.625076282575317e-06, + "loss": 0.7197, + "step": 1840 + }, + { + "epoch": 0.15, + "grad_norm": 3.7751650590973203, + "learning_rate": 9.624573177125623e-06, + "loss": 0.6854, + "step": 1841 + }, + { + "epoch": 0.15, + "grad_norm": 3.6287066239927293, + "learning_rate": 9.624069747513474e-06, + "loss": 0.8383, + "step": 1842 + }, + { + "epoch": 0.15, + "grad_norm": 5.466341283669408, + "learning_rate": 9.623565993774161e-06, + "loss": 1.379, + "step": 1843 + }, + { + "epoch": 0.15, + "grad_norm": 3.9565044600451404, + "learning_rate": 9.623061915942994e-06, + "loss": 0.8075, + "step": 1844 + }, + { + "epoch": 0.15, + "grad_norm": 4.353500384494615, + "learning_rate": 9.622557514055306e-06, + "loss": 0.8647, + "step": 1845 + }, + { + "epoch": 0.15, + "grad_norm": 2.7802807636965956, + "learning_rate": 9.622052788146454e-06, + "loss": 0.7, + "step": 1846 + }, + { + "epoch": 0.15, + "grad_norm": 3.7243261490814894, + "learning_rate": 9.621547738251816e-06, + "loss": 0.6569, + "step": 1847 + }, + { + "epoch": 0.15, + "grad_norm": 3.5979273143338655, + "learning_rate": 9.621042364406796e-06, + "loss": 0.6535, + "step": 1848 + }, + { + "epoch": 0.15, + "grad_norm": 3.4155454455842635, + "learning_rate": 9.620536666646816e-06, + "loss": 0.5346, + "step": 1849 + }, + { + "epoch": 0.15, + "grad_norm": 5.346796067656894, + "learning_rate": 9.620030645007324e-06, + "loss": 1.1118, + "step": 1850 + }, + { + "epoch": 0.15, + "grad_norm": 2.065844455634222, + "learning_rate": 9.61952429952379e-06, + "loss": 0.4093, + "step": 1851 + }, + { + "epoch": 0.15, + "grad_norm": 2.0422151702096683, + "learning_rate": 9.619017630231709e-06, + "loss": 0.3572, + "step": 1852 + }, + { + "epoch": 0.15, + "grad_norm": 3.384252750841941, + "learning_rate": 9.618510637166591e-06, + "loss": 0.6136, + "step": 1853 + }, + { + "epoch": 0.15, + "grad_norm": 5.710009165195783, + "learning_rate": 9.61800332036398e-06, + "loss": 0.9828, + "step": 1854 + }, + { + "epoch": 0.15, + "grad_norm": 2.274612438483947, + "learning_rate": 9.617495679859432e-06, + "loss": 0.3935, + "step": 1855 + }, + { + "epoch": 0.15, + "grad_norm": 4.685079377159412, + "learning_rate": 9.616987715688534e-06, + "loss": 1.2142, + "step": 1856 + }, + { + "epoch": 0.15, + "grad_norm": 3.7327329404216423, + "learning_rate": 9.616479427886889e-06, + "loss": 1.0473, + "step": 1857 + }, + { + "epoch": 0.15, + "grad_norm": 4.633754836253732, + "learning_rate": 9.615970816490127e-06, + "loss": 1.088, + "step": 1858 + }, + { + "epoch": 0.15, + "grad_norm": 2.6267774185656303, + "learning_rate": 9.6154618815339e-06, + "loss": 0.3738, + "step": 1859 + }, + { + "epoch": 0.15, + "grad_norm": 2.683276366996444, + "learning_rate": 9.61495262305388e-06, + "loss": 0.7404, + "step": 1860 + }, + { + "epoch": 0.15, + "grad_norm": 2.4397339031584555, + "learning_rate": 9.614443041085768e-06, + "loss": 0.5179, + "step": 1861 + }, + { + "epoch": 0.15, + "grad_norm": 4.080227084199175, + "learning_rate": 9.613933135665281e-06, + "loss": 0.8409, + "step": 1862 + }, + { + "epoch": 0.15, + "grad_norm": 2.720888044583893, + "learning_rate": 9.613422906828161e-06, + "loss": 0.5713, + "step": 1863 + }, + { + "epoch": 0.15, + "grad_norm": 4.605411201655714, + "learning_rate": 9.61291235461017e-06, + "loss": 1.1279, + "step": 1864 + }, + { + "epoch": 0.15, + "grad_norm": 3.6743922503335473, + "learning_rate": 9.612401479047102e-06, + "loss": 0.5532, + "step": 1865 + }, + { + "epoch": 0.15, + "grad_norm": 2.458010962051488, + "learning_rate": 9.611890280174761e-06, + "loss": 0.4904, + "step": 1866 + }, + { + "epoch": 0.15, + "grad_norm": 3.590096886998366, + "learning_rate": 9.611378758028984e-06, + "loss": 0.5157, + "step": 1867 + }, + { + "epoch": 0.15, + "grad_norm": 4.786726498871613, + "learning_rate": 9.610866912645624e-06, + "loss": 0.9906, + "step": 1868 + }, + { + "epoch": 0.15, + "grad_norm": 2.520165208333526, + "learning_rate": 9.61035474406056e-06, + "loss": 0.49, + "step": 1869 + }, + { + "epoch": 0.15, + "grad_norm": 3.2529485159125437, + "learning_rate": 9.609842252309694e-06, + "loss": 0.9277, + "step": 1870 + }, + { + "epoch": 0.15, + "grad_norm": 2.702654732411725, + "learning_rate": 9.609329437428946e-06, + "loss": 0.6127, + "step": 1871 + }, + { + "epoch": 0.15, + "grad_norm": 4.388100944422168, + "learning_rate": 9.608816299454267e-06, + "loss": 0.4555, + "step": 1872 + }, + { + "epoch": 0.15, + "grad_norm": 4.987626503763048, + "learning_rate": 9.608302838421622e-06, + "loss": 1.1882, + "step": 1873 + }, + { + "epoch": 0.15, + "grad_norm": 4.92680809173823, + "learning_rate": 9.607789054367e-06, + "loss": 1.6455, + "step": 1874 + }, + { + "epoch": 0.15, + "grad_norm": 2.933180856469748, + "learning_rate": 9.607274947326423e-06, + "loss": 0.2325, + "step": 1875 + }, + { + "epoch": 0.15, + "grad_norm": 3.7454435218935003, + "learning_rate": 9.606760517335923e-06, + "loss": 0.6886, + "step": 1876 + }, + { + "epoch": 0.15, + "grad_norm": 3.7601473733544006, + "learning_rate": 9.606245764431557e-06, + "loss": 1.0919, + "step": 1877 + }, + { + "epoch": 0.15, + "grad_norm": 1.963578919378046, + "learning_rate": 9.605730688649411e-06, + "loss": 0.4018, + "step": 1878 + }, + { + "epoch": 0.15, + "grad_norm": 4.094419034446812, + "learning_rate": 9.605215290025587e-06, + "loss": 0.8573, + "step": 1879 + }, + { + "epoch": 0.15, + "grad_norm": 4.232518157536914, + "learning_rate": 9.604699568596211e-06, + "loss": 1.1854, + "step": 1880 + }, + { + "epoch": 0.15, + "grad_norm": 5.330752275162666, + "learning_rate": 9.604183524397439e-06, + "loss": 1.1544, + "step": 1881 + }, + { + "epoch": 0.15, + "grad_norm": 5.738236543702836, + "learning_rate": 9.603667157465436e-06, + "loss": 1.2742, + "step": 1882 + }, + { + "epoch": 0.15, + "grad_norm": 4.500833585139346, + "learning_rate": 9.603150467836403e-06, + "loss": 1.0557, + "step": 1883 + }, + { + "epoch": 0.15, + "grad_norm": 4.348349354005987, + "learning_rate": 9.602633455546556e-06, + "loss": 0.8161, + "step": 1884 + }, + { + "epoch": 0.15, + "grad_norm": 3.345114650968165, + "learning_rate": 9.602116120632133e-06, + "loss": 0.9003, + "step": 1885 + }, + { + "epoch": 0.15, + "grad_norm": 4.535743444943094, + "learning_rate": 9.601598463129398e-06, + "loss": 1.003, + "step": 1886 + }, + { + "epoch": 0.15, + "grad_norm": 3.9347754032019444, + "learning_rate": 9.601080483074637e-06, + "loss": 0.7201, + "step": 1887 + }, + { + "epoch": 0.15, + "grad_norm": 4.512700028513524, + "learning_rate": 9.600562180504158e-06, + "loss": 0.6555, + "step": 1888 + }, + { + "epoch": 0.15, + "grad_norm": 4.831953765720359, + "learning_rate": 9.600043555454292e-06, + "loss": 1.2459, + "step": 1889 + }, + { + "epoch": 0.15, + "grad_norm": 3.908436927960402, + "learning_rate": 9.599524607961394e-06, + "loss": 1.0104, + "step": 1890 + }, + { + "epoch": 0.15, + "grad_norm": 5.103109745089075, + "learning_rate": 9.599005338061835e-06, + "loss": 1.1343, + "step": 1891 + }, + { + "epoch": 0.15, + "grad_norm": 4.718322584210091, + "learning_rate": 9.598485745792019e-06, + "loss": 0.8079, + "step": 1892 + }, + { + "epoch": 0.15, + "grad_norm": 4.160552069027906, + "learning_rate": 9.597965831188365e-06, + "loss": 0.8904, + "step": 1893 + }, + { + "epoch": 0.15, + "grad_norm": 4.893236221583511, + "learning_rate": 9.597445594287315e-06, + "loss": 0.8749, + "step": 1894 + }, + { + "epoch": 0.15, + "grad_norm": 3.8989511257441816, + "learning_rate": 9.596925035125338e-06, + "loss": 0.8513, + "step": 1895 + }, + { + "epoch": 0.15, + "grad_norm": 5.502391681117759, + "learning_rate": 9.596404153738922e-06, + "loss": 1.2835, + "step": 1896 + }, + { + "epoch": 0.16, + "grad_norm": 1.5096032919902738, + "learning_rate": 9.595882950164579e-06, + "loss": 0.2231, + "step": 1897 + }, + { + "epoch": 0.16, + "grad_norm": 3.3940828284046365, + "learning_rate": 9.595361424438841e-06, + "loss": 0.8311, + "step": 1898 + }, + { + "epoch": 0.16, + "grad_norm": 3.1565144371734566, + "learning_rate": 9.594839576598267e-06, + "loss": 0.7746, + "step": 1899 + }, + { + "epoch": 0.16, + "grad_norm": 4.861681899339718, + "learning_rate": 9.594317406679436e-06, + "loss": 1.4082, + "step": 1900 + }, + { + "epoch": 0.16, + "grad_norm": 4.3701934090314865, + "learning_rate": 9.593794914718948e-06, + "loss": 1.0056, + "step": 1901 + }, + { + "epoch": 0.16, + "grad_norm": 1.4361844931507608, + "learning_rate": 9.593272100753431e-06, + "loss": 0.183, + "step": 1902 + }, + { + "epoch": 0.16, + "grad_norm": 5.341261409056521, + "learning_rate": 9.592748964819528e-06, + "loss": 0.9966, + "step": 1903 + }, + { + "epoch": 0.16, + "grad_norm": 4.246802133865648, + "learning_rate": 9.592225506953911e-06, + "loss": 0.8687, + "step": 1904 + }, + { + "epoch": 0.16, + "grad_norm": 2.536665860148006, + "learning_rate": 9.591701727193272e-06, + "loss": 0.4399, + "step": 1905 + }, + { + "epoch": 0.16, + "grad_norm": 3.3123258067487007, + "learning_rate": 9.591177625574322e-06, + "loss": 0.4896, + "step": 1906 + }, + { + "epoch": 0.16, + "grad_norm": 4.273373552745985, + "learning_rate": 9.590653202133804e-06, + "loss": 0.9065, + "step": 1907 + }, + { + "epoch": 0.16, + "grad_norm": 5.014798967567046, + "learning_rate": 9.590128456908476e-06, + "loss": 0.96, + "step": 1908 + }, + { + "epoch": 0.16, + "grad_norm": 2.4186995200654637, + "learning_rate": 9.589603389935118e-06, + "loss": 0.5375, + "step": 1909 + }, + { + "epoch": 0.16, + "grad_norm": 3.0198594591627925, + "learning_rate": 9.589078001250537e-06, + "loss": 0.6707, + "step": 1910 + }, + { + "epoch": 0.16, + "grad_norm": 4.1364010070572155, + "learning_rate": 9.588552290891557e-06, + "loss": 1.1784, + "step": 1911 + }, + { + "epoch": 0.16, + "grad_norm": 5.158358601291325, + "learning_rate": 9.588026258895034e-06, + "loss": 0.9459, + "step": 1912 + }, + { + "epoch": 0.16, + "grad_norm": 4.444787446353252, + "learning_rate": 9.587499905297836e-06, + "loss": 1.0501, + "step": 1913 + }, + { + "epoch": 0.16, + "grad_norm": 3.357310424227119, + "learning_rate": 9.58697323013686e-06, + "loss": 0.906, + "step": 1914 + }, + { + "epoch": 0.16, + "grad_norm": 2.340254000033742, + "learning_rate": 9.586446233449024e-06, + "loss": 0.539, + "step": 1915 + }, + { + "epoch": 0.16, + "grad_norm": 1.3421768612808074, + "learning_rate": 9.585918915271267e-06, + "loss": 0.1963, + "step": 1916 + }, + { + "epoch": 0.16, + "grad_norm": 5.200976251872486, + "learning_rate": 9.58539127564055e-06, + "loss": 0.9611, + "step": 1917 + }, + { + "epoch": 0.16, + "grad_norm": 5.14363328588231, + "learning_rate": 9.584863314593862e-06, + "loss": 1.1128, + "step": 1918 + }, + { + "epoch": 0.16, + "grad_norm": 4.767959520821083, + "learning_rate": 9.584335032168209e-06, + "loss": 1.2102, + "step": 1919 + }, + { + "epoch": 0.16, + "grad_norm": 4.0603362286037, + "learning_rate": 9.58380642840062e-06, + "loss": 0.8753, + "step": 1920 + }, + { + "epoch": 0.16, + "grad_norm": 2.784097276200648, + "learning_rate": 9.583277503328152e-06, + "loss": 0.6181, + "step": 1921 + }, + { + "epoch": 0.16, + "grad_norm": 3.3628590333982915, + "learning_rate": 9.582748256987875e-06, + "loss": 0.3996, + "step": 1922 + }, + { + "epoch": 0.16, + "grad_norm": 3.1992436604635115, + "learning_rate": 9.58221868941689e-06, + "loss": 0.8356, + "step": 1923 + }, + { + "epoch": 0.16, + "grad_norm": 2.5690561422258256, + "learning_rate": 9.581688800652317e-06, + "loss": 0.5881, + "step": 1924 + }, + { + "epoch": 0.16, + "grad_norm": 3.9962006736267157, + "learning_rate": 9.581158590731298e-06, + "loss": 0.6964, + "step": 1925 + }, + { + "epoch": 0.16, + "grad_norm": 4.838643693846644, + "learning_rate": 9.580628059691e-06, + "loss": 1.1755, + "step": 1926 + }, + { + "epoch": 0.16, + "grad_norm": 5.645525539871668, + "learning_rate": 9.58009720756861e-06, + "loss": 1.3661, + "step": 1927 + }, + { + "epoch": 0.16, + "grad_norm": 5.369531010137947, + "learning_rate": 9.57956603440134e-06, + "loss": 1.1669, + "step": 1928 + }, + { + "epoch": 0.16, + "grad_norm": 2.0261167417220345, + "learning_rate": 9.579034540226417e-06, + "loss": 0.4221, + "step": 1929 + }, + { + "epoch": 0.16, + "grad_norm": 2.805961619998852, + "learning_rate": 9.578502725081105e-06, + "loss": 0.4036, + "step": 1930 + }, + { + "epoch": 0.16, + "grad_norm": 4.7343364076028704, + "learning_rate": 9.577970589002674e-06, + "loss": 0.8297, + "step": 1931 + }, + { + "epoch": 0.16, + "grad_norm": 3.115812274867747, + "learning_rate": 9.577438132028431e-06, + "loss": 0.5971, + "step": 1932 + }, + { + "epoch": 0.16, + "grad_norm": 4.013164357851665, + "learning_rate": 9.576905354195695e-06, + "loss": 0.7369, + "step": 1933 + }, + { + "epoch": 0.16, + "grad_norm": 5.044032284551135, + "learning_rate": 9.576372255541812e-06, + "loss": 1.0083, + "step": 1934 + }, + { + "epoch": 0.16, + "grad_norm": 4.314036059653204, + "learning_rate": 9.575838836104152e-06, + "loss": 1.1176, + "step": 1935 + }, + { + "epoch": 0.16, + "grad_norm": 3.9222150730303875, + "learning_rate": 9.575305095920101e-06, + "loss": 0.8157, + "step": 1936 + }, + { + "epoch": 0.16, + "grad_norm": 3.5652621046635025, + "learning_rate": 9.574771035027077e-06, + "loss": 0.826, + "step": 1937 + }, + { + "epoch": 0.16, + "grad_norm": 2.9630771554858146, + "learning_rate": 9.574236653462511e-06, + "loss": 0.4971, + "step": 1938 + }, + { + "epoch": 0.16, + "grad_norm": 5.412336714047085, + "learning_rate": 9.573701951263862e-06, + "loss": 1.0939, + "step": 1939 + }, + { + "epoch": 0.16, + "grad_norm": 4.6333530332442, + "learning_rate": 9.573166928468615e-06, + "loss": 1.4336, + "step": 1940 + }, + { + "epoch": 0.16, + "grad_norm": 4.62204995393872, + "learning_rate": 9.572631585114267e-06, + "loss": 1.1644, + "step": 1941 + }, + { + "epoch": 0.16, + "grad_norm": 3.1738069100273223, + "learning_rate": 9.572095921238343e-06, + "loss": 0.7047, + "step": 1942 + }, + { + "epoch": 0.16, + "grad_norm": 3.8525663295063906, + "learning_rate": 9.571559936878394e-06, + "loss": 0.9256, + "step": 1943 + }, + { + "epoch": 0.16, + "grad_norm": 1.4944576959377527, + "learning_rate": 9.571023632071989e-06, + "loss": 0.2304, + "step": 1944 + }, + { + "epoch": 0.16, + "grad_norm": 3.812644322997221, + "learning_rate": 9.570487006856722e-06, + "loss": 0.6744, + "step": 1945 + }, + { + "epoch": 0.16, + "grad_norm": 3.1507359526587972, + "learning_rate": 9.569950061270204e-06, + "loss": 0.7059, + "step": 1946 + }, + { + "epoch": 0.16, + "grad_norm": 2.624674249680327, + "learning_rate": 9.569412795350076e-06, + "loss": 0.6104, + "step": 1947 + }, + { + "epoch": 0.16, + "grad_norm": 4.31256529328677, + "learning_rate": 9.568875209133999e-06, + "loss": 1.0901, + "step": 1948 + }, + { + "epoch": 0.16, + "grad_norm": 3.861101663997485, + "learning_rate": 9.568337302659652e-06, + "loss": 0.7332, + "step": 1949 + }, + { + "epoch": 0.16, + "grad_norm": 3.252967963194855, + "learning_rate": 9.567799075964743e-06, + "loss": 0.473, + "step": 1950 + }, + { + "epoch": 0.16, + "grad_norm": 3.7954437726383152, + "learning_rate": 9.567260529086997e-06, + "loss": 0.8397, + "step": 1951 + }, + { + "epoch": 0.16, + "grad_norm": 2.0570487471770336, + "learning_rate": 9.566721662064164e-06, + "loss": 0.4776, + "step": 1952 + }, + { + "epoch": 0.16, + "grad_norm": 4.598324280894434, + "learning_rate": 9.566182474934017e-06, + "loss": 1.0364, + "step": 1953 + }, + { + "epoch": 0.16, + "grad_norm": 3.9893068037392205, + "learning_rate": 9.565642967734351e-06, + "loss": 0.8069, + "step": 1954 + }, + { + "epoch": 0.16, + "grad_norm": 2.4689637147814354, + "learning_rate": 9.565103140502982e-06, + "loss": 0.4132, + "step": 1955 + }, + { + "epoch": 0.16, + "grad_norm": 5.9962889936616355, + "learning_rate": 9.564562993277752e-06, + "loss": 1.2551, + "step": 1956 + }, + { + "epoch": 0.16, + "grad_norm": 4.037837016283461, + "learning_rate": 9.564022526096521e-06, + "loss": 0.6973, + "step": 1957 + }, + { + "epoch": 0.16, + "grad_norm": 4.623565841563198, + "learning_rate": 9.563481738997172e-06, + "loss": 1.2694, + "step": 1958 + }, + { + "epoch": 0.16, + "grad_norm": 3.7142809741906966, + "learning_rate": 9.562940632017614e-06, + "loss": 0.565, + "step": 1959 + }, + { + "epoch": 0.16, + "grad_norm": 3.5410216749125705, + "learning_rate": 9.562399205195775e-06, + "loss": 0.6941, + "step": 1960 + }, + { + "epoch": 0.16, + "grad_norm": 3.9151566880983317, + "learning_rate": 9.56185745856961e-06, + "loss": 0.9364, + "step": 1961 + }, + { + "epoch": 0.16, + "grad_norm": 5.682734437236148, + "learning_rate": 9.561315392177089e-06, + "loss": 0.767, + "step": 1962 + }, + { + "epoch": 0.16, + "grad_norm": 3.6613759123423164, + "learning_rate": 9.56077300605621e-06, + "loss": 1.0541, + "step": 1963 + }, + { + "epoch": 0.16, + "grad_norm": 4.110581249062556, + "learning_rate": 9.56023030024499e-06, + "loss": 0.8277, + "step": 1964 + }, + { + "epoch": 0.16, + "grad_norm": 4.288487454511914, + "learning_rate": 9.559687274781475e-06, + "loss": 0.768, + "step": 1965 + }, + { + "epoch": 0.16, + "grad_norm": 3.8223670341511733, + "learning_rate": 9.559143929703724e-06, + "loss": 0.9807, + "step": 1966 + }, + { + "epoch": 0.16, + "grad_norm": 4.108908693851089, + "learning_rate": 9.558600265049825e-06, + "loss": 0.8094, + "step": 1967 + }, + { + "epoch": 0.16, + "grad_norm": 3.630304005106419, + "learning_rate": 9.558056280857887e-06, + "loss": 0.7759, + "step": 1968 + }, + { + "epoch": 0.16, + "grad_norm": 3.8460453116763738, + "learning_rate": 9.55751197716604e-06, + "loss": 0.8335, + "step": 1969 + }, + { + "epoch": 0.16, + "grad_norm": 4.365489981874306, + "learning_rate": 9.556967354012438e-06, + "loss": 0.8814, + "step": 1970 + }, + { + "epoch": 0.16, + "grad_norm": 4.194935912677049, + "learning_rate": 9.556422411435257e-06, + "loss": 1.0257, + "step": 1971 + }, + { + "epoch": 0.16, + "grad_norm": 3.9574399155708813, + "learning_rate": 9.555877149472695e-06, + "loss": 0.8627, + "step": 1972 + }, + { + "epoch": 0.16, + "grad_norm": 4.040316351809699, + "learning_rate": 9.555331568162972e-06, + "loss": 0.8235, + "step": 1973 + }, + { + "epoch": 0.16, + "grad_norm": 2.273221383752521, + "learning_rate": 9.554785667544329e-06, + "loss": 0.6658, + "step": 1974 + }, + { + "epoch": 0.16, + "grad_norm": 4.044878071923383, + "learning_rate": 9.554239447655036e-06, + "loss": 0.9372, + "step": 1975 + }, + { + "epoch": 0.16, + "grad_norm": 7.246557282396511, + "learning_rate": 9.553692908533375e-06, + "loss": 1.4103, + "step": 1976 + }, + { + "epoch": 0.16, + "grad_norm": 4.274490525562516, + "learning_rate": 9.55314605021766e-06, + "loss": 1.3265, + "step": 1977 + }, + { + "epoch": 0.16, + "grad_norm": 3.784351877267063, + "learning_rate": 9.552598872746223e-06, + "loss": 0.7399, + "step": 1978 + }, + { + "epoch": 0.16, + "grad_norm": 5.510740649222768, + "learning_rate": 9.552051376157417e-06, + "loss": 1.1919, + "step": 1979 + }, + { + "epoch": 0.16, + "grad_norm": 4.682551788596266, + "learning_rate": 9.55150356048962e-06, + "loss": 1.2566, + "step": 1980 + }, + { + "epoch": 0.16, + "grad_norm": 3.308435854041583, + "learning_rate": 9.550955425781234e-06, + "loss": 0.5514, + "step": 1981 + }, + { + "epoch": 0.16, + "grad_norm": 3.5546470124450935, + "learning_rate": 9.550406972070676e-06, + "loss": 0.7517, + "step": 1982 + }, + { + "epoch": 0.16, + "grad_norm": 3.847516559239803, + "learning_rate": 9.549858199396394e-06, + "loss": 0.8813, + "step": 1983 + }, + { + "epoch": 0.16, + "grad_norm": 3.2468457030579745, + "learning_rate": 9.549309107796852e-06, + "loss": 0.9317, + "step": 1984 + }, + { + "epoch": 0.16, + "grad_norm": 2.7988168500030888, + "learning_rate": 9.54875969731054e-06, + "loss": 0.5256, + "step": 1985 + }, + { + "epoch": 0.16, + "grad_norm": 4.034857576281372, + "learning_rate": 9.54820996797597e-06, + "loss": 0.9549, + "step": 1986 + }, + { + "epoch": 0.16, + "grad_norm": 4.8790834699851295, + "learning_rate": 9.547659919831676e-06, + "loss": 1.6624, + "step": 1987 + }, + { + "epoch": 0.16, + "grad_norm": 4.05241404605833, + "learning_rate": 9.547109552916211e-06, + "loss": 1.2909, + "step": 1988 + }, + { + "epoch": 0.16, + "grad_norm": 2.9117588210126377, + "learning_rate": 9.546558867268159e-06, + "loss": 0.6867, + "step": 1989 + }, + { + "epoch": 0.16, + "grad_norm": 3.309118950701456, + "learning_rate": 9.546007862926115e-06, + "loss": 0.6384, + "step": 1990 + }, + { + "epoch": 0.16, + "grad_norm": 4.116069205074591, + "learning_rate": 9.545456539928704e-06, + "loss": 1.443, + "step": 1991 + }, + { + "epoch": 0.16, + "grad_norm": 4.9087098585219024, + "learning_rate": 9.544904898314572e-06, + "loss": 1.0374, + "step": 1992 + }, + { + "epoch": 0.16, + "grad_norm": 3.9309719792696702, + "learning_rate": 9.544352938122384e-06, + "loss": 0.8559, + "step": 1993 + }, + { + "epoch": 0.16, + "grad_norm": 4.469895891148484, + "learning_rate": 9.543800659390835e-06, + "loss": 0.8993, + "step": 1994 + }, + { + "epoch": 0.16, + "grad_norm": 4.278454789623266, + "learning_rate": 9.543248062158632e-06, + "loss": 1.0097, + "step": 1995 + }, + { + "epoch": 0.16, + "grad_norm": 4.529235977103577, + "learning_rate": 9.542695146464513e-06, + "loss": 1.0865, + "step": 1996 + }, + { + "epoch": 0.16, + "grad_norm": 4.169795889365141, + "learning_rate": 9.542141912347236e-06, + "loss": 1.018, + "step": 1997 + }, + { + "epoch": 0.16, + "grad_norm": 5.51563326069593, + "learning_rate": 9.541588359845575e-06, + "loss": 1.0636, + "step": 1998 + }, + { + "epoch": 0.16, + "grad_norm": 5.479455931358969, + "learning_rate": 9.541034488998338e-06, + "loss": 0.9901, + "step": 1999 + }, + { + "epoch": 0.16, + "grad_norm": 3.372716248240536, + "learning_rate": 9.540480299844345e-06, + "loss": 0.6926, + "step": 2000 + }, + { + "epoch": 0.16, + "grad_norm": 5.747085234992996, + "learning_rate": 9.539925792422443e-06, + "loss": 1.0609, + "step": 2001 + }, + { + "epoch": 0.16, + "grad_norm": 1.269631520809824, + "learning_rate": 9.5393709667715e-06, + "loss": 0.209, + "step": 2002 + }, + { + "epoch": 0.16, + "grad_norm": 4.8841818602939, + "learning_rate": 9.53881582293041e-06, + "loss": 0.8058, + "step": 2003 + }, + { + "epoch": 0.16, + "grad_norm": 4.405695660762923, + "learning_rate": 9.538260360938081e-06, + "loss": 0.8038, + "step": 2004 + }, + { + "epoch": 0.16, + "grad_norm": 2.771381211989904, + "learning_rate": 9.537704580833453e-06, + "loss": 0.4068, + "step": 2005 + }, + { + "epoch": 0.16, + "grad_norm": 3.26310212288867, + "learning_rate": 9.537148482655482e-06, + "loss": 0.9612, + "step": 2006 + }, + { + "epoch": 0.16, + "grad_norm": 3.976865740042055, + "learning_rate": 9.536592066443147e-06, + "loss": 1.0408, + "step": 2007 + }, + { + "epoch": 0.16, + "grad_norm": 4.71800834789916, + "learning_rate": 9.536035332235454e-06, + "loss": 0.9944, + "step": 2008 + }, + { + "epoch": 0.16, + "grad_norm": 5.622149280558416, + "learning_rate": 9.535478280071422e-06, + "loss": 0.9308, + "step": 2009 + }, + { + "epoch": 0.16, + "grad_norm": 6.105878039691027, + "learning_rate": 9.534920909990101e-06, + "loss": 1.3476, + "step": 2010 + }, + { + "epoch": 0.16, + "grad_norm": 4.166972114310374, + "learning_rate": 9.534363222030563e-06, + "loss": 0.8512, + "step": 2011 + }, + { + "epoch": 0.16, + "grad_norm": 4.628377013586853, + "learning_rate": 9.533805216231894e-06, + "loss": 0.9158, + "step": 2012 + }, + { + "epoch": 0.16, + "grad_norm": 5.980529070798636, + "learning_rate": 9.533246892633213e-06, + "loss": 0.7445, + "step": 2013 + }, + { + "epoch": 0.16, + "grad_norm": 3.4338879751047973, + "learning_rate": 9.532688251273654e-06, + "loss": 0.5876, + "step": 2014 + }, + { + "epoch": 0.16, + "grad_norm": 3.2946132643396164, + "learning_rate": 9.532129292192373e-06, + "loss": 0.7603, + "step": 2015 + }, + { + "epoch": 0.16, + "grad_norm": 3.900269383522834, + "learning_rate": 9.531570015428553e-06, + "loss": 0.7026, + "step": 2016 + }, + { + "epoch": 0.16, + "grad_norm": 4.469620357210813, + "learning_rate": 9.531010421021396e-06, + "loss": 1.2024, + "step": 2017 + }, + { + "epoch": 0.16, + "grad_norm": 5.3814539431713895, + "learning_rate": 9.530450509010128e-06, + "loss": 1.1572, + "step": 2018 + }, + { + "epoch": 0.17, + "grad_norm": 4.214817152693931, + "learning_rate": 9.529890279433995e-06, + "loss": 0.72, + "step": 2019 + }, + { + "epoch": 0.17, + "grad_norm": 2.520489716408855, + "learning_rate": 9.529329732332268e-06, + "loss": 0.3485, + "step": 2020 + }, + { + "epoch": 0.17, + "grad_norm": 3.803091814303413, + "learning_rate": 9.52876886774424e-06, + "loss": 1.1041, + "step": 2021 + }, + { + "epoch": 0.17, + "grad_norm": 4.6421407248221, + "learning_rate": 9.528207685709221e-06, + "loss": 1.2171, + "step": 2022 + }, + { + "epoch": 0.17, + "grad_norm": 4.23653445974646, + "learning_rate": 9.52764618626655e-06, + "loss": 1.1003, + "step": 2023 + }, + { + "epoch": 0.17, + "grad_norm": 4.885493299123106, + "learning_rate": 9.527084369455589e-06, + "loss": 1.0119, + "step": 2024 + }, + { + "epoch": 0.17, + "grad_norm": 4.731147655154812, + "learning_rate": 9.526522235315713e-06, + "loss": 1.1316, + "step": 2025 + }, + { + "epoch": 0.17, + "grad_norm": 2.8303744896202905, + "learning_rate": 9.525959783886329e-06, + "loss": 0.7168, + "step": 2026 + }, + { + "epoch": 0.17, + "grad_norm": 4.79528547028082, + "learning_rate": 9.525397015206861e-06, + "loss": 0.8626, + "step": 2027 + }, + { + "epoch": 0.17, + "grad_norm": 4.403137139314899, + "learning_rate": 9.524833929316758e-06, + "loss": 0.7673, + "step": 2028 + }, + { + "epoch": 0.17, + "grad_norm": 5.632791389070294, + "learning_rate": 9.524270526255486e-06, + "loss": 1.6007, + "step": 2029 + }, + { + "epoch": 0.17, + "grad_norm": 4.910686152455089, + "learning_rate": 9.523706806062541e-06, + "loss": 1.383, + "step": 2030 + }, + { + "epoch": 0.17, + "grad_norm": 5.2941777100821295, + "learning_rate": 9.523142768777435e-06, + "loss": 1.4508, + "step": 2031 + }, + { + "epoch": 0.17, + "grad_norm": 4.2257729295585085, + "learning_rate": 9.522578414439707e-06, + "loss": 0.7555, + "step": 2032 + }, + { + "epoch": 0.17, + "grad_norm": 5.177167550722004, + "learning_rate": 9.522013743088916e-06, + "loss": 0.9737, + "step": 2033 + }, + { + "epoch": 0.17, + "grad_norm": 4.878369880410306, + "learning_rate": 9.52144875476464e-06, + "loss": 1.3448, + "step": 2034 + }, + { + "epoch": 0.17, + "grad_norm": 5.307328908557558, + "learning_rate": 9.520883449506483e-06, + "loss": 1.0423, + "step": 2035 + }, + { + "epoch": 0.17, + "grad_norm": 3.7938378285887997, + "learning_rate": 9.52031782735407e-06, + "loss": 0.5762, + "step": 2036 + }, + { + "epoch": 0.17, + "grad_norm": 4.410318130606425, + "learning_rate": 9.519751888347053e-06, + "loss": 0.9626, + "step": 2037 + }, + { + "epoch": 0.17, + "grad_norm": 4.689113064551304, + "learning_rate": 9.519185632525097e-06, + "loss": 1.0112, + "step": 2038 + }, + { + "epoch": 0.17, + "grad_norm": 3.771765049703681, + "learning_rate": 9.518619059927895e-06, + "loss": 0.4313, + "step": 2039 + }, + { + "epoch": 0.17, + "grad_norm": 3.524515596196058, + "learning_rate": 9.518052170595165e-06, + "loss": 0.9909, + "step": 2040 + }, + { + "epoch": 0.17, + "grad_norm": 3.1098921131187756, + "learning_rate": 9.517484964566637e-06, + "loss": 0.4388, + "step": 2041 + }, + { + "epoch": 0.17, + "grad_norm": 3.840815725983443, + "learning_rate": 9.516917441882074e-06, + "loss": 0.8934, + "step": 2042 + }, + { + "epoch": 0.17, + "grad_norm": 4.737988001113275, + "learning_rate": 9.516349602581256e-06, + "loss": 0.9319, + "step": 2043 + }, + { + "epoch": 0.17, + "grad_norm": 5.233006946267547, + "learning_rate": 9.515781446703988e-06, + "loss": 0.9586, + "step": 2044 + }, + { + "epoch": 0.17, + "grad_norm": 5.338332469127597, + "learning_rate": 9.51521297429009e-06, + "loss": 1.1064, + "step": 2045 + }, + { + "epoch": 0.17, + "grad_norm": 2.181487637050818, + "learning_rate": 9.514644185379416e-06, + "loss": 0.3214, + "step": 2046 + }, + { + "epoch": 0.17, + "grad_norm": 4.820616144216788, + "learning_rate": 9.51407508001183e-06, + "loss": 1.0967, + "step": 2047 + }, + { + "epoch": 0.17, + "grad_norm": 2.3995605186361595, + "learning_rate": 9.51350565822723e-06, + "loss": 0.46, + "step": 2048 + }, + { + "epoch": 0.17, + "grad_norm": 4.189027154695834, + "learning_rate": 9.512935920065523e-06, + "loss": 1.0905, + "step": 2049 + }, + { + "epoch": 0.17, + "grad_norm": 4.208339630062005, + "learning_rate": 9.512365865566648e-06, + "loss": 0.7666, + "step": 2050 + }, + { + "epoch": 0.17, + "grad_norm": 4.44626051074097, + "learning_rate": 9.511795494770563e-06, + "loss": 0.9391, + "step": 2051 + }, + { + "epoch": 0.17, + "grad_norm": 4.014287763227923, + "learning_rate": 9.511224807717253e-06, + "loss": 0.4553, + "step": 2052 + }, + { + "epoch": 0.17, + "grad_norm": 3.5272840701887276, + "learning_rate": 9.510653804446714e-06, + "loss": 0.98, + "step": 2053 + }, + { + "epoch": 0.17, + "grad_norm": 2.5804379646472637, + "learning_rate": 9.510082484998975e-06, + "loss": 0.5724, + "step": 2054 + }, + { + "epoch": 0.17, + "grad_norm": 3.0664109603373664, + "learning_rate": 9.50951084941408e-06, + "loss": 0.777, + "step": 2055 + }, + { + "epoch": 0.17, + "grad_norm": 5.9548121149971385, + "learning_rate": 9.508938897732101e-06, + "loss": 1.1975, + "step": 2056 + }, + { + "epoch": 0.17, + "grad_norm": 3.704230560766375, + "learning_rate": 9.508366629993129e-06, + "loss": 0.8394, + "step": 2057 + }, + { + "epoch": 0.17, + "grad_norm": 3.0182372497951966, + "learning_rate": 9.507794046237275e-06, + "loss": 0.4947, + "step": 2058 + }, + { + "epoch": 0.17, + "grad_norm": 4.3287807453200715, + "learning_rate": 9.507221146504679e-06, + "loss": 0.5632, + "step": 2059 + }, + { + "epoch": 0.17, + "grad_norm": 4.986043890091539, + "learning_rate": 9.506647930835494e-06, + "loss": 1.1423, + "step": 2060 + }, + { + "epoch": 0.17, + "grad_norm": 4.958680811383781, + "learning_rate": 9.506074399269902e-06, + "loss": 1.0541, + "step": 2061 + }, + { + "epoch": 0.17, + "grad_norm": 4.1924867873842455, + "learning_rate": 9.505500551848105e-06, + "loss": 0.7383, + "step": 2062 + }, + { + "epoch": 0.17, + "grad_norm": 2.192658274030753, + "learning_rate": 9.50492638861033e-06, + "loss": 0.4288, + "step": 2063 + }, + { + "epoch": 0.17, + "grad_norm": 3.0884543370481534, + "learning_rate": 9.504351909596818e-06, + "loss": 0.6757, + "step": 2064 + }, + { + "epoch": 0.17, + "grad_norm": 2.299401261090491, + "learning_rate": 9.503777114847841e-06, + "loss": 0.5118, + "step": 2065 + }, + { + "epoch": 0.17, + "grad_norm": 1.9253161949191624, + "learning_rate": 9.503202004403688e-06, + "loss": 0.4369, + "step": 2066 + }, + { + "epoch": 0.17, + "grad_norm": 4.028626104070394, + "learning_rate": 9.502626578304673e-06, + "loss": 0.7785, + "step": 2067 + }, + { + "epoch": 0.17, + "grad_norm": 4.1122624328146475, + "learning_rate": 9.50205083659113e-06, + "loss": 0.7224, + "step": 2068 + }, + { + "epoch": 0.17, + "grad_norm": 3.2818337720691417, + "learning_rate": 9.501474779303416e-06, + "loss": 0.6204, + "step": 2069 + }, + { + "epoch": 0.17, + "grad_norm": 4.771811140932043, + "learning_rate": 9.500898406481911e-06, + "loss": 1.1285, + "step": 2070 + }, + { + "epoch": 0.17, + "grad_norm": 3.8057931128703726, + "learning_rate": 9.500321718167017e-06, + "loss": 0.8784, + "step": 2071 + }, + { + "epoch": 0.17, + "grad_norm": 3.5515639183361767, + "learning_rate": 9.499744714399155e-06, + "loss": 0.899, + "step": 2072 + }, + { + "epoch": 0.17, + "grad_norm": 5.122862374078744, + "learning_rate": 9.499167395218772e-06, + "loss": 0.9491, + "step": 2073 + }, + { + "epoch": 0.17, + "grad_norm": 3.7491156377139907, + "learning_rate": 9.498589760666333e-06, + "loss": 1.0864, + "step": 2074 + }, + { + "epoch": 0.17, + "grad_norm": 3.448237666496974, + "learning_rate": 9.498011810782332e-06, + "loss": 0.7363, + "step": 2075 + }, + { + "epoch": 0.17, + "grad_norm": 2.865420210192231, + "learning_rate": 9.497433545607278e-06, + "loss": 0.6435, + "step": 2076 + }, + { + "epoch": 0.17, + "grad_norm": 3.8746356846623153, + "learning_rate": 9.496854965181705e-06, + "loss": 1.1468, + "step": 2077 + }, + { + "epoch": 0.17, + "grad_norm": 4.349451863770051, + "learning_rate": 9.496276069546169e-06, + "loss": 0.998, + "step": 2078 + }, + { + "epoch": 0.17, + "grad_norm": 5.444977958151714, + "learning_rate": 9.495696858741249e-06, + "loss": 0.9305, + "step": 2079 + }, + { + "epoch": 0.17, + "grad_norm": 4.643233439483295, + "learning_rate": 9.495117332807542e-06, + "loss": 0.9184, + "step": 2080 + }, + { + "epoch": 0.17, + "grad_norm": 3.447022066674226, + "learning_rate": 9.494537491785676e-06, + "loss": 0.7931, + "step": 2081 + }, + { + "epoch": 0.17, + "grad_norm": 3.9125196189107183, + "learning_rate": 9.493957335716291e-06, + "loss": 0.9897, + "step": 2082 + }, + { + "epoch": 0.17, + "grad_norm": 6.285592320779023, + "learning_rate": 9.493376864640054e-06, + "loss": 1.3372, + "step": 2083 + }, + { + "epoch": 0.17, + "grad_norm": 3.0882900515413283, + "learning_rate": 9.492796078597655e-06, + "loss": 0.527, + "step": 2084 + }, + { + "epoch": 0.17, + "grad_norm": 4.573554407494425, + "learning_rate": 9.492214977629804e-06, + "loss": 0.9002, + "step": 2085 + }, + { + "epoch": 0.17, + "grad_norm": 5.834869538459952, + "learning_rate": 9.491633561777232e-06, + "loss": 1.1973, + "step": 2086 + }, + { + "epoch": 0.17, + "grad_norm": 4.591982689006695, + "learning_rate": 9.491051831080695e-06, + "loss": 1.2376, + "step": 2087 + }, + { + "epoch": 0.17, + "grad_norm": 3.410080452072863, + "learning_rate": 9.490469785580971e-06, + "loss": 0.77, + "step": 2088 + }, + { + "epoch": 0.17, + "grad_norm": 3.4696812104534214, + "learning_rate": 9.489887425318856e-06, + "loss": 0.8371, + "step": 2089 + }, + { + "epoch": 0.17, + "grad_norm": 3.6395549603734527, + "learning_rate": 9.489304750335173e-06, + "loss": 0.8124, + "step": 2090 + }, + { + "epoch": 0.17, + "grad_norm": 3.3541758421205494, + "learning_rate": 9.488721760670766e-06, + "loss": 0.7443, + "step": 2091 + }, + { + "epoch": 0.17, + "grad_norm": 2.157586521603289, + "learning_rate": 9.488138456366497e-06, + "loss": 0.681, + "step": 2092 + }, + { + "epoch": 0.17, + "grad_norm": 3.117454253879517, + "learning_rate": 9.487554837463255e-06, + "loss": 0.4913, + "step": 2093 + }, + { + "epoch": 0.17, + "grad_norm": 6.380817613738605, + "learning_rate": 9.48697090400195e-06, + "loss": 1.092, + "step": 2094 + }, + { + "epoch": 0.17, + "grad_norm": 2.4747229179837382, + "learning_rate": 9.486386656023509e-06, + "loss": 0.5649, + "step": 2095 + }, + { + "epoch": 0.17, + "grad_norm": 3.3807338815393715, + "learning_rate": 9.485802093568892e-06, + "loss": 0.7537, + "step": 2096 + }, + { + "epoch": 0.17, + "grad_norm": 3.1771176947496484, + "learning_rate": 9.485217216679068e-06, + "loss": 0.9096, + "step": 2097 + }, + { + "epoch": 0.17, + "grad_norm": 4.282633156917091, + "learning_rate": 9.484632025395037e-06, + "loss": 0.9372, + "step": 2098 + }, + { + "epoch": 0.17, + "grad_norm": 5.095661517054039, + "learning_rate": 9.48404651975782e-06, + "loss": 1.414, + "step": 2099 + }, + { + "epoch": 0.17, + "grad_norm": 5.529039630511247, + "learning_rate": 9.483460699808454e-06, + "loss": 1.2644, + "step": 2100 + }, + { + "epoch": 0.17, + "grad_norm": 5.867831198743964, + "learning_rate": 9.482874565588008e-06, + "loss": 1.2301, + "step": 2101 + }, + { + "epoch": 0.17, + "grad_norm": 3.908320279178014, + "learning_rate": 9.482288117137561e-06, + "loss": 0.5719, + "step": 2102 + }, + { + "epoch": 0.17, + "grad_norm": 4.22276681186549, + "learning_rate": 9.481701354498227e-06, + "loss": 0.7356, + "step": 2103 + }, + { + "epoch": 0.17, + "grad_norm": 3.8726519472165974, + "learning_rate": 9.48111427771113e-06, + "loss": 1.0238, + "step": 2104 + }, + { + "epoch": 0.17, + "grad_norm": 3.545740432284491, + "learning_rate": 9.480526886817425e-06, + "loss": 0.5745, + "step": 2105 + }, + { + "epoch": 0.17, + "grad_norm": 3.525664610056473, + "learning_rate": 9.479939181858286e-06, + "loss": 0.841, + "step": 2106 + }, + { + "epoch": 0.17, + "grad_norm": 5.119819181857357, + "learning_rate": 9.479351162874905e-06, + "loss": 1.0244, + "step": 2107 + }, + { + "epoch": 0.17, + "grad_norm": 4.480209901923949, + "learning_rate": 9.478762829908503e-06, + "loss": 1.0946, + "step": 2108 + }, + { + "epoch": 0.17, + "grad_norm": 4.06730290748614, + "learning_rate": 9.478174183000319e-06, + "loss": 0.9159, + "step": 2109 + }, + { + "epoch": 0.17, + "grad_norm": 4.50710093602591, + "learning_rate": 9.477585222191612e-06, + "loss": 1.0762, + "step": 2110 + }, + { + "epoch": 0.17, + "grad_norm": 5.821870746791981, + "learning_rate": 9.476995947523668e-06, + "loss": 1.1686, + "step": 2111 + }, + { + "epoch": 0.17, + "grad_norm": 3.4036271881923335, + "learning_rate": 9.476406359037792e-06, + "loss": 0.6, + "step": 2112 + }, + { + "epoch": 0.17, + "grad_norm": 4.135499123303508, + "learning_rate": 9.475816456775313e-06, + "loss": 0.8012, + "step": 2113 + }, + { + "epoch": 0.17, + "grad_norm": 4.674933703337597, + "learning_rate": 9.475226240777577e-06, + "loss": 1.1691, + "step": 2114 + }, + { + "epoch": 0.17, + "grad_norm": 6.075280978313644, + "learning_rate": 9.47463571108596e-06, + "loss": 1.0191, + "step": 2115 + }, + { + "epoch": 0.17, + "grad_norm": 3.4166952874689573, + "learning_rate": 9.474044867741852e-06, + "loss": 0.5438, + "step": 2116 + }, + { + "epoch": 0.17, + "grad_norm": 3.991601282845236, + "learning_rate": 9.47345371078667e-06, + "loss": 1.1868, + "step": 2117 + }, + { + "epoch": 0.17, + "grad_norm": 4.387621946592111, + "learning_rate": 9.47286224026185e-06, + "loss": 0.9359, + "step": 2118 + }, + { + "epoch": 0.17, + "grad_norm": 3.120682905250976, + "learning_rate": 9.472270456208856e-06, + "loss": 0.682, + "step": 2119 + }, + { + "epoch": 0.17, + "grad_norm": 2.9720525101079796, + "learning_rate": 9.471678358669164e-06, + "loss": 0.6188, + "step": 2120 + }, + { + "epoch": 0.17, + "grad_norm": 3.7381808096544877, + "learning_rate": 9.47108594768428e-06, + "loss": 0.8853, + "step": 2121 + }, + { + "epoch": 0.17, + "grad_norm": 3.8959848975983835, + "learning_rate": 9.47049322329573e-06, + "loss": 0.9168, + "step": 2122 + }, + { + "epoch": 0.17, + "grad_norm": 1.2368095730765394, + "learning_rate": 9.469900185545061e-06, + "loss": 0.1946, + "step": 2123 + }, + { + "epoch": 0.17, + "grad_norm": 3.286467425215324, + "learning_rate": 9.469306834473841e-06, + "loss": 0.7813, + "step": 2124 + }, + { + "epoch": 0.17, + "grad_norm": 2.914048061704354, + "learning_rate": 9.468713170123664e-06, + "loss": 0.4727, + "step": 2125 + }, + { + "epoch": 0.17, + "grad_norm": 3.035570216006248, + "learning_rate": 9.46811919253614e-06, + "loss": 0.4793, + "step": 2126 + }, + { + "epoch": 0.17, + "grad_norm": 4.383252038597668, + "learning_rate": 9.467524901752906e-06, + "loss": 1.0153, + "step": 2127 + }, + { + "epoch": 0.17, + "grad_norm": 3.9088761095690225, + "learning_rate": 9.466930297815622e-06, + "loss": 1.2265, + "step": 2128 + }, + { + "epoch": 0.17, + "grad_norm": 3.659464819329504, + "learning_rate": 9.46633538076596e-06, + "loss": 0.8317, + "step": 2129 + }, + { + "epoch": 0.17, + "grad_norm": 3.897382976866003, + "learning_rate": 9.465740150645629e-06, + "loss": 0.6749, + "step": 2130 + }, + { + "epoch": 0.17, + "grad_norm": 4.286261516991291, + "learning_rate": 9.465144607496347e-06, + "loss": 0.9856, + "step": 2131 + }, + { + "epoch": 0.17, + "grad_norm": 3.8718066416881376, + "learning_rate": 9.46454875135986e-06, + "loss": 0.9202, + "step": 2132 + }, + { + "epoch": 0.17, + "grad_norm": 3.9295301699594405, + "learning_rate": 9.463952582277936e-06, + "loss": 0.637, + "step": 2133 + }, + { + "epoch": 0.17, + "grad_norm": 2.7624787002724838, + "learning_rate": 9.463356100292363e-06, + "loss": 0.6039, + "step": 2134 + }, + { + "epoch": 0.17, + "grad_norm": 5.45137275432468, + "learning_rate": 9.462759305444951e-06, + "loss": 1.5769, + "step": 2135 + }, + { + "epoch": 0.17, + "grad_norm": 5.043486054594081, + "learning_rate": 9.462162197777533e-06, + "loss": 0.8245, + "step": 2136 + }, + { + "epoch": 0.17, + "grad_norm": 3.4508470611822823, + "learning_rate": 9.461564777331966e-06, + "loss": 0.6273, + "step": 2137 + }, + { + "epoch": 0.17, + "grad_norm": 3.550464396618127, + "learning_rate": 9.460967044150125e-06, + "loss": 0.4785, + "step": 2138 + }, + { + "epoch": 0.17, + "grad_norm": 4.220157500165512, + "learning_rate": 9.460368998273908e-06, + "loss": 0.8692, + "step": 2139 + }, + { + "epoch": 0.17, + "grad_norm": 4.451446644164049, + "learning_rate": 9.459770639745235e-06, + "loss": 0.8494, + "step": 2140 + }, + { + "epoch": 0.17, + "grad_norm": 4.649836832615487, + "learning_rate": 9.459171968606051e-06, + "loss": 1.0236, + "step": 2141 + }, + { + "epoch": 0.18, + "grad_norm": 2.986127517748761, + "learning_rate": 9.458572984898318e-06, + "loss": 0.5701, + "step": 2142 + }, + { + "epoch": 0.18, + "grad_norm": 3.979887171050888, + "learning_rate": 9.457973688664021e-06, + "loss": 0.865, + "step": 2143 + }, + { + "epoch": 0.18, + "grad_norm": 1.2761285963995945, + "learning_rate": 9.45737407994517e-06, + "loss": 0.2562, + "step": 2144 + }, + { + "epoch": 0.18, + "grad_norm": 5.330361544442108, + "learning_rate": 9.456774158783795e-06, + "loss": 1.2154, + "step": 2145 + }, + { + "epoch": 0.18, + "grad_norm": 4.612898726359513, + "learning_rate": 9.456173925221948e-06, + "loss": 0.7982, + "step": 2146 + }, + { + "epoch": 0.18, + "grad_norm": 5.966569355397138, + "learning_rate": 9.455573379301702e-06, + "loss": 1.5651, + "step": 2147 + }, + { + "epoch": 0.18, + "grad_norm": 2.4979505753209525, + "learning_rate": 9.45497252106515e-06, + "loss": 0.3068, + "step": 2148 + }, + { + "epoch": 0.18, + "grad_norm": 0.8643227063850487, + "learning_rate": 9.454371350554417e-06, + "loss": 0.1983, + "step": 2149 + }, + { + "epoch": 0.18, + "grad_norm": 2.8998501733971542, + "learning_rate": 9.453769867811636e-06, + "loss": 0.7906, + "step": 2150 + }, + { + "epoch": 0.18, + "grad_norm": 3.334813471461509, + "learning_rate": 9.45316807287897e-06, + "loss": 0.5067, + "step": 2151 + }, + { + "epoch": 0.18, + "grad_norm": 2.6296558987705527, + "learning_rate": 9.4525659657986e-06, + "loss": 0.4986, + "step": 2152 + }, + { + "epoch": 0.18, + "grad_norm": 3.9692009490803257, + "learning_rate": 9.451963546612737e-06, + "loss": 0.9403, + "step": 2153 + }, + { + "epoch": 0.18, + "grad_norm": 4.384935016423835, + "learning_rate": 9.451360815363601e-06, + "loss": 0.9806, + "step": 2154 + }, + { + "epoch": 0.18, + "grad_norm": 3.7487390901287174, + "learning_rate": 9.450757772093447e-06, + "loss": 0.6419, + "step": 2155 + }, + { + "epoch": 0.18, + "grad_norm": 3.7973741872239026, + "learning_rate": 9.450154416844543e-06, + "loss": 0.7477, + "step": 2156 + }, + { + "epoch": 0.18, + "grad_norm": 4.490270775900813, + "learning_rate": 9.44955074965918e-06, + "loss": 1.148, + "step": 2157 + }, + { + "epoch": 0.18, + "grad_norm": 4.100769635829848, + "learning_rate": 9.448946770579675e-06, + "loss": 0.9803, + "step": 2158 + }, + { + "epoch": 0.18, + "grad_norm": 4.324000798766789, + "learning_rate": 9.448342479648362e-06, + "loss": 0.6454, + "step": 2159 + }, + { + "epoch": 0.18, + "grad_norm": 4.757169125636011, + "learning_rate": 9.447737876907602e-06, + "loss": 1.2184, + "step": 2160 + }, + { + "epoch": 0.18, + "grad_norm": 3.7057613162293004, + "learning_rate": 9.447132962399772e-06, + "loss": 0.8668, + "step": 2161 + }, + { + "epoch": 0.18, + "grad_norm": 2.352280279037213, + "learning_rate": 9.446527736167277e-06, + "loss": 0.3339, + "step": 2162 + }, + { + "epoch": 0.18, + "grad_norm": 3.4131389889547674, + "learning_rate": 9.445922198252538e-06, + "loss": 0.8509, + "step": 2163 + }, + { + "epoch": 0.18, + "grad_norm": 3.1302691719793336, + "learning_rate": 9.445316348698002e-06, + "loss": 0.9048, + "step": 2164 + }, + { + "epoch": 0.18, + "grad_norm": 5.184514928247789, + "learning_rate": 9.444710187546136e-06, + "loss": 1.2745, + "step": 2165 + }, + { + "epoch": 0.18, + "grad_norm": 4.3023991330152445, + "learning_rate": 9.444103714839427e-06, + "loss": 0.8257, + "step": 2166 + }, + { + "epoch": 0.18, + "grad_norm": 2.4559356624211106, + "learning_rate": 9.443496930620392e-06, + "loss": 0.7127, + "step": 2167 + }, + { + "epoch": 0.18, + "grad_norm": 3.0009784342285326, + "learning_rate": 9.442889834931558e-06, + "loss": 0.5808, + "step": 2168 + }, + { + "epoch": 0.18, + "grad_norm": 4.271613359566975, + "learning_rate": 9.442282427815483e-06, + "loss": 0.8843, + "step": 2169 + }, + { + "epoch": 0.18, + "grad_norm": 3.768172855722143, + "learning_rate": 9.441674709314743e-06, + "loss": 1.0149, + "step": 2170 + }, + { + "epoch": 0.18, + "grad_norm": 2.393967273531713, + "learning_rate": 9.441066679471935e-06, + "loss": 0.5938, + "step": 2171 + }, + { + "epoch": 0.18, + "grad_norm": 3.7071192673095257, + "learning_rate": 9.440458338329681e-06, + "loss": 0.5606, + "step": 2172 + }, + { + "epoch": 0.18, + "grad_norm": 4.286003794630861, + "learning_rate": 9.439849685930623e-06, + "loss": 0.9005, + "step": 2173 + }, + { + "epoch": 0.18, + "grad_norm": 3.8373323976564766, + "learning_rate": 9.439240722317423e-06, + "loss": 0.4884, + "step": 2174 + }, + { + "epoch": 0.18, + "grad_norm": 2.8342604024352474, + "learning_rate": 9.43863144753277e-06, + "loss": 0.6117, + "step": 2175 + }, + { + "epoch": 0.18, + "grad_norm": 4.5346372794679, + "learning_rate": 9.438021861619367e-06, + "loss": 0.8834, + "step": 2176 + }, + { + "epoch": 0.18, + "grad_norm": 3.630996103152386, + "learning_rate": 9.437411964619947e-06, + "loss": 0.8089, + "step": 2177 + }, + { + "epoch": 0.18, + "grad_norm": 4.55704600360632, + "learning_rate": 9.43680175657726e-06, + "loss": 0.8158, + "step": 2178 + }, + { + "epoch": 0.18, + "grad_norm": 5.547727769767378, + "learning_rate": 9.43619123753408e-06, + "loss": 1.5137, + "step": 2179 + }, + { + "epoch": 0.18, + "grad_norm": 3.6197319803485417, + "learning_rate": 9.435580407533198e-06, + "loss": 0.4518, + "step": 2180 + }, + { + "epoch": 0.18, + "grad_norm": 5.264578431627168, + "learning_rate": 9.434969266617436e-06, + "loss": 1.4073, + "step": 2181 + }, + { + "epoch": 0.18, + "grad_norm": 4.085161116879681, + "learning_rate": 9.434357814829627e-06, + "loss": 0.7944, + "step": 2182 + }, + { + "epoch": 0.18, + "grad_norm": 3.8200278738459086, + "learning_rate": 9.433746052212636e-06, + "loss": 0.8745, + "step": 2183 + }, + { + "epoch": 0.18, + "grad_norm": 5.073425916795223, + "learning_rate": 9.43313397880934e-06, + "loss": 0.9598, + "step": 2184 + }, + { + "epoch": 0.18, + "grad_norm": 3.8512311184225396, + "learning_rate": 9.432521594662648e-06, + "loss": 0.7672, + "step": 2185 + }, + { + "epoch": 0.18, + "grad_norm": 3.9337318171462963, + "learning_rate": 9.431908899815479e-06, + "loss": 0.8608, + "step": 2186 + }, + { + "epoch": 0.18, + "grad_norm": 1.8885296919716454, + "learning_rate": 9.431295894310786e-06, + "loss": 0.4857, + "step": 2187 + }, + { + "epoch": 0.18, + "grad_norm": 2.903393735231428, + "learning_rate": 9.430682578191537e-06, + "loss": 0.6505, + "step": 2188 + }, + { + "epoch": 0.18, + "grad_norm": 1.6887859434219867, + "learning_rate": 9.43006895150072e-06, + "loss": 0.336, + "step": 2189 + }, + { + "epoch": 0.18, + "grad_norm": 5.153251818217103, + "learning_rate": 9.429455014281349e-06, + "loss": 1.4917, + "step": 2190 + }, + { + "epoch": 0.18, + "grad_norm": 3.706061847628713, + "learning_rate": 9.428840766576459e-06, + "loss": 1.0367, + "step": 2191 + }, + { + "epoch": 0.18, + "grad_norm": 5.457412294800602, + "learning_rate": 9.428226208429106e-06, + "loss": 1.1445, + "step": 2192 + }, + { + "epoch": 0.18, + "grad_norm": 4.555600596305112, + "learning_rate": 9.427611339882368e-06, + "loss": 1.197, + "step": 2193 + }, + { + "epoch": 0.18, + "grad_norm": 4.005253708947643, + "learning_rate": 9.426996160979342e-06, + "loss": 1.0339, + "step": 2194 + }, + { + "epoch": 0.18, + "grad_norm": 4.977210166365334, + "learning_rate": 9.426380671763154e-06, + "loss": 1.0448, + "step": 2195 + }, + { + "epoch": 0.18, + "grad_norm": 3.1991074719897274, + "learning_rate": 9.425764872276942e-06, + "loss": 0.5701, + "step": 2196 + }, + { + "epoch": 0.18, + "grad_norm": 3.1006524749382405, + "learning_rate": 9.425148762563876e-06, + "loss": 0.9353, + "step": 2197 + }, + { + "epoch": 0.18, + "grad_norm": 3.332035032248812, + "learning_rate": 9.424532342667138e-06, + "loss": 0.7351, + "step": 2198 + }, + { + "epoch": 0.18, + "grad_norm": 3.6185299233730506, + "learning_rate": 9.42391561262994e-06, + "loss": 0.7285, + "step": 2199 + }, + { + "epoch": 0.18, + "grad_norm": 2.5599192483980033, + "learning_rate": 9.42329857249551e-06, + "loss": 0.4709, + "step": 2200 + }, + { + "epoch": 0.18, + "grad_norm": 4.312042537898623, + "learning_rate": 9.422681222307099e-06, + "loss": 0.9185, + "step": 2201 + }, + { + "epoch": 0.18, + "grad_norm": 3.6335923426745067, + "learning_rate": 9.422063562107984e-06, + "loss": 0.8953, + "step": 2202 + }, + { + "epoch": 0.18, + "grad_norm": 6.034931329079666, + "learning_rate": 9.421445591941456e-06, + "loss": 1.3278, + "step": 2203 + }, + { + "epoch": 0.18, + "grad_norm": 2.7279775684316347, + "learning_rate": 9.420827311850836e-06, + "loss": 0.5268, + "step": 2204 + }, + { + "epoch": 0.18, + "grad_norm": 3.636644980441379, + "learning_rate": 9.42020872187946e-06, + "loss": 0.543, + "step": 2205 + }, + { + "epoch": 0.18, + "grad_norm": 3.8513882635748193, + "learning_rate": 9.41958982207069e-06, + "loss": 1.1868, + "step": 2206 + }, + { + "epoch": 0.18, + "grad_norm": 3.599020465113644, + "learning_rate": 9.418970612467908e-06, + "loss": 0.5247, + "step": 2207 + }, + { + "epoch": 0.18, + "grad_norm": 2.247518479537959, + "learning_rate": 9.418351093114517e-06, + "loss": 0.343, + "step": 2208 + }, + { + "epoch": 0.18, + "grad_norm": 5.2366428056676835, + "learning_rate": 9.417731264053942e-06, + "loss": 1.409, + "step": 2209 + }, + { + "epoch": 0.18, + "grad_norm": 3.424511385285495, + "learning_rate": 9.417111125329633e-06, + "loss": 0.891, + "step": 2210 + }, + { + "epoch": 0.18, + "grad_norm": 2.854324920321562, + "learning_rate": 9.416490676985057e-06, + "loss": 0.57, + "step": 2211 + }, + { + "epoch": 0.18, + "grad_norm": 4.814197414087323, + "learning_rate": 9.415869919063705e-06, + "loss": 0.957, + "step": 2212 + }, + { + "epoch": 0.18, + "grad_norm": 3.1892540112475123, + "learning_rate": 9.415248851609089e-06, + "loss": 0.6602, + "step": 2213 + }, + { + "epoch": 0.18, + "grad_norm": 6.012278285033315, + "learning_rate": 9.414627474664745e-06, + "loss": 1.3654, + "step": 2214 + }, + { + "epoch": 0.18, + "grad_norm": 3.5956857349837175, + "learning_rate": 9.414005788274226e-06, + "loss": 0.7299, + "step": 2215 + }, + { + "epoch": 0.18, + "grad_norm": 2.8920306479101443, + "learning_rate": 9.413383792481112e-06, + "loss": 0.5877, + "step": 2216 + }, + { + "epoch": 0.18, + "grad_norm": 2.961917764585591, + "learning_rate": 9.412761487329e-06, + "loss": 0.3996, + "step": 2217 + }, + { + "epoch": 0.18, + "grad_norm": 3.622972061370353, + "learning_rate": 9.412138872861514e-06, + "loss": 0.7335, + "step": 2218 + }, + { + "epoch": 0.18, + "grad_norm": 3.8898421548794957, + "learning_rate": 9.411515949122295e-06, + "loss": 0.7544, + "step": 2219 + }, + { + "epoch": 0.18, + "grad_norm": 5.542825331149399, + "learning_rate": 9.410892716155006e-06, + "loss": 1.1682, + "step": 2220 + }, + { + "epoch": 0.18, + "grad_norm": 5.240458429700037, + "learning_rate": 9.410269174003333e-06, + "loss": 1.1765, + "step": 2221 + }, + { + "epoch": 0.18, + "grad_norm": 4.717643764221059, + "learning_rate": 9.409645322710985e-06, + "loss": 1.0527, + "step": 2222 + }, + { + "epoch": 0.18, + "grad_norm": 5.61581799462886, + "learning_rate": 9.409021162321692e-06, + "loss": 1.2727, + "step": 2223 + }, + { + "epoch": 0.18, + "grad_norm": 3.902406104492035, + "learning_rate": 9.408396692879202e-06, + "loss": 1.1395, + "step": 2224 + }, + { + "epoch": 0.18, + "grad_norm": 4.042305305793964, + "learning_rate": 9.40777191442729e-06, + "loss": 0.859, + "step": 2225 + }, + { + "epoch": 0.18, + "grad_norm": 1.0774316749315587, + "learning_rate": 9.40714682700975e-06, + "loss": 0.165, + "step": 2226 + }, + { + "epoch": 0.18, + "grad_norm": 3.302788281417149, + "learning_rate": 9.406521430670397e-06, + "loss": 0.8008, + "step": 2227 + }, + { + "epoch": 0.18, + "grad_norm": 3.7846222730904553, + "learning_rate": 9.405895725453069e-06, + "loss": 0.7245, + "step": 2228 + }, + { + "epoch": 0.18, + "grad_norm": 4.0857113770735145, + "learning_rate": 9.405269711401625e-06, + "loss": 0.8109, + "step": 2229 + }, + { + "epoch": 0.18, + "grad_norm": 1.9188680752732636, + "learning_rate": 9.404643388559945e-06, + "loss": 0.3626, + "step": 2230 + }, + { + "epoch": 0.18, + "grad_norm": 3.1849555018977895, + "learning_rate": 9.404016756971934e-06, + "loss": 0.9315, + "step": 2231 + }, + { + "epoch": 0.18, + "grad_norm": 3.7608332507637434, + "learning_rate": 9.403389816681514e-06, + "loss": 0.7762, + "step": 2232 + }, + { + "epoch": 0.18, + "grad_norm": 3.9655397836011237, + "learning_rate": 9.402762567732632e-06, + "loss": 0.9902, + "step": 2233 + }, + { + "epoch": 0.18, + "grad_norm": 4.1218309956038155, + "learning_rate": 9.402135010169254e-06, + "loss": 0.5033, + "step": 2234 + }, + { + "epoch": 0.18, + "grad_norm": 1.366445790921517, + "learning_rate": 9.401507144035371e-06, + "loss": 0.222, + "step": 2235 + }, + { + "epoch": 0.18, + "grad_norm": 3.903752718477064, + "learning_rate": 9.400878969374991e-06, + "loss": 0.7701, + "step": 2236 + }, + { + "epoch": 0.18, + "grad_norm": 3.751144292776653, + "learning_rate": 9.40025048623215e-06, + "loss": 0.7907, + "step": 2237 + }, + { + "epoch": 0.18, + "grad_norm": 4.825728154383655, + "learning_rate": 9.399621694650898e-06, + "loss": 1.3445, + "step": 2238 + }, + { + "epoch": 0.18, + "grad_norm": 4.068955811651021, + "learning_rate": 9.398992594675314e-06, + "loss": 0.751, + "step": 2239 + }, + { + "epoch": 0.18, + "grad_norm": 3.658416537459373, + "learning_rate": 9.398363186349493e-06, + "loss": 1.0305, + "step": 2240 + }, + { + "epoch": 0.18, + "grad_norm": 4.329549125699289, + "learning_rate": 9.397733469717554e-06, + "loss": 1.2354, + "step": 2241 + }, + { + "epoch": 0.18, + "grad_norm": 3.069811292045692, + "learning_rate": 9.397103444823638e-06, + "loss": 0.5039, + "step": 2242 + }, + { + "epoch": 0.18, + "grad_norm": 4.574464473522762, + "learning_rate": 9.396473111711908e-06, + "loss": 1.2865, + "step": 2243 + }, + { + "epoch": 0.18, + "grad_norm": 3.1815009387994815, + "learning_rate": 9.395842470426545e-06, + "loss": 0.3662, + "step": 2244 + }, + { + "epoch": 0.18, + "grad_norm": 4.596516254714554, + "learning_rate": 9.395211521011756e-06, + "loss": 1.0371, + "step": 2245 + }, + { + "epoch": 0.18, + "grad_norm": 4.278370333633337, + "learning_rate": 9.394580263511765e-06, + "loss": 0.9215, + "step": 2246 + }, + { + "epoch": 0.18, + "grad_norm": 4.5781826380201975, + "learning_rate": 9.393948697970827e-06, + "loss": 0.6672, + "step": 2247 + }, + { + "epoch": 0.18, + "grad_norm": 3.692094398353298, + "learning_rate": 9.393316824433204e-06, + "loss": 0.6842, + "step": 2248 + }, + { + "epoch": 0.18, + "grad_norm": 3.611932153208114, + "learning_rate": 9.392684642943195e-06, + "loss": 0.9484, + "step": 2249 + }, + { + "epoch": 0.18, + "grad_norm": 3.1271003219389075, + "learning_rate": 9.392052153545108e-06, + "loss": 0.6765, + "step": 2250 + }, + { + "epoch": 0.18, + "grad_norm": 4.2203921452166915, + "learning_rate": 9.391419356283277e-06, + "loss": 1.0756, + "step": 2251 + }, + { + "epoch": 0.18, + "grad_norm": 4.809427884781176, + "learning_rate": 9.390786251202064e-06, + "loss": 1.1822, + "step": 2252 + }, + { + "epoch": 0.18, + "grad_norm": 2.7567251671685584, + "learning_rate": 9.390152838345842e-06, + "loss": 0.6625, + "step": 2253 + }, + { + "epoch": 0.18, + "grad_norm": 4.694270323963662, + "learning_rate": 9.389519117759012e-06, + "loss": 1.3287, + "step": 2254 + }, + { + "epoch": 0.18, + "grad_norm": 4.555233870715096, + "learning_rate": 9.388885089485995e-06, + "loss": 0.9989, + "step": 2255 + }, + { + "epoch": 0.18, + "grad_norm": 4.243426813783271, + "learning_rate": 9.388250753571235e-06, + "loss": 0.8727, + "step": 2256 + }, + { + "epoch": 0.18, + "grad_norm": 1.1269155707380953, + "learning_rate": 9.387616110059194e-06, + "loss": 0.1509, + "step": 2257 + }, + { + "epoch": 0.18, + "grad_norm": 3.1492173789558664, + "learning_rate": 9.386981158994359e-06, + "loss": 0.8127, + "step": 2258 + }, + { + "epoch": 0.18, + "grad_norm": 2.778123070227287, + "learning_rate": 9.386345900421236e-06, + "loss": 0.6753, + "step": 2259 + }, + { + "epoch": 0.18, + "grad_norm": 3.126349253955989, + "learning_rate": 9.385710334384357e-06, + "loss": 0.3218, + "step": 2260 + }, + { + "epoch": 0.18, + "grad_norm": 3.1294988384899938, + "learning_rate": 9.385074460928267e-06, + "loss": 0.4438, + "step": 2261 + }, + { + "epoch": 0.18, + "grad_norm": 3.9584109911097376, + "learning_rate": 9.384438280097543e-06, + "loss": 0.8727, + "step": 2262 + }, + { + "epoch": 0.18, + "grad_norm": 2.5563394754276163, + "learning_rate": 9.383801791936777e-06, + "loss": 0.6102, + "step": 2263 + }, + { + "epoch": 0.19, + "grad_norm": 3.360677635526274, + "learning_rate": 9.383164996490583e-06, + "loss": 0.4224, + "step": 2264 + }, + { + "epoch": 0.19, + "grad_norm": 4.006397861225834, + "learning_rate": 9.382527893803599e-06, + "loss": 1.1627, + "step": 2265 + }, + { + "epoch": 0.19, + "grad_norm": 3.465562048660857, + "learning_rate": 9.381890483920482e-06, + "loss": 0.5598, + "step": 2266 + }, + { + "epoch": 0.19, + "grad_norm": 4.389436228854544, + "learning_rate": 9.381252766885914e-06, + "loss": 1.3719, + "step": 2267 + }, + { + "epoch": 0.19, + "grad_norm": 4.650189768224478, + "learning_rate": 9.380614742744592e-06, + "loss": 1.073, + "step": 2268 + }, + { + "epoch": 0.19, + "grad_norm": 3.940624357871344, + "learning_rate": 9.379976411541241e-06, + "loss": 1.0487, + "step": 2269 + }, + { + "epoch": 0.19, + "grad_norm": 4.0691136895443245, + "learning_rate": 9.379337773320604e-06, + "loss": 0.9991, + "step": 2270 + }, + { + "epoch": 0.19, + "grad_norm": 3.671113323218031, + "learning_rate": 9.378698828127451e-06, + "loss": 0.7108, + "step": 2271 + }, + { + "epoch": 0.19, + "grad_norm": 3.094792417803656, + "learning_rate": 9.378059576006567e-06, + "loss": 0.6131, + "step": 2272 + }, + { + "epoch": 0.19, + "grad_norm": 4.097438813925161, + "learning_rate": 9.377420017002756e-06, + "loss": 1.0312, + "step": 2273 + }, + { + "epoch": 0.19, + "grad_norm": 4.892784583139424, + "learning_rate": 9.376780151160856e-06, + "loss": 1.0126, + "step": 2274 + }, + { + "epoch": 0.19, + "grad_norm": 3.8083537051345573, + "learning_rate": 9.376139978525713e-06, + "loss": 0.9419, + "step": 2275 + }, + { + "epoch": 0.19, + "grad_norm": 2.641312816729417, + "learning_rate": 9.375499499142204e-06, + "loss": 0.8278, + "step": 2276 + }, + { + "epoch": 0.19, + "grad_norm": 4.562235260630101, + "learning_rate": 9.374858713055221e-06, + "loss": 1.0192, + "step": 2277 + }, + { + "epoch": 0.19, + "grad_norm": 1.2153746964649463, + "learning_rate": 9.374217620309684e-06, + "loss": 0.2192, + "step": 2278 + }, + { + "epoch": 0.19, + "grad_norm": 2.6305760677433625, + "learning_rate": 9.373576220950527e-06, + "loss": 0.452, + "step": 2279 + }, + { + "epoch": 0.19, + "grad_norm": 4.230674908813215, + "learning_rate": 9.37293451502271e-06, + "loss": 1.0268, + "step": 2280 + }, + { + "epoch": 0.19, + "grad_norm": 5.652602389587261, + "learning_rate": 9.372292502571217e-06, + "loss": 1.3541, + "step": 2281 + }, + { + "epoch": 0.19, + "grad_norm": 1.5384697420557394, + "learning_rate": 9.371650183641046e-06, + "loss": 0.2499, + "step": 2282 + }, + { + "epoch": 0.19, + "grad_norm": 5.051187199744434, + "learning_rate": 9.371007558277221e-06, + "loss": 1.079, + "step": 2283 + }, + { + "epoch": 0.19, + "grad_norm": 2.6611226742779315, + "learning_rate": 9.370364626524791e-06, + "loss": 0.5498, + "step": 2284 + }, + { + "epoch": 0.19, + "grad_norm": 3.9110461646193837, + "learning_rate": 9.36972138842882e-06, + "loss": 0.9785, + "step": 2285 + }, + { + "epoch": 0.19, + "grad_norm": 5.815306567877642, + "learning_rate": 9.369077844034398e-06, + "loss": 1.5498, + "step": 2286 + }, + { + "epoch": 0.19, + "grad_norm": 3.8516878045698872, + "learning_rate": 9.368433993386632e-06, + "loss": 0.7308, + "step": 2287 + }, + { + "epoch": 0.19, + "grad_norm": 4.22649733569183, + "learning_rate": 9.367789836530655e-06, + "loss": 1.3369, + "step": 2288 + }, + { + "epoch": 0.19, + "grad_norm": 6.188081935922202, + "learning_rate": 9.36714537351162e-06, + "loss": 1.3739, + "step": 2289 + }, + { + "epoch": 0.19, + "grad_norm": 5.836611550160533, + "learning_rate": 9.366500604374699e-06, + "loss": 1.4048, + "step": 2290 + }, + { + "epoch": 0.19, + "grad_norm": 3.68458182019333, + "learning_rate": 9.365855529165089e-06, + "loss": 0.8023, + "step": 2291 + }, + { + "epoch": 0.19, + "grad_norm": 2.937118676656062, + "learning_rate": 9.365210147928006e-06, + "loss": 0.607, + "step": 2292 + }, + { + "epoch": 0.19, + "grad_norm": 3.0959864170466402, + "learning_rate": 9.364564460708689e-06, + "loss": 0.5608, + "step": 2293 + }, + { + "epoch": 0.19, + "grad_norm": 4.279504397544929, + "learning_rate": 9.3639184675524e-06, + "loss": 0.8709, + "step": 2294 + }, + { + "epoch": 0.19, + "grad_norm": 2.814079321161493, + "learning_rate": 9.363272168504417e-06, + "loss": 0.4674, + "step": 2295 + }, + { + "epoch": 0.19, + "grad_norm": 3.61620991804245, + "learning_rate": 9.362625563610044e-06, + "loss": 1.1514, + "step": 2296 + }, + { + "epoch": 0.19, + "grad_norm": 4.758909968343346, + "learning_rate": 9.361978652914605e-06, + "loss": 0.8233, + "step": 2297 + }, + { + "epoch": 0.19, + "grad_norm": 3.2385771274175483, + "learning_rate": 9.361331436463446e-06, + "loss": 0.9019, + "step": 2298 + }, + { + "epoch": 0.19, + "grad_norm": 4.896629749654269, + "learning_rate": 9.360683914301934e-06, + "loss": 1.5023, + "step": 2299 + }, + { + "epoch": 0.19, + "grad_norm": 5.312029356565346, + "learning_rate": 9.360036086475457e-06, + "loss": 1.4424, + "step": 2300 + }, + { + "epoch": 0.19, + "grad_norm": 5.150712940690037, + "learning_rate": 9.359387953029425e-06, + "loss": 0.8739, + "step": 2301 + }, + { + "epoch": 0.19, + "grad_norm": 3.363421170051736, + "learning_rate": 9.358739514009271e-06, + "loss": 0.9312, + "step": 2302 + }, + { + "epoch": 0.19, + "grad_norm": 2.79247579777736, + "learning_rate": 9.358090769460446e-06, + "loss": 0.598, + "step": 2303 + }, + { + "epoch": 0.19, + "grad_norm": 3.185708394371197, + "learning_rate": 9.357441719428423e-06, + "loss": 0.5437, + "step": 2304 + }, + { + "epoch": 0.19, + "grad_norm": 4.29604390462049, + "learning_rate": 9.3567923639587e-06, + "loss": 0.872, + "step": 2305 + }, + { + "epoch": 0.19, + "grad_norm": 4.621954214424412, + "learning_rate": 9.356142703096793e-06, + "loss": 1.0147, + "step": 2306 + }, + { + "epoch": 0.19, + "grad_norm": 3.776210916732992, + "learning_rate": 9.355492736888242e-06, + "loss": 0.8019, + "step": 2307 + }, + { + "epoch": 0.19, + "grad_norm": 3.737184950318045, + "learning_rate": 9.354842465378604e-06, + "loss": 1.0243, + "step": 2308 + }, + { + "epoch": 0.19, + "grad_norm": 3.7661736156524657, + "learning_rate": 9.354191888613462e-06, + "loss": 0.9402, + "step": 2309 + }, + { + "epoch": 0.19, + "grad_norm": 3.3941009941822626, + "learning_rate": 9.353541006638417e-06, + "loss": 0.7118, + "step": 2310 + }, + { + "epoch": 0.19, + "grad_norm": 5.008294593308233, + "learning_rate": 9.352889819499096e-06, + "loss": 1.2041, + "step": 2311 + }, + { + "epoch": 0.19, + "grad_norm": 4.497801080060148, + "learning_rate": 9.35223832724114e-06, + "loss": 1.1584, + "step": 2312 + }, + { + "epoch": 0.19, + "grad_norm": 3.5206679515786554, + "learning_rate": 9.35158652991022e-06, + "loss": 1.1447, + "step": 2313 + }, + { + "epoch": 0.19, + "grad_norm": 5.118739250240711, + "learning_rate": 9.350934427552023e-06, + "loss": 1.476, + "step": 2314 + }, + { + "epoch": 0.19, + "grad_norm": 3.5796514622825963, + "learning_rate": 9.350282020212256e-06, + "loss": 0.5229, + "step": 2315 + }, + { + "epoch": 0.19, + "grad_norm": 5.519285126458806, + "learning_rate": 9.349629307936653e-06, + "loss": 1.039, + "step": 2316 + }, + { + "epoch": 0.19, + "grad_norm": 4.409487761550421, + "learning_rate": 9.348976290770965e-06, + "loss": 1.0798, + "step": 2317 + }, + { + "epoch": 0.19, + "grad_norm": 4.566681383204464, + "learning_rate": 9.348322968760965e-06, + "loss": 0.8468, + "step": 2318 + }, + { + "epoch": 0.19, + "grad_norm": 3.167042571796452, + "learning_rate": 9.34766934195245e-06, + "loss": 0.7639, + "step": 2319 + }, + { + "epoch": 0.19, + "grad_norm": 3.0381343904833593, + "learning_rate": 9.347015410391235e-06, + "loss": 0.3901, + "step": 2320 + }, + { + "epoch": 0.19, + "grad_norm": 4.5957839378494025, + "learning_rate": 9.34636117412316e-06, + "loss": 1.4191, + "step": 2321 + }, + { + "epoch": 0.19, + "grad_norm": 2.3712381446725423, + "learning_rate": 9.345706633194078e-06, + "loss": 0.6917, + "step": 2322 + }, + { + "epoch": 0.19, + "grad_norm": 4.525764739475181, + "learning_rate": 9.345051787649877e-06, + "loss": 0.6715, + "step": 2323 + }, + { + "epoch": 0.19, + "grad_norm": 3.4365910876302572, + "learning_rate": 9.344396637536453e-06, + "loss": 0.8326, + "step": 2324 + }, + { + "epoch": 0.19, + "grad_norm": 4.244381507411723, + "learning_rate": 9.343741182899733e-06, + "loss": 1.1729, + "step": 2325 + }, + { + "epoch": 0.19, + "grad_norm": 3.6725681990302252, + "learning_rate": 9.34308542378566e-06, + "loss": 0.8226, + "step": 2326 + }, + { + "epoch": 0.19, + "grad_norm": 2.8662307678886147, + "learning_rate": 9.3424293602402e-06, + "loss": 0.6593, + "step": 2327 + }, + { + "epoch": 0.19, + "grad_norm": 4.578416090268908, + "learning_rate": 9.34177299230934e-06, + "loss": 0.7326, + "step": 2328 + }, + { + "epoch": 0.19, + "grad_norm": 4.1837032657349225, + "learning_rate": 9.341116320039088e-06, + "loss": 0.9397, + "step": 2329 + }, + { + "epoch": 0.19, + "grad_norm": 4.3667517386706205, + "learning_rate": 9.340459343475475e-06, + "loss": 1.2654, + "step": 2330 + }, + { + "epoch": 0.19, + "grad_norm": 5.3000469622278645, + "learning_rate": 9.339802062664553e-06, + "loss": 1.3045, + "step": 2331 + }, + { + "epoch": 0.19, + "grad_norm": 4.2957218695500545, + "learning_rate": 9.339144477652391e-06, + "loss": 1.0857, + "step": 2332 + }, + { + "epoch": 0.19, + "grad_norm": 4.68864267751406, + "learning_rate": 9.338486588485087e-06, + "loss": 1.3811, + "step": 2333 + }, + { + "epoch": 0.19, + "grad_norm": 3.372649789054265, + "learning_rate": 9.337828395208755e-06, + "loss": 0.5209, + "step": 2334 + }, + { + "epoch": 0.19, + "grad_norm": 3.8499680904005222, + "learning_rate": 9.337169897869528e-06, + "loss": 0.8476, + "step": 2335 + }, + { + "epoch": 0.19, + "grad_norm": 5.961242137201628, + "learning_rate": 9.336511096513568e-06, + "loss": 1.528, + "step": 2336 + }, + { + "epoch": 0.19, + "grad_norm": 5.693661806290446, + "learning_rate": 9.335851991187053e-06, + "loss": 0.873, + "step": 2337 + }, + { + "epoch": 0.19, + "grad_norm": 4.508153442359315, + "learning_rate": 9.335192581936183e-06, + "loss": 1.0223, + "step": 2338 + }, + { + "epoch": 0.19, + "grad_norm": 4.4103159948747335, + "learning_rate": 9.334532868807179e-06, + "loss": 1.2319, + "step": 2339 + }, + { + "epoch": 0.19, + "grad_norm": 5.19185231767955, + "learning_rate": 9.333872851846285e-06, + "loss": 1.0721, + "step": 2340 + }, + { + "epoch": 0.19, + "grad_norm": 4.111140255013692, + "learning_rate": 9.333212531099767e-06, + "loss": 0.9359, + "step": 2341 + }, + { + "epoch": 0.19, + "grad_norm": 3.5008772177759786, + "learning_rate": 9.332551906613908e-06, + "loss": 0.4058, + "step": 2342 + }, + { + "epoch": 0.19, + "grad_norm": 4.1415338749073785, + "learning_rate": 9.331890978435014e-06, + "loss": 1.0631, + "step": 2343 + }, + { + "epoch": 0.19, + "grad_norm": 3.80184772378031, + "learning_rate": 9.331229746609416e-06, + "loss": 0.6319, + "step": 2344 + }, + { + "epoch": 0.19, + "grad_norm": 2.3313134792011, + "learning_rate": 9.330568211183461e-06, + "loss": 0.4148, + "step": 2345 + }, + { + "epoch": 0.19, + "grad_norm": 3.914408637597205, + "learning_rate": 9.329906372203523e-06, + "loss": 0.4193, + "step": 2346 + }, + { + "epoch": 0.19, + "grad_norm": 1.765654467492976, + "learning_rate": 9.329244229715992e-06, + "loss": 0.4101, + "step": 2347 + }, + { + "epoch": 0.19, + "grad_norm": 3.550335647297338, + "learning_rate": 9.328581783767281e-06, + "loss": 0.6818, + "step": 2348 + }, + { + "epoch": 0.19, + "grad_norm": 2.9500036638624088, + "learning_rate": 9.327919034403825e-06, + "loss": 0.3719, + "step": 2349 + }, + { + "epoch": 0.19, + "grad_norm": 5.501275834383856, + "learning_rate": 9.327255981672082e-06, + "loss": 1.395, + "step": 2350 + }, + { + "epoch": 0.19, + "grad_norm": 4.279179026210534, + "learning_rate": 9.326592625618523e-06, + "loss": 1.2283, + "step": 2351 + }, + { + "epoch": 0.19, + "grad_norm": 5.262990927460758, + "learning_rate": 9.325928966289652e-06, + "loss": 1.0392, + "step": 2352 + }, + { + "epoch": 0.19, + "grad_norm": 3.8054019782508375, + "learning_rate": 9.325265003731988e-06, + "loss": 0.5625, + "step": 2353 + }, + { + "epoch": 0.19, + "grad_norm": 3.065618563471102, + "learning_rate": 9.324600737992069e-06, + "loss": 0.6746, + "step": 2354 + }, + { + "epoch": 0.19, + "grad_norm": 2.9175758949154034, + "learning_rate": 9.323936169116461e-06, + "loss": 0.8539, + "step": 2355 + }, + { + "epoch": 0.19, + "grad_norm": 4.403739334261655, + "learning_rate": 9.323271297151743e-06, + "loss": 1.1677, + "step": 2356 + }, + { + "epoch": 0.19, + "grad_norm": 4.28527399597012, + "learning_rate": 9.322606122144524e-06, + "loss": 0.8743, + "step": 2357 + }, + { + "epoch": 0.19, + "grad_norm": 3.158860499691112, + "learning_rate": 9.321940644141427e-06, + "loss": 0.3042, + "step": 2358 + }, + { + "epoch": 0.19, + "grad_norm": 4.485617366183805, + "learning_rate": 9.3212748631891e-06, + "loss": 1.0994, + "step": 2359 + }, + { + "epoch": 0.19, + "grad_norm": 3.1845245822430632, + "learning_rate": 9.320608779334212e-06, + "loss": 0.401, + "step": 2360 + }, + { + "epoch": 0.19, + "grad_norm": 2.408107084724933, + "learning_rate": 9.319942392623451e-06, + "loss": 0.5151, + "step": 2361 + }, + { + "epoch": 0.19, + "grad_norm": 3.550807236446376, + "learning_rate": 9.319275703103529e-06, + "loss": 0.5964, + "step": 2362 + }, + { + "epoch": 0.19, + "grad_norm": 3.59677790334423, + "learning_rate": 9.318608710821179e-06, + "loss": 1.0998, + "step": 2363 + }, + { + "epoch": 0.19, + "grad_norm": 3.2999653007697263, + "learning_rate": 9.317941415823151e-06, + "loss": 0.8447, + "step": 2364 + }, + { + "epoch": 0.19, + "grad_norm": 1.0328133919182665, + "learning_rate": 9.317273818156223e-06, + "loss": 0.1885, + "step": 2365 + }, + { + "epoch": 0.19, + "grad_norm": 5.439082118673476, + "learning_rate": 9.316605917867189e-06, + "loss": 1.244, + "step": 2366 + }, + { + "epoch": 0.19, + "grad_norm": 4.144453026630218, + "learning_rate": 9.315937715002865e-06, + "loss": 0.8939, + "step": 2367 + }, + { + "epoch": 0.19, + "grad_norm": 3.661134589171982, + "learning_rate": 9.315269209610092e-06, + "loss": 0.727, + "step": 2368 + }, + { + "epoch": 0.19, + "grad_norm": 3.0121522655064643, + "learning_rate": 9.314600401735727e-06, + "loss": 0.6767, + "step": 2369 + }, + { + "epoch": 0.19, + "grad_norm": 3.9143730505910006, + "learning_rate": 9.31393129142665e-06, + "loss": 0.7601, + "step": 2370 + }, + { + "epoch": 0.19, + "grad_norm": 3.5397444675924405, + "learning_rate": 9.313261878729765e-06, + "loss": 0.7476, + "step": 2371 + }, + { + "epoch": 0.19, + "grad_norm": 3.4541352720937573, + "learning_rate": 9.312592163691991e-06, + "loss": 1.0122, + "step": 2372 + }, + { + "epoch": 0.19, + "grad_norm": 5.368894813193451, + "learning_rate": 9.311922146360276e-06, + "loss": 1.2218, + "step": 2373 + }, + { + "epoch": 0.19, + "grad_norm": 4.196780787673087, + "learning_rate": 9.311251826781587e-06, + "loss": 0.7043, + "step": 2374 + }, + { + "epoch": 0.19, + "grad_norm": 3.937401888424667, + "learning_rate": 9.310581205002905e-06, + "loss": 0.7497, + "step": 2375 + }, + { + "epoch": 0.19, + "grad_norm": 3.7329755870495376, + "learning_rate": 9.30991028107124e-06, + "loss": 0.9845, + "step": 2376 + }, + { + "epoch": 0.19, + "grad_norm": 4.1875849682133115, + "learning_rate": 9.309239055033623e-06, + "loss": 0.7722, + "step": 2377 + }, + { + "epoch": 0.19, + "grad_norm": 4.936687169393056, + "learning_rate": 9.3085675269371e-06, + "loss": 1.018, + "step": 2378 + }, + { + "epoch": 0.19, + "grad_norm": 4.192478312596038, + "learning_rate": 9.307895696828746e-06, + "loss": 1.0972, + "step": 2379 + }, + { + "epoch": 0.19, + "grad_norm": 2.0847921517774552, + "learning_rate": 9.307223564755649e-06, + "loss": 0.3726, + "step": 2380 + }, + { + "epoch": 0.19, + "grad_norm": 2.861499554696102, + "learning_rate": 9.306551130764929e-06, + "loss": 0.6663, + "step": 2381 + }, + { + "epoch": 0.19, + "grad_norm": 4.132355188229489, + "learning_rate": 9.305878394903714e-06, + "loss": 0.8424, + "step": 2382 + }, + { + "epoch": 0.19, + "grad_norm": 3.9622667131259264, + "learning_rate": 9.305205357219165e-06, + "loss": 0.88, + "step": 2383 + }, + { + "epoch": 0.19, + "grad_norm": 4.162565828964038, + "learning_rate": 9.304532017758454e-06, + "loss": 0.8993, + "step": 2384 + }, + { + "epoch": 0.19, + "grad_norm": 3.720231045728556, + "learning_rate": 9.303858376568784e-06, + "loss": 0.8852, + "step": 2385 + }, + { + "epoch": 0.2, + "grad_norm": 3.6436473738437174, + "learning_rate": 9.303184433697371e-06, + "loss": 0.7395, + "step": 2386 + }, + { + "epoch": 0.2, + "grad_norm": 3.7860005278747586, + "learning_rate": 9.302510189191458e-06, + "loss": 0.974, + "step": 2387 + }, + { + "epoch": 0.2, + "grad_norm": 5.741299005535924, + "learning_rate": 9.301835643098305e-06, + "loss": 1.4295, + "step": 2388 + }, + { + "epoch": 0.2, + "grad_norm": 4.393585118933961, + "learning_rate": 9.301160795465196e-06, + "loss": 1.1355, + "step": 2389 + }, + { + "epoch": 0.2, + "grad_norm": 3.619503406329702, + "learning_rate": 9.300485646339431e-06, + "loss": 0.9534, + "step": 2390 + }, + { + "epoch": 0.2, + "grad_norm": 4.111439895035622, + "learning_rate": 9.299810195768341e-06, + "loss": 0.8473, + "step": 2391 + }, + { + "epoch": 0.2, + "grad_norm": 2.223696444269811, + "learning_rate": 9.299134443799267e-06, + "loss": 0.4557, + "step": 2392 + }, + { + "epoch": 0.2, + "grad_norm": 5.174862259989276, + "learning_rate": 9.298458390479579e-06, + "loss": 1.2313, + "step": 2393 + }, + { + "epoch": 0.2, + "grad_norm": 4.049120303711942, + "learning_rate": 9.297782035856667e-06, + "loss": 0.9009, + "step": 2394 + }, + { + "epoch": 0.2, + "grad_norm": 4.607973996729967, + "learning_rate": 9.297105379977935e-06, + "loss": 1.311, + "step": 2395 + }, + { + "epoch": 0.2, + "grad_norm": 1.804514523499376, + "learning_rate": 9.296428422890817e-06, + "loss": 0.5371, + "step": 2396 + }, + { + "epoch": 0.2, + "grad_norm": 2.5712502914725803, + "learning_rate": 9.295751164642767e-06, + "loss": 0.3081, + "step": 2397 + }, + { + "epoch": 0.2, + "grad_norm": 4.9385847868834265, + "learning_rate": 9.295073605281255e-06, + "loss": 1.3572, + "step": 2398 + }, + { + "epoch": 0.2, + "grad_norm": 3.953183154224133, + "learning_rate": 9.294395744853775e-06, + "loss": 0.7451, + "step": 2399 + }, + { + "epoch": 0.2, + "grad_norm": 4.006366705979045, + "learning_rate": 9.293717583407843e-06, + "loss": 1.0411, + "step": 2400 + }, + { + "epoch": 0.2, + "grad_norm": 2.9360113632346465, + "learning_rate": 9.293039120990995e-06, + "loss": 0.7905, + "step": 2401 + }, + { + "epoch": 0.2, + "grad_norm": 3.970447326489623, + "learning_rate": 9.292360357650785e-06, + "loss": 0.9431, + "step": 2402 + }, + { + "epoch": 0.2, + "grad_norm": 3.742640339977302, + "learning_rate": 9.291681293434797e-06, + "loss": 0.8987, + "step": 2403 + }, + { + "epoch": 0.2, + "grad_norm": 3.5528572102404032, + "learning_rate": 9.291001928390629e-06, + "loss": 0.8615, + "step": 2404 + }, + { + "epoch": 0.2, + "grad_norm": 5.186330963000171, + "learning_rate": 9.290322262565897e-06, + "loss": 1.3626, + "step": 2405 + }, + { + "epoch": 0.2, + "grad_norm": 2.9948835802395184, + "learning_rate": 9.289642296008248e-06, + "loss": 0.7315, + "step": 2406 + }, + { + "epoch": 0.2, + "grad_norm": 2.8638274177086402, + "learning_rate": 9.288962028765342e-06, + "loss": 0.5873, + "step": 2407 + }, + { + "epoch": 0.2, + "grad_norm": 5.20550751601823, + "learning_rate": 9.288281460884864e-06, + "loss": 1.2367, + "step": 2408 + }, + { + "epoch": 0.2, + "grad_norm": 3.6467454307293434, + "learning_rate": 9.287600592414517e-06, + "loss": 0.6496, + "step": 2409 + }, + { + "epoch": 0.2, + "grad_norm": 1.0927102373684074, + "learning_rate": 9.28691942340203e-06, + "loss": 0.2213, + "step": 2410 + }, + { + "epoch": 0.2, + "grad_norm": 2.9930871543106923, + "learning_rate": 9.286237953895148e-06, + "loss": 0.5662, + "step": 2411 + }, + { + "epoch": 0.2, + "grad_norm": 3.3265903378205772, + "learning_rate": 9.285556183941637e-06, + "loss": 0.9091, + "step": 2412 + }, + { + "epoch": 0.2, + "grad_norm": 2.955673187066376, + "learning_rate": 9.28487411358929e-06, + "loss": 0.6372, + "step": 2413 + }, + { + "epoch": 0.2, + "grad_norm": 4.006607505537565, + "learning_rate": 9.284191742885915e-06, + "loss": 1.1425, + "step": 2414 + }, + { + "epoch": 0.2, + "grad_norm": 5.078956744021499, + "learning_rate": 9.283509071879344e-06, + "loss": 1.2454, + "step": 2415 + }, + { + "epoch": 0.2, + "grad_norm": 2.392870823843581, + "learning_rate": 9.282826100617429e-06, + "loss": 0.5376, + "step": 2416 + }, + { + "epoch": 0.2, + "grad_norm": 4.467695605250697, + "learning_rate": 9.282142829148043e-06, + "loss": 1.0673, + "step": 2417 + }, + { + "epoch": 0.2, + "grad_norm": 1.1094007382106783, + "learning_rate": 9.28145925751908e-06, + "loss": 0.1823, + "step": 2418 + }, + { + "epoch": 0.2, + "grad_norm": 5.416330381903597, + "learning_rate": 9.280775385778458e-06, + "loss": 1.1083, + "step": 2419 + }, + { + "epoch": 0.2, + "grad_norm": 2.8890851270704814, + "learning_rate": 9.280091213974109e-06, + "loss": 0.974, + "step": 2420 + }, + { + "epoch": 0.2, + "grad_norm": 2.781892576166949, + "learning_rate": 9.279406742153996e-06, + "loss": 0.4089, + "step": 2421 + }, + { + "epoch": 0.2, + "grad_norm": 5.066866788110671, + "learning_rate": 9.278721970366092e-06, + "loss": 1.3843, + "step": 2422 + }, + { + "epoch": 0.2, + "grad_norm": 2.9425551802733314, + "learning_rate": 9.278036898658401e-06, + "loss": 0.7035, + "step": 2423 + }, + { + "epoch": 0.2, + "grad_norm": 4.68743343050816, + "learning_rate": 9.277351527078938e-06, + "loss": 1.1653, + "step": 2424 + }, + { + "epoch": 0.2, + "grad_norm": 4.845281486402816, + "learning_rate": 9.276665855675751e-06, + "loss": 1.1039, + "step": 2425 + }, + { + "epoch": 0.2, + "grad_norm": 4.201882004264333, + "learning_rate": 9.275979884496898e-06, + "loss": 1.107, + "step": 2426 + }, + { + "epoch": 0.2, + "grad_norm": 4.02109199001339, + "learning_rate": 9.275293613590465e-06, + "loss": 0.9791, + "step": 2427 + }, + { + "epoch": 0.2, + "grad_norm": 1.9454430982583224, + "learning_rate": 9.274607043004556e-06, + "loss": 0.4404, + "step": 2428 + }, + { + "epoch": 0.2, + "grad_norm": 4.627297027829158, + "learning_rate": 9.273920172787297e-06, + "loss": 0.7501, + "step": 2429 + }, + { + "epoch": 0.2, + "grad_norm": 3.4906400608114176, + "learning_rate": 9.273233002986833e-06, + "loss": 0.929, + "step": 2430 + }, + { + "epoch": 0.2, + "grad_norm": 3.8919170121454916, + "learning_rate": 9.27254553365133e-06, + "loss": 1.2215, + "step": 2431 + }, + { + "epoch": 0.2, + "grad_norm": 4.729171809737526, + "learning_rate": 9.271857764828985e-06, + "loss": 1.419, + "step": 2432 + }, + { + "epoch": 0.2, + "grad_norm": 3.387967287590401, + "learning_rate": 9.271169696567999e-06, + "loss": 0.7166, + "step": 2433 + }, + { + "epoch": 0.2, + "grad_norm": 2.7446962517953417, + "learning_rate": 9.270481328916605e-06, + "loss": 0.509, + "step": 2434 + }, + { + "epoch": 0.2, + "grad_norm": 4.3337688838086725, + "learning_rate": 9.269792661923055e-06, + "loss": 0.8205, + "step": 2435 + }, + { + "epoch": 0.2, + "grad_norm": 5.070855586850423, + "learning_rate": 9.269103695635622e-06, + "loss": 1.2684, + "step": 2436 + }, + { + "epoch": 0.2, + "grad_norm": 5.072157256597414, + "learning_rate": 9.2684144301026e-06, + "loss": 1.0799, + "step": 2437 + }, + { + "epoch": 0.2, + "grad_norm": 3.4439968585400815, + "learning_rate": 9.2677248653723e-06, + "loss": 0.7584, + "step": 2438 + }, + { + "epoch": 0.2, + "grad_norm": 3.0997069644786572, + "learning_rate": 9.267035001493064e-06, + "loss": 0.7358, + "step": 2439 + }, + { + "epoch": 0.2, + "grad_norm": 4.321923940828505, + "learning_rate": 9.266344838513241e-06, + "loss": 0.911, + "step": 2440 + }, + { + "epoch": 0.2, + "grad_norm": 3.797153143640582, + "learning_rate": 9.265654376481214e-06, + "loss": 0.7008, + "step": 2441 + }, + { + "epoch": 0.2, + "grad_norm": 3.137346063274294, + "learning_rate": 9.264963615445378e-06, + "loss": 0.5624, + "step": 2442 + }, + { + "epoch": 0.2, + "grad_norm": 4.329038814070353, + "learning_rate": 9.264272555454156e-06, + "loss": 1.1483, + "step": 2443 + }, + { + "epoch": 0.2, + "grad_norm": 3.846667627346543, + "learning_rate": 9.263581196555984e-06, + "loss": 1.1069, + "step": 2444 + }, + { + "epoch": 0.2, + "grad_norm": 3.0885308129338425, + "learning_rate": 9.262889538799327e-06, + "loss": 0.6528, + "step": 2445 + }, + { + "epoch": 0.2, + "grad_norm": 3.984212107188813, + "learning_rate": 9.262197582232665e-06, + "loss": 0.7519, + "step": 2446 + }, + { + "epoch": 0.2, + "grad_norm": 2.6441126736579625, + "learning_rate": 9.2615053269045e-06, + "loss": 0.5124, + "step": 2447 + }, + { + "epoch": 0.2, + "grad_norm": 2.801090724259183, + "learning_rate": 9.260812772863362e-06, + "loss": 0.5342, + "step": 2448 + }, + { + "epoch": 0.2, + "grad_norm": 5.0335952212132895, + "learning_rate": 9.260119920157786e-06, + "loss": 1.2109, + "step": 2449 + }, + { + "epoch": 0.2, + "grad_norm": 3.3807668935028627, + "learning_rate": 9.259426768836347e-06, + "loss": 0.7047, + "step": 2450 + }, + { + "epoch": 0.2, + "grad_norm": 5.588787739876182, + "learning_rate": 9.258733318947627e-06, + "loss": 1.5709, + "step": 2451 + }, + { + "epoch": 0.2, + "grad_norm": 3.1994010042190997, + "learning_rate": 9.258039570540238e-06, + "loss": 0.8593, + "step": 2452 + }, + { + "epoch": 0.2, + "grad_norm": 3.7504222258312465, + "learning_rate": 9.257345523662804e-06, + "loss": 0.9261, + "step": 2453 + }, + { + "epoch": 0.2, + "grad_norm": 4.411878854734679, + "learning_rate": 9.256651178363978e-06, + "loss": 1.0292, + "step": 2454 + }, + { + "epoch": 0.2, + "grad_norm": 5.609824834430926, + "learning_rate": 9.255956534692428e-06, + "loss": 1.5112, + "step": 2455 + }, + { + "epoch": 0.2, + "grad_norm": 4.657719639064696, + "learning_rate": 9.255261592696849e-06, + "loss": 1.0546, + "step": 2456 + }, + { + "epoch": 0.2, + "grad_norm": 3.9287243102542893, + "learning_rate": 9.254566352425949e-06, + "loss": 1.0827, + "step": 2457 + }, + { + "epoch": 0.2, + "grad_norm": 4.406667014838754, + "learning_rate": 9.253870813928465e-06, + "loss": 1.0112, + "step": 2458 + }, + { + "epoch": 0.2, + "grad_norm": 3.7527747181267754, + "learning_rate": 9.25317497725315e-06, + "loss": 0.9954, + "step": 2459 + }, + { + "epoch": 0.2, + "grad_norm": 1.9851742175340266, + "learning_rate": 9.252478842448778e-06, + "loss": 0.4253, + "step": 2460 + }, + { + "epoch": 0.2, + "grad_norm": 2.928820661023417, + "learning_rate": 9.251782409564146e-06, + "loss": 0.4773, + "step": 2461 + }, + { + "epoch": 0.2, + "grad_norm": 4.542990238697449, + "learning_rate": 9.251085678648072e-06, + "loss": 0.5688, + "step": 2462 + }, + { + "epoch": 0.2, + "grad_norm": 2.9259639052018427, + "learning_rate": 9.250388649749391e-06, + "loss": 0.6107, + "step": 2463 + }, + { + "epoch": 0.2, + "grad_norm": 5.511230033717203, + "learning_rate": 9.249691322916965e-06, + "loss": 1.7268, + "step": 2464 + }, + { + "epoch": 0.2, + "grad_norm": 4.015230401762193, + "learning_rate": 9.248993698199672e-06, + "loss": 0.9483, + "step": 2465 + }, + { + "epoch": 0.2, + "grad_norm": 5.568010511496238, + "learning_rate": 9.248295775646412e-06, + "loss": 0.9769, + "step": 2466 + }, + { + "epoch": 0.2, + "grad_norm": 3.767556720925712, + "learning_rate": 9.247597555306107e-06, + "loss": 0.7468, + "step": 2467 + }, + { + "epoch": 0.2, + "grad_norm": 0.6824470971410319, + "learning_rate": 9.246899037227698e-06, + "loss": 0.1253, + "step": 2468 + }, + { + "epoch": 0.2, + "grad_norm": 2.556352405481264, + "learning_rate": 9.246200221460148e-06, + "loss": 0.4267, + "step": 2469 + }, + { + "epoch": 0.2, + "grad_norm": 3.5598987471209234, + "learning_rate": 9.245501108052447e-06, + "loss": 0.654, + "step": 2470 + }, + { + "epoch": 0.2, + "grad_norm": 2.1694604413817036, + "learning_rate": 9.24480169705359e-06, + "loss": 0.3556, + "step": 2471 + }, + { + "epoch": 0.2, + "grad_norm": 3.614876497302162, + "learning_rate": 9.244101988512608e-06, + "loss": 0.8888, + "step": 2472 + }, + { + "epoch": 0.2, + "grad_norm": 4.11782195067912, + "learning_rate": 9.243401982478548e-06, + "loss": 1.0358, + "step": 2473 + }, + { + "epoch": 0.2, + "grad_norm": 3.0826329130323074, + "learning_rate": 9.242701679000477e-06, + "loss": 0.4509, + "step": 2474 + }, + { + "epoch": 0.2, + "grad_norm": 3.449403101466768, + "learning_rate": 9.242001078127483e-06, + "loss": 0.4125, + "step": 2475 + }, + { + "epoch": 0.2, + "grad_norm": 2.0237449018883034, + "learning_rate": 9.241300179908672e-06, + "loss": 0.4052, + "step": 2476 + }, + { + "epoch": 0.2, + "grad_norm": 5.948681216804102, + "learning_rate": 9.240598984393179e-06, + "loss": 0.3693, + "step": 2477 + }, + { + "epoch": 0.2, + "grad_norm": 3.3261854739380206, + "learning_rate": 9.239897491630152e-06, + "loss": 0.8982, + "step": 2478 + }, + { + "epoch": 0.2, + "grad_norm": 3.033930031952647, + "learning_rate": 9.239195701668762e-06, + "loss": 0.6192, + "step": 2479 + }, + { + "epoch": 0.2, + "grad_norm": 3.718121732200636, + "learning_rate": 9.238493614558203e-06, + "loss": 0.944, + "step": 2480 + }, + { + "epoch": 0.2, + "grad_norm": 3.9801540794617747, + "learning_rate": 9.237791230347688e-06, + "loss": 1.0094, + "step": 2481 + }, + { + "epoch": 0.2, + "grad_norm": 2.3348847550539866, + "learning_rate": 9.237088549086449e-06, + "loss": 0.7, + "step": 2482 + }, + { + "epoch": 0.2, + "grad_norm": 3.9710795346239567, + "learning_rate": 9.236385570823746e-06, + "loss": 0.5839, + "step": 2483 + }, + { + "epoch": 0.2, + "grad_norm": 5.174006026845121, + "learning_rate": 9.235682295608848e-06, + "loss": 1.3029, + "step": 2484 + }, + { + "epoch": 0.2, + "grad_norm": 4.230100928597258, + "learning_rate": 9.234978723491054e-06, + "loss": 1.027, + "step": 2485 + }, + { + "epoch": 0.2, + "grad_norm": 4.648003166348003, + "learning_rate": 9.234274854519685e-06, + "loss": 1.0663, + "step": 2486 + }, + { + "epoch": 0.2, + "grad_norm": 3.8057104068829544, + "learning_rate": 9.233570688744076e-06, + "loss": 0.4592, + "step": 2487 + }, + { + "epoch": 0.2, + "grad_norm": 2.906143424329132, + "learning_rate": 9.232866226213586e-06, + "loss": 0.5144, + "step": 2488 + }, + { + "epoch": 0.2, + "grad_norm": 3.8208585072714936, + "learning_rate": 9.232161466977595e-06, + "loss": 0.5398, + "step": 2489 + }, + { + "epoch": 0.2, + "grad_norm": 2.105264891709723, + "learning_rate": 9.231456411085502e-06, + "loss": 0.3397, + "step": 2490 + }, + { + "epoch": 0.2, + "grad_norm": 3.9134449975356302, + "learning_rate": 9.23075105858673e-06, + "loss": 0.9787, + "step": 2491 + }, + { + "epoch": 0.2, + "grad_norm": 4.549363434492431, + "learning_rate": 9.230045409530724e-06, + "loss": 1.1104, + "step": 2492 + }, + { + "epoch": 0.2, + "grad_norm": 2.741785394799542, + "learning_rate": 9.229339463966942e-06, + "loss": 0.4278, + "step": 2493 + }, + { + "epoch": 0.2, + "grad_norm": 3.3875035043855193, + "learning_rate": 9.228633221944869e-06, + "loss": 0.4966, + "step": 2494 + }, + { + "epoch": 0.2, + "grad_norm": 3.079424147406122, + "learning_rate": 9.227926683514012e-06, + "loss": 0.4968, + "step": 2495 + }, + { + "epoch": 0.2, + "grad_norm": 3.319924544614497, + "learning_rate": 9.227219848723893e-06, + "loss": 0.6642, + "step": 2496 + }, + { + "epoch": 0.2, + "grad_norm": 4.25907882646003, + "learning_rate": 9.226512717624062e-06, + "loss": 0.7904, + "step": 2497 + }, + { + "epoch": 0.2, + "grad_norm": 3.497068125726394, + "learning_rate": 9.22580529026408e-06, + "loss": 0.7181, + "step": 2498 + }, + { + "epoch": 0.2, + "grad_norm": 3.802893634393092, + "learning_rate": 9.225097566693539e-06, + "loss": 0.7675, + "step": 2499 + }, + { + "epoch": 0.2, + "grad_norm": 1.095308909812506, + "learning_rate": 9.224389546962047e-06, + "loss": 0.1666, + "step": 2500 + }, + { + "epoch": 0.2, + "grad_norm": 3.395290691931256, + "learning_rate": 9.223681231119232e-06, + "loss": 0.6342, + "step": 2501 + }, + { + "epoch": 0.2, + "grad_norm": 4.544834977506785, + "learning_rate": 9.222972619214745e-06, + "loss": 1.0556, + "step": 2502 + }, + { + "epoch": 0.2, + "grad_norm": 5.257132580663881, + "learning_rate": 9.222263711298256e-06, + "loss": 1.2329, + "step": 2503 + }, + { + "epoch": 0.2, + "grad_norm": 2.952517170894928, + "learning_rate": 9.221554507419455e-06, + "loss": 0.4574, + "step": 2504 + }, + { + "epoch": 0.2, + "grad_norm": 5.894079523365917, + "learning_rate": 9.220845007628055e-06, + "loss": 0.94, + "step": 2505 + }, + { + "epoch": 0.2, + "grad_norm": 3.166460250726755, + "learning_rate": 9.22013521197379e-06, + "loss": 0.8148, + "step": 2506 + }, + { + "epoch": 0.2, + "grad_norm": 2.795346323399248, + "learning_rate": 9.219425120506414e-06, + "loss": 0.6581, + "step": 2507 + }, + { + "epoch": 0.2, + "grad_norm": 3.856522992182047, + "learning_rate": 9.218714733275698e-06, + "loss": 0.8704, + "step": 2508 + }, + { + "epoch": 0.21, + "grad_norm": 5.514542570030826, + "learning_rate": 9.21800405033144e-06, + "loss": 1.3371, + "step": 2509 + }, + { + "epoch": 0.21, + "grad_norm": 4.5945254947430785, + "learning_rate": 9.217293071723455e-06, + "loss": 1.136, + "step": 2510 + }, + { + "epoch": 0.21, + "grad_norm": 3.7277842663523812, + "learning_rate": 9.216581797501578e-06, + "loss": 0.686, + "step": 2511 + }, + { + "epoch": 0.21, + "grad_norm": 4.369640029754952, + "learning_rate": 9.215870227715669e-06, + "loss": 1.2206, + "step": 2512 + }, + { + "epoch": 0.21, + "grad_norm": 4.127736755897348, + "learning_rate": 9.215158362415604e-06, + "loss": 0.8052, + "step": 2513 + }, + { + "epoch": 0.21, + "grad_norm": 4.604555276458949, + "learning_rate": 9.21444620165128e-06, + "loss": 1.0992, + "step": 2514 + }, + { + "epoch": 0.21, + "grad_norm": 4.653818385651074, + "learning_rate": 9.213733745472623e-06, + "loss": 0.9455, + "step": 2515 + }, + { + "epoch": 0.21, + "grad_norm": 2.9787448891704096, + "learning_rate": 9.213020993929566e-06, + "loss": 0.6354, + "step": 2516 + }, + { + "epoch": 0.21, + "grad_norm": 2.8866749246671595, + "learning_rate": 9.212307947072074e-06, + "loss": 0.541, + "step": 2517 + }, + { + "epoch": 0.21, + "grad_norm": 6.339381202051915, + "learning_rate": 9.211594604950127e-06, + "loss": 1.2898, + "step": 2518 + }, + { + "epoch": 0.21, + "grad_norm": 3.961553529245296, + "learning_rate": 9.210880967613724e-06, + "loss": 1.0144, + "step": 2519 + }, + { + "epoch": 0.21, + "grad_norm": 5.234675316545422, + "learning_rate": 9.210167035112894e-06, + "loss": 1.2231, + "step": 2520 + }, + { + "epoch": 0.21, + "grad_norm": 5.358216145490436, + "learning_rate": 9.209452807497677e-06, + "loss": 1.1559, + "step": 2521 + }, + { + "epoch": 0.21, + "grad_norm": 3.5686683887336494, + "learning_rate": 9.208738284818138e-06, + "loss": 0.7985, + "step": 2522 + }, + { + "epoch": 0.21, + "grad_norm": 1.9181533771460326, + "learning_rate": 9.20802346712436e-06, + "loss": 0.4145, + "step": 2523 + }, + { + "epoch": 0.21, + "grad_norm": 4.231466180884634, + "learning_rate": 9.20730835446645e-06, + "loss": 0.9182, + "step": 2524 + }, + { + "epoch": 0.21, + "grad_norm": 3.7500185297541964, + "learning_rate": 9.206592946894538e-06, + "loss": 0.6986, + "step": 2525 + }, + { + "epoch": 0.21, + "grad_norm": 4.414215285507461, + "learning_rate": 9.205877244458765e-06, + "loss": 0.6744, + "step": 2526 + }, + { + "epoch": 0.21, + "grad_norm": 2.395880282392656, + "learning_rate": 9.205161247209303e-06, + "loss": 0.317, + "step": 2527 + }, + { + "epoch": 0.21, + "grad_norm": 4.427669095406331, + "learning_rate": 9.204444955196337e-06, + "loss": 0.8707, + "step": 2528 + }, + { + "epoch": 0.21, + "grad_norm": 3.7214519772571037, + "learning_rate": 9.203728368470077e-06, + "loss": 0.8583, + "step": 2529 + }, + { + "epoch": 0.21, + "grad_norm": 4.423959878964401, + "learning_rate": 9.203011487080755e-06, + "loss": 0.5747, + "step": 2530 + }, + { + "epoch": 0.21, + "grad_norm": 2.98099678270369, + "learning_rate": 9.202294311078618e-06, + "loss": 0.7567, + "step": 2531 + }, + { + "epoch": 0.21, + "grad_norm": 3.4053424148101112, + "learning_rate": 9.201576840513939e-06, + "loss": 0.737, + "step": 2532 + }, + { + "epoch": 0.21, + "grad_norm": 2.3427163106192985, + "learning_rate": 9.200859075437008e-06, + "loss": 0.6799, + "step": 2533 + }, + { + "epoch": 0.21, + "grad_norm": 4.32925968832063, + "learning_rate": 9.200141015898138e-06, + "loss": 1.209, + "step": 2534 + }, + { + "epoch": 0.21, + "grad_norm": 4.265874001661272, + "learning_rate": 9.199422661947662e-06, + "loss": 0.8002, + "step": 2535 + }, + { + "epoch": 0.21, + "grad_norm": 3.08560714492616, + "learning_rate": 9.198704013635934e-06, + "loss": 0.7652, + "step": 2536 + }, + { + "epoch": 0.21, + "grad_norm": 2.8415414566991486, + "learning_rate": 9.197985071013326e-06, + "loss": 0.7683, + "step": 2537 + }, + { + "epoch": 0.21, + "grad_norm": 3.849674167020621, + "learning_rate": 9.197265834130235e-06, + "loss": 0.8824, + "step": 2538 + }, + { + "epoch": 0.21, + "grad_norm": 3.2292221455199654, + "learning_rate": 9.196546303037077e-06, + "loss": 0.9428, + "step": 2539 + }, + { + "epoch": 0.21, + "grad_norm": 3.3977499377383547, + "learning_rate": 9.195826477784286e-06, + "loss": 0.8224, + "step": 2540 + }, + { + "epoch": 0.21, + "grad_norm": 5.407183252962648, + "learning_rate": 9.19510635842232e-06, + "loss": 1.0697, + "step": 2541 + }, + { + "epoch": 0.21, + "grad_norm": 4.555713809852212, + "learning_rate": 9.194385945001652e-06, + "loss": 1.1947, + "step": 2542 + }, + { + "epoch": 0.21, + "grad_norm": 2.5305475089934677, + "learning_rate": 9.193665237572785e-06, + "loss": 0.6139, + "step": 2543 + }, + { + "epoch": 0.21, + "grad_norm": 2.9543541968597733, + "learning_rate": 9.192944236186237e-06, + "loss": 0.5635, + "step": 2544 + }, + { + "epoch": 0.21, + "grad_norm": 3.804833370830699, + "learning_rate": 9.192222940892543e-06, + "loss": 0.7743, + "step": 2545 + }, + { + "epoch": 0.21, + "grad_norm": 4.051494233217439, + "learning_rate": 9.191501351742269e-06, + "loss": 0.6582, + "step": 2546 + }, + { + "epoch": 0.21, + "grad_norm": 4.79343864325062, + "learning_rate": 9.19077946878599e-06, + "loss": 1.1509, + "step": 2547 + }, + { + "epoch": 0.21, + "grad_norm": 6.22126397598874, + "learning_rate": 9.190057292074308e-06, + "loss": 1.5538, + "step": 2548 + }, + { + "epoch": 0.21, + "grad_norm": 4.317098774130853, + "learning_rate": 9.189334821657846e-06, + "loss": 0.6657, + "step": 2549 + }, + { + "epoch": 0.21, + "grad_norm": 2.7015628351357672, + "learning_rate": 9.188612057587246e-06, + "loss": 0.6748, + "step": 2550 + }, + { + "epoch": 0.21, + "grad_norm": 2.6132826447826805, + "learning_rate": 9.187888999913166e-06, + "loss": 0.7137, + "step": 2551 + }, + { + "epoch": 0.21, + "grad_norm": 3.2849372692578647, + "learning_rate": 9.187165648686296e-06, + "loss": 0.8883, + "step": 2552 + }, + { + "epoch": 0.21, + "grad_norm": 1.7770415993893534, + "learning_rate": 9.186442003957337e-06, + "loss": 0.4326, + "step": 2553 + }, + { + "epoch": 0.21, + "grad_norm": 4.206890972843402, + "learning_rate": 9.185718065777011e-06, + "loss": 1.0782, + "step": 2554 + }, + { + "epoch": 0.21, + "grad_norm": 3.855430118818454, + "learning_rate": 9.184993834196065e-06, + "loss": 0.9108, + "step": 2555 + }, + { + "epoch": 0.21, + "grad_norm": 4.224854598296555, + "learning_rate": 9.184269309265266e-06, + "loss": 0.9526, + "step": 2556 + }, + { + "epoch": 0.21, + "grad_norm": 3.649110311640722, + "learning_rate": 9.183544491035396e-06, + "loss": 0.5603, + "step": 2557 + }, + { + "epoch": 0.21, + "grad_norm": 2.6275348993531513, + "learning_rate": 9.182819379557266e-06, + "loss": 0.634, + "step": 2558 + }, + { + "epoch": 0.21, + "grad_norm": 3.656912091187971, + "learning_rate": 9.1820939748817e-06, + "loss": 0.8573, + "step": 2559 + }, + { + "epoch": 0.21, + "grad_norm": 3.174479933286031, + "learning_rate": 9.181368277059548e-06, + "loss": 0.6829, + "step": 2560 + }, + { + "epoch": 0.21, + "grad_norm": 2.5872657569719784, + "learning_rate": 9.180642286141678e-06, + "loss": 0.3411, + "step": 2561 + }, + { + "epoch": 0.21, + "grad_norm": 3.789949702148944, + "learning_rate": 9.179916002178976e-06, + "loss": 0.6619, + "step": 2562 + }, + { + "epoch": 0.21, + "grad_norm": 4.534928294722872, + "learning_rate": 9.179189425222354e-06, + "loss": 0.8082, + "step": 2563 + }, + { + "epoch": 0.21, + "grad_norm": 4.851233261542717, + "learning_rate": 9.178462555322742e-06, + "loss": 1.2742, + "step": 2564 + }, + { + "epoch": 0.21, + "grad_norm": 3.1864289822875618, + "learning_rate": 9.177735392531088e-06, + "loss": 0.6699, + "step": 2565 + }, + { + "epoch": 0.21, + "grad_norm": 3.5049285283905367, + "learning_rate": 9.177007936898365e-06, + "loss": 0.804, + "step": 2566 + }, + { + "epoch": 0.21, + "grad_norm": 4.372758267623915, + "learning_rate": 9.176280188475565e-06, + "loss": 0.8771, + "step": 2567 + }, + { + "epoch": 0.21, + "grad_norm": 3.0264069966642824, + "learning_rate": 9.175552147313698e-06, + "loss": 0.458, + "step": 2568 + }, + { + "epoch": 0.21, + "grad_norm": 4.921229189159816, + "learning_rate": 9.174823813463799e-06, + "loss": 1.0942, + "step": 2569 + }, + { + "epoch": 0.21, + "grad_norm": 3.393971440135176, + "learning_rate": 9.174095186976916e-06, + "loss": 0.8405, + "step": 2570 + }, + { + "epoch": 0.21, + "grad_norm": 5.680965342339284, + "learning_rate": 9.17336626790413e-06, + "loss": 1.5422, + "step": 2571 + }, + { + "epoch": 0.21, + "grad_norm": 2.681309457351905, + "learning_rate": 9.172637056296529e-06, + "loss": 0.7138, + "step": 2572 + }, + { + "epoch": 0.21, + "grad_norm": 5.449514166310241, + "learning_rate": 9.17190755220523e-06, + "loss": 1.0039, + "step": 2573 + }, + { + "epoch": 0.21, + "grad_norm": 5.251873711827825, + "learning_rate": 9.17117775568137e-06, + "loss": 0.8907, + "step": 2574 + }, + { + "epoch": 0.21, + "grad_norm": 3.1650591046235372, + "learning_rate": 9.170447666776101e-06, + "loss": 0.5766, + "step": 2575 + }, + { + "epoch": 0.21, + "grad_norm": 3.4827679981366515, + "learning_rate": 9.1697172855406e-06, + "loss": 0.5803, + "step": 2576 + }, + { + "epoch": 0.21, + "grad_norm": 4.405827340101964, + "learning_rate": 9.168986612026063e-06, + "loss": 1.0401, + "step": 2577 + }, + { + "epoch": 0.21, + "grad_norm": 4.108557485440849, + "learning_rate": 9.16825564628371e-06, + "loss": 0.9864, + "step": 2578 + }, + { + "epoch": 0.21, + "grad_norm": 4.72691957372029, + "learning_rate": 9.167524388364775e-06, + "loss": 1.2002, + "step": 2579 + }, + { + "epoch": 0.21, + "grad_norm": 3.206603025302393, + "learning_rate": 9.166792838320517e-06, + "loss": 0.6778, + "step": 2580 + }, + { + "epoch": 0.21, + "grad_norm": 2.857778089354351, + "learning_rate": 9.166060996202218e-06, + "loss": 0.5272, + "step": 2581 + }, + { + "epoch": 0.21, + "grad_norm": 3.0259298682451554, + "learning_rate": 9.165328862061172e-06, + "loss": 0.5766, + "step": 2582 + }, + { + "epoch": 0.21, + "grad_norm": 3.414197217782002, + "learning_rate": 9.164596435948699e-06, + "loss": 1.0155, + "step": 2583 + }, + { + "epoch": 0.21, + "grad_norm": 4.908278810961928, + "learning_rate": 9.163863717916142e-06, + "loss": 1.0146, + "step": 2584 + }, + { + "epoch": 0.21, + "grad_norm": 3.712167202066841, + "learning_rate": 9.163130708014858e-06, + "loss": 0.5501, + "step": 2585 + }, + { + "epoch": 0.21, + "grad_norm": 3.0649722491996014, + "learning_rate": 9.16239740629623e-06, + "loss": 0.5382, + "step": 2586 + }, + { + "epoch": 0.21, + "grad_norm": 2.3574318547925315, + "learning_rate": 9.16166381281166e-06, + "loss": 0.2507, + "step": 2587 + }, + { + "epoch": 0.21, + "grad_norm": 4.015733310555574, + "learning_rate": 9.160929927612567e-06, + "loss": 1.1375, + "step": 2588 + }, + { + "epoch": 0.21, + "grad_norm": 2.1669045648508427, + "learning_rate": 9.160195750750396e-06, + "loss": 0.3214, + "step": 2589 + }, + { + "epoch": 0.21, + "grad_norm": 5.127864108182433, + "learning_rate": 9.159461282276605e-06, + "loss": 1.0092, + "step": 2590 + }, + { + "epoch": 0.21, + "grad_norm": 2.848476885058125, + "learning_rate": 9.158726522242684e-06, + "loss": 0.6853, + "step": 2591 + }, + { + "epoch": 0.21, + "grad_norm": 2.3804941378319917, + "learning_rate": 9.15799147070013e-06, + "loss": 0.4897, + "step": 2592 + }, + { + "epoch": 0.21, + "grad_norm": 4.053977480378836, + "learning_rate": 9.157256127700472e-06, + "loss": 0.8833, + "step": 2593 + }, + { + "epoch": 0.21, + "grad_norm": 3.75654008244524, + "learning_rate": 9.156520493295249e-06, + "loss": 0.7025, + "step": 2594 + }, + { + "epoch": 0.21, + "grad_norm": 3.2543139923423734, + "learning_rate": 9.15578456753603e-06, + "loss": 0.8477, + "step": 2595 + }, + { + "epoch": 0.21, + "grad_norm": 2.4459278163166998, + "learning_rate": 9.155048350474398e-06, + "loss": 0.4061, + "step": 2596 + }, + { + "epoch": 0.21, + "grad_norm": 3.4250556753826427, + "learning_rate": 9.15431184216196e-06, + "loss": 0.559, + "step": 2597 + }, + { + "epoch": 0.21, + "grad_norm": 2.7293760100346875, + "learning_rate": 9.153575042650342e-06, + "loss": 0.4459, + "step": 2598 + }, + { + "epoch": 0.21, + "grad_norm": 3.88752240281919, + "learning_rate": 9.15283795199119e-06, + "loss": 0.702, + "step": 2599 + }, + { + "epoch": 0.21, + "grad_norm": 4.850180234109402, + "learning_rate": 9.152100570236172e-06, + "loss": 1.0422, + "step": 2600 + }, + { + "epoch": 0.21, + "grad_norm": 3.1384254357693853, + "learning_rate": 9.15136289743697e-06, + "loss": 1.0971, + "step": 2601 + }, + { + "epoch": 0.21, + "grad_norm": 3.1876046760275427, + "learning_rate": 9.150624933645297e-06, + "loss": 0.9115, + "step": 2602 + }, + { + "epoch": 0.21, + "grad_norm": 3.909756335673938, + "learning_rate": 9.149886678912883e-06, + "loss": 0.7705, + "step": 2603 + }, + { + "epoch": 0.21, + "grad_norm": 4.100639887160335, + "learning_rate": 9.14914813329147e-06, + "loss": 0.8299, + "step": 2604 + }, + { + "epoch": 0.21, + "grad_norm": 3.270330154313572, + "learning_rate": 9.148409296832832e-06, + "loss": 0.9597, + "step": 2605 + }, + { + "epoch": 0.21, + "grad_norm": 4.054076412924293, + "learning_rate": 9.147670169588754e-06, + "loss": 1.1523, + "step": 2606 + }, + { + "epoch": 0.21, + "grad_norm": 3.716632979901013, + "learning_rate": 9.14693075161105e-06, + "loss": 1.0254, + "step": 2607 + }, + { + "epoch": 0.21, + "grad_norm": 3.6393434225559393, + "learning_rate": 9.146191042951546e-06, + "loss": 0.7487, + "step": 2608 + }, + { + "epoch": 0.21, + "grad_norm": 3.467533295509262, + "learning_rate": 9.145451043662095e-06, + "loss": 0.7654, + "step": 2609 + }, + { + "epoch": 0.21, + "grad_norm": 2.3948877468965932, + "learning_rate": 9.144710753794567e-06, + "loss": 0.3971, + "step": 2610 + }, + { + "epoch": 0.21, + "grad_norm": 2.4801815731732444, + "learning_rate": 9.143970173400853e-06, + "loss": 0.3545, + "step": 2611 + }, + { + "epoch": 0.21, + "grad_norm": 4.6644744590683365, + "learning_rate": 9.143229302532866e-06, + "loss": 1.3814, + "step": 2612 + }, + { + "epoch": 0.21, + "grad_norm": 4.417698106094416, + "learning_rate": 9.142488141242534e-06, + "loss": 1.0687, + "step": 2613 + }, + { + "epoch": 0.21, + "grad_norm": 3.8191416836094914, + "learning_rate": 9.141746689581811e-06, + "loss": 0.8562, + "step": 2614 + }, + { + "epoch": 0.21, + "grad_norm": 2.273085690695707, + "learning_rate": 9.141004947602672e-06, + "loss": 0.5464, + "step": 2615 + }, + { + "epoch": 0.21, + "grad_norm": 3.001040918183898, + "learning_rate": 9.140262915357107e-06, + "loss": 0.5869, + "step": 2616 + }, + { + "epoch": 0.21, + "grad_norm": 4.510086130850804, + "learning_rate": 9.139520592897131e-06, + "loss": 1.0375, + "step": 2617 + }, + { + "epoch": 0.21, + "grad_norm": 3.0965008392075584, + "learning_rate": 9.138777980274776e-06, + "loss": 0.564, + "step": 2618 + }, + { + "epoch": 0.21, + "grad_norm": 2.2354925236562706, + "learning_rate": 9.138035077542096e-06, + "loss": 0.5104, + "step": 2619 + }, + { + "epoch": 0.21, + "grad_norm": 5.003837214181366, + "learning_rate": 9.137291884751165e-06, + "loss": 1.6036, + "step": 2620 + }, + { + "epoch": 0.21, + "grad_norm": 4.132618628152957, + "learning_rate": 9.13654840195408e-06, + "loss": 0.9808, + "step": 2621 + }, + { + "epoch": 0.21, + "grad_norm": 4.189334795046342, + "learning_rate": 9.135804629202955e-06, + "loss": 0.7809, + "step": 2622 + }, + { + "epoch": 0.21, + "grad_norm": 5.956989408432872, + "learning_rate": 9.135060566549924e-06, + "loss": 1.608, + "step": 2623 + }, + { + "epoch": 0.21, + "grad_norm": 3.4074585256132326, + "learning_rate": 9.134316214047144e-06, + "loss": 0.3921, + "step": 2624 + }, + { + "epoch": 0.21, + "grad_norm": 4.742860665175202, + "learning_rate": 9.133571571746786e-06, + "loss": 1.0199, + "step": 2625 + }, + { + "epoch": 0.21, + "grad_norm": 4.452758980350587, + "learning_rate": 9.132826639701055e-06, + "loss": 0.8877, + "step": 2626 + }, + { + "epoch": 0.21, + "grad_norm": 3.949992256628996, + "learning_rate": 9.13208141796216e-06, + "loss": 0.7818, + "step": 2627 + }, + { + "epoch": 0.21, + "grad_norm": 2.4071015775853146, + "learning_rate": 9.13133590658234e-06, + "loss": 0.4309, + "step": 2628 + }, + { + "epoch": 0.21, + "grad_norm": 3.843805103301418, + "learning_rate": 9.130590105613854e-06, + "loss": 0.7782, + "step": 2629 + }, + { + "epoch": 0.21, + "grad_norm": 4.859908067407454, + "learning_rate": 9.129844015108978e-06, + "loss": 1.06, + "step": 2630 + }, + { + "epoch": 0.22, + "grad_norm": 5.143641660915089, + "learning_rate": 9.12909763512001e-06, + "loss": 1.1174, + "step": 2631 + }, + { + "epoch": 0.22, + "grad_norm": 4.0003721523478255, + "learning_rate": 9.128350965699267e-06, + "loss": 1.1409, + "step": 2632 + }, + { + "epoch": 0.22, + "grad_norm": 2.3123734082883107, + "learning_rate": 9.127604006899088e-06, + "loss": 0.6201, + "step": 2633 + }, + { + "epoch": 0.22, + "grad_norm": 3.8886274054005017, + "learning_rate": 9.126856758771832e-06, + "loss": 0.9564, + "step": 2634 + }, + { + "epoch": 0.22, + "grad_norm": 3.363089925833701, + "learning_rate": 9.126109221369877e-06, + "loss": 0.8048, + "step": 2635 + }, + { + "epoch": 0.22, + "grad_norm": 2.597459332169226, + "learning_rate": 9.125361394745621e-06, + "loss": 0.5441, + "step": 2636 + }, + { + "epoch": 0.22, + "grad_norm": 3.121212674843806, + "learning_rate": 9.124613278951486e-06, + "loss": 0.6559, + "step": 2637 + }, + { + "epoch": 0.22, + "grad_norm": 4.34419329799521, + "learning_rate": 9.12386487403991e-06, + "loss": 0.8502, + "step": 2638 + }, + { + "epoch": 0.22, + "grad_norm": 2.4451716397751406, + "learning_rate": 9.123116180063356e-06, + "loss": 0.5108, + "step": 2639 + }, + { + "epoch": 0.22, + "grad_norm": 4.473863821303688, + "learning_rate": 9.1223671970743e-06, + "loss": 0.9417, + "step": 2640 + }, + { + "epoch": 0.22, + "grad_norm": 3.6621722801899974, + "learning_rate": 9.121617925125244e-06, + "loss": 0.8106, + "step": 2641 + }, + { + "epoch": 0.22, + "grad_norm": 3.6666378806199424, + "learning_rate": 9.12086836426871e-06, + "loss": 0.932, + "step": 2642 + }, + { + "epoch": 0.22, + "grad_norm": 1.4977537563590089, + "learning_rate": 9.120118514557235e-06, + "loss": 0.2099, + "step": 2643 + }, + { + "epoch": 0.22, + "grad_norm": 4.113942022339024, + "learning_rate": 9.119368376043384e-06, + "loss": 0.774, + "step": 2644 + }, + { + "epoch": 0.22, + "grad_norm": 4.816855066586145, + "learning_rate": 9.118617948779738e-06, + "loss": 1.1958, + "step": 2645 + }, + { + "epoch": 0.22, + "grad_norm": 4.335000089257285, + "learning_rate": 9.117867232818897e-06, + "loss": 1.0147, + "step": 2646 + }, + { + "epoch": 0.22, + "grad_norm": 4.171377852877847, + "learning_rate": 9.117116228213485e-06, + "loss": 0.9544, + "step": 2647 + }, + { + "epoch": 0.22, + "grad_norm": 5.662025483828039, + "learning_rate": 9.116364935016144e-06, + "loss": 1.0558, + "step": 2648 + }, + { + "epoch": 0.22, + "grad_norm": 3.5688211631492845, + "learning_rate": 9.115613353279533e-06, + "loss": 0.6812, + "step": 2649 + }, + { + "epoch": 0.22, + "grad_norm": 4.267943032312493, + "learning_rate": 9.11486148305634e-06, + "loss": 1.248, + "step": 2650 + }, + { + "epoch": 0.22, + "grad_norm": 4.542385422783675, + "learning_rate": 9.114109324399263e-06, + "loss": 1.3974, + "step": 2651 + }, + { + "epoch": 0.22, + "grad_norm": 3.4433326699791102, + "learning_rate": 9.113356877361027e-06, + "loss": 0.4743, + "step": 2652 + }, + { + "epoch": 0.22, + "grad_norm": 3.6224171332006985, + "learning_rate": 9.112604141994376e-06, + "loss": 0.7523, + "step": 2653 + }, + { + "epoch": 0.22, + "grad_norm": 3.3394163890362543, + "learning_rate": 9.111851118352074e-06, + "loss": 0.6131, + "step": 2654 + }, + { + "epoch": 0.22, + "grad_norm": 3.7817423764246874, + "learning_rate": 9.111097806486901e-06, + "loss": 1.0332, + "step": 2655 + }, + { + "epoch": 0.22, + "grad_norm": 2.691615566474534, + "learning_rate": 9.110344206451665e-06, + "loss": 0.697, + "step": 2656 + }, + { + "epoch": 0.22, + "grad_norm": 3.4666547510013435, + "learning_rate": 9.109590318299189e-06, + "loss": 0.9713, + "step": 2657 + }, + { + "epoch": 0.22, + "grad_norm": 4.537850282404029, + "learning_rate": 9.108836142082316e-06, + "loss": 1.2781, + "step": 2658 + }, + { + "epoch": 0.22, + "grad_norm": 4.533021480613041, + "learning_rate": 9.108081677853911e-06, + "loss": 1.2023, + "step": 2659 + }, + { + "epoch": 0.22, + "grad_norm": 0.7972539953327008, + "learning_rate": 9.107326925666857e-06, + "loss": 0.1528, + "step": 2660 + }, + { + "epoch": 0.22, + "grad_norm": 3.7799263906140883, + "learning_rate": 9.106571885574062e-06, + "loss": 0.9498, + "step": 2661 + }, + { + "epoch": 0.22, + "grad_norm": 4.73423685102641, + "learning_rate": 9.105816557628451e-06, + "loss": 0.9331, + "step": 2662 + }, + { + "epoch": 0.22, + "grad_norm": 4.762462442085408, + "learning_rate": 9.105060941882966e-06, + "loss": 1.0003, + "step": 2663 + }, + { + "epoch": 0.22, + "grad_norm": 4.012753402102101, + "learning_rate": 9.104305038390575e-06, + "loss": 0.6783, + "step": 2664 + }, + { + "epoch": 0.22, + "grad_norm": 4.68841305451672, + "learning_rate": 9.103548847204263e-06, + "loss": 1.2537, + "step": 2665 + }, + { + "epoch": 0.22, + "grad_norm": 1.846154686958786, + "learning_rate": 9.102792368377036e-06, + "loss": 0.3033, + "step": 2666 + }, + { + "epoch": 0.22, + "grad_norm": 2.785629135154621, + "learning_rate": 9.102035601961919e-06, + "loss": 0.5163, + "step": 2667 + }, + { + "epoch": 0.22, + "grad_norm": 3.528580565267492, + "learning_rate": 9.101278548011959e-06, + "loss": 0.8711, + "step": 2668 + }, + { + "epoch": 0.22, + "grad_norm": 3.3211863555450694, + "learning_rate": 9.100521206580219e-06, + "loss": 0.6148, + "step": 2669 + }, + { + "epoch": 0.22, + "grad_norm": 5.636657691789844, + "learning_rate": 9.099763577719788e-06, + "loss": 1.4335, + "step": 2670 + }, + { + "epoch": 0.22, + "grad_norm": 5.589107135519192, + "learning_rate": 9.099005661483776e-06, + "loss": 1.2736, + "step": 2671 + }, + { + "epoch": 0.22, + "grad_norm": 4.473990332404505, + "learning_rate": 9.098247457925304e-06, + "loss": 0.9545, + "step": 2672 + }, + { + "epoch": 0.22, + "grad_norm": 3.984610233768719, + "learning_rate": 9.09748896709752e-06, + "loss": 0.5687, + "step": 2673 + }, + { + "epoch": 0.22, + "grad_norm": 6.398528093033693, + "learning_rate": 9.096730189053594e-06, + "loss": 1.6897, + "step": 2674 + }, + { + "epoch": 0.22, + "grad_norm": 3.561569204175784, + "learning_rate": 9.09597112384671e-06, + "loss": 0.7358, + "step": 2675 + }, + { + "epoch": 0.22, + "grad_norm": 6.1036865093347785, + "learning_rate": 9.095211771530074e-06, + "loss": 1.1493, + "step": 2676 + }, + { + "epoch": 0.22, + "grad_norm": 1.3760146537329225, + "learning_rate": 9.094452132156917e-06, + "loss": 0.2076, + "step": 2677 + }, + { + "epoch": 0.22, + "grad_norm": 4.098394907191026, + "learning_rate": 9.093692205780485e-06, + "loss": 0.8469, + "step": 2678 + }, + { + "epoch": 0.22, + "grad_norm": 5.467224924945489, + "learning_rate": 9.092931992454044e-06, + "loss": 1.1177, + "step": 2679 + }, + { + "epoch": 0.22, + "grad_norm": 4.06386919862057, + "learning_rate": 9.092171492230883e-06, + "loss": 0.9505, + "step": 2680 + }, + { + "epoch": 0.22, + "grad_norm": 2.5510675849780027, + "learning_rate": 9.091410705164312e-06, + "loss": 0.4873, + "step": 2681 + }, + { + "epoch": 0.22, + "grad_norm": 5.255174006776468, + "learning_rate": 9.090649631307653e-06, + "loss": 1.1771, + "step": 2682 + }, + { + "epoch": 0.22, + "grad_norm": 4.599542010746827, + "learning_rate": 9.08988827071426e-06, + "loss": 0.8635, + "step": 2683 + }, + { + "epoch": 0.22, + "grad_norm": 5.673085770773519, + "learning_rate": 9.089126623437496e-06, + "loss": 1.5034, + "step": 2684 + }, + { + "epoch": 0.22, + "grad_norm": 4.117986430311266, + "learning_rate": 9.088364689530753e-06, + "loss": 1.0058, + "step": 2685 + }, + { + "epoch": 0.22, + "grad_norm": 2.5266863984789554, + "learning_rate": 9.087602469047438e-06, + "loss": 0.4884, + "step": 2686 + }, + { + "epoch": 0.22, + "grad_norm": 3.577416855872609, + "learning_rate": 9.08683996204098e-06, + "loss": 0.9433, + "step": 2687 + }, + { + "epoch": 0.22, + "grad_norm": 4.629953813099042, + "learning_rate": 9.086077168564825e-06, + "loss": 1.1885, + "step": 2688 + }, + { + "epoch": 0.22, + "grad_norm": 2.596494599376898, + "learning_rate": 9.085314088672443e-06, + "loss": 0.797, + "step": 2689 + }, + { + "epoch": 0.22, + "grad_norm": 5.020907218922748, + "learning_rate": 9.084550722417324e-06, + "loss": 1.3782, + "step": 2690 + }, + { + "epoch": 0.22, + "grad_norm": 4.125555556634683, + "learning_rate": 9.083787069852976e-06, + "loss": 0.8511, + "step": 2691 + }, + { + "epoch": 0.22, + "grad_norm": 3.513382843467977, + "learning_rate": 9.083023131032926e-06, + "loss": 0.6633, + "step": 2692 + }, + { + "epoch": 0.22, + "grad_norm": 2.869475350461378, + "learning_rate": 9.082258906010724e-06, + "loss": 0.7274, + "step": 2693 + }, + { + "epoch": 0.22, + "grad_norm": 5.066640667462265, + "learning_rate": 9.081494394839937e-06, + "loss": 0.8265, + "step": 2694 + }, + { + "epoch": 0.22, + "grad_norm": 2.9031623988993083, + "learning_rate": 9.080729597574159e-06, + "loss": 0.7738, + "step": 2695 + }, + { + "epoch": 0.22, + "grad_norm": 5.210771030915046, + "learning_rate": 9.079964514266993e-06, + "loss": 1.0799, + "step": 2696 + }, + { + "epoch": 0.22, + "grad_norm": 3.746004952544171, + "learning_rate": 9.079199144972072e-06, + "loss": 0.8076, + "step": 2697 + }, + { + "epoch": 0.22, + "grad_norm": 5.211666453404126, + "learning_rate": 9.078433489743044e-06, + "loss": 0.8715, + "step": 2698 + }, + { + "epoch": 0.22, + "grad_norm": 3.725053966461781, + "learning_rate": 9.077667548633576e-06, + "loss": 0.7972, + "step": 2699 + }, + { + "epoch": 0.22, + "grad_norm": 2.9799110669409408, + "learning_rate": 9.07690132169736e-06, + "loss": 0.6411, + "step": 2700 + }, + { + "epoch": 0.22, + "grad_norm": 4.3679873723537055, + "learning_rate": 9.076134808988104e-06, + "loss": 1.417, + "step": 2701 + }, + { + "epoch": 0.22, + "grad_norm": 2.1280222277258822, + "learning_rate": 9.075368010559538e-06, + "loss": 0.2649, + "step": 2702 + }, + { + "epoch": 0.22, + "grad_norm": 3.6689600488903578, + "learning_rate": 9.07460092646541e-06, + "loss": 0.9392, + "step": 2703 + }, + { + "epoch": 0.22, + "grad_norm": 4.193899358666189, + "learning_rate": 9.073833556759489e-06, + "loss": 0.801, + "step": 2704 + }, + { + "epoch": 0.22, + "grad_norm": 5.847355714154215, + "learning_rate": 9.073065901495565e-06, + "loss": 1.1343, + "step": 2705 + }, + { + "epoch": 0.22, + "grad_norm": 3.2529862008053017, + "learning_rate": 9.072297960727449e-06, + "loss": 0.7906, + "step": 2706 + }, + { + "epoch": 0.22, + "grad_norm": 2.6042894685662863, + "learning_rate": 9.071529734508968e-06, + "loss": 0.704, + "step": 2707 + }, + { + "epoch": 0.22, + "grad_norm": 2.9368839985787356, + "learning_rate": 9.070761222893972e-06, + "loss": 0.5706, + "step": 2708 + }, + { + "epoch": 0.22, + "grad_norm": 3.7028095547180655, + "learning_rate": 9.06999242593633e-06, + "loss": 0.8101, + "step": 2709 + }, + { + "epoch": 0.22, + "grad_norm": 3.8895209253771186, + "learning_rate": 9.06922334368993e-06, + "loss": 0.9076, + "step": 2710 + }, + { + "epoch": 0.22, + "grad_norm": 5.513508298678548, + "learning_rate": 9.068453976208685e-06, + "loss": 1.2225, + "step": 2711 + }, + { + "epoch": 0.22, + "grad_norm": 2.7300236102942743, + "learning_rate": 9.067684323546522e-06, + "loss": 0.5809, + "step": 2712 + }, + { + "epoch": 0.22, + "grad_norm": 2.737520002700896, + "learning_rate": 9.066914385757391e-06, + "loss": 0.5947, + "step": 2713 + }, + { + "epoch": 0.22, + "grad_norm": 3.500085553474255, + "learning_rate": 9.066144162895259e-06, + "loss": 0.5553, + "step": 2714 + }, + { + "epoch": 0.22, + "grad_norm": 5.173765965653176, + "learning_rate": 9.065373655014118e-06, + "loss": 0.9426, + "step": 2715 + }, + { + "epoch": 0.22, + "grad_norm": 3.0139284654348866, + "learning_rate": 9.064602862167978e-06, + "loss": 0.9988, + "step": 2716 + }, + { + "epoch": 0.22, + "grad_norm": 1.0631356101532043, + "learning_rate": 9.063831784410864e-06, + "loss": 0.233, + "step": 2717 + }, + { + "epoch": 0.22, + "grad_norm": 3.3993921934621922, + "learning_rate": 9.06306042179683e-06, + "loss": 0.6685, + "step": 2718 + }, + { + "epoch": 0.22, + "grad_norm": 3.032180623334634, + "learning_rate": 9.06228877437994e-06, + "loss": 0.5603, + "step": 2719 + }, + { + "epoch": 0.22, + "grad_norm": 5.139258586810135, + "learning_rate": 9.061516842214289e-06, + "loss": 1.2063, + "step": 2720 + }, + { + "epoch": 0.22, + "grad_norm": 4.178060190654128, + "learning_rate": 9.060744625353981e-06, + "loss": 1.0683, + "step": 2721 + }, + { + "epoch": 0.22, + "grad_norm": 5.040238592557097, + "learning_rate": 9.059972123853147e-06, + "loss": 1.5402, + "step": 2722 + }, + { + "epoch": 0.22, + "grad_norm": 4.555367406515826, + "learning_rate": 9.059199337765938e-06, + "loss": 1.0049, + "step": 2723 + }, + { + "epoch": 0.22, + "grad_norm": 3.609256108481837, + "learning_rate": 9.05842626714652e-06, + "loss": 1.0136, + "step": 2724 + }, + { + "epoch": 0.22, + "grad_norm": 2.6825562606320705, + "learning_rate": 9.057652912049084e-06, + "loss": 0.443, + "step": 2725 + }, + { + "epoch": 0.22, + "grad_norm": 6.359237350151824, + "learning_rate": 9.056879272527837e-06, + "loss": 1.1107, + "step": 2726 + }, + { + "epoch": 0.22, + "grad_norm": 3.4778726856530744, + "learning_rate": 9.05610534863701e-06, + "loss": 0.9013, + "step": 2727 + }, + { + "epoch": 0.22, + "grad_norm": 4.147177221249099, + "learning_rate": 9.05533114043085e-06, + "loss": 1.0685, + "step": 2728 + }, + { + "epoch": 0.22, + "grad_norm": 3.9314829273281657, + "learning_rate": 9.054556647963624e-06, + "loss": 1.0849, + "step": 2729 + }, + { + "epoch": 0.22, + "grad_norm": 3.4745032042371378, + "learning_rate": 9.053781871289624e-06, + "loss": 0.7362, + "step": 2730 + }, + { + "epoch": 0.22, + "grad_norm": 5.777530136657484, + "learning_rate": 9.053006810463156e-06, + "loss": 1.4634, + "step": 2731 + }, + { + "epoch": 0.22, + "grad_norm": 4.414922717015416, + "learning_rate": 9.05223146553855e-06, + "loss": 1.0374, + "step": 2732 + }, + { + "epoch": 0.22, + "grad_norm": 4.492283894983976, + "learning_rate": 9.051455836570154e-06, + "loss": 1.0562, + "step": 2733 + }, + { + "epoch": 0.22, + "grad_norm": 1.8660740691181457, + "learning_rate": 9.050679923612334e-06, + "loss": 0.3383, + "step": 2734 + }, + { + "epoch": 0.22, + "grad_norm": 4.992589365919628, + "learning_rate": 9.049903726719482e-06, + "loss": 1.4697, + "step": 2735 + }, + { + "epoch": 0.22, + "grad_norm": 3.2639832225852814, + "learning_rate": 9.049127245946003e-06, + "loss": 0.5715, + "step": 2736 + }, + { + "epoch": 0.22, + "grad_norm": 4.503803510304516, + "learning_rate": 9.048350481346326e-06, + "loss": 0.793, + "step": 2737 + }, + { + "epoch": 0.22, + "grad_norm": 3.2856682173269554, + "learning_rate": 9.047573432974898e-06, + "loss": 0.9469, + "step": 2738 + }, + { + "epoch": 0.22, + "grad_norm": 2.985965444532201, + "learning_rate": 9.04679610088619e-06, + "loss": 0.3888, + "step": 2739 + }, + { + "epoch": 0.22, + "grad_norm": 4.433476556382269, + "learning_rate": 9.046018485134684e-06, + "loss": 1.0396, + "step": 2740 + }, + { + "epoch": 0.22, + "grad_norm": 2.1370696305822245, + "learning_rate": 9.045240585774893e-06, + "loss": 0.5518, + "step": 2741 + }, + { + "epoch": 0.22, + "grad_norm": 4.337055499474134, + "learning_rate": 9.04446240286134e-06, + "loss": 0.7675, + "step": 2742 + }, + { + "epoch": 0.22, + "grad_norm": 2.000323836971652, + "learning_rate": 9.043683936448576e-06, + "loss": 0.4838, + "step": 2743 + }, + { + "epoch": 0.22, + "grad_norm": 5.88456891213328, + "learning_rate": 9.042905186591165e-06, + "loss": 1.1856, + "step": 2744 + }, + { + "epoch": 0.22, + "grad_norm": 3.4318484148804034, + "learning_rate": 9.042126153343696e-06, + "loss": 0.6447, + "step": 2745 + }, + { + "epoch": 0.22, + "grad_norm": 3.4363596168029864, + "learning_rate": 9.041346836760774e-06, + "loss": 0.6539, + "step": 2746 + }, + { + "epoch": 0.22, + "grad_norm": 2.8489378324494146, + "learning_rate": 9.040567236897027e-06, + "loss": 0.6987, + "step": 2747 + }, + { + "epoch": 0.22, + "grad_norm": 3.333163031387533, + "learning_rate": 9.039787353807101e-06, + "loss": 0.8948, + "step": 2748 + }, + { + "epoch": 0.22, + "grad_norm": 4.691612764942628, + "learning_rate": 9.039007187545663e-06, + "loss": 1.2254, + "step": 2749 + }, + { + "epoch": 0.22, + "grad_norm": 4.395036206555651, + "learning_rate": 9.0382267381674e-06, + "loss": 1.1411, + "step": 2750 + }, + { + "epoch": 0.22, + "grad_norm": 4.264749249109578, + "learning_rate": 9.037446005727015e-06, + "loss": 0.8549, + "step": 2751 + }, + { + "epoch": 0.22, + "grad_norm": 2.3635051911212206, + "learning_rate": 9.036664990279238e-06, + "loss": 0.6115, + "step": 2752 + }, + { + "epoch": 0.23, + "grad_norm": 2.539616537868484, + "learning_rate": 9.035883691878811e-06, + "loss": 0.627, + "step": 2753 + }, + { + "epoch": 0.23, + "grad_norm": 3.6299450090948415, + "learning_rate": 9.035102110580503e-06, + "loss": 0.5744, + "step": 2754 + }, + { + "epoch": 0.23, + "grad_norm": 2.660952668616124, + "learning_rate": 9.034320246439099e-06, + "loss": 0.5867, + "step": 2755 + }, + { + "epoch": 0.23, + "grad_norm": 3.7375965940589637, + "learning_rate": 9.0335380995094e-06, + "loss": 0.7727, + "step": 2756 + }, + { + "epoch": 0.23, + "grad_norm": 5.536191405126242, + "learning_rate": 9.032755669846236e-06, + "loss": 1.3356, + "step": 2757 + }, + { + "epoch": 0.23, + "grad_norm": 5.17031370829871, + "learning_rate": 9.03197295750445e-06, + "loss": 1.2032, + "step": 2758 + }, + { + "epoch": 0.23, + "grad_norm": 3.026485263691201, + "learning_rate": 9.031189962538906e-06, + "loss": 0.837, + "step": 2759 + }, + { + "epoch": 0.23, + "grad_norm": 4.423354422955292, + "learning_rate": 9.030406685004491e-06, + "loss": 0.9258, + "step": 2760 + }, + { + "epoch": 0.23, + "grad_norm": 3.055087678512803, + "learning_rate": 9.029623124956107e-06, + "loss": 0.6416, + "step": 2761 + }, + { + "epoch": 0.23, + "grad_norm": 4.190891435470464, + "learning_rate": 9.028839282448678e-06, + "loss": 0.5773, + "step": 2762 + }, + { + "epoch": 0.23, + "grad_norm": 2.819205835772208, + "learning_rate": 9.02805515753715e-06, + "loss": 0.6063, + "step": 2763 + }, + { + "epoch": 0.23, + "grad_norm": 4.024035874945257, + "learning_rate": 9.027270750276486e-06, + "loss": 0.5309, + "step": 2764 + }, + { + "epoch": 0.23, + "grad_norm": 2.9569835244011022, + "learning_rate": 9.026486060721668e-06, + "loss": 0.521, + "step": 2765 + }, + { + "epoch": 0.23, + "grad_norm": 5.359550607675502, + "learning_rate": 9.0257010889277e-06, + "loss": 1.5101, + "step": 2766 + }, + { + "epoch": 0.23, + "grad_norm": 3.8025381643603797, + "learning_rate": 9.024915834949607e-06, + "loss": 1.0569, + "step": 2767 + }, + { + "epoch": 0.23, + "grad_norm": 4.5427223532062015, + "learning_rate": 9.02413029884243e-06, + "loss": 1.044, + "step": 2768 + }, + { + "epoch": 0.23, + "grad_norm": 3.606930909658853, + "learning_rate": 9.023344480661234e-06, + "loss": 0.8584, + "step": 2769 + }, + { + "epoch": 0.23, + "grad_norm": 5.4269758171149345, + "learning_rate": 9.022558380461097e-06, + "loss": 1.2617, + "step": 2770 + }, + { + "epoch": 0.23, + "grad_norm": 3.947679373528407, + "learning_rate": 9.021771998297124e-06, + "loss": 0.7401, + "step": 2771 + }, + { + "epoch": 0.23, + "grad_norm": 3.6036071767627855, + "learning_rate": 9.020985334224437e-06, + "loss": 0.987, + "step": 2772 + }, + { + "epoch": 0.23, + "grad_norm": 4.958509943303449, + "learning_rate": 9.020198388298179e-06, + "loss": 1.4065, + "step": 2773 + }, + { + "epoch": 0.23, + "grad_norm": 3.673660488009326, + "learning_rate": 9.019411160573508e-06, + "loss": 0.7963, + "step": 2774 + }, + { + "epoch": 0.23, + "grad_norm": 3.650622270596607, + "learning_rate": 9.018623651105607e-06, + "loss": 0.5745, + "step": 2775 + }, + { + "epoch": 0.23, + "grad_norm": 4.216886938088694, + "learning_rate": 9.017835859949677e-06, + "loss": 0.8929, + "step": 2776 + }, + { + "epoch": 0.23, + "grad_norm": 1.8007191532303202, + "learning_rate": 9.01704778716094e-06, + "loss": 0.3384, + "step": 2777 + }, + { + "epoch": 0.23, + "grad_norm": 3.657644897341523, + "learning_rate": 9.016259432794637e-06, + "loss": 0.811, + "step": 2778 + }, + { + "epoch": 0.23, + "grad_norm": 3.512963732012924, + "learning_rate": 9.015470796906024e-06, + "loss": 1.1201, + "step": 2779 + }, + { + "epoch": 0.23, + "grad_norm": 5.010832411307142, + "learning_rate": 9.014681879550385e-06, + "loss": 0.7525, + "step": 2780 + }, + { + "epoch": 0.23, + "grad_norm": 5.486459322409384, + "learning_rate": 9.013892680783016e-06, + "loss": 1.0253, + "step": 2781 + }, + { + "epoch": 0.23, + "grad_norm": 3.2106168850862566, + "learning_rate": 9.01310320065924e-06, + "loss": 0.8443, + "step": 2782 + }, + { + "epoch": 0.23, + "grad_norm": 2.4301667912614193, + "learning_rate": 9.012313439234395e-06, + "loss": 0.637, + "step": 2783 + }, + { + "epoch": 0.23, + "grad_norm": 4.212204675584937, + "learning_rate": 9.011523396563839e-06, + "loss": 0.8935, + "step": 2784 + }, + { + "epoch": 0.23, + "grad_norm": 5.044338879716831, + "learning_rate": 9.010733072702952e-06, + "loss": 1.2117, + "step": 2785 + }, + { + "epoch": 0.23, + "grad_norm": 2.859085867712923, + "learning_rate": 9.009942467707131e-06, + "loss": 0.5685, + "step": 2786 + }, + { + "epoch": 0.23, + "grad_norm": 3.7258987707406455, + "learning_rate": 9.009151581631795e-06, + "loss": 0.8138, + "step": 2787 + }, + { + "epoch": 0.23, + "grad_norm": 3.3186899853149128, + "learning_rate": 9.00836041453238e-06, + "loss": 0.5666, + "step": 2788 + }, + { + "epoch": 0.23, + "grad_norm": 2.6209517043383737, + "learning_rate": 9.007568966464345e-06, + "loss": 0.6105, + "step": 2789 + }, + { + "epoch": 0.23, + "grad_norm": 3.3931208874085, + "learning_rate": 9.006777237483165e-06, + "loss": 0.4942, + "step": 2790 + }, + { + "epoch": 0.23, + "grad_norm": 4.544807705087439, + "learning_rate": 9.00598522764434e-06, + "loss": 0.7926, + "step": 2791 + }, + { + "epoch": 0.23, + "grad_norm": 4.005000496520486, + "learning_rate": 9.005192937003384e-06, + "loss": 0.7464, + "step": 2792 + }, + { + "epoch": 0.23, + "grad_norm": 4.872935081002712, + "learning_rate": 9.004400365615834e-06, + "loss": 1.3192, + "step": 2793 + }, + { + "epoch": 0.23, + "grad_norm": 3.7811303859401684, + "learning_rate": 9.003607513537245e-06, + "loss": 0.7219, + "step": 2794 + }, + { + "epoch": 0.23, + "grad_norm": 4.354983640897277, + "learning_rate": 9.002814380823192e-06, + "loss": 0.757, + "step": 2795 + }, + { + "epoch": 0.23, + "grad_norm": 4.254025619919019, + "learning_rate": 9.002020967529272e-06, + "loss": 0.7183, + "step": 2796 + }, + { + "epoch": 0.23, + "grad_norm": 4.057649153469898, + "learning_rate": 9.0012272737111e-06, + "loss": 1.0508, + "step": 2797 + }, + { + "epoch": 0.23, + "grad_norm": 3.561540618546092, + "learning_rate": 9.000433299424308e-06, + "loss": 0.5899, + "step": 2798 + }, + { + "epoch": 0.23, + "grad_norm": 3.940995656792812, + "learning_rate": 8.999639044724555e-06, + "loss": 0.9293, + "step": 2799 + }, + { + "epoch": 0.23, + "grad_norm": 2.6829296806974505, + "learning_rate": 8.998844509667508e-06, + "loss": 0.6437, + "step": 2800 + }, + { + "epoch": 0.23, + "grad_norm": 4.433040218782608, + "learning_rate": 8.998049694308866e-06, + "loss": 0.6815, + "step": 2801 + }, + { + "epoch": 0.23, + "grad_norm": 3.4827796949727086, + "learning_rate": 8.99725459870434e-06, + "loss": 0.8859, + "step": 2802 + }, + { + "epoch": 0.23, + "grad_norm": 4.9475617906007505, + "learning_rate": 8.996459222909662e-06, + "loss": 1.2561, + "step": 2803 + }, + { + "epoch": 0.23, + "grad_norm": 2.340861492907865, + "learning_rate": 8.995663566980583e-06, + "loss": 0.4739, + "step": 2804 + }, + { + "epoch": 0.23, + "grad_norm": 4.983847979765691, + "learning_rate": 8.994867630972878e-06, + "loss": 0.9867, + "step": 2805 + }, + { + "epoch": 0.23, + "grad_norm": 3.4039614230101574, + "learning_rate": 8.99407141494234e-06, + "loss": 0.7996, + "step": 2806 + }, + { + "epoch": 0.23, + "grad_norm": 5.665702582646373, + "learning_rate": 8.993274918944777e-06, + "loss": 1.0746, + "step": 2807 + }, + { + "epoch": 0.23, + "grad_norm": 1.010574700798193, + "learning_rate": 8.992478143036022e-06, + "loss": 0.1999, + "step": 2808 + }, + { + "epoch": 0.23, + "grad_norm": 4.811328637907539, + "learning_rate": 8.991681087271922e-06, + "loss": 0.9865, + "step": 2809 + }, + { + "epoch": 0.23, + "grad_norm": 3.8568309546794937, + "learning_rate": 8.990883751708353e-06, + "loss": 0.812, + "step": 2810 + }, + { + "epoch": 0.23, + "grad_norm": 4.1161826007469795, + "learning_rate": 8.990086136401199e-06, + "loss": 0.6223, + "step": 2811 + }, + { + "epoch": 0.23, + "grad_norm": 4.456885999014682, + "learning_rate": 8.989288241406371e-06, + "loss": 0.8013, + "step": 2812 + }, + { + "epoch": 0.23, + "grad_norm": 2.374121412700305, + "learning_rate": 8.9884900667798e-06, + "loss": 0.4702, + "step": 2813 + }, + { + "epoch": 0.23, + "grad_norm": 3.5488691936882972, + "learning_rate": 8.987691612577433e-06, + "loss": 0.9326, + "step": 2814 + }, + { + "epoch": 0.23, + "grad_norm": 4.828785058948604, + "learning_rate": 8.986892878855238e-06, + "loss": 1.2953, + "step": 2815 + }, + { + "epoch": 0.23, + "grad_norm": 5.465452757305021, + "learning_rate": 8.986093865669205e-06, + "loss": 0.8868, + "step": 2816 + }, + { + "epoch": 0.23, + "grad_norm": 4.891282308557172, + "learning_rate": 8.985294573075338e-06, + "loss": 1.0112, + "step": 2817 + }, + { + "epoch": 0.23, + "grad_norm": 3.890800750650286, + "learning_rate": 8.984495001129667e-06, + "loss": 0.9078, + "step": 2818 + }, + { + "epoch": 0.23, + "grad_norm": 6.05677774070146, + "learning_rate": 8.983695149888234e-06, + "loss": 1.4547, + "step": 2819 + }, + { + "epoch": 0.23, + "grad_norm": 5.250995322790718, + "learning_rate": 8.982895019407112e-06, + "loss": 1.038, + "step": 2820 + }, + { + "epoch": 0.23, + "grad_norm": 3.0724656980668823, + "learning_rate": 8.98209460974238e-06, + "loss": 0.7388, + "step": 2821 + }, + { + "epoch": 0.23, + "grad_norm": 3.340758261958266, + "learning_rate": 8.981293920950147e-06, + "loss": 1.0972, + "step": 2822 + }, + { + "epoch": 0.23, + "grad_norm": 2.462736754135804, + "learning_rate": 8.980492953086535e-06, + "loss": 0.5185, + "step": 2823 + }, + { + "epoch": 0.23, + "grad_norm": 3.2096517402684297, + "learning_rate": 8.979691706207692e-06, + "loss": 0.6595, + "step": 2824 + }, + { + "epoch": 0.23, + "grad_norm": 4.346508184568805, + "learning_rate": 8.97889018036978e-06, + "loss": 0.9841, + "step": 2825 + }, + { + "epoch": 0.23, + "grad_norm": 3.848452650091313, + "learning_rate": 8.978088375628983e-06, + "loss": 0.7972, + "step": 2826 + }, + { + "epoch": 0.23, + "grad_norm": 4.526629528918161, + "learning_rate": 8.977286292041503e-06, + "loss": 0.9327, + "step": 2827 + }, + { + "epoch": 0.23, + "grad_norm": 2.9604609578722085, + "learning_rate": 8.976483929663562e-06, + "loss": 0.5696, + "step": 2828 + }, + { + "epoch": 0.23, + "grad_norm": 4.259698795083026, + "learning_rate": 8.975681288551405e-06, + "loss": 0.8955, + "step": 2829 + }, + { + "epoch": 0.23, + "grad_norm": 1.713873183280796, + "learning_rate": 8.97487836876129e-06, + "loss": 0.3443, + "step": 2830 + }, + { + "epoch": 0.23, + "grad_norm": 2.466880037688497, + "learning_rate": 8.974075170349502e-06, + "loss": 0.6895, + "step": 2831 + }, + { + "epoch": 0.23, + "grad_norm": 2.5967277234419393, + "learning_rate": 8.973271693372338e-06, + "loss": 0.411, + "step": 2832 + }, + { + "epoch": 0.23, + "grad_norm": 4.029576229429126, + "learning_rate": 8.972467937886122e-06, + "loss": 0.733, + "step": 2833 + }, + { + "epoch": 0.23, + "grad_norm": 1.9498422274837677, + "learning_rate": 8.971663903947191e-06, + "loss": 0.3991, + "step": 2834 + }, + { + "epoch": 0.23, + "grad_norm": 2.3772068633274634, + "learning_rate": 8.970859591611904e-06, + "loss": 0.5426, + "step": 2835 + }, + { + "epoch": 0.23, + "grad_norm": 3.5590228128457717, + "learning_rate": 8.970055000936643e-06, + "loss": 0.9203, + "step": 2836 + }, + { + "epoch": 0.23, + "grad_norm": 3.6475066147276056, + "learning_rate": 8.969250131977803e-06, + "loss": 0.9788, + "step": 2837 + }, + { + "epoch": 0.23, + "grad_norm": 4.114364170867909, + "learning_rate": 8.968444984791801e-06, + "loss": 0.7102, + "step": 2838 + }, + { + "epoch": 0.23, + "grad_norm": 3.201567434821648, + "learning_rate": 8.967639559435079e-06, + "loss": 0.7126, + "step": 2839 + }, + { + "epoch": 0.23, + "grad_norm": 3.8625761671129175, + "learning_rate": 8.96683385596409e-06, + "loss": 1.3315, + "step": 2840 + }, + { + "epoch": 0.23, + "grad_norm": 5.57336965684408, + "learning_rate": 8.966027874435313e-06, + "loss": 1.3707, + "step": 2841 + }, + { + "epoch": 0.23, + "grad_norm": 2.4775308239244853, + "learning_rate": 8.965221614905241e-06, + "loss": 0.6265, + "step": 2842 + }, + { + "epoch": 0.23, + "grad_norm": 3.850705463406801, + "learning_rate": 8.964415077430391e-06, + "loss": 0.6261, + "step": 2843 + }, + { + "epoch": 0.23, + "grad_norm": 3.021140145526666, + "learning_rate": 8.963608262067296e-06, + "loss": 0.4525, + "step": 2844 + }, + { + "epoch": 0.23, + "grad_norm": 4.147160395871126, + "learning_rate": 8.962801168872513e-06, + "loss": 0.9202, + "step": 2845 + }, + { + "epoch": 0.23, + "grad_norm": 1.6818190008786265, + "learning_rate": 8.961993797902613e-06, + "loss": 0.2499, + "step": 2846 + }, + { + "epoch": 0.23, + "grad_norm": 3.281047026662951, + "learning_rate": 8.961186149214191e-06, + "loss": 0.8977, + "step": 2847 + }, + { + "epoch": 0.23, + "grad_norm": 3.101860188112433, + "learning_rate": 8.96037822286386e-06, + "loss": 0.6484, + "step": 2848 + }, + { + "epoch": 0.23, + "grad_norm": 2.623832576872076, + "learning_rate": 8.959570018908248e-06, + "loss": 0.6869, + "step": 2849 + }, + { + "epoch": 0.23, + "grad_norm": 4.0541378484556665, + "learning_rate": 8.958761537404012e-06, + "loss": 0.7, + "step": 2850 + }, + { + "epoch": 0.23, + "grad_norm": 2.779380648621428, + "learning_rate": 8.957952778407822e-06, + "loss": 0.6621, + "step": 2851 + }, + { + "epoch": 0.23, + "grad_norm": 4.634388934331967, + "learning_rate": 8.957143741976366e-06, + "loss": 1.0359, + "step": 2852 + }, + { + "epoch": 0.23, + "grad_norm": 3.745980430150055, + "learning_rate": 8.956334428166355e-06, + "loss": 1.0663, + "step": 2853 + }, + { + "epoch": 0.23, + "grad_norm": 1.0527854010053523, + "learning_rate": 8.95552483703452e-06, + "loss": 0.1956, + "step": 2854 + }, + { + "epoch": 0.23, + "grad_norm": 3.2624856816156536, + "learning_rate": 8.954714968637606e-06, + "loss": 0.8258, + "step": 2855 + }, + { + "epoch": 0.23, + "grad_norm": 3.8966623314578803, + "learning_rate": 8.953904823032384e-06, + "loss": 0.9205, + "step": 2856 + }, + { + "epoch": 0.23, + "grad_norm": 4.640423997502576, + "learning_rate": 8.953094400275644e-06, + "loss": 1.499, + "step": 2857 + }, + { + "epoch": 0.23, + "grad_norm": 3.986840018375436, + "learning_rate": 8.952283700424188e-06, + "loss": 1.0383, + "step": 2858 + }, + { + "epoch": 0.23, + "grad_norm": 4.606151699801707, + "learning_rate": 8.951472723534846e-06, + "loss": 1.1475, + "step": 2859 + }, + { + "epoch": 0.23, + "grad_norm": 3.788270601765476, + "learning_rate": 8.950661469664462e-06, + "loss": 1.0722, + "step": 2860 + }, + { + "epoch": 0.23, + "grad_norm": 4.836919142856838, + "learning_rate": 8.949849938869904e-06, + "loss": 0.9889, + "step": 2861 + }, + { + "epoch": 0.23, + "grad_norm": 4.563519527237157, + "learning_rate": 8.949038131208054e-06, + "loss": 0.8877, + "step": 2862 + }, + { + "epoch": 0.23, + "grad_norm": 4.721883252612204, + "learning_rate": 8.948226046735817e-06, + "loss": 0.5332, + "step": 2863 + }, + { + "epoch": 0.23, + "grad_norm": 3.448798099577989, + "learning_rate": 8.947413685510118e-06, + "loss": 1.032, + "step": 2864 + }, + { + "epoch": 0.23, + "grad_norm": 3.160199095042135, + "learning_rate": 8.946601047587898e-06, + "loss": 0.5859, + "step": 2865 + }, + { + "epoch": 0.23, + "grad_norm": 5.415316077485991, + "learning_rate": 8.94578813302612e-06, + "loss": 1.6669, + "step": 2866 + }, + { + "epoch": 0.23, + "grad_norm": 4.28215722241667, + "learning_rate": 8.944974941881766e-06, + "loss": 0.883, + "step": 2867 + }, + { + "epoch": 0.23, + "grad_norm": 4.2851793502192725, + "learning_rate": 8.94416147421184e-06, + "loss": 0.9386, + "step": 2868 + }, + { + "epoch": 0.23, + "grad_norm": 1.7798034305693526, + "learning_rate": 8.943347730073355e-06, + "loss": 0.3629, + "step": 2869 + }, + { + "epoch": 0.23, + "grad_norm": 4.029526509580939, + "learning_rate": 8.942533709523358e-06, + "loss": 1.2032, + "step": 2870 + }, + { + "epoch": 0.23, + "grad_norm": 1.715867193032858, + "learning_rate": 8.941719412618905e-06, + "loss": 0.2987, + "step": 2871 + }, + { + "epoch": 0.23, + "grad_norm": 4.280487822475332, + "learning_rate": 8.940904839417078e-06, + "loss": 0.7824, + "step": 2872 + }, + { + "epoch": 0.23, + "grad_norm": 2.987478844312189, + "learning_rate": 8.94008998997497e-06, + "loss": 0.7334, + "step": 2873 + }, + { + "epoch": 0.23, + "grad_norm": 3.791659519165454, + "learning_rate": 8.939274864349702e-06, + "loss": 0.9929, + "step": 2874 + }, + { + "epoch": 0.23, + "grad_norm": 7.081963403891954, + "learning_rate": 8.93845946259841e-06, + "loss": 1.318, + "step": 2875 + }, + { + "epoch": 0.24, + "grad_norm": 3.7244126800865307, + "learning_rate": 8.93764378477825e-06, + "loss": 0.9154, + "step": 2876 + }, + { + "epoch": 0.24, + "grad_norm": 3.4485265914698013, + "learning_rate": 8.936827830946397e-06, + "loss": 0.7187, + "step": 2877 + }, + { + "epoch": 0.24, + "grad_norm": 3.302352810307197, + "learning_rate": 8.936011601160046e-06, + "loss": 0.7165, + "step": 2878 + }, + { + "epoch": 0.24, + "grad_norm": 4.303497388254325, + "learning_rate": 8.935195095476412e-06, + "loss": 0.9351, + "step": 2879 + }, + { + "epoch": 0.24, + "grad_norm": 3.9582810507637536, + "learning_rate": 8.934378313952727e-06, + "loss": 0.8128, + "step": 2880 + }, + { + "epoch": 0.24, + "grad_norm": 2.63455467974814, + "learning_rate": 8.933561256646247e-06, + "loss": 0.599, + "step": 2881 + }, + { + "epoch": 0.24, + "grad_norm": 3.817216225830816, + "learning_rate": 8.932743923614237e-06, + "loss": 0.8345, + "step": 2882 + }, + { + "epoch": 0.24, + "grad_norm": 2.2788123295098903, + "learning_rate": 8.931926314913998e-06, + "loss": 0.495, + "step": 2883 + }, + { + "epoch": 0.24, + "grad_norm": 1.9451263961915763, + "learning_rate": 8.931108430602834e-06, + "loss": 0.3557, + "step": 2884 + }, + { + "epoch": 0.24, + "grad_norm": 3.674895514486894, + "learning_rate": 8.930290270738079e-06, + "loss": 0.8228, + "step": 2885 + }, + { + "epoch": 0.24, + "grad_norm": 2.7542288766120713, + "learning_rate": 8.929471835377078e-06, + "loss": 0.4755, + "step": 2886 + }, + { + "epoch": 0.24, + "grad_norm": 6.057165314865615, + "learning_rate": 8.928653124577204e-06, + "loss": 1.569, + "step": 2887 + }, + { + "epoch": 0.24, + "grad_norm": 2.9347074343546335, + "learning_rate": 8.927834138395843e-06, + "loss": 0.6106, + "step": 2888 + }, + { + "epoch": 0.24, + "grad_norm": 4.248265115238616, + "learning_rate": 8.927014876890402e-06, + "loss": 1.0259, + "step": 2889 + }, + { + "epoch": 0.24, + "grad_norm": 2.6666573587433353, + "learning_rate": 8.926195340118312e-06, + "loss": 0.4648, + "step": 2890 + }, + { + "epoch": 0.24, + "grad_norm": 4.867031223985313, + "learning_rate": 8.925375528137012e-06, + "loss": 1.0118, + "step": 2891 + }, + { + "epoch": 0.24, + "grad_norm": 4.6026790307902745, + "learning_rate": 8.924555441003973e-06, + "loss": 0.8052, + "step": 2892 + }, + { + "epoch": 0.24, + "grad_norm": 4.857068725782363, + "learning_rate": 8.923735078776676e-06, + "loss": 1.1488, + "step": 2893 + }, + { + "epoch": 0.24, + "grad_norm": 2.9381131716176174, + "learning_rate": 8.922914441512626e-06, + "loss": 0.3417, + "step": 2894 + }, + { + "epoch": 0.24, + "grad_norm": 1.4132180157242291, + "learning_rate": 8.922093529269347e-06, + "loss": 0.1973, + "step": 2895 + }, + { + "epoch": 0.24, + "grad_norm": 3.9565676205158287, + "learning_rate": 8.921272342104382e-06, + "loss": 1.0563, + "step": 2896 + }, + { + "epoch": 0.24, + "grad_norm": 2.9575448163406683, + "learning_rate": 8.92045088007529e-06, + "loss": 1.0864, + "step": 2897 + }, + { + "epoch": 0.24, + "grad_norm": 3.0103830571098324, + "learning_rate": 8.919629143239652e-06, + "loss": 0.8211, + "step": 2898 + }, + { + "epoch": 0.24, + "grad_norm": 3.6310097637805145, + "learning_rate": 8.918807131655069e-06, + "loss": 0.8162, + "step": 2899 + }, + { + "epoch": 0.24, + "grad_norm": 3.6579277723192414, + "learning_rate": 8.917984845379162e-06, + "loss": 0.765, + "step": 2900 + }, + { + "epoch": 0.24, + "grad_norm": 4.447466899662024, + "learning_rate": 8.917162284469569e-06, + "loss": 0.8084, + "step": 2901 + }, + { + "epoch": 0.24, + "grad_norm": 4.345413652998398, + "learning_rate": 8.916339448983945e-06, + "loss": 1.2272, + "step": 2902 + }, + { + "epoch": 0.24, + "grad_norm": 3.388461703716796, + "learning_rate": 8.915516338979973e-06, + "loss": 0.6305, + "step": 2903 + }, + { + "epoch": 0.24, + "grad_norm": 2.687060236470035, + "learning_rate": 8.914692954515344e-06, + "loss": 0.4097, + "step": 2904 + }, + { + "epoch": 0.24, + "grad_norm": 3.6100909371539154, + "learning_rate": 8.913869295647777e-06, + "loss": 0.7575, + "step": 2905 + }, + { + "epoch": 0.24, + "grad_norm": 3.97719742428522, + "learning_rate": 8.913045362435004e-06, + "loss": 0.938, + "step": 2906 + }, + { + "epoch": 0.24, + "grad_norm": 3.1183558522117183, + "learning_rate": 8.91222115493478e-06, + "loss": 0.7863, + "step": 2907 + }, + { + "epoch": 0.24, + "grad_norm": 2.9833914186492243, + "learning_rate": 8.911396673204881e-06, + "loss": 0.5393, + "step": 2908 + }, + { + "epoch": 0.24, + "grad_norm": 3.16799800442453, + "learning_rate": 8.910571917303098e-06, + "loss": 0.7233, + "step": 2909 + }, + { + "epoch": 0.24, + "grad_norm": 3.8362192782466056, + "learning_rate": 8.90974688728724e-06, + "loss": 0.9734, + "step": 2910 + }, + { + "epoch": 0.24, + "grad_norm": 4.6294280620231945, + "learning_rate": 8.908921583215143e-06, + "loss": 1.0333, + "step": 2911 + }, + { + "epoch": 0.24, + "grad_norm": 3.8164965009412533, + "learning_rate": 8.908096005144654e-06, + "loss": 0.7351, + "step": 2912 + }, + { + "epoch": 0.24, + "grad_norm": 4.405646643100329, + "learning_rate": 8.907270153133643e-06, + "loss": 1.1334, + "step": 2913 + }, + { + "epoch": 0.24, + "grad_norm": 5.044455880720274, + "learning_rate": 8.906444027239999e-06, + "loss": 1.2822, + "step": 2914 + }, + { + "epoch": 0.24, + "grad_norm": 3.3033739123756076, + "learning_rate": 8.90561762752163e-06, + "loss": 0.5727, + "step": 2915 + }, + { + "epoch": 0.24, + "grad_norm": 4.408990250089634, + "learning_rate": 8.90479095403646e-06, + "loss": 0.7527, + "step": 2916 + }, + { + "epoch": 0.24, + "grad_norm": 4.810162697001605, + "learning_rate": 8.90396400684244e-06, + "loss": 1.1846, + "step": 2917 + }, + { + "epoch": 0.24, + "grad_norm": 5.771676233576919, + "learning_rate": 8.903136785997533e-06, + "loss": 1.3764, + "step": 2918 + }, + { + "epoch": 0.24, + "grad_norm": 4.133198302514898, + "learning_rate": 8.902309291559724e-06, + "loss": 1.001, + "step": 2919 + }, + { + "epoch": 0.24, + "grad_norm": 4.1451373989928895, + "learning_rate": 8.901481523587017e-06, + "loss": 0.9517, + "step": 2920 + }, + { + "epoch": 0.24, + "grad_norm": 3.9171169131457897, + "learning_rate": 8.900653482137434e-06, + "loss": 0.7557, + "step": 2921 + }, + { + "epoch": 0.24, + "grad_norm": 4.120715791842282, + "learning_rate": 8.899825167269016e-06, + "loss": 0.5538, + "step": 2922 + }, + { + "epoch": 0.24, + "grad_norm": 3.0165744064583526, + "learning_rate": 8.898996579039829e-06, + "loss": 0.783, + "step": 2923 + }, + { + "epoch": 0.24, + "grad_norm": 2.9787819731271474, + "learning_rate": 8.898167717507949e-06, + "loss": 0.3642, + "step": 2924 + }, + { + "epoch": 0.24, + "grad_norm": 3.9979649908032515, + "learning_rate": 8.897338582731476e-06, + "loss": 0.694, + "step": 2925 + }, + { + "epoch": 0.24, + "grad_norm": 5.465345306276673, + "learning_rate": 8.896509174768528e-06, + "loss": 1.2069, + "step": 2926 + }, + { + "epoch": 0.24, + "grad_norm": 3.001986170825165, + "learning_rate": 8.895679493677247e-06, + "loss": 0.5428, + "step": 2927 + }, + { + "epoch": 0.24, + "grad_norm": 2.970549933403323, + "learning_rate": 8.894849539515788e-06, + "loss": 0.5073, + "step": 2928 + }, + { + "epoch": 0.24, + "grad_norm": 2.581485075523803, + "learning_rate": 8.894019312342325e-06, + "loss": 0.5156, + "step": 2929 + }, + { + "epoch": 0.24, + "grad_norm": 6.758865803490577, + "learning_rate": 8.893188812215057e-06, + "loss": 1.1096, + "step": 2930 + }, + { + "epoch": 0.24, + "grad_norm": 4.242292637061147, + "learning_rate": 8.892358039192196e-06, + "loss": 0.9125, + "step": 2931 + }, + { + "epoch": 0.24, + "grad_norm": 2.851855379448961, + "learning_rate": 8.891526993331974e-06, + "loss": 0.658, + "step": 2932 + }, + { + "epoch": 0.24, + "grad_norm": 2.7730340149059307, + "learning_rate": 8.890695674692648e-06, + "loss": 0.839, + "step": 2933 + }, + { + "epoch": 0.24, + "grad_norm": 4.593971328906433, + "learning_rate": 8.889864083332486e-06, + "loss": 0.6572, + "step": 2934 + }, + { + "epoch": 0.24, + "grad_norm": 1.509712803796424, + "learning_rate": 8.889032219309781e-06, + "loss": 0.2217, + "step": 2935 + }, + { + "epoch": 0.24, + "grad_norm": 4.189866905519617, + "learning_rate": 8.888200082682842e-06, + "loss": 1.0506, + "step": 2936 + }, + { + "epoch": 0.24, + "grad_norm": 5.721479794619482, + "learning_rate": 8.887367673510002e-06, + "loss": 1.4691, + "step": 2937 + }, + { + "epoch": 0.24, + "grad_norm": 4.901042227359208, + "learning_rate": 8.886534991849603e-06, + "loss": 1.2325, + "step": 2938 + }, + { + "epoch": 0.24, + "grad_norm": 1.6920310456009662, + "learning_rate": 8.885702037760016e-06, + "loss": 0.2909, + "step": 2939 + }, + { + "epoch": 0.24, + "grad_norm": 3.1842414345062067, + "learning_rate": 8.884868811299627e-06, + "loss": 0.858, + "step": 2940 + }, + { + "epoch": 0.24, + "grad_norm": 3.217988813255178, + "learning_rate": 8.88403531252684e-06, + "loss": 0.7523, + "step": 2941 + }, + { + "epoch": 0.24, + "grad_norm": 4.585574617111233, + "learning_rate": 8.883201541500082e-06, + "loss": 1.0042, + "step": 2942 + }, + { + "epoch": 0.24, + "grad_norm": 5.334108757662184, + "learning_rate": 8.882367498277795e-06, + "loss": 1.034, + "step": 2943 + }, + { + "epoch": 0.24, + "grad_norm": 3.8751675188151795, + "learning_rate": 8.881533182918444e-06, + "loss": 0.7488, + "step": 2944 + }, + { + "epoch": 0.24, + "grad_norm": 2.5142236100060154, + "learning_rate": 8.880698595480509e-06, + "loss": 0.6338, + "step": 2945 + }, + { + "epoch": 0.24, + "grad_norm": 5.212057409312506, + "learning_rate": 8.87986373602249e-06, + "loss": 1.2324, + "step": 2946 + }, + { + "epoch": 0.24, + "grad_norm": 4.402148710685035, + "learning_rate": 8.879028604602908e-06, + "loss": 1.2733, + "step": 2947 + }, + { + "epoch": 0.24, + "grad_norm": 3.195936383889245, + "learning_rate": 8.878193201280305e-06, + "loss": 0.8655, + "step": 2948 + }, + { + "epoch": 0.24, + "grad_norm": 2.733486040762455, + "learning_rate": 8.877357526113234e-06, + "loss": 0.5531, + "step": 2949 + }, + { + "epoch": 0.24, + "grad_norm": 2.142029788504823, + "learning_rate": 8.876521579160275e-06, + "loss": 0.4354, + "step": 2950 + }, + { + "epoch": 0.24, + "grad_norm": 4.48630056663649, + "learning_rate": 8.875685360480027e-06, + "loss": 1.3183, + "step": 2951 + }, + { + "epoch": 0.24, + "grad_norm": 5.994076738007516, + "learning_rate": 8.874848870131098e-06, + "loss": 1.7964, + "step": 2952 + }, + { + "epoch": 0.24, + "grad_norm": 3.7150255903944416, + "learning_rate": 8.87401210817213e-06, + "loss": 0.5186, + "step": 2953 + }, + { + "epoch": 0.24, + "grad_norm": 2.9136979849392772, + "learning_rate": 8.87317507466177e-06, + "loss": 0.4953, + "step": 2954 + }, + { + "epoch": 0.24, + "grad_norm": 3.3771356342933405, + "learning_rate": 8.872337769658696e-06, + "loss": 0.991, + "step": 2955 + }, + { + "epoch": 0.24, + "grad_norm": 1.8083950829827165, + "learning_rate": 8.871500193221596e-06, + "loss": 0.3512, + "step": 2956 + }, + { + "epoch": 0.24, + "grad_norm": 3.5684383495880447, + "learning_rate": 8.87066234540918e-06, + "loss": 0.7557, + "step": 2957 + }, + { + "epoch": 0.24, + "grad_norm": 3.452996042800122, + "learning_rate": 8.86982422628018e-06, + "loss": 0.9597, + "step": 2958 + }, + { + "epoch": 0.24, + "grad_norm": 2.9979832047022694, + "learning_rate": 8.868985835893344e-06, + "loss": 0.6404, + "step": 2959 + }, + { + "epoch": 0.24, + "grad_norm": 4.537443193679681, + "learning_rate": 8.868147174307438e-06, + "loss": 1.2665, + "step": 2960 + }, + { + "epoch": 0.24, + "grad_norm": 3.3265675796338345, + "learning_rate": 8.86730824158125e-06, + "loss": 0.648, + "step": 2961 + }, + { + "epoch": 0.24, + "grad_norm": 5.830979842557535, + "learning_rate": 8.866469037773582e-06, + "loss": 1.4799, + "step": 2962 + }, + { + "epoch": 0.24, + "grad_norm": 4.010278861649628, + "learning_rate": 8.865629562943265e-06, + "loss": 0.3216, + "step": 2963 + }, + { + "epoch": 0.24, + "grad_norm": 3.616138757300956, + "learning_rate": 8.864789817149137e-06, + "loss": 0.7531, + "step": 2964 + }, + { + "epoch": 0.24, + "grad_norm": 5.790458959821874, + "learning_rate": 8.863949800450063e-06, + "loss": 1.4917, + "step": 2965 + }, + { + "epoch": 0.24, + "grad_norm": 2.9789882643523837, + "learning_rate": 8.863109512904924e-06, + "loss": 0.7569, + "step": 2966 + }, + { + "epoch": 0.24, + "grad_norm": 3.5722174289876714, + "learning_rate": 8.862268954572618e-06, + "loss": 0.6274, + "step": 2967 + }, + { + "epoch": 0.24, + "grad_norm": 3.0627508713733618, + "learning_rate": 8.861428125512071e-06, + "loss": 0.6932, + "step": 2968 + }, + { + "epoch": 0.24, + "grad_norm": 2.5744792073861236, + "learning_rate": 8.860587025782215e-06, + "loss": 0.4992, + "step": 2969 + }, + { + "epoch": 0.24, + "grad_norm": 2.43717337203305, + "learning_rate": 8.859745655442012e-06, + "loss": 0.503, + "step": 2970 + }, + { + "epoch": 0.24, + "grad_norm": 5.287648098546942, + "learning_rate": 8.858904014550434e-06, + "loss": 1.156, + "step": 2971 + }, + { + "epoch": 0.24, + "grad_norm": 3.6511168741579314, + "learning_rate": 8.858062103166479e-06, + "loss": 0.817, + "step": 2972 + }, + { + "epoch": 0.24, + "grad_norm": 3.5830778351070243, + "learning_rate": 8.85721992134916e-06, + "loss": 1.1603, + "step": 2973 + }, + { + "epoch": 0.24, + "grad_norm": 2.135773229473065, + "learning_rate": 8.856377469157513e-06, + "loss": 0.3204, + "step": 2974 + }, + { + "epoch": 0.24, + "grad_norm": 2.242353578298476, + "learning_rate": 8.855534746650586e-06, + "loss": 0.4531, + "step": 2975 + }, + { + "epoch": 0.24, + "grad_norm": 4.21442194749558, + "learning_rate": 8.854691753887455e-06, + "loss": 0.9405, + "step": 2976 + }, + { + "epoch": 0.24, + "grad_norm": 3.836501269058079, + "learning_rate": 8.853848490927207e-06, + "loss": 0.8754, + "step": 2977 + }, + { + "epoch": 0.24, + "grad_norm": 3.5125849385426475, + "learning_rate": 8.853004957828952e-06, + "loss": 0.5242, + "step": 2978 + }, + { + "epoch": 0.24, + "grad_norm": 2.7242216451581576, + "learning_rate": 8.852161154651817e-06, + "loss": 0.4244, + "step": 2979 + }, + { + "epoch": 0.24, + "grad_norm": 3.802380953813256, + "learning_rate": 8.851317081454951e-06, + "loss": 0.9277, + "step": 2980 + }, + { + "epoch": 0.24, + "grad_norm": 3.049767752733464, + "learning_rate": 8.850472738297518e-06, + "loss": 0.6299, + "step": 2981 + }, + { + "epoch": 0.24, + "grad_norm": 3.7215201474495654, + "learning_rate": 8.849628125238703e-06, + "loss": 0.7794, + "step": 2982 + }, + { + "epoch": 0.24, + "grad_norm": 4.735496235244078, + "learning_rate": 8.848783242337711e-06, + "loss": 1.0313, + "step": 2983 + }, + { + "epoch": 0.24, + "grad_norm": 3.8805762816472638, + "learning_rate": 8.847938089653763e-06, + "loss": 0.5554, + "step": 2984 + }, + { + "epoch": 0.24, + "grad_norm": 1.2182610153442475, + "learning_rate": 8.847092667246101e-06, + "loss": 0.1831, + "step": 2985 + }, + { + "epoch": 0.24, + "grad_norm": 5.886218186327842, + "learning_rate": 8.846246975173985e-06, + "loss": 1.7733, + "step": 2986 + }, + { + "epoch": 0.24, + "grad_norm": 3.01633070745938, + "learning_rate": 8.845401013496697e-06, + "loss": 0.5916, + "step": 2987 + }, + { + "epoch": 0.24, + "grad_norm": 4.820860752067593, + "learning_rate": 8.84455478227353e-06, + "loss": 1.1315, + "step": 2988 + }, + { + "epoch": 0.24, + "grad_norm": 5.0352922168496095, + "learning_rate": 8.843708281563808e-06, + "loss": 1.539, + "step": 2989 + }, + { + "epoch": 0.24, + "grad_norm": 4.562737207595621, + "learning_rate": 8.842861511426862e-06, + "loss": 0.8672, + "step": 2990 + }, + { + "epoch": 0.24, + "grad_norm": 4.0376963816397025, + "learning_rate": 8.842014471922046e-06, + "loss": 1.1109, + "step": 2991 + }, + { + "epoch": 0.24, + "grad_norm": 5.132946137148141, + "learning_rate": 8.84116716310874e-06, + "loss": 1.2181, + "step": 2992 + }, + { + "epoch": 0.24, + "grad_norm": 1.607972489288798, + "learning_rate": 8.84031958504633e-06, + "loss": 0.3004, + "step": 2993 + }, + { + "epoch": 0.24, + "grad_norm": 2.1621884343551834, + "learning_rate": 8.839471737794232e-06, + "loss": 0.5174, + "step": 2994 + }, + { + "epoch": 0.24, + "grad_norm": 2.320221171032136, + "learning_rate": 8.838623621411871e-06, + "loss": 0.5463, + "step": 2995 + }, + { + "epoch": 0.24, + "grad_norm": 2.7088125465197876, + "learning_rate": 8.837775235958704e-06, + "loss": 0.5495, + "step": 2996 + }, + { + "epoch": 0.24, + "grad_norm": 3.8556123789911116, + "learning_rate": 8.836926581494191e-06, + "loss": 0.8726, + "step": 2997 + }, + { + "epoch": 0.25, + "grad_norm": 5.258349986279272, + "learning_rate": 8.836077658077825e-06, + "loss": 1.0929, + "step": 2998 + }, + { + "epoch": 0.25, + "grad_norm": 4.886977236391661, + "learning_rate": 8.835228465769113e-06, + "loss": 1.1653, + "step": 2999 + }, + { + "epoch": 0.25, + "grad_norm": 3.1490792321452905, + "learning_rate": 8.834379004627572e-06, + "loss": 0.5612, + "step": 3000 + }, + { + "epoch": 0.25, + "grad_norm": 3.537597675669336, + "learning_rate": 8.833529274712751e-06, + "loss": 0.8386, + "step": 3001 + }, + { + "epoch": 0.25, + "grad_norm": 3.7338484716835243, + "learning_rate": 8.832679276084213e-06, + "loss": 0.5664, + "step": 3002 + }, + { + "epoch": 0.25, + "grad_norm": 3.834281332933508, + "learning_rate": 8.831829008801536e-06, + "loss": 0.8881, + "step": 3003 + }, + { + "epoch": 0.25, + "grad_norm": 3.8698804980984405, + "learning_rate": 8.830978472924323e-06, + "loss": 0.8899, + "step": 3004 + }, + { + "epoch": 0.25, + "grad_norm": 3.6275615582211356, + "learning_rate": 8.830127668512191e-06, + "loss": 0.8097, + "step": 3005 + }, + { + "epoch": 0.25, + "grad_norm": 3.176712118230684, + "learning_rate": 8.829276595624778e-06, + "loss": 0.7396, + "step": 3006 + }, + { + "epoch": 0.25, + "grad_norm": 4.450354431030108, + "learning_rate": 8.828425254321742e-06, + "loss": 0.8398, + "step": 3007 + }, + { + "epoch": 0.25, + "grad_norm": 3.842755616061428, + "learning_rate": 8.827573644662756e-06, + "loss": 0.8975, + "step": 3008 + }, + { + "epoch": 0.25, + "grad_norm": 5.422657524771481, + "learning_rate": 8.826721766707514e-06, + "loss": 1.2179, + "step": 3009 + }, + { + "epoch": 0.25, + "grad_norm": 2.9833365920739645, + "learning_rate": 8.825869620515732e-06, + "loss": 0.5358, + "step": 3010 + }, + { + "epoch": 0.25, + "grad_norm": 3.879167488092526, + "learning_rate": 8.825017206147139e-06, + "loss": 0.7018, + "step": 3011 + }, + { + "epoch": 0.25, + "grad_norm": 3.355405824595915, + "learning_rate": 8.824164523661485e-06, + "loss": 0.4947, + "step": 3012 + }, + { + "epoch": 0.25, + "grad_norm": 4.352312149931795, + "learning_rate": 8.823311573118543e-06, + "loss": 1.0323, + "step": 3013 + }, + { + "epoch": 0.25, + "grad_norm": 2.7924492631796953, + "learning_rate": 8.822458354578098e-06, + "loss": 0.8462, + "step": 3014 + }, + { + "epoch": 0.25, + "grad_norm": 4.319453911353385, + "learning_rate": 8.821604868099957e-06, + "loss": 1.1634, + "step": 3015 + }, + { + "epoch": 0.25, + "grad_norm": 2.4177468723900803, + "learning_rate": 8.820751113743948e-06, + "loss": 0.4041, + "step": 3016 + }, + { + "epoch": 0.25, + "grad_norm": 3.6565659236706436, + "learning_rate": 8.819897091569911e-06, + "loss": 0.8552, + "step": 3017 + }, + { + "epoch": 0.25, + "grad_norm": 2.6235116887935432, + "learning_rate": 8.819042801637715e-06, + "loss": 0.5085, + "step": 3018 + }, + { + "epoch": 0.25, + "grad_norm": 3.061247292842075, + "learning_rate": 8.81818824400724e-06, + "loss": 0.8747, + "step": 3019 + }, + { + "epoch": 0.25, + "grad_norm": 5.807535007879584, + "learning_rate": 8.817333418738382e-06, + "loss": 1.2014, + "step": 3020 + }, + { + "epoch": 0.25, + "grad_norm": 4.0249120389592665, + "learning_rate": 8.816478325891067e-06, + "loss": 0.7394, + "step": 3021 + }, + { + "epoch": 0.25, + "grad_norm": 3.8172430367905124, + "learning_rate": 8.815622965525231e-06, + "loss": 0.6459, + "step": 3022 + }, + { + "epoch": 0.25, + "grad_norm": 4.6977222583715434, + "learning_rate": 8.814767337700829e-06, + "loss": 1.14, + "step": 3023 + }, + { + "epoch": 0.25, + "grad_norm": 2.945164214725052, + "learning_rate": 8.81391144247784e-06, + "loss": 0.6226, + "step": 3024 + }, + { + "epoch": 0.25, + "grad_norm": 3.2322993706947263, + "learning_rate": 8.813055279916258e-06, + "loss": 0.5395, + "step": 3025 + }, + { + "epoch": 0.25, + "grad_norm": 4.352276850776771, + "learning_rate": 8.812198850076092e-06, + "loss": 1.2339, + "step": 3026 + }, + { + "epoch": 0.25, + "grad_norm": 2.269106853685705, + "learning_rate": 8.81134215301738e-06, + "loss": 0.3062, + "step": 3027 + }, + { + "epoch": 0.25, + "grad_norm": 4.073530103864496, + "learning_rate": 8.81048518880017e-06, + "loss": 1.0182, + "step": 3028 + }, + { + "epoch": 0.25, + "grad_norm": 4.73778326088764, + "learning_rate": 8.80962795748453e-06, + "loss": 0.8068, + "step": 3029 + }, + { + "epoch": 0.25, + "grad_norm": 3.8308118516350893, + "learning_rate": 8.80877045913055e-06, + "loss": 0.8938, + "step": 3030 + }, + { + "epoch": 0.25, + "grad_norm": 2.9154835262683707, + "learning_rate": 8.80791269379834e-06, + "loss": 0.5688, + "step": 3031 + }, + { + "epoch": 0.25, + "grad_norm": 4.591077187198804, + "learning_rate": 8.807054661548019e-06, + "loss": 1.3252, + "step": 3032 + }, + { + "epoch": 0.25, + "grad_norm": 3.6776801143872526, + "learning_rate": 8.806196362439734e-06, + "loss": 1.0447, + "step": 3033 + }, + { + "epoch": 0.25, + "grad_norm": 4.2212908361446635, + "learning_rate": 8.80533779653365e-06, + "loss": 0.7886, + "step": 3034 + }, + { + "epoch": 0.25, + "grad_norm": 3.1280264324817724, + "learning_rate": 8.804478963889948e-06, + "loss": 0.7596, + "step": 3035 + }, + { + "epoch": 0.25, + "grad_norm": 3.078134161585271, + "learning_rate": 8.803619864568827e-06, + "loss": 0.5037, + "step": 3036 + }, + { + "epoch": 0.25, + "grad_norm": 3.325914291845112, + "learning_rate": 8.802760498630507e-06, + "loss": 0.921, + "step": 3037 + }, + { + "epoch": 0.25, + "grad_norm": 3.079137764634465, + "learning_rate": 8.801900866135225e-06, + "loss": 0.8713, + "step": 3038 + }, + { + "epoch": 0.25, + "grad_norm": 3.2472378005541467, + "learning_rate": 8.80104096714324e-06, + "loss": 0.7648, + "step": 3039 + }, + { + "epoch": 0.25, + "grad_norm": 4.168245278168702, + "learning_rate": 8.800180801714824e-06, + "loss": 1.0513, + "step": 3040 + }, + { + "epoch": 0.25, + "grad_norm": 5.625803081651678, + "learning_rate": 8.799320369910273e-06, + "loss": 1.5099, + "step": 3041 + }, + { + "epoch": 0.25, + "grad_norm": 5.16058040833108, + "learning_rate": 8.798459671789898e-06, + "loss": 0.9635, + "step": 3042 + }, + { + "epoch": 0.25, + "grad_norm": 4.308757760875945, + "learning_rate": 8.79759870741403e-06, + "loss": 0.9304, + "step": 3043 + }, + { + "epoch": 0.25, + "grad_norm": 3.187234819766285, + "learning_rate": 8.796737476843023e-06, + "loss": 0.4738, + "step": 3044 + }, + { + "epoch": 0.25, + "grad_norm": 5.226740566813347, + "learning_rate": 8.795875980137238e-06, + "loss": 0.8863, + "step": 3045 + }, + { + "epoch": 0.25, + "grad_norm": 5.51213011831428, + "learning_rate": 8.79501421735707e-06, + "loss": 1.4908, + "step": 3046 + }, + { + "epoch": 0.25, + "grad_norm": 2.458799182603262, + "learning_rate": 8.79415218856292e-06, + "loss": 0.4516, + "step": 3047 + }, + { + "epoch": 0.25, + "grad_norm": 2.033543389356406, + "learning_rate": 8.793289893815213e-06, + "loss": 0.2532, + "step": 3048 + }, + { + "epoch": 0.25, + "grad_norm": 3.1917699404856252, + "learning_rate": 8.792427333174395e-06, + "loss": 0.7104, + "step": 3049 + }, + { + "epoch": 0.25, + "grad_norm": 2.856959628270824, + "learning_rate": 8.791564506700925e-06, + "loss": 0.6704, + "step": 3050 + }, + { + "epoch": 0.25, + "grad_norm": 5.470122110245426, + "learning_rate": 8.790701414455283e-06, + "loss": 0.9739, + "step": 3051 + }, + { + "epoch": 0.25, + "grad_norm": 4.3464298401882315, + "learning_rate": 8.789838056497969e-06, + "loss": 0.8312, + "step": 3052 + }, + { + "epoch": 0.25, + "grad_norm": 5.007427352617898, + "learning_rate": 8.7889744328895e-06, + "loss": 1.0857, + "step": 3053 + }, + { + "epoch": 0.25, + "grad_norm": 5.173821069060673, + "learning_rate": 8.788110543690415e-06, + "loss": 0.8516, + "step": 3054 + }, + { + "epoch": 0.25, + "grad_norm": 3.9007425602595327, + "learning_rate": 8.787246388961265e-06, + "loss": 1.0306, + "step": 3055 + }, + { + "epoch": 0.25, + "grad_norm": 2.8175766762413605, + "learning_rate": 8.786381968762628e-06, + "loss": 0.344, + "step": 3056 + }, + { + "epoch": 0.25, + "grad_norm": 4.714141728356981, + "learning_rate": 8.785517283155092e-06, + "loss": 1.0833, + "step": 3057 + }, + { + "epoch": 0.25, + "grad_norm": 3.4920510483411547, + "learning_rate": 8.784652332199269e-06, + "loss": 0.9534, + "step": 3058 + }, + { + "epoch": 0.25, + "grad_norm": 3.201057136856587, + "learning_rate": 8.783787115955787e-06, + "loss": 0.6107, + "step": 3059 + }, + { + "epoch": 0.25, + "grad_norm": 2.96564825037283, + "learning_rate": 8.782921634485297e-06, + "loss": 0.5547, + "step": 3060 + }, + { + "epoch": 0.25, + "grad_norm": 3.1921970486894624, + "learning_rate": 8.782055887848462e-06, + "loss": 0.6019, + "step": 3061 + }, + { + "epoch": 0.25, + "grad_norm": 3.4492343149468145, + "learning_rate": 8.78118987610597e-06, + "loss": 0.8935, + "step": 3062 + }, + { + "epoch": 0.25, + "grad_norm": 3.7783810161720743, + "learning_rate": 8.780323599318524e-06, + "loss": 0.8023, + "step": 3063 + }, + { + "epoch": 0.25, + "grad_norm": 3.9544689623425415, + "learning_rate": 8.779457057546844e-06, + "loss": 0.7754, + "step": 3064 + }, + { + "epoch": 0.25, + "grad_norm": 5.225298130462849, + "learning_rate": 8.778590250851674e-06, + "loss": 1.0747, + "step": 3065 + }, + { + "epoch": 0.25, + "grad_norm": 1.2969341365306017, + "learning_rate": 8.777723179293772e-06, + "loss": 0.2045, + "step": 3066 + }, + { + "epoch": 0.25, + "grad_norm": 5.418704482740044, + "learning_rate": 8.776855842933915e-06, + "loss": 0.6294, + "step": 3067 + }, + { + "epoch": 0.25, + "grad_norm": 2.8206797529615666, + "learning_rate": 8.7759882418329e-06, + "loss": 0.6849, + "step": 3068 + }, + { + "epoch": 0.25, + "grad_norm": 2.607499970750639, + "learning_rate": 8.775120376051544e-06, + "loss": 0.5084, + "step": 3069 + }, + { + "epoch": 0.25, + "grad_norm": 6.946479721945572, + "learning_rate": 8.774252245650678e-06, + "loss": 1.2328, + "step": 3070 + }, + { + "epoch": 0.25, + "grad_norm": 5.305464030242199, + "learning_rate": 8.773383850691155e-06, + "loss": 1.336, + "step": 3071 + }, + { + "epoch": 0.25, + "grad_norm": 2.521782900215057, + "learning_rate": 8.772515191233846e-06, + "loss": 0.3972, + "step": 3072 + }, + { + "epoch": 0.25, + "grad_norm": 4.130677228668136, + "learning_rate": 8.771646267339641e-06, + "loss": 0.7908, + "step": 3073 + }, + { + "epoch": 0.25, + "grad_norm": 3.3666015604730046, + "learning_rate": 8.770777079069446e-06, + "loss": 0.9013, + "step": 3074 + }, + { + "epoch": 0.25, + "grad_norm": 3.1417450167135086, + "learning_rate": 8.769907626484189e-06, + "loss": 0.8646, + "step": 3075 + }, + { + "epoch": 0.25, + "grad_norm": 1.1039939450121492, + "learning_rate": 8.769037909644813e-06, + "loss": 0.1571, + "step": 3076 + }, + { + "epoch": 0.25, + "grad_norm": 4.381687631869932, + "learning_rate": 8.768167928612283e-06, + "loss": 0.714, + "step": 3077 + }, + { + "epoch": 0.25, + "grad_norm": 4.028985577124985, + "learning_rate": 8.76729768344758e-06, + "loss": 1.0472, + "step": 3078 + }, + { + "epoch": 0.25, + "grad_norm": 2.4696662022938765, + "learning_rate": 8.766427174211704e-06, + "loss": 0.4696, + "step": 3079 + }, + { + "epoch": 0.25, + "grad_norm": 4.270044585126113, + "learning_rate": 8.765556400965677e-06, + "loss": 0.8139, + "step": 3080 + }, + { + "epoch": 0.25, + "grad_norm": 3.621035925749316, + "learning_rate": 8.764685363770534e-06, + "loss": 1.0925, + "step": 3081 + }, + { + "epoch": 0.25, + "grad_norm": 2.025670248824383, + "learning_rate": 8.763814062687329e-06, + "loss": 0.4951, + "step": 3082 + }, + { + "epoch": 0.25, + "grad_norm": 3.154694676655994, + "learning_rate": 8.762942497777138e-06, + "loss": 0.6662, + "step": 3083 + }, + { + "epoch": 0.25, + "grad_norm": 4.17545478000675, + "learning_rate": 8.762070669101054e-06, + "loss": 0.9638, + "step": 3084 + }, + { + "epoch": 0.25, + "grad_norm": 2.0777362421774552, + "learning_rate": 8.761198576720191e-06, + "loss": 0.3896, + "step": 3085 + }, + { + "epoch": 0.25, + "grad_norm": 2.8485534506189727, + "learning_rate": 8.760326220695677e-06, + "loss": 0.5518, + "step": 3086 + }, + { + "epoch": 0.25, + "grad_norm": 5.4570503971498185, + "learning_rate": 8.759453601088658e-06, + "loss": 0.9435, + "step": 3087 + }, + { + "epoch": 0.25, + "grad_norm": 2.762459736007317, + "learning_rate": 8.758580717960303e-06, + "loss": 0.6803, + "step": 3088 + }, + { + "epoch": 0.25, + "grad_norm": 4.072988444518039, + "learning_rate": 8.757707571371795e-06, + "loss": 1.2577, + "step": 3089 + }, + { + "epoch": 0.25, + "grad_norm": 3.0319532056540286, + "learning_rate": 8.756834161384344e-06, + "loss": 0.6772, + "step": 3090 + }, + { + "epoch": 0.25, + "grad_norm": 2.0531323444970426, + "learning_rate": 8.755960488059167e-06, + "loss": 0.3901, + "step": 3091 + }, + { + "epoch": 0.25, + "grad_norm": 3.8435794741136617, + "learning_rate": 8.755086551457504e-06, + "loss": 1.0897, + "step": 3092 + }, + { + "epoch": 0.25, + "grad_norm": 6.1029982378400565, + "learning_rate": 8.754212351640618e-06, + "loss": 0.9717, + "step": 3093 + }, + { + "epoch": 0.25, + "grad_norm": 4.467093227327127, + "learning_rate": 8.753337888669783e-06, + "loss": 0.7102, + "step": 3094 + }, + { + "epoch": 0.25, + "grad_norm": 4.026197241645423, + "learning_rate": 8.752463162606297e-06, + "loss": 0.6473, + "step": 3095 + }, + { + "epoch": 0.25, + "grad_norm": 4.915327712366391, + "learning_rate": 8.751588173511474e-06, + "loss": 1.1337, + "step": 3096 + }, + { + "epoch": 0.25, + "grad_norm": 4.9382618749318565, + "learning_rate": 8.750712921446647e-06, + "loss": 1.0953, + "step": 3097 + }, + { + "epoch": 0.25, + "grad_norm": 3.2741886506684112, + "learning_rate": 8.749837406473168e-06, + "loss": 0.7295, + "step": 3098 + }, + { + "epoch": 0.25, + "grad_norm": 3.701676756960232, + "learning_rate": 8.748961628652406e-06, + "loss": 0.7643, + "step": 3099 + }, + { + "epoch": 0.25, + "grad_norm": 4.241600773286563, + "learning_rate": 8.74808558804575e-06, + "loss": 1.0252, + "step": 3100 + }, + { + "epoch": 0.25, + "grad_norm": 3.367775399977547, + "learning_rate": 8.747209284714604e-06, + "loss": 0.6707, + "step": 3101 + }, + { + "epoch": 0.25, + "grad_norm": 4.5932625088710335, + "learning_rate": 8.746332718720395e-06, + "loss": 1.2799, + "step": 3102 + }, + { + "epoch": 0.25, + "grad_norm": 3.2022569176912743, + "learning_rate": 8.745455890124567e-06, + "loss": 0.803, + "step": 3103 + }, + { + "epoch": 0.25, + "grad_norm": 3.9448983739541603, + "learning_rate": 8.744578798988584e-06, + "loss": 0.7613, + "step": 3104 + }, + { + "epoch": 0.25, + "grad_norm": 4.885468151504446, + "learning_rate": 8.743701445373922e-06, + "loss": 1.1849, + "step": 3105 + }, + { + "epoch": 0.25, + "grad_norm": 5.584814906714775, + "learning_rate": 8.74282382934208e-06, + "loss": 1.3323, + "step": 3106 + }, + { + "epoch": 0.25, + "grad_norm": 2.7150379669348883, + "learning_rate": 8.741945950954577e-06, + "loss": 0.6995, + "step": 3107 + }, + { + "epoch": 0.25, + "grad_norm": 3.3623560606552503, + "learning_rate": 8.741067810272949e-06, + "loss": 0.4057, + "step": 3108 + }, + { + "epoch": 0.25, + "grad_norm": 3.7438001086287556, + "learning_rate": 8.740189407358747e-06, + "loss": 1.0674, + "step": 3109 + }, + { + "epoch": 0.25, + "grad_norm": 2.386810913733393, + "learning_rate": 8.739310742273546e-06, + "loss": 0.4845, + "step": 3110 + }, + { + "epoch": 0.25, + "grad_norm": 3.908178175809527, + "learning_rate": 8.738431815078937e-06, + "loss": 1.0802, + "step": 3111 + }, + { + "epoch": 0.25, + "grad_norm": 2.133124078726872, + "learning_rate": 8.737552625836525e-06, + "loss": 0.3782, + "step": 3112 + }, + { + "epoch": 0.25, + "grad_norm": 3.1146653144289034, + "learning_rate": 8.73667317460794e-06, + "loss": 0.7028, + "step": 3113 + }, + { + "epoch": 0.25, + "grad_norm": 4.8189764064404965, + "learning_rate": 8.735793461454828e-06, + "loss": 1.0162, + "step": 3114 + }, + { + "epoch": 0.25, + "grad_norm": 6.21556316598591, + "learning_rate": 8.734913486438854e-06, + "loss": 1.6151, + "step": 3115 + }, + { + "epoch": 0.25, + "grad_norm": 4.610434382709641, + "learning_rate": 8.734033249621695e-06, + "loss": 0.6186, + "step": 3116 + }, + { + "epoch": 0.25, + "grad_norm": 3.1136830203208943, + "learning_rate": 8.73315275106506e-06, + "loss": 0.4801, + "step": 3117 + }, + { + "epoch": 0.25, + "grad_norm": 2.8810962077461704, + "learning_rate": 8.732271990830663e-06, + "loss": 0.6271, + "step": 3118 + }, + { + "epoch": 0.25, + "grad_norm": 2.8009657466197377, + "learning_rate": 8.731390968980242e-06, + "loss": 0.5574, + "step": 3119 + }, + { + "epoch": 0.26, + "grad_norm": 3.5106276997584525, + "learning_rate": 8.730509685575552e-06, + "loss": 0.8657, + "step": 3120 + }, + { + "epoch": 0.26, + "grad_norm": 4.70797696118993, + "learning_rate": 8.72962814067837e-06, + "loss": 0.9346, + "step": 3121 + }, + { + "epoch": 0.26, + "grad_norm": 4.1023948752405675, + "learning_rate": 8.728746334350483e-06, + "loss": 1.2921, + "step": 3122 + }, + { + "epoch": 0.26, + "grad_norm": 2.1629856076308487, + "learning_rate": 8.72786426665371e-06, + "loss": 0.2938, + "step": 3123 + }, + { + "epoch": 0.26, + "grad_norm": 4.098831316671046, + "learning_rate": 8.726981937649875e-06, + "loss": 1.134, + "step": 3124 + }, + { + "epoch": 0.26, + "grad_norm": 3.927581384352747, + "learning_rate": 8.726099347400824e-06, + "loss": 0.6837, + "step": 3125 + }, + { + "epoch": 0.26, + "grad_norm": 4.745472576702596, + "learning_rate": 8.725216495968426e-06, + "loss": 1.5312, + "step": 3126 + }, + { + "epoch": 0.26, + "grad_norm": 3.4741765877770887, + "learning_rate": 8.724333383414563e-06, + "loss": 0.7448, + "step": 3127 + }, + { + "epoch": 0.26, + "grad_norm": 3.3200229506989993, + "learning_rate": 8.723450009801139e-06, + "loss": 0.5416, + "step": 3128 + }, + { + "epoch": 0.26, + "grad_norm": 4.1467770194927835, + "learning_rate": 8.722566375190073e-06, + "loss": 0.9865, + "step": 3129 + }, + { + "epoch": 0.26, + "grad_norm": 3.1764658181726544, + "learning_rate": 8.721682479643307e-06, + "loss": 0.679, + "step": 3130 + }, + { + "epoch": 0.26, + "grad_norm": 5.045637342760714, + "learning_rate": 8.720798323222795e-06, + "loss": 1.0718, + "step": 3131 + }, + { + "epoch": 0.26, + "grad_norm": 6.108791828817261, + "learning_rate": 8.719913905990511e-06, + "loss": 1.3495, + "step": 3132 + }, + { + "epoch": 0.26, + "grad_norm": 4.581562699895055, + "learning_rate": 8.719029228008454e-06, + "loss": 0.9609, + "step": 3133 + }, + { + "epoch": 0.26, + "grad_norm": 4.905464031884023, + "learning_rate": 8.718144289338632e-06, + "loss": 1.1061, + "step": 3134 + }, + { + "epoch": 0.26, + "grad_norm": 5.038760696296255, + "learning_rate": 8.717259090043078e-06, + "loss": 1.6582, + "step": 3135 + }, + { + "epoch": 0.26, + "grad_norm": 2.8090502903294605, + "learning_rate": 8.716373630183839e-06, + "loss": 0.5046, + "step": 3136 + }, + { + "epoch": 0.26, + "grad_norm": 3.019665012589466, + "learning_rate": 8.715487909822982e-06, + "loss": 0.7241, + "step": 3137 + }, + { + "epoch": 0.26, + "grad_norm": 4.342658470523938, + "learning_rate": 8.714601929022591e-06, + "loss": 0.7319, + "step": 3138 + }, + { + "epoch": 0.26, + "grad_norm": 3.2930299554632527, + "learning_rate": 8.713715687844772e-06, + "loss": 0.6454, + "step": 3139 + }, + { + "epoch": 0.26, + "grad_norm": 3.7485307418029983, + "learning_rate": 8.712829186351646e-06, + "loss": 0.8893, + "step": 3140 + }, + { + "epoch": 0.26, + "grad_norm": 3.609508442458764, + "learning_rate": 8.711942424605352e-06, + "loss": 0.903, + "step": 3141 + }, + { + "epoch": 0.26, + "grad_norm": 3.4121316309959697, + "learning_rate": 8.711055402668049e-06, + "loss": 0.8174, + "step": 3142 + }, + { + "epoch": 0.26, + "grad_norm": 3.6279918762098293, + "learning_rate": 8.710168120601912e-06, + "loss": 0.6543, + "step": 3143 + }, + { + "epoch": 0.26, + "grad_norm": 4.403460609793017, + "learning_rate": 8.709280578469135e-06, + "loss": 1.2444, + "step": 3144 + }, + { + "epoch": 0.26, + "grad_norm": 2.7833886357786386, + "learning_rate": 8.708392776331935e-06, + "loss": 0.5886, + "step": 3145 + }, + { + "epoch": 0.26, + "grad_norm": 3.3529510963178852, + "learning_rate": 8.707504714252539e-06, + "loss": 0.8862, + "step": 3146 + }, + { + "epoch": 0.26, + "grad_norm": 1.0977778285785627, + "learning_rate": 8.706616392293199e-06, + "loss": 0.1751, + "step": 3147 + }, + { + "epoch": 0.26, + "grad_norm": 6.1552747267107994, + "learning_rate": 8.705727810516179e-06, + "loss": 1.0112, + "step": 3148 + }, + { + "epoch": 0.26, + "grad_norm": 5.988284675168184, + "learning_rate": 8.70483896898377e-06, + "loss": 1.4558, + "step": 3149 + }, + { + "epoch": 0.26, + "grad_norm": 1.146370997195073, + "learning_rate": 8.703949867758269e-06, + "loss": 0.1939, + "step": 3150 + }, + { + "epoch": 0.26, + "grad_norm": 4.518287166379679, + "learning_rate": 8.703060506902004e-06, + "loss": 0.7558, + "step": 3151 + }, + { + "epoch": 0.26, + "grad_norm": 4.253671816177628, + "learning_rate": 8.702170886477312e-06, + "loss": 1.1437, + "step": 3152 + }, + { + "epoch": 0.26, + "grad_norm": 3.8170806354964486, + "learning_rate": 8.701281006546554e-06, + "loss": 0.9219, + "step": 3153 + }, + { + "epoch": 0.26, + "grad_norm": 5.031382707502849, + "learning_rate": 8.700390867172104e-06, + "loss": 1.4201, + "step": 3154 + }, + { + "epoch": 0.26, + "grad_norm": 4.142443824282733, + "learning_rate": 8.699500468416359e-06, + "loss": 0.5597, + "step": 3155 + }, + { + "epoch": 0.26, + "grad_norm": 5.268972095381594, + "learning_rate": 8.698609810341733e-06, + "loss": 1.1795, + "step": 3156 + }, + { + "epoch": 0.26, + "grad_norm": 4.245915832669328, + "learning_rate": 8.697718893010654e-06, + "loss": 1.0289, + "step": 3157 + }, + { + "epoch": 0.26, + "grad_norm": 3.3672476490029912, + "learning_rate": 8.696827716485575e-06, + "loss": 0.6437, + "step": 3158 + }, + { + "epoch": 0.26, + "grad_norm": 3.32873091167265, + "learning_rate": 8.69593628082896e-06, + "loss": 0.7401, + "step": 3159 + }, + { + "epoch": 0.26, + "grad_norm": 4.05422206738384, + "learning_rate": 8.695044586103297e-06, + "loss": 0.7442, + "step": 3160 + }, + { + "epoch": 0.26, + "grad_norm": 3.8301976158364446, + "learning_rate": 8.69415263237109e-06, + "loss": 0.8611, + "step": 3161 + }, + { + "epoch": 0.26, + "grad_norm": 2.4771740486546956, + "learning_rate": 8.693260419694858e-06, + "loss": 0.4881, + "step": 3162 + }, + { + "epoch": 0.26, + "grad_norm": 4.354569593164627, + "learning_rate": 8.692367948137146e-06, + "loss": 0.8609, + "step": 3163 + }, + { + "epoch": 0.26, + "grad_norm": 3.3948040475678503, + "learning_rate": 8.69147521776051e-06, + "loss": 0.7847, + "step": 3164 + }, + { + "epoch": 0.26, + "grad_norm": 3.952371981072322, + "learning_rate": 8.690582228627525e-06, + "loss": 1.2305, + "step": 3165 + }, + { + "epoch": 0.26, + "grad_norm": 2.9741205651636946, + "learning_rate": 8.68968898080079e-06, + "loss": 0.4751, + "step": 3166 + }, + { + "epoch": 0.26, + "grad_norm": 2.558786018301694, + "learning_rate": 8.688795474342913e-06, + "loss": 0.3973, + "step": 3167 + }, + { + "epoch": 0.26, + "grad_norm": 5.3154057654178395, + "learning_rate": 8.687901709316526e-06, + "loss": 1.5187, + "step": 3168 + }, + { + "epoch": 0.26, + "grad_norm": 1.86788167062856, + "learning_rate": 8.68700768578428e-06, + "loss": 0.3617, + "step": 3169 + }, + { + "epoch": 0.26, + "grad_norm": 7.8911866823110985, + "learning_rate": 8.686113403808843e-06, + "loss": 0.997, + "step": 3170 + }, + { + "epoch": 0.26, + "grad_norm": 4.869882483356167, + "learning_rate": 8.685218863452896e-06, + "loss": 1.2136, + "step": 3171 + }, + { + "epoch": 0.26, + "grad_norm": 2.496630214403798, + "learning_rate": 8.684324064779147e-06, + "loss": 0.5059, + "step": 3172 + }, + { + "epoch": 0.26, + "grad_norm": 3.0616387089416395, + "learning_rate": 8.683429007850313e-06, + "loss": 0.6251, + "step": 3173 + }, + { + "epoch": 0.26, + "grad_norm": 3.2089421498861, + "learning_rate": 8.682533692729137e-06, + "loss": 0.7262, + "step": 3174 + }, + { + "epoch": 0.26, + "grad_norm": 3.707763156712534, + "learning_rate": 8.681638119478375e-06, + "loss": 0.8375, + "step": 3175 + }, + { + "epoch": 0.26, + "grad_norm": 2.622146077316868, + "learning_rate": 8.680742288160803e-06, + "loss": 0.5689, + "step": 3176 + }, + { + "epoch": 0.26, + "grad_norm": 4.846788125256505, + "learning_rate": 8.679846198839216e-06, + "loss": 1.0026, + "step": 3177 + }, + { + "epoch": 0.26, + "grad_norm": 3.110639668818378, + "learning_rate": 8.678949851576425e-06, + "loss": 0.5267, + "step": 3178 + }, + { + "epoch": 0.26, + "grad_norm": 1.5945107075834242, + "learning_rate": 8.678053246435261e-06, + "loss": 0.3912, + "step": 3179 + }, + { + "epoch": 0.26, + "grad_norm": 2.0778832980751147, + "learning_rate": 8.677156383478571e-06, + "loss": 0.5534, + "step": 3180 + }, + { + "epoch": 0.26, + "grad_norm": 4.170325352138206, + "learning_rate": 8.676259262769222e-06, + "loss": 1.2933, + "step": 3181 + }, + { + "epoch": 0.26, + "grad_norm": 2.6725061220038397, + "learning_rate": 8.675361884370097e-06, + "loss": 0.5551, + "step": 3182 + }, + { + "epoch": 0.26, + "grad_norm": 3.1924954050152907, + "learning_rate": 8.6744642483441e-06, + "loss": 0.7139, + "step": 3183 + }, + { + "epoch": 0.26, + "grad_norm": 2.9889298140945484, + "learning_rate": 8.67356635475415e-06, + "loss": 0.4657, + "step": 3184 + }, + { + "epoch": 0.26, + "grad_norm": 3.434549748705281, + "learning_rate": 8.672668203663185e-06, + "loss": 0.6748, + "step": 3185 + }, + { + "epoch": 0.26, + "grad_norm": 4.549624402623971, + "learning_rate": 8.671769795134164e-06, + "loss": 1.0978, + "step": 3186 + }, + { + "epoch": 0.26, + "grad_norm": 2.962116235820115, + "learning_rate": 8.67087112923006e-06, + "loss": 0.6544, + "step": 3187 + }, + { + "epoch": 0.26, + "grad_norm": 4.522077343959333, + "learning_rate": 8.669972206013864e-06, + "loss": 1.3273, + "step": 3188 + }, + { + "epoch": 0.26, + "grad_norm": 3.2457425570795433, + "learning_rate": 8.669073025548588e-06, + "loss": 0.7193, + "step": 3189 + }, + { + "epoch": 0.26, + "grad_norm": 3.7196750818422535, + "learning_rate": 8.668173587897261e-06, + "loss": 0.7571, + "step": 3190 + }, + { + "epoch": 0.26, + "grad_norm": 4.3977177249844335, + "learning_rate": 8.667273893122932e-06, + "loss": 0.9859, + "step": 3191 + }, + { + "epoch": 0.26, + "grad_norm": 3.8157013770108463, + "learning_rate": 8.666373941288661e-06, + "loss": 1.2364, + "step": 3192 + }, + { + "epoch": 0.26, + "grad_norm": 3.493289730627912, + "learning_rate": 8.665473732457533e-06, + "loss": 0.9903, + "step": 3193 + }, + { + "epoch": 0.26, + "grad_norm": 3.98117903788595, + "learning_rate": 8.664573266692648e-06, + "loss": 1.0363, + "step": 3194 + }, + { + "epoch": 0.26, + "grad_norm": 4.7274376036047885, + "learning_rate": 8.663672544057126e-06, + "loss": 0.9828, + "step": 3195 + }, + { + "epoch": 0.26, + "grad_norm": 3.84076112404563, + "learning_rate": 8.662771564614102e-06, + "loss": 0.6248, + "step": 3196 + }, + { + "epoch": 0.26, + "grad_norm": 2.890432405279825, + "learning_rate": 8.661870328426734e-06, + "loss": 0.7854, + "step": 3197 + }, + { + "epoch": 0.26, + "grad_norm": 3.554546599450026, + "learning_rate": 8.66096883555819e-06, + "loss": 0.5484, + "step": 3198 + }, + { + "epoch": 0.26, + "grad_norm": 4.383581001219526, + "learning_rate": 8.660067086071665e-06, + "loss": 1.0188, + "step": 3199 + }, + { + "epoch": 0.26, + "grad_norm": 3.5078927578736643, + "learning_rate": 8.659165080030366e-06, + "loss": 0.6404, + "step": 3200 + }, + { + "epoch": 0.26, + "grad_norm": 2.906240123006569, + "learning_rate": 8.658262817497517e-06, + "loss": 0.6517, + "step": 3201 + }, + { + "epoch": 0.26, + "grad_norm": 3.2438586396263625, + "learning_rate": 8.657360298536368e-06, + "loss": 0.3485, + "step": 3202 + }, + { + "epoch": 0.26, + "grad_norm": 3.6901225925717074, + "learning_rate": 8.656457523210178e-06, + "loss": 0.7426, + "step": 3203 + }, + { + "epoch": 0.26, + "grad_norm": 5.135785463653785, + "learning_rate": 8.65555449158223e-06, + "loss": 1.2519, + "step": 3204 + }, + { + "epoch": 0.26, + "grad_norm": 4.197218607509568, + "learning_rate": 8.65465120371582e-06, + "loss": 0.9433, + "step": 3205 + }, + { + "epoch": 0.26, + "grad_norm": 4.414861530515555, + "learning_rate": 8.653747659674265e-06, + "loss": 1.0128, + "step": 3206 + }, + { + "epoch": 0.26, + "grad_norm": 4.7227671150350234, + "learning_rate": 8.6528438595209e-06, + "loss": 0.9727, + "step": 3207 + }, + { + "epoch": 0.26, + "grad_norm": 4.859963917459991, + "learning_rate": 8.651939803319079e-06, + "loss": 0.8443, + "step": 3208 + }, + { + "epoch": 0.26, + "grad_norm": 3.026595312703798, + "learning_rate": 8.651035491132171e-06, + "loss": 0.6055, + "step": 3209 + }, + { + "epoch": 0.26, + "grad_norm": 4.250457816338681, + "learning_rate": 8.650130923023564e-06, + "loss": 0.8802, + "step": 3210 + }, + { + "epoch": 0.26, + "grad_norm": 3.4885457304896335, + "learning_rate": 8.649226099056664e-06, + "loss": 0.8172, + "step": 3211 + }, + { + "epoch": 0.26, + "grad_norm": 2.726001747173857, + "learning_rate": 8.648321019294896e-06, + "loss": 0.5996, + "step": 3212 + }, + { + "epoch": 0.26, + "grad_norm": 4.501357604415449, + "learning_rate": 8.647415683801703e-06, + "loss": 0.8746, + "step": 3213 + }, + { + "epoch": 0.26, + "grad_norm": 3.668083160728214, + "learning_rate": 8.646510092640546e-06, + "loss": 0.7768, + "step": 3214 + }, + { + "epoch": 0.26, + "grad_norm": 5.0515484249074465, + "learning_rate": 8.645604245874898e-06, + "loss": 1.1065, + "step": 3215 + }, + { + "epoch": 0.26, + "grad_norm": 4.044574041841505, + "learning_rate": 8.64469814356826e-06, + "loss": 1.0122, + "step": 3216 + }, + { + "epoch": 0.26, + "grad_norm": 5.206242620906617, + "learning_rate": 8.643791785784143e-06, + "loss": 1.2983, + "step": 3217 + }, + { + "epoch": 0.26, + "grad_norm": 3.788662775672696, + "learning_rate": 8.642885172586079e-06, + "loss": 0.6405, + "step": 3218 + }, + { + "epoch": 0.26, + "grad_norm": 2.771380323773392, + "learning_rate": 8.641978304037619e-06, + "loss": 0.502, + "step": 3219 + }, + { + "epoch": 0.26, + "grad_norm": 3.196793597191887, + "learning_rate": 8.64107118020233e-06, + "loss": 0.4887, + "step": 3220 + }, + { + "epoch": 0.26, + "grad_norm": 4.197875163689073, + "learning_rate": 8.640163801143797e-06, + "loss": 0.8519, + "step": 3221 + }, + { + "epoch": 0.26, + "grad_norm": 2.267636049624269, + "learning_rate": 8.639256166925623e-06, + "loss": 0.4885, + "step": 3222 + }, + { + "epoch": 0.26, + "grad_norm": 4.219769417655235, + "learning_rate": 8.638348277611431e-06, + "loss": 1.0352, + "step": 3223 + }, + { + "epoch": 0.26, + "grad_norm": 3.300249351364667, + "learning_rate": 8.637440133264858e-06, + "loss": 0.8545, + "step": 3224 + }, + { + "epoch": 0.26, + "grad_norm": 4.122445723263637, + "learning_rate": 8.636531733949562e-06, + "loss": 0.9217, + "step": 3225 + }, + { + "epoch": 0.26, + "grad_norm": 3.6354846730557955, + "learning_rate": 8.635623079729218e-06, + "loss": 0.53, + "step": 3226 + }, + { + "epoch": 0.26, + "grad_norm": 3.037083101704012, + "learning_rate": 8.63471417066752e-06, + "loss": 0.5501, + "step": 3227 + }, + { + "epoch": 0.26, + "grad_norm": 4.815458380227235, + "learning_rate": 8.633805006828175e-06, + "loss": 0.7468, + "step": 3228 + }, + { + "epoch": 0.26, + "grad_norm": 3.5727780826689126, + "learning_rate": 8.632895588274913e-06, + "loss": 0.7856, + "step": 3229 + }, + { + "epoch": 0.26, + "grad_norm": 3.2251162220840035, + "learning_rate": 8.63198591507148e-06, + "loss": 0.6999, + "step": 3230 + }, + { + "epoch": 0.26, + "grad_norm": 3.1361439942367513, + "learning_rate": 8.631075987281645e-06, + "loss": 0.7396, + "step": 3231 + }, + { + "epoch": 0.26, + "grad_norm": 3.4658354764042856, + "learning_rate": 8.630165804969181e-06, + "loss": 1.0458, + "step": 3232 + }, + { + "epoch": 0.26, + "grad_norm": 3.45617644112384, + "learning_rate": 8.629255368197894e-06, + "loss": 0.8709, + "step": 3233 + }, + { + "epoch": 0.26, + "grad_norm": 4.986012130449948, + "learning_rate": 8.628344677031602e-06, + "loss": 1.3019, + "step": 3234 + }, + { + "epoch": 0.26, + "grad_norm": 4.327627012636433, + "learning_rate": 8.627433731534138e-06, + "loss": 1.1655, + "step": 3235 + }, + { + "epoch": 0.26, + "grad_norm": 2.5669462906154594, + "learning_rate": 8.626522531769356e-06, + "loss": 0.571, + "step": 3236 + }, + { + "epoch": 0.26, + "grad_norm": 3.7131241591546362, + "learning_rate": 8.625611077801127e-06, + "loss": 0.7743, + "step": 3237 + }, + { + "epoch": 0.26, + "grad_norm": 3.0749190601802012, + "learning_rate": 8.624699369693338e-06, + "loss": 1.0538, + "step": 3238 + }, + { + "epoch": 0.26, + "grad_norm": 3.836550605964662, + "learning_rate": 8.623787407509902e-06, + "loss": 0.7836, + "step": 3239 + }, + { + "epoch": 0.26, + "grad_norm": 4.759929928706973, + "learning_rate": 8.622875191314737e-06, + "loss": 0.6143, + "step": 3240 + }, + { + "epoch": 0.26, + "grad_norm": 2.6827245495757204, + "learning_rate": 8.621962721171789e-06, + "loss": 0.6333, + "step": 3241 + }, + { + "epoch": 0.26, + "grad_norm": 2.017636572954006, + "learning_rate": 8.621049997145016e-06, + "loss": 0.4061, + "step": 3242 + }, + { + "epoch": 0.27, + "grad_norm": 4.046091184963527, + "learning_rate": 8.620137019298397e-06, + "loss": 1.274, + "step": 3243 + }, + { + "epoch": 0.27, + "grad_norm": 3.630981230839866, + "learning_rate": 8.61922378769593e-06, + "loss": 0.8632, + "step": 3244 + }, + { + "epoch": 0.27, + "grad_norm": 3.857283660257242, + "learning_rate": 8.618310302401624e-06, + "loss": 0.8118, + "step": 3245 + }, + { + "epoch": 0.27, + "grad_norm": 3.8626741829376483, + "learning_rate": 8.617396563479512e-06, + "loss": 0.7905, + "step": 3246 + }, + { + "epoch": 0.27, + "grad_norm": 4.421926641303325, + "learning_rate": 8.616482570993648e-06, + "loss": 1.2504, + "step": 3247 + }, + { + "epoch": 0.27, + "grad_norm": 4.546897594665816, + "learning_rate": 8.61556832500809e-06, + "loss": 1.514, + "step": 3248 + }, + { + "epoch": 0.27, + "grad_norm": 5.060131968138134, + "learning_rate": 8.614653825586929e-06, + "loss": 1.1402, + "step": 3249 + }, + { + "epoch": 0.27, + "grad_norm": 3.1715649425333967, + "learning_rate": 8.613739072794268e-06, + "loss": 0.6246, + "step": 3250 + }, + { + "epoch": 0.27, + "grad_norm": 5.404068495336512, + "learning_rate": 8.612824066694223e-06, + "loss": 1.0936, + "step": 3251 + }, + { + "epoch": 0.27, + "grad_norm": 4.484444207292898, + "learning_rate": 8.611908807350934e-06, + "loss": 0.9757, + "step": 3252 + }, + { + "epoch": 0.27, + "grad_norm": 3.423023227701489, + "learning_rate": 8.610993294828557e-06, + "loss": 0.4086, + "step": 3253 + }, + { + "epoch": 0.27, + "grad_norm": 3.349545026797728, + "learning_rate": 8.610077529191265e-06, + "loss": 0.9609, + "step": 3254 + }, + { + "epoch": 0.27, + "grad_norm": 4.389377079746561, + "learning_rate": 8.60916151050325e-06, + "loss": 1.0769, + "step": 3255 + }, + { + "epoch": 0.27, + "grad_norm": 3.202524400456061, + "learning_rate": 8.60824523882872e-06, + "loss": 0.7872, + "step": 3256 + }, + { + "epoch": 0.27, + "grad_norm": 3.505244005208422, + "learning_rate": 8.607328714231901e-06, + "loss": 0.7738, + "step": 3257 + }, + { + "epoch": 0.27, + "grad_norm": 1.492210496875677, + "learning_rate": 8.60641193677704e-06, + "loss": 0.1749, + "step": 3258 + }, + { + "epoch": 0.27, + "grad_norm": 3.211838371020348, + "learning_rate": 8.605494906528395e-06, + "loss": 0.9311, + "step": 3259 + }, + { + "epoch": 0.27, + "grad_norm": 4.423420650706527, + "learning_rate": 8.604577623550249e-06, + "loss": 0.9224, + "step": 3260 + }, + { + "epoch": 0.27, + "grad_norm": 4.158714351825901, + "learning_rate": 8.603660087906901e-06, + "loss": 1.0484, + "step": 3261 + }, + { + "epoch": 0.27, + "grad_norm": 5.102169056933126, + "learning_rate": 8.602742299662662e-06, + "loss": 1.0258, + "step": 3262 + }, + { + "epoch": 0.27, + "grad_norm": 4.499002455252406, + "learning_rate": 8.601824258881868e-06, + "loss": 1.3883, + "step": 3263 + }, + { + "epoch": 0.27, + "grad_norm": 2.4907076006441042, + "learning_rate": 8.600905965628867e-06, + "loss": 0.5417, + "step": 3264 + }, + { + "epoch": 0.27, + "grad_norm": 2.8531022936766153, + "learning_rate": 8.59998741996803e-06, + "loss": 0.877, + "step": 3265 + }, + { + "epoch": 0.27, + "grad_norm": 4.370751097548014, + "learning_rate": 8.599068621963741e-06, + "loss": 1.0476, + "step": 3266 + }, + { + "epoch": 0.27, + "grad_norm": 4.508339914520264, + "learning_rate": 8.598149571680407e-06, + "loss": 1.4139, + "step": 3267 + }, + { + "epoch": 0.27, + "grad_norm": 3.3080389351523425, + "learning_rate": 8.597230269182446e-06, + "loss": 0.7074, + "step": 3268 + }, + { + "epoch": 0.27, + "grad_norm": 3.562432608615904, + "learning_rate": 8.596310714534299e-06, + "loss": 0.4951, + "step": 3269 + }, + { + "epoch": 0.27, + "grad_norm": 4.437605479178292, + "learning_rate": 8.59539090780042e-06, + "loss": 1.112, + "step": 3270 + }, + { + "epoch": 0.27, + "grad_norm": 2.508607042973389, + "learning_rate": 8.59447084904529e-06, + "loss": 0.4823, + "step": 3271 + }, + { + "epoch": 0.27, + "grad_norm": 4.228500440504984, + "learning_rate": 8.593550538333392e-06, + "loss": 1.4294, + "step": 3272 + }, + { + "epoch": 0.27, + "grad_norm": 4.767059649852298, + "learning_rate": 8.592629975729242e-06, + "loss": 0.9167, + "step": 3273 + }, + { + "epoch": 0.27, + "grad_norm": 3.269612391619527, + "learning_rate": 8.591709161297366e-06, + "loss": 0.6488, + "step": 3274 + }, + { + "epoch": 0.27, + "grad_norm": 4.846307992233512, + "learning_rate": 8.59078809510231e-06, + "loss": 0.9216, + "step": 3275 + }, + { + "epoch": 0.27, + "grad_norm": 5.826302142300191, + "learning_rate": 8.589866777208632e-06, + "loss": 1.4859, + "step": 3276 + }, + { + "epoch": 0.27, + "grad_norm": 2.9845686316737927, + "learning_rate": 8.588945207680922e-06, + "loss": 0.5955, + "step": 3277 + }, + { + "epoch": 0.27, + "grad_norm": 6.516434459764189, + "learning_rate": 8.588023386583766e-06, + "loss": 1.227, + "step": 3278 + }, + { + "epoch": 0.27, + "grad_norm": 4.000628683865046, + "learning_rate": 8.587101313981789e-06, + "loss": 0.429, + "step": 3279 + }, + { + "epoch": 0.27, + "grad_norm": 1.149387629155255, + "learning_rate": 8.586178989939622e-06, + "loss": 0.2166, + "step": 3280 + }, + { + "epoch": 0.27, + "grad_norm": 5.674756287794339, + "learning_rate": 8.585256414521912e-06, + "loss": 1.4346, + "step": 3281 + }, + { + "epoch": 0.27, + "grad_norm": 3.814894563094977, + "learning_rate": 8.584333587793334e-06, + "loss": 1.093, + "step": 3282 + }, + { + "epoch": 0.27, + "grad_norm": 3.2099645602466165, + "learning_rate": 8.583410509818567e-06, + "loss": 0.8124, + "step": 3283 + }, + { + "epoch": 0.27, + "grad_norm": 3.7785344671897114, + "learning_rate": 8.58248718066232e-06, + "loss": 0.9876, + "step": 3284 + }, + { + "epoch": 0.27, + "grad_norm": 4.827799117265657, + "learning_rate": 8.581563600389313e-06, + "loss": 1.1371, + "step": 3285 + }, + { + "epoch": 0.27, + "grad_norm": 5.027555913927014, + "learning_rate": 8.580639769064283e-06, + "loss": 0.9466, + "step": 3286 + }, + { + "epoch": 0.27, + "grad_norm": 3.4193502984889474, + "learning_rate": 8.57971568675199e-06, + "loss": 0.5155, + "step": 3287 + }, + { + "epoch": 0.27, + "grad_norm": 4.811872675998102, + "learning_rate": 8.578791353517205e-06, + "loss": 0.6685, + "step": 3288 + }, + { + "epoch": 0.27, + "grad_norm": 4.0447413505751895, + "learning_rate": 8.577866769424722e-06, + "loss": 0.7002, + "step": 3289 + }, + { + "epoch": 0.27, + "grad_norm": 4.67709329214413, + "learning_rate": 8.57694193453935e-06, + "loss": 1.059, + "step": 3290 + }, + { + "epoch": 0.27, + "grad_norm": 3.9639687943958597, + "learning_rate": 8.576016848925914e-06, + "loss": 0.9254, + "step": 3291 + }, + { + "epoch": 0.27, + "grad_norm": 3.756176769055125, + "learning_rate": 8.57509151264926e-06, + "loss": 0.689, + "step": 3292 + }, + { + "epoch": 0.27, + "grad_norm": 3.9754738372845124, + "learning_rate": 8.574165925774249e-06, + "loss": 0.8303, + "step": 3293 + }, + { + "epoch": 0.27, + "grad_norm": 2.7067266196734256, + "learning_rate": 8.573240088365764e-06, + "loss": 0.5972, + "step": 3294 + }, + { + "epoch": 0.27, + "grad_norm": 4.318813164893715, + "learning_rate": 8.572314000488697e-06, + "loss": 0.9459, + "step": 3295 + }, + { + "epoch": 0.27, + "grad_norm": 4.176265666423131, + "learning_rate": 8.571387662207966e-06, + "loss": 1.0976, + "step": 3296 + }, + { + "epoch": 0.27, + "grad_norm": 5.027574899271077, + "learning_rate": 8.570461073588503e-06, + "loss": 1.4062, + "step": 3297 + }, + { + "epoch": 0.27, + "grad_norm": 3.5759335168554047, + "learning_rate": 8.569534234695258e-06, + "loss": 0.7524, + "step": 3298 + }, + { + "epoch": 0.27, + "grad_norm": 2.98753427562043, + "learning_rate": 8.568607145593197e-06, + "loss": 0.5578, + "step": 3299 + }, + { + "epoch": 0.27, + "grad_norm": 4.824454307674025, + "learning_rate": 8.567679806347307e-06, + "loss": 1.1906, + "step": 3300 + }, + { + "epoch": 0.27, + "grad_norm": 3.8811060287963155, + "learning_rate": 8.566752217022587e-06, + "loss": 0.9038, + "step": 3301 + }, + { + "epoch": 0.27, + "grad_norm": 3.1295118533835895, + "learning_rate": 8.56582437768406e-06, + "loss": 0.5656, + "step": 3302 + }, + { + "epoch": 0.27, + "grad_norm": 4.189873115925194, + "learning_rate": 8.564896288396762e-06, + "loss": 0.7928, + "step": 3303 + }, + { + "epoch": 0.27, + "grad_norm": 3.9550707861543692, + "learning_rate": 8.56396794922575e-06, + "loss": 0.9126, + "step": 3304 + }, + { + "epoch": 0.27, + "grad_norm": 4.734076565039832, + "learning_rate": 8.563039360236097e-06, + "loss": 1.3105, + "step": 3305 + }, + { + "epoch": 0.27, + "grad_norm": 3.7954476971871696, + "learning_rate": 8.562110521492888e-06, + "loss": 0.5605, + "step": 3306 + }, + { + "epoch": 0.27, + "grad_norm": 4.913998182189051, + "learning_rate": 8.561181433061234e-06, + "loss": 1.4244, + "step": 3307 + }, + { + "epoch": 0.27, + "grad_norm": 4.009470227880041, + "learning_rate": 8.56025209500626e-06, + "loss": 0.7917, + "step": 3308 + }, + { + "epoch": 0.27, + "grad_norm": 2.665922735380563, + "learning_rate": 8.55932250739311e-06, + "loss": 0.3222, + "step": 3309 + }, + { + "epoch": 0.27, + "grad_norm": 4.7198380743455095, + "learning_rate": 8.558392670286942e-06, + "loss": 1.1366, + "step": 3310 + }, + { + "epoch": 0.27, + "grad_norm": 3.178748702987763, + "learning_rate": 8.557462583752934e-06, + "loss": 0.7711, + "step": 3311 + }, + { + "epoch": 0.27, + "grad_norm": 2.438568163220989, + "learning_rate": 8.55653224785628e-06, + "loss": 0.4922, + "step": 3312 + }, + { + "epoch": 0.27, + "grad_norm": 6.330476916402365, + "learning_rate": 8.555601662662194e-06, + "loss": 1.2982, + "step": 3313 + }, + { + "epoch": 0.27, + "grad_norm": 3.6678253271213106, + "learning_rate": 8.554670828235905e-06, + "loss": 0.5943, + "step": 3314 + }, + { + "epoch": 0.27, + "grad_norm": 2.1007843639528807, + "learning_rate": 8.553739744642662e-06, + "loss": 0.3446, + "step": 3315 + }, + { + "epoch": 0.27, + "grad_norm": 0.5255229994819723, + "learning_rate": 8.552808411947727e-06, + "loss": 0.0892, + "step": 3316 + }, + { + "epoch": 0.27, + "grad_norm": 1.245052826510659, + "learning_rate": 8.551876830216385e-06, + "loss": 0.2156, + "step": 3317 + }, + { + "epoch": 0.27, + "grad_norm": 3.4013826162449154, + "learning_rate": 8.550944999513936e-06, + "loss": 0.8355, + "step": 3318 + }, + { + "epoch": 0.27, + "grad_norm": 1.9915229079069694, + "learning_rate": 8.550012919905696e-06, + "loss": 0.3716, + "step": 3319 + }, + { + "epoch": 0.27, + "grad_norm": 4.177043562454798, + "learning_rate": 8.549080591457e-06, + "loss": 0.8958, + "step": 3320 + }, + { + "epoch": 0.27, + "grad_norm": 2.4423048906076823, + "learning_rate": 8.5481480142332e-06, + "loss": 0.4812, + "step": 3321 + }, + { + "epoch": 0.27, + "grad_norm": 3.207681663736577, + "learning_rate": 8.547215188299664e-06, + "loss": 0.5496, + "step": 3322 + }, + { + "epoch": 0.27, + "grad_norm": 1.3400518150108705, + "learning_rate": 8.546282113721785e-06, + "loss": 0.2233, + "step": 3323 + }, + { + "epoch": 0.27, + "grad_norm": 3.521282854097466, + "learning_rate": 8.54534879056496e-06, + "loss": 0.7035, + "step": 3324 + }, + { + "epoch": 0.27, + "grad_norm": 3.027767335477696, + "learning_rate": 8.544415218894615e-06, + "loss": 0.6919, + "step": 3325 + }, + { + "epoch": 0.27, + "grad_norm": 3.016276645524086, + "learning_rate": 8.543481398776188e-06, + "loss": 0.6924, + "step": 3326 + }, + { + "epoch": 0.27, + "grad_norm": 4.264797794444411, + "learning_rate": 8.542547330275138e-06, + "loss": 0.9471, + "step": 3327 + }, + { + "epoch": 0.27, + "grad_norm": 3.2930852371271047, + "learning_rate": 8.541613013456935e-06, + "loss": 0.8721, + "step": 3328 + }, + { + "epoch": 0.27, + "grad_norm": 3.3653326087802973, + "learning_rate": 8.540678448387075e-06, + "loss": 0.7226, + "step": 3329 + }, + { + "epoch": 0.27, + "grad_norm": 5.672402910371198, + "learning_rate": 8.539743635131064e-06, + "loss": 1.0184, + "step": 3330 + }, + { + "epoch": 0.27, + "grad_norm": 5.302459581678297, + "learning_rate": 8.538808573754428e-06, + "loss": 1.2515, + "step": 3331 + }, + { + "epoch": 0.27, + "grad_norm": 3.9777333277068743, + "learning_rate": 8.537873264322714e-06, + "loss": 0.8424, + "step": 3332 + }, + { + "epoch": 0.27, + "grad_norm": 3.2118263151992004, + "learning_rate": 8.53693770690148e-06, + "loss": 0.736, + "step": 3333 + }, + { + "epoch": 0.27, + "grad_norm": 3.505969676427309, + "learning_rate": 8.536001901556306e-06, + "loss": 0.6689, + "step": 3334 + }, + { + "epoch": 0.27, + "grad_norm": 3.2270756392293136, + "learning_rate": 8.535065848352785e-06, + "loss": 0.6879, + "step": 3335 + }, + { + "epoch": 0.27, + "grad_norm": 3.698836210287015, + "learning_rate": 8.534129547356536e-06, + "loss": 0.6287, + "step": 3336 + }, + { + "epoch": 0.27, + "grad_norm": 4.961390143155534, + "learning_rate": 8.533192998633184e-06, + "loss": 0.9411, + "step": 3337 + }, + { + "epoch": 0.27, + "grad_norm": 4.4226150856526845, + "learning_rate": 8.532256202248382e-06, + "loss": 1.0278, + "step": 3338 + }, + { + "epoch": 0.27, + "grad_norm": 1.1314777414437935, + "learning_rate": 8.53131915826779e-06, + "loss": 0.3731, + "step": 3339 + }, + { + "epoch": 0.27, + "grad_norm": 3.7013183433744246, + "learning_rate": 8.530381866757096e-06, + "loss": 0.6113, + "step": 3340 + }, + { + "epoch": 0.27, + "grad_norm": 4.1012435037945, + "learning_rate": 8.529444327781995e-06, + "loss": 1.0921, + "step": 3341 + }, + { + "epoch": 0.27, + "grad_norm": 2.828225586633404, + "learning_rate": 8.528506541408208e-06, + "loss": 0.471, + "step": 3342 + }, + { + "epoch": 0.27, + "grad_norm": 4.270768950776447, + "learning_rate": 8.527568507701467e-06, + "loss": 0.6118, + "step": 3343 + }, + { + "epoch": 0.27, + "grad_norm": 4.133735752099045, + "learning_rate": 8.526630226727528e-06, + "loss": 0.8534, + "step": 3344 + }, + { + "epoch": 0.27, + "grad_norm": 6.2286250088537605, + "learning_rate": 8.525691698552157e-06, + "loss": 1.2391, + "step": 3345 + }, + { + "epoch": 0.27, + "grad_norm": 3.674507236090259, + "learning_rate": 8.52475292324114e-06, + "loss": 0.9262, + "step": 3346 + }, + { + "epoch": 0.27, + "grad_norm": 3.251410586765895, + "learning_rate": 8.523813900860285e-06, + "loss": 0.7037, + "step": 3347 + }, + { + "epoch": 0.27, + "grad_norm": 4.489785459479057, + "learning_rate": 8.52287463147541e-06, + "loss": 0.8188, + "step": 3348 + }, + { + "epoch": 0.27, + "grad_norm": 4.275572233721359, + "learning_rate": 8.521935115152357e-06, + "loss": 0.8396, + "step": 3349 + }, + { + "epoch": 0.27, + "grad_norm": 2.0843978814099873, + "learning_rate": 8.520995351956977e-06, + "loss": 0.416, + "step": 3350 + }, + { + "epoch": 0.27, + "grad_norm": 5.3651970008548, + "learning_rate": 8.52005534195515e-06, + "loss": 1.806, + "step": 3351 + }, + { + "epoch": 0.27, + "grad_norm": 2.6903373871188485, + "learning_rate": 8.51911508521276e-06, + "loss": 0.562, + "step": 3352 + }, + { + "epoch": 0.27, + "grad_norm": 3.201406982156682, + "learning_rate": 8.518174581795718e-06, + "loss": 0.6777, + "step": 3353 + }, + { + "epoch": 0.27, + "grad_norm": 2.8101727232527187, + "learning_rate": 8.517233831769951e-06, + "loss": 0.641, + "step": 3354 + }, + { + "epoch": 0.27, + "grad_norm": 3.201730798911782, + "learning_rate": 8.516292835201396e-06, + "loss": 0.7662, + "step": 3355 + }, + { + "epoch": 0.27, + "grad_norm": 4.016548229239503, + "learning_rate": 8.51535159215602e-06, + "loss": 0.9928, + "step": 3356 + }, + { + "epoch": 0.27, + "grad_norm": 2.594045982399574, + "learning_rate": 8.514410102699794e-06, + "loss": 0.3828, + "step": 3357 + }, + { + "epoch": 0.27, + "grad_norm": 5.063304614421123, + "learning_rate": 8.513468366898714e-06, + "loss": 0.965, + "step": 3358 + }, + { + "epoch": 0.27, + "grad_norm": 5.613244946794997, + "learning_rate": 8.512526384818794e-06, + "loss": 0.9638, + "step": 3359 + }, + { + "epoch": 0.27, + "grad_norm": 4.133040387545648, + "learning_rate": 8.511584156526059e-06, + "loss": 0.8531, + "step": 3360 + }, + { + "epoch": 0.27, + "grad_norm": 3.5220936512990617, + "learning_rate": 8.510641682086557e-06, + "loss": 0.782, + "step": 3361 + }, + { + "epoch": 0.27, + "grad_norm": 3.2675450795257666, + "learning_rate": 8.509698961566356e-06, + "loss": 0.6078, + "step": 3362 + }, + { + "epoch": 0.27, + "grad_norm": 5.7808428333412145, + "learning_rate": 8.508755995031527e-06, + "loss": 0.9535, + "step": 3363 + }, + { + "epoch": 0.27, + "grad_norm": 5.202309335801023, + "learning_rate": 8.507812782548174e-06, + "loss": 1.0453, + "step": 3364 + }, + { + "epoch": 0.28, + "grad_norm": 4.494824079311042, + "learning_rate": 8.506869324182411e-06, + "loss": 1.143, + "step": 3365 + }, + { + "epoch": 0.28, + "grad_norm": 3.022960989901094, + "learning_rate": 8.505925620000373e-06, + "loss": 0.625, + "step": 3366 + }, + { + "epoch": 0.28, + "grad_norm": 5.417571085131394, + "learning_rate": 8.504981670068204e-06, + "loss": 1.2577, + "step": 3367 + }, + { + "epoch": 0.28, + "grad_norm": 5.54414024670515, + "learning_rate": 8.504037474452073e-06, + "loss": 1.224, + "step": 3368 + }, + { + "epoch": 0.28, + "grad_norm": 3.328681112964479, + "learning_rate": 8.503093033218168e-06, + "loss": 0.7479, + "step": 3369 + }, + { + "epoch": 0.28, + "grad_norm": 2.581582710005775, + "learning_rate": 8.502148346432683e-06, + "loss": 0.6942, + "step": 3370 + }, + { + "epoch": 0.28, + "grad_norm": 3.630141365785073, + "learning_rate": 8.501203414161844e-06, + "loss": 0.8597, + "step": 3371 + }, + { + "epoch": 0.28, + "grad_norm": 2.4991407245673773, + "learning_rate": 8.50025823647188e-06, + "loss": 0.5979, + "step": 3372 + }, + { + "epoch": 0.28, + "grad_norm": 3.313086390452513, + "learning_rate": 8.499312813429047e-06, + "loss": 0.8321, + "step": 3373 + }, + { + "epoch": 0.28, + "grad_norm": 2.098169768904149, + "learning_rate": 8.498367145099618e-06, + "loss": 0.3456, + "step": 3374 + }, + { + "epoch": 0.28, + "grad_norm": 4.49992191921518, + "learning_rate": 8.497421231549873e-06, + "loss": 1.3791, + "step": 3375 + }, + { + "epoch": 0.28, + "grad_norm": 3.2400728584587615, + "learning_rate": 8.496475072846125e-06, + "loss": 0.5226, + "step": 3376 + }, + { + "epoch": 0.28, + "grad_norm": 3.5645603139882116, + "learning_rate": 8.495528669054688e-06, + "loss": 0.579, + "step": 3377 + }, + { + "epoch": 0.28, + "grad_norm": 4.016893323985373, + "learning_rate": 8.494582020241905e-06, + "loss": 1.2116, + "step": 3378 + }, + { + "epoch": 0.28, + "grad_norm": 3.0975535418531996, + "learning_rate": 8.493635126474128e-06, + "loss": 0.8675, + "step": 3379 + }, + { + "epoch": 0.28, + "grad_norm": 4.967808603430733, + "learning_rate": 8.492687987817736e-06, + "loss": 1.4063, + "step": 3380 + }, + { + "epoch": 0.28, + "grad_norm": 3.069151077502174, + "learning_rate": 8.491740604339115e-06, + "loss": 0.5528, + "step": 3381 + }, + { + "epoch": 0.28, + "grad_norm": 4.29931499894164, + "learning_rate": 8.490792976104676e-06, + "loss": 0.8807, + "step": 3382 + }, + { + "epoch": 0.28, + "grad_norm": 4.600143238803147, + "learning_rate": 8.48984510318084e-06, + "loss": 0.886, + "step": 3383 + }, + { + "epoch": 0.28, + "grad_norm": 3.7937467755769947, + "learning_rate": 8.48889698563405e-06, + "loss": 1.1959, + "step": 3384 + }, + { + "epoch": 0.28, + "grad_norm": 2.2159369615732913, + "learning_rate": 8.487948623530765e-06, + "loss": 0.3218, + "step": 3385 + }, + { + "epoch": 0.28, + "grad_norm": 3.567853307581026, + "learning_rate": 8.48700001693746e-06, + "loss": 0.6045, + "step": 3386 + }, + { + "epoch": 0.28, + "grad_norm": 5.204283266026797, + "learning_rate": 8.48605116592063e-06, + "loss": 0.8285, + "step": 3387 + }, + { + "epoch": 0.28, + "grad_norm": 4.522280887836711, + "learning_rate": 8.485102070546786e-06, + "loss": 0.9568, + "step": 3388 + }, + { + "epoch": 0.28, + "grad_norm": 3.688467153997024, + "learning_rate": 8.484152730882453e-06, + "loss": 0.5945, + "step": 3389 + }, + { + "epoch": 0.28, + "grad_norm": 3.046191178978568, + "learning_rate": 8.483203146994174e-06, + "loss": 0.4337, + "step": 3390 + }, + { + "epoch": 0.28, + "grad_norm": 4.301048854694765, + "learning_rate": 8.482253318948516e-06, + "loss": 1.2326, + "step": 3391 + }, + { + "epoch": 0.28, + "grad_norm": 3.0677386148310584, + "learning_rate": 8.481303246812056e-06, + "loss": 0.6228, + "step": 3392 + }, + { + "epoch": 0.28, + "grad_norm": 3.9716547402830846, + "learning_rate": 8.480352930651387e-06, + "loss": 0.9784, + "step": 3393 + }, + { + "epoch": 0.28, + "grad_norm": 4.63926742491477, + "learning_rate": 8.479402370533127e-06, + "loss": 0.9952, + "step": 3394 + }, + { + "epoch": 0.28, + "grad_norm": 3.2588996439244484, + "learning_rate": 8.478451566523902e-06, + "loss": 0.7426, + "step": 3395 + }, + { + "epoch": 0.28, + "grad_norm": 2.91189519136294, + "learning_rate": 8.47750051869036e-06, + "loss": 0.6852, + "step": 3396 + }, + { + "epoch": 0.28, + "grad_norm": 4.392249566403095, + "learning_rate": 8.476549227099164e-06, + "loss": 0.7342, + "step": 3397 + }, + { + "epoch": 0.28, + "grad_norm": 2.808611635862418, + "learning_rate": 8.475597691817e-06, + "loss": 0.7754, + "step": 3398 + }, + { + "epoch": 0.28, + "grad_norm": 3.1065486891336858, + "learning_rate": 8.474645912910562e-06, + "loss": 0.7521, + "step": 3399 + }, + { + "epoch": 0.28, + "grad_norm": 4.844818704294859, + "learning_rate": 8.473693890446568e-06, + "loss": 1.1064, + "step": 3400 + }, + { + "epoch": 0.28, + "grad_norm": 3.4964248348112372, + "learning_rate": 8.472741624491749e-06, + "loss": 0.829, + "step": 3401 + }, + { + "epoch": 0.28, + "grad_norm": 5.813746484461821, + "learning_rate": 8.471789115112857e-06, + "loss": 1.6135, + "step": 3402 + }, + { + "epoch": 0.28, + "grad_norm": 6.04943564553509, + "learning_rate": 8.470836362376657e-06, + "loss": 1.5515, + "step": 3403 + }, + { + "epoch": 0.28, + "grad_norm": 2.974791909097457, + "learning_rate": 8.469883366349932e-06, + "loss": 0.6906, + "step": 3404 + }, + { + "epoch": 0.28, + "grad_norm": 4.369584392055278, + "learning_rate": 8.468930127099486e-06, + "loss": 1.2062, + "step": 3405 + }, + { + "epoch": 0.28, + "grad_norm": 3.0886044700598396, + "learning_rate": 8.467976644692131e-06, + "loss": 0.7033, + "step": 3406 + }, + { + "epoch": 0.28, + "grad_norm": 4.00103892834991, + "learning_rate": 8.46702291919471e-06, + "loss": 0.9827, + "step": 3407 + }, + { + "epoch": 0.28, + "grad_norm": 5.134494477289156, + "learning_rate": 8.466068950674068e-06, + "loss": 1.0653, + "step": 3408 + }, + { + "epoch": 0.28, + "grad_norm": 4.323040388604154, + "learning_rate": 8.465114739197079e-06, + "loss": 1.0076, + "step": 3409 + }, + { + "epoch": 0.28, + "grad_norm": 4.5968184604234805, + "learning_rate": 8.464160284830627e-06, + "loss": 1.2016, + "step": 3410 + }, + { + "epoch": 0.28, + "grad_norm": 2.7621565259260055, + "learning_rate": 8.463205587641614e-06, + "loss": 0.2877, + "step": 3411 + }, + { + "epoch": 0.28, + "grad_norm": 4.938771720970346, + "learning_rate": 8.462250647696962e-06, + "loss": 1.5023, + "step": 3412 + }, + { + "epoch": 0.28, + "grad_norm": 2.9458559677127023, + "learning_rate": 8.461295465063605e-06, + "loss": 0.6964, + "step": 3413 + }, + { + "epoch": 0.28, + "grad_norm": 3.4891976793370416, + "learning_rate": 8.460340039808504e-06, + "loss": 1.0164, + "step": 3414 + }, + { + "epoch": 0.28, + "grad_norm": 3.8104414866522105, + "learning_rate": 8.459384371998622e-06, + "loss": 0.7668, + "step": 3415 + }, + { + "epoch": 0.28, + "grad_norm": 3.732488335406169, + "learning_rate": 8.458428461700951e-06, + "loss": 0.841, + "step": 3416 + }, + { + "epoch": 0.28, + "grad_norm": 2.7949169717721754, + "learning_rate": 8.457472308982498e-06, + "loss": 0.6091, + "step": 3417 + }, + { + "epoch": 0.28, + "grad_norm": 2.3442232976274724, + "learning_rate": 8.456515913910282e-06, + "loss": 0.4215, + "step": 3418 + }, + { + "epoch": 0.28, + "grad_norm": 2.660297546667241, + "learning_rate": 8.455559276551343e-06, + "loss": 0.3241, + "step": 3419 + }, + { + "epoch": 0.28, + "grad_norm": 3.1989636832644086, + "learning_rate": 8.454602396972737e-06, + "loss": 0.6127, + "step": 3420 + }, + { + "epoch": 0.28, + "grad_norm": 2.3692484321829754, + "learning_rate": 8.453645275241538e-06, + "loss": 0.4041, + "step": 3421 + }, + { + "epoch": 0.28, + "grad_norm": 3.5031828824437254, + "learning_rate": 8.452687911424836e-06, + "loss": 0.8936, + "step": 3422 + }, + { + "epoch": 0.28, + "grad_norm": 3.276633330790473, + "learning_rate": 8.451730305589737e-06, + "loss": 0.5023, + "step": 3423 + }, + { + "epoch": 0.28, + "grad_norm": 4.379893635875519, + "learning_rate": 8.450772457803365e-06, + "loss": 0.5751, + "step": 3424 + }, + { + "epoch": 0.28, + "grad_norm": 5.221177909343238, + "learning_rate": 8.449814368132862e-06, + "loss": 1.0798, + "step": 3425 + }, + { + "epoch": 0.28, + "grad_norm": 4.1564530399167605, + "learning_rate": 8.448856036645386e-06, + "loss": 1.1126, + "step": 3426 + }, + { + "epoch": 0.28, + "grad_norm": 5.136210138891469, + "learning_rate": 8.447897463408113e-06, + "loss": 1.3091, + "step": 3427 + }, + { + "epoch": 0.28, + "grad_norm": 4.802072054536583, + "learning_rate": 8.44693864848823e-06, + "loss": 0.9722, + "step": 3428 + }, + { + "epoch": 0.28, + "grad_norm": 5.438403174790669, + "learning_rate": 8.44597959195295e-06, + "loss": 1.4541, + "step": 3429 + }, + { + "epoch": 0.28, + "grad_norm": 3.858668859920182, + "learning_rate": 8.445020293869497e-06, + "loss": 0.7488, + "step": 3430 + }, + { + "epoch": 0.28, + "grad_norm": 3.384226705250362, + "learning_rate": 8.444060754305115e-06, + "loss": 0.6044, + "step": 3431 + }, + { + "epoch": 0.28, + "grad_norm": 4.210435131363074, + "learning_rate": 8.443100973327063e-06, + "loss": 0.8752, + "step": 3432 + }, + { + "epoch": 0.28, + "grad_norm": 4.611685456780522, + "learning_rate": 8.442140951002616e-06, + "loss": 0.7336, + "step": 3433 + }, + { + "epoch": 0.28, + "grad_norm": 1.968448930951749, + "learning_rate": 8.441180687399068e-06, + "loss": 0.3585, + "step": 3434 + }, + { + "epoch": 0.28, + "grad_norm": 2.1026752437935223, + "learning_rate": 8.440220182583731e-06, + "loss": 0.4757, + "step": 3435 + }, + { + "epoch": 0.28, + "grad_norm": 3.6772016795236184, + "learning_rate": 8.439259436623933e-06, + "loss": 0.8092, + "step": 3436 + }, + { + "epoch": 0.28, + "grad_norm": 2.929545885760317, + "learning_rate": 8.438298449587014e-06, + "loss": 0.4462, + "step": 3437 + }, + { + "epoch": 0.28, + "grad_norm": 2.8387804784920716, + "learning_rate": 8.437337221540337e-06, + "loss": 0.638, + "step": 3438 + }, + { + "epoch": 0.28, + "grad_norm": 6.15289257004559, + "learning_rate": 8.436375752551282e-06, + "loss": 1.1346, + "step": 3439 + }, + { + "epoch": 0.28, + "grad_norm": 3.3656847914305503, + "learning_rate": 8.43541404268724e-06, + "loss": 0.7933, + "step": 3440 + }, + { + "epoch": 0.28, + "grad_norm": 2.8082542811817923, + "learning_rate": 8.434452092015624e-06, + "loss": 0.6105, + "step": 3441 + }, + { + "epoch": 0.28, + "grad_norm": 4.434561121019493, + "learning_rate": 8.433489900603866e-06, + "loss": 1.2958, + "step": 3442 + }, + { + "epoch": 0.28, + "grad_norm": 5.036759914134969, + "learning_rate": 8.432527468519405e-06, + "loss": 1.3316, + "step": 3443 + }, + { + "epoch": 0.28, + "grad_norm": 3.488323094554122, + "learning_rate": 8.43156479582971e-06, + "loss": 0.746, + "step": 3444 + }, + { + "epoch": 0.28, + "grad_norm": 3.2517439549549003, + "learning_rate": 8.430601882602256e-06, + "loss": 0.6266, + "step": 3445 + }, + { + "epoch": 0.28, + "grad_norm": 4.502813425517545, + "learning_rate": 8.429638728904538e-06, + "loss": 1.0791, + "step": 3446 + }, + { + "epoch": 0.28, + "grad_norm": 5.22014969866628, + "learning_rate": 8.428675334804073e-06, + "loss": 1.3465, + "step": 3447 + }, + { + "epoch": 0.28, + "grad_norm": 3.552856686401315, + "learning_rate": 8.42771170036839e-06, + "loss": 0.4467, + "step": 3448 + }, + { + "epoch": 0.28, + "grad_norm": 3.423996933534739, + "learning_rate": 8.426747825665032e-06, + "loss": 0.6019, + "step": 3449 + }, + { + "epoch": 0.28, + "grad_norm": 3.81325929155305, + "learning_rate": 8.425783710761565e-06, + "loss": 0.8259, + "step": 3450 + }, + { + "epoch": 0.28, + "grad_norm": 5.4597466616695405, + "learning_rate": 8.42481935572557e-06, + "loss": 1.0784, + "step": 3451 + }, + { + "epoch": 0.28, + "grad_norm": 5.190375688125212, + "learning_rate": 8.423854760624641e-06, + "loss": 1.4251, + "step": 3452 + }, + { + "epoch": 0.28, + "grad_norm": 4.350206830956461, + "learning_rate": 8.422889925526396e-06, + "loss": 1.0581, + "step": 3453 + }, + { + "epoch": 0.28, + "grad_norm": 2.675596277735707, + "learning_rate": 8.421924850498464e-06, + "loss": 0.318, + "step": 3454 + }, + { + "epoch": 0.28, + "grad_norm": 4.567263639604266, + "learning_rate": 8.420959535608491e-06, + "loss": 0.8752, + "step": 3455 + }, + { + "epoch": 0.28, + "grad_norm": 4.188249554694329, + "learning_rate": 8.419993980924141e-06, + "loss": 1.2736, + "step": 3456 + }, + { + "epoch": 0.28, + "grad_norm": 4.804489030767843, + "learning_rate": 8.4190281865131e-06, + "loss": 1.0392, + "step": 3457 + }, + { + "epoch": 0.28, + "grad_norm": 3.910357957576581, + "learning_rate": 8.41806215244306e-06, + "loss": 0.9776, + "step": 3458 + }, + { + "epoch": 0.28, + "grad_norm": 4.0388462868255, + "learning_rate": 8.417095878781742e-06, + "loss": 1.0865, + "step": 3459 + }, + { + "epoch": 0.28, + "grad_norm": 3.290045246104023, + "learning_rate": 8.41612936559687e-06, + "loss": 0.4962, + "step": 3460 + }, + { + "epoch": 0.28, + "grad_norm": 4.627539575503532, + "learning_rate": 8.4151626129562e-06, + "loss": 0.6425, + "step": 3461 + }, + { + "epoch": 0.28, + "grad_norm": 4.649946228294108, + "learning_rate": 8.414195620927491e-06, + "loss": 1.0886, + "step": 3462 + }, + { + "epoch": 0.28, + "grad_norm": 4.51517747858038, + "learning_rate": 8.41322838957853e-06, + "loss": 0.8784, + "step": 3463 + }, + { + "epoch": 0.28, + "grad_norm": 4.330803877844067, + "learning_rate": 8.412260918977112e-06, + "loss": 0.8254, + "step": 3464 + }, + { + "epoch": 0.28, + "grad_norm": 1.3595172213056332, + "learning_rate": 8.411293209191054e-06, + "loss": 0.1939, + "step": 3465 + }, + { + "epoch": 0.28, + "grad_norm": 3.892873810491101, + "learning_rate": 8.410325260288188e-06, + "loss": 1.0629, + "step": 3466 + }, + { + "epoch": 0.28, + "grad_norm": 3.569474704149108, + "learning_rate": 8.409357072336363e-06, + "loss": 0.5773, + "step": 3467 + }, + { + "epoch": 0.28, + "grad_norm": 3.545847930059436, + "learning_rate": 8.408388645403445e-06, + "loss": 0.6537, + "step": 3468 + }, + { + "epoch": 0.28, + "grad_norm": 5.18436426463238, + "learning_rate": 8.407419979557317e-06, + "loss": 1.4713, + "step": 3469 + }, + { + "epoch": 0.28, + "grad_norm": 4.442562603742148, + "learning_rate": 8.406451074865875e-06, + "loss": 1.0713, + "step": 3470 + }, + { + "epoch": 0.28, + "grad_norm": 3.6746294802722166, + "learning_rate": 8.405481931397042e-06, + "loss": 0.8966, + "step": 3471 + }, + { + "epoch": 0.28, + "grad_norm": 3.414364511955644, + "learning_rate": 8.404512549218741e-06, + "loss": 0.6344, + "step": 3472 + }, + { + "epoch": 0.28, + "grad_norm": 2.5411981015933045, + "learning_rate": 8.40354292839893e-06, + "loss": 0.5917, + "step": 3473 + }, + { + "epoch": 0.28, + "grad_norm": 2.4104669592059573, + "learning_rate": 8.402573069005573e-06, + "loss": 0.3866, + "step": 3474 + }, + { + "epoch": 0.28, + "grad_norm": 2.8931161075128085, + "learning_rate": 8.40160297110665e-06, + "loss": 0.559, + "step": 3475 + }, + { + "epoch": 0.28, + "grad_norm": 2.6508457374642984, + "learning_rate": 8.400632634770163e-06, + "loss": 0.5797, + "step": 3476 + }, + { + "epoch": 0.28, + "grad_norm": 2.405419805992831, + "learning_rate": 8.399662060064126e-06, + "loss": 0.4704, + "step": 3477 + }, + { + "epoch": 0.28, + "grad_norm": 3.5565941443329865, + "learning_rate": 8.398691247056577e-06, + "loss": 0.8638, + "step": 3478 + }, + { + "epoch": 0.28, + "grad_norm": 4.3152786905128035, + "learning_rate": 8.397720195815561e-06, + "loss": 0.9719, + "step": 3479 + }, + { + "epoch": 0.28, + "grad_norm": 4.155128845995398, + "learning_rate": 8.396748906409147e-06, + "loss": 1.1606, + "step": 3480 + }, + { + "epoch": 0.28, + "grad_norm": 2.602097103846587, + "learning_rate": 8.395777378905417e-06, + "loss": 0.6643, + "step": 3481 + }, + { + "epoch": 0.28, + "grad_norm": 3.734829924575268, + "learning_rate": 8.394805613372471e-06, + "loss": 0.6499, + "step": 3482 + }, + { + "epoch": 0.28, + "grad_norm": 3.0622166922148346, + "learning_rate": 8.393833609878426e-06, + "loss": 0.5855, + "step": 3483 + }, + { + "epoch": 0.28, + "grad_norm": 3.236309198802574, + "learning_rate": 8.392861368491415e-06, + "loss": 0.7262, + "step": 3484 + }, + { + "epoch": 0.28, + "grad_norm": 3.706892030666024, + "learning_rate": 8.391888889279589e-06, + "loss": 0.8424, + "step": 3485 + }, + { + "epoch": 0.28, + "grad_norm": 4.003666146621652, + "learning_rate": 8.390916172311113e-06, + "loss": 0.8174, + "step": 3486 + }, + { + "epoch": 0.29, + "grad_norm": 2.7056523946650533, + "learning_rate": 8.389943217654169e-06, + "loss": 0.687, + "step": 3487 + }, + { + "epoch": 0.29, + "grad_norm": 4.099632712182386, + "learning_rate": 8.38897002537696e-06, + "loss": 1.0527, + "step": 3488 + }, + { + "epoch": 0.29, + "grad_norm": 3.0084346188837983, + "learning_rate": 8.387996595547699e-06, + "loss": 0.7832, + "step": 3489 + }, + { + "epoch": 0.29, + "grad_norm": 3.1426806013722053, + "learning_rate": 8.387022928234623e-06, + "loss": 0.7562, + "step": 3490 + }, + { + "epoch": 0.29, + "grad_norm": 3.9478691188997646, + "learning_rate": 8.38604902350598e-06, + "loss": 0.7023, + "step": 3491 + }, + { + "epoch": 0.29, + "grad_norm": 3.9094567058010594, + "learning_rate": 8.385074881430036e-06, + "loss": 0.8844, + "step": 3492 + }, + { + "epoch": 0.29, + "grad_norm": 4.2910983396938285, + "learning_rate": 8.384100502075076e-06, + "loss": 1.015, + "step": 3493 + }, + { + "epoch": 0.29, + "grad_norm": 4.628523203688381, + "learning_rate": 8.383125885509398e-06, + "loss": 1.1924, + "step": 3494 + }, + { + "epoch": 0.29, + "grad_norm": 3.288493469772743, + "learning_rate": 8.382151031801318e-06, + "loss": 0.5543, + "step": 3495 + }, + { + "epoch": 0.29, + "grad_norm": 5.178578709877618, + "learning_rate": 8.381175941019171e-06, + "loss": 1.2796, + "step": 3496 + }, + { + "epoch": 0.29, + "grad_norm": 3.1525444590197744, + "learning_rate": 8.380200613231305e-06, + "loss": 0.6884, + "step": 3497 + }, + { + "epoch": 0.29, + "grad_norm": 2.9848281841139404, + "learning_rate": 8.379225048506085e-06, + "loss": 0.6297, + "step": 3498 + }, + { + "epoch": 0.29, + "grad_norm": 2.5167439822359685, + "learning_rate": 8.378249246911898e-06, + "loss": 0.7095, + "step": 3499 + }, + { + "epoch": 0.29, + "grad_norm": 3.7118260838690333, + "learning_rate": 8.377273208517138e-06, + "loss": 0.9257, + "step": 3500 + }, + { + "epoch": 0.29, + "grad_norm": 3.303460637133216, + "learning_rate": 8.376296933390227e-06, + "loss": 0.9383, + "step": 3501 + }, + { + "epoch": 0.29, + "grad_norm": 3.786574910586223, + "learning_rate": 8.375320421599595e-06, + "loss": 0.3783, + "step": 3502 + }, + { + "epoch": 0.29, + "grad_norm": 3.643732629168323, + "learning_rate": 8.374343673213689e-06, + "loss": 0.8174, + "step": 3503 + }, + { + "epoch": 0.29, + "grad_norm": 2.9863507062348122, + "learning_rate": 8.373366688300978e-06, + "loss": 0.7188, + "step": 3504 + }, + { + "epoch": 0.29, + "grad_norm": 3.426477659359341, + "learning_rate": 8.37238946692994e-06, + "loss": 0.8369, + "step": 3505 + }, + { + "epoch": 0.29, + "grad_norm": 2.820979127414353, + "learning_rate": 8.37141200916908e-06, + "loss": 0.7249, + "step": 3506 + }, + { + "epoch": 0.29, + "grad_norm": 4.560419011175925, + "learning_rate": 8.37043431508691e-06, + "loss": 0.9008, + "step": 3507 + }, + { + "epoch": 0.29, + "grad_norm": 2.813495080021344, + "learning_rate": 8.369456384751963e-06, + "loss": 0.5069, + "step": 3508 + }, + { + "epoch": 0.29, + "grad_norm": 1.9553754956034446, + "learning_rate": 8.368478218232787e-06, + "loss": 0.3191, + "step": 3509 + }, + { + "epoch": 0.29, + "grad_norm": 3.3542405637871937, + "learning_rate": 8.367499815597947e-06, + "loss": 0.6272, + "step": 3510 + }, + { + "epoch": 0.29, + "grad_norm": 4.501836087508141, + "learning_rate": 8.366521176916027e-06, + "loss": 1.3518, + "step": 3511 + }, + { + "epoch": 0.29, + "grad_norm": 3.610952413356651, + "learning_rate": 8.365542302255623e-06, + "loss": 0.5242, + "step": 3512 + }, + { + "epoch": 0.29, + "grad_norm": 3.8167889077572505, + "learning_rate": 8.364563191685348e-06, + "loss": 0.9545, + "step": 3513 + }, + { + "epoch": 0.29, + "grad_norm": 5.0670028406576355, + "learning_rate": 8.363583845273839e-06, + "loss": 1.4594, + "step": 3514 + }, + { + "epoch": 0.29, + "grad_norm": 3.519355454572228, + "learning_rate": 8.362604263089739e-06, + "loss": 0.5279, + "step": 3515 + }, + { + "epoch": 0.29, + "grad_norm": 2.5810225460305403, + "learning_rate": 8.361624445201715e-06, + "loss": 0.5254, + "step": 3516 + }, + { + "epoch": 0.29, + "grad_norm": 1.9515789420431564, + "learning_rate": 8.360644391678448e-06, + "loss": 0.4145, + "step": 3517 + }, + { + "epoch": 0.29, + "grad_norm": 4.796650732563646, + "learning_rate": 8.359664102588633e-06, + "loss": 1.2567, + "step": 3518 + }, + { + "epoch": 0.29, + "grad_norm": 2.5323364328106757, + "learning_rate": 8.358683578000987e-06, + "loss": 0.7193, + "step": 3519 + }, + { + "epoch": 0.29, + "grad_norm": 3.897656459189634, + "learning_rate": 8.357702817984239e-06, + "loss": 0.6481, + "step": 3520 + }, + { + "epoch": 0.29, + "grad_norm": 3.38233537413345, + "learning_rate": 8.356721822607136e-06, + "loss": 0.8116, + "step": 3521 + }, + { + "epoch": 0.29, + "grad_norm": 4.32681111980777, + "learning_rate": 8.355740591938445e-06, + "loss": 0.8616, + "step": 3522 + }, + { + "epoch": 0.29, + "grad_norm": 2.2162808086280132, + "learning_rate": 8.354759126046937e-06, + "loss": 0.5248, + "step": 3523 + }, + { + "epoch": 0.29, + "grad_norm": 4.434389364827244, + "learning_rate": 8.353777425001417e-06, + "loss": 1.2069, + "step": 3524 + }, + { + "epoch": 0.29, + "grad_norm": 2.824519616989892, + "learning_rate": 8.352795488870695e-06, + "loss": 0.6385, + "step": 3525 + }, + { + "epoch": 0.29, + "grad_norm": 2.8981735074257484, + "learning_rate": 8.351813317723601e-06, + "loss": 0.6773, + "step": 3526 + }, + { + "epoch": 0.29, + "grad_norm": 5.326576867153255, + "learning_rate": 8.35083091162898e-06, + "loss": 1.1598, + "step": 3527 + }, + { + "epoch": 0.29, + "grad_norm": 4.421449357052687, + "learning_rate": 8.349848270655696e-06, + "loss": 0.7413, + "step": 3528 + }, + { + "epoch": 0.29, + "grad_norm": 2.4048118640068776, + "learning_rate": 8.348865394872625e-06, + "loss": 0.6517, + "step": 3529 + }, + { + "epoch": 0.29, + "grad_norm": 3.8012794215788777, + "learning_rate": 8.347882284348665e-06, + "loss": 1.1257, + "step": 3530 + }, + { + "epoch": 0.29, + "grad_norm": 4.094074924068292, + "learning_rate": 8.346898939152728e-06, + "loss": 0.7916, + "step": 3531 + }, + { + "epoch": 0.29, + "grad_norm": 2.981656102798751, + "learning_rate": 8.34591535935374e-06, + "loss": 0.8597, + "step": 3532 + }, + { + "epoch": 0.29, + "grad_norm": 5.362189708212367, + "learning_rate": 8.344931545020646e-06, + "loss": 1.3468, + "step": 3533 + }, + { + "epoch": 0.29, + "grad_norm": 3.011845342498781, + "learning_rate": 8.343947496222409e-06, + "loss": 0.6193, + "step": 3534 + }, + { + "epoch": 0.29, + "grad_norm": 4.339480083175476, + "learning_rate": 8.342963213028005e-06, + "loss": 1.1736, + "step": 3535 + }, + { + "epoch": 0.29, + "grad_norm": 3.9558061142349534, + "learning_rate": 8.34197869550643e-06, + "loss": 0.7738, + "step": 3536 + }, + { + "epoch": 0.29, + "grad_norm": 5.154641996932149, + "learning_rate": 8.340993943726692e-06, + "loss": 1.3008, + "step": 3537 + }, + { + "epoch": 0.29, + "grad_norm": 4.497783457109824, + "learning_rate": 8.340008957757817e-06, + "loss": 0.9407, + "step": 3538 + }, + { + "epoch": 0.29, + "grad_norm": 4.138359581887209, + "learning_rate": 8.339023737668851e-06, + "loss": 1.0942, + "step": 3539 + }, + { + "epoch": 0.29, + "grad_norm": 3.806807753455482, + "learning_rate": 8.338038283528852e-06, + "loss": 0.9245, + "step": 3540 + }, + { + "epoch": 0.29, + "grad_norm": 4.201461202068064, + "learning_rate": 8.337052595406896e-06, + "loss": 0.8242, + "step": 3541 + }, + { + "epoch": 0.29, + "grad_norm": 3.7793788421250945, + "learning_rate": 8.336066673372079e-06, + "loss": 0.8631, + "step": 3542 + }, + { + "epoch": 0.29, + "grad_norm": 4.354344052829089, + "learning_rate": 8.335080517493503e-06, + "loss": 1.5768, + "step": 3543 + }, + { + "epoch": 0.29, + "grad_norm": 3.216675886871792, + "learning_rate": 8.3340941278403e-06, + "loss": 0.7338, + "step": 3544 + }, + { + "epoch": 0.29, + "grad_norm": 3.86311833127007, + "learning_rate": 8.333107504481606e-06, + "loss": 1.0233, + "step": 3545 + }, + { + "epoch": 0.29, + "grad_norm": 1.9121852510210247, + "learning_rate": 8.332120647486583e-06, + "loss": 0.402, + "step": 3546 + }, + { + "epoch": 0.29, + "grad_norm": 1.7083309623026617, + "learning_rate": 8.331133556924404e-06, + "loss": 0.466, + "step": 3547 + }, + { + "epoch": 0.29, + "grad_norm": 4.756683675731017, + "learning_rate": 8.33014623286426e-06, + "loss": 1.0057, + "step": 3548 + }, + { + "epoch": 0.29, + "grad_norm": 4.173372658028368, + "learning_rate": 8.32915867537536e-06, + "loss": 0.7373, + "step": 3549 + }, + { + "epoch": 0.29, + "grad_norm": 5.611139422628014, + "learning_rate": 8.32817088452692e-06, + "loss": 1.2505, + "step": 3550 + }, + { + "epoch": 0.29, + "grad_norm": 2.8996910690245694, + "learning_rate": 8.32718286038819e-06, + "loss": 0.8201, + "step": 3551 + }, + { + "epoch": 0.29, + "grad_norm": 4.909677954824557, + "learning_rate": 8.326194603028419e-06, + "loss": 0.8776, + "step": 3552 + }, + { + "epoch": 0.29, + "grad_norm": 5.2155694907097425, + "learning_rate": 8.325206112516883e-06, + "loss": 1.5689, + "step": 3553 + }, + { + "epoch": 0.29, + "grad_norm": 3.877852476122359, + "learning_rate": 8.324217388922871e-06, + "loss": 0.8479, + "step": 3554 + }, + { + "epoch": 0.29, + "grad_norm": 3.7553956611936457, + "learning_rate": 8.323228432315684e-06, + "loss": 0.8556, + "step": 3555 + }, + { + "epoch": 0.29, + "grad_norm": 3.6617576423421254, + "learning_rate": 8.32223924276465e-06, + "loss": 1.0142, + "step": 3556 + }, + { + "epoch": 0.29, + "grad_norm": 2.0526872916953827, + "learning_rate": 8.321249820339102e-06, + "loss": 0.3802, + "step": 3557 + }, + { + "epoch": 0.29, + "grad_norm": 2.872143189106044, + "learning_rate": 8.320260165108395e-06, + "loss": 0.6965, + "step": 3558 + }, + { + "epoch": 0.29, + "grad_norm": 1.806996264415691, + "learning_rate": 8.3192702771419e-06, + "loss": 0.287, + "step": 3559 + }, + { + "epoch": 0.29, + "grad_norm": 2.699748900635862, + "learning_rate": 8.318280156509007e-06, + "loss": 0.5149, + "step": 3560 + }, + { + "epoch": 0.29, + "grad_norm": 3.74006251254508, + "learning_rate": 8.317289803279112e-06, + "loss": 0.615, + "step": 3561 + }, + { + "epoch": 0.29, + "grad_norm": 4.9784451434125705, + "learning_rate": 8.316299217521641e-06, + "loss": 1.4028, + "step": 3562 + }, + { + "epoch": 0.29, + "grad_norm": 4.017270239393142, + "learning_rate": 8.315308399306027e-06, + "loss": 0.9775, + "step": 3563 + }, + { + "epoch": 0.29, + "grad_norm": 3.42080669867183, + "learning_rate": 8.314317348701724e-06, + "loss": 0.7133, + "step": 3564 + }, + { + "epoch": 0.29, + "grad_norm": 4.893028050718947, + "learning_rate": 8.313326065778198e-06, + "loss": 1.2777, + "step": 3565 + }, + { + "epoch": 0.29, + "grad_norm": 4.55452023460235, + "learning_rate": 8.312334550604934e-06, + "loss": 1.2704, + "step": 3566 + }, + { + "epoch": 0.29, + "grad_norm": 4.1051621769345426, + "learning_rate": 8.311342803251431e-06, + "loss": 1.1103, + "step": 3567 + }, + { + "epoch": 0.29, + "grad_norm": 4.133309248809495, + "learning_rate": 8.310350823787213e-06, + "loss": 0.9868, + "step": 3568 + }, + { + "epoch": 0.29, + "grad_norm": 3.6100634341728, + "learning_rate": 8.309358612281805e-06, + "loss": 0.905, + "step": 3569 + }, + { + "epoch": 0.29, + "grad_norm": 2.6992294908098087, + "learning_rate": 8.308366168804764e-06, + "loss": 0.5854, + "step": 3570 + }, + { + "epoch": 0.29, + "grad_norm": 2.193632411771907, + "learning_rate": 8.30737349342565e-06, + "loss": 0.6192, + "step": 3571 + }, + { + "epoch": 0.29, + "grad_norm": 2.956331830855571, + "learning_rate": 8.30638058621405e-06, + "loss": 0.8116, + "step": 3572 + }, + { + "epoch": 0.29, + "grad_norm": 3.4776215746659096, + "learning_rate": 8.305387447239559e-06, + "loss": 0.5024, + "step": 3573 + }, + { + "epoch": 0.29, + "grad_norm": 3.9348804554264616, + "learning_rate": 8.304394076571794e-06, + "loss": 0.8389, + "step": 3574 + }, + { + "epoch": 0.29, + "grad_norm": 2.9240920418295615, + "learning_rate": 8.303400474280384e-06, + "loss": 0.8246, + "step": 3575 + }, + { + "epoch": 0.29, + "grad_norm": 5.604727604154122, + "learning_rate": 8.302406640434978e-06, + "loss": 1.1539, + "step": 3576 + }, + { + "epoch": 0.29, + "grad_norm": 4.3571536012994, + "learning_rate": 8.301412575105238e-06, + "loss": 1.0719, + "step": 3577 + }, + { + "epoch": 0.29, + "grad_norm": 3.614080600263977, + "learning_rate": 8.300418278360844e-06, + "loss": 0.4737, + "step": 3578 + }, + { + "epoch": 0.29, + "grad_norm": 3.5897488641657365, + "learning_rate": 8.299423750271493e-06, + "loss": 0.9158, + "step": 3579 + }, + { + "epoch": 0.29, + "grad_norm": 3.6018999721838405, + "learning_rate": 8.298428990906896e-06, + "loss": 0.7084, + "step": 3580 + }, + { + "epoch": 0.29, + "grad_norm": 0.9521900164481667, + "learning_rate": 8.297434000336781e-06, + "loss": 0.1663, + "step": 3581 + }, + { + "epoch": 0.29, + "grad_norm": 3.416897765599965, + "learning_rate": 8.296438778630893e-06, + "loss": 0.7562, + "step": 3582 + }, + { + "epoch": 0.29, + "grad_norm": 3.0830365100554555, + "learning_rate": 8.295443325858994e-06, + "loss": 0.9708, + "step": 3583 + }, + { + "epoch": 0.29, + "grad_norm": 1.5648254742585337, + "learning_rate": 8.294447642090857e-06, + "loss": 0.2603, + "step": 3584 + }, + { + "epoch": 0.29, + "grad_norm": 3.5890801278182125, + "learning_rate": 8.293451727396283e-06, + "loss": 0.84, + "step": 3585 + }, + { + "epoch": 0.29, + "grad_norm": 4.596402155220595, + "learning_rate": 8.29245558184507e-06, + "loss": 0.5958, + "step": 3586 + }, + { + "epoch": 0.29, + "grad_norm": 4.645888567946884, + "learning_rate": 8.291459205507054e-06, + "loss": 1.1768, + "step": 3587 + }, + { + "epoch": 0.29, + "grad_norm": 2.7688250555200846, + "learning_rate": 8.29046259845207e-06, + "loss": 0.6303, + "step": 3588 + }, + { + "epoch": 0.29, + "grad_norm": 5.681310621502814, + "learning_rate": 8.289465760749977e-06, + "loss": 1.7021, + "step": 3589 + }, + { + "epoch": 0.29, + "grad_norm": 4.613193942592307, + "learning_rate": 8.288468692470652e-06, + "loss": 1.0909, + "step": 3590 + }, + { + "epoch": 0.29, + "grad_norm": 3.8875252487754874, + "learning_rate": 8.287471393683984e-06, + "loss": 1.1162, + "step": 3591 + }, + { + "epoch": 0.29, + "grad_norm": 2.9841346177304655, + "learning_rate": 8.286473864459876e-06, + "loss": 0.7506, + "step": 3592 + }, + { + "epoch": 0.29, + "grad_norm": 5.508869631913947, + "learning_rate": 8.285476104868252e-06, + "loss": 1.1238, + "step": 3593 + }, + { + "epoch": 0.29, + "grad_norm": 5.837820743640624, + "learning_rate": 8.284478114979056e-06, + "loss": 0.7888, + "step": 3594 + }, + { + "epoch": 0.29, + "grad_norm": 2.836434081449681, + "learning_rate": 8.283479894862233e-06, + "loss": 0.5704, + "step": 3595 + }, + { + "epoch": 0.29, + "grad_norm": 3.2250188976590355, + "learning_rate": 8.282481444587764e-06, + "loss": 0.6061, + "step": 3596 + }, + { + "epoch": 0.29, + "grad_norm": 3.100264445911385, + "learning_rate": 8.281482764225628e-06, + "loss": 0.5391, + "step": 3597 + }, + { + "epoch": 0.29, + "grad_norm": 4.284825992082469, + "learning_rate": 8.280483853845831e-06, + "loss": 1.4993, + "step": 3598 + }, + { + "epoch": 0.29, + "grad_norm": 3.683705715157677, + "learning_rate": 8.279484713518395e-06, + "loss": 1.1575, + "step": 3599 + }, + { + "epoch": 0.29, + "grad_norm": 3.422930359517672, + "learning_rate": 8.27848534331335e-06, + "loss": 0.5458, + "step": 3600 + }, + { + "epoch": 0.29, + "grad_norm": 4.337143182155289, + "learning_rate": 8.277485743300753e-06, + "loss": 0.6768, + "step": 3601 + }, + { + "epoch": 0.29, + "grad_norm": 5.951202647514589, + "learning_rate": 8.27648591355067e-06, + "loss": 1.0886, + "step": 3602 + }, + { + "epoch": 0.29, + "grad_norm": 3.1163177763316767, + "learning_rate": 8.27548585413318e-06, + "loss": 0.4608, + "step": 3603 + }, + { + "epoch": 0.29, + "grad_norm": 4.473268162565548, + "learning_rate": 8.274485565118389e-06, + "loss": 0.9527, + "step": 3604 + }, + { + "epoch": 0.29, + "grad_norm": 4.2134953665323875, + "learning_rate": 8.27348504657641e-06, + "loss": 1.02, + "step": 3605 + }, + { + "epoch": 0.29, + "grad_norm": 4.865807389021216, + "learning_rate": 8.272484298577375e-06, + "loss": 1.2196, + "step": 3606 + }, + { + "epoch": 0.29, + "grad_norm": 4.4482018269506804, + "learning_rate": 8.271483321191433e-06, + "loss": 1.0805, + "step": 3607 + }, + { + "epoch": 0.29, + "grad_norm": 3.789927696800828, + "learning_rate": 8.270482114488748e-06, + "loss": 0.7778, + "step": 3608 + }, + { + "epoch": 0.29, + "grad_norm": 4.3632231589887605, + "learning_rate": 8.269480678539498e-06, + "loss": 0.8847, + "step": 3609 + }, + { + "epoch": 0.3, + "grad_norm": 3.1027736871674576, + "learning_rate": 8.268479013413883e-06, + "loss": 0.679, + "step": 3610 + }, + { + "epoch": 0.3, + "grad_norm": 1.9596146365430251, + "learning_rate": 8.267477119182115e-06, + "loss": 0.3198, + "step": 3611 + }, + { + "epoch": 0.3, + "grad_norm": 1.3201503867779574, + "learning_rate": 8.266474995914419e-06, + "loss": 0.1946, + "step": 3612 + }, + { + "epoch": 0.3, + "grad_norm": 2.4227795740721945, + "learning_rate": 8.265472643681042e-06, + "loss": 0.5886, + "step": 3613 + }, + { + "epoch": 0.3, + "grad_norm": 1.9999825595731449, + "learning_rate": 8.264470062552246e-06, + "loss": 0.501, + "step": 3614 + }, + { + "epoch": 0.3, + "grad_norm": 2.466292481798756, + "learning_rate": 8.263467252598303e-06, + "loss": 0.3048, + "step": 3615 + }, + { + "epoch": 0.3, + "grad_norm": 3.3983750894590083, + "learning_rate": 8.26246421388951e-06, + "loss": 0.8234, + "step": 3616 + }, + { + "epoch": 0.3, + "grad_norm": 2.9845882181511003, + "learning_rate": 8.261460946496172e-06, + "loss": 0.7159, + "step": 3617 + }, + { + "epoch": 0.3, + "grad_norm": 2.963902341128361, + "learning_rate": 8.260457450488617e-06, + "loss": 0.5597, + "step": 3618 + }, + { + "epoch": 0.3, + "grad_norm": 4.58947166693942, + "learning_rate": 8.259453725937184e-06, + "loss": 0.9475, + "step": 3619 + }, + { + "epoch": 0.3, + "grad_norm": 1.8608228824499164, + "learning_rate": 8.25844977291223e-06, + "loss": 0.4321, + "step": 3620 + }, + { + "epoch": 0.3, + "grad_norm": 3.1629799771517217, + "learning_rate": 8.257445591484128e-06, + "loss": 0.7059, + "step": 3621 + }, + { + "epoch": 0.3, + "grad_norm": 3.4020188663153315, + "learning_rate": 8.256441181723265e-06, + "loss": 0.5953, + "step": 3622 + }, + { + "epoch": 0.3, + "grad_norm": 4.750190553423603, + "learning_rate": 8.255436543700048e-06, + "loss": 0.9565, + "step": 3623 + }, + { + "epoch": 0.3, + "grad_norm": 3.420792324678391, + "learning_rate": 8.254431677484898e-06, + "loss": 0.9565, + "step": 3624 + }, + { + "epoch": 0.3, + "grad_norm": 3.610989213133846, + "learning_rate": 8.25342658314825e-06, + "loss": 1.0059, + "step": 3625 + }, + { + "epoch": 0.3, + "grad_norm": 3.5987006785906437, + "learning_rate": 8.252421260760558e-06, + "loss": 0.9466, + "step": 3626 + }, + { + "epoch": 0.3, + "grad_norm": 4.658086668796395, + "learning_rate": 8.25141571039229e-06, + "loss": 1.2601, + "step": 3627 + }, + { + "epoch": 0.3, + "grad_norm": 4.424797321212776, + "learning_rate": 8.25040993211393e-06, + "loss": 0.8074, + "step": 3628 + }, + { + "epoch": 0.3, + "grad_norm": 2.945071389194256, + "learning_rate": 8.24940392599598e-06, + "loss": 0.6433, + "step": 3629 + }, + { + "epoch": 0.3, + "grad_norm": 4.658612357174698, + "learning_rate": 8.248397692108957e-06, + "loss": 1.1627, + "step": 3630 + }, + { + "epoch": 0.3, + "grad_norm": 5.488384567872834, + "learning_rate": 8.247391230523393e-06, + "loss": 1.3089, + "step": 3631 + }, + { + "epoch": 0.3, + "grad_norm": 2.272377230796433, + "learning_rate": 8.246384541309835e-06, + "loss": 0.4514, + "step": 3632 + }, + { + "epoch": 0.3, + "grad_norm": 4.105369688695962, + "learning_rate": 8.24537762453885e-06, + "loss": 0.8004, + "step": 3633 + }, + { + "epoch": 0.3, + "grad_norm": 4.537957023672104, + "learning_rate": 8.244370480281018e-06, + "loss": 0.6889, + "step": 3634 + }, + { + "epoch": 0.3, + "grad_norm": 4.014372334228167, + "learning_rate": 8.243363108606934e-06, + "loss": 1.0708, + "step": 3635 + }, + { + "epoch": 0.3, + "grad_norm": 3.644010528781357, + "learning_rate": 8.24235550958721e-06, + "loss": 0.6836, + "step": 3636 + }, + { + "epoch": 0.3, + "grad_norm": 3.8196684083357106, + "learning_rate": 8.241347683292478e-06, + "loss": 0.9738, + "step": 3637 + }, + { + "epoch": 0.3, + "grad_norm": 5.07292046837355, + "learning_rate": 8.240339629793379e-06, + "loss": 1.3078, + "step": 3638 + }, + { + "epoch": 0.3, + "grad_norm": 4.013052169089353, + "learning_rate": 8.239331349160573e-06, + "loss": 0.5815, + "step": 3639 + }, + { + "epoch": 0.3, + "grad_norm": 1.653944549068703, + "learning_rate": 8.238322841464738e-06, + "loss": 0.3109, + "step": 3640 + }, + { + "epoch": 0.3, + "grad_norm": 3.115651595817726, + "learning_rate": 8.237314106776563e-06, + "loss": 0.8189, + "step": 3641 + }, + { + "epoch": 0.3, + "grad_norm": 3.3410805462475524, + "learning_rate": 8.236305145166761e-06, + "loss": 0.808, + "step": 3642 + }, + { + "epoch": 0.3, + "grad_norm": 2.9670019774770364, + "learning_rate": 8.23529595670605e-06, + "loss": 0.6854, + "step": 3643 + }, + { + "epoch": 0.3, + "grad_norm": 2.958379765225836, + "learning_rate": 8.234286541465175e-06, + "loss": 0.534, + "step": 3644 + }, + { + "epoch": 0.3, + "grad_norm": 3.855724854233526, + "learning_rate": 8.233276899514887e-06, + "loss": 1.0768, + "step": 3645 + }, + { + "epoch": 0.3, + "grad_norm": 4.519939370491934, + "learning_rate": 8.232267030925963e-06, + "loss": 1.2004, + "step": 3646 + }, + { + "epoch": 0.3, + "grad_norm": 4.796719534626249, + "learning_rate": 8.231256935769183e-06, + "loss": 1.1672, + "step": 3647 + }, + { + "epoch": 0.3, + "grad_norm": 4.58716913978193, + "learning_rate": 8.230246614115357e-06, + "loss": 1.191, + "step": 3648 + }, + { + "epoch": 0.3, + "grad_norm": 2.5302816001180433, + "learning_rate": 8.2292360660353e-06, + "loss": 0.461, + "step": 3649 + }, + { + "epoch": 0.3, + "grad_norm": 2.9954865922410283, + "learning_rate": 8.228225291599849e-06, + "loss": 0.8287, + "step": 3650 + }, + { + "epoch": 0.3, + "grad_norm": 4.078826919973614, + "learning_rate": 8.227214290879855e-06, + "loss": 0.6676, + "step": 3651 + }, + { + "epoch": 0.3, + "grad_norm": 3.6359386984785216, + "learning_rate": 8.226203063946184e-06, + "loss": 0.5838, + "step": 3652 + }, + { + "epoch": 0.3, + "grad_norm": 4.600655242064704, + "learning_rate": 8.225191610869717e-06, + "loss": 0.8791, + "step": 3653 + }, + { + "epoch": 0.3, + "grad_norm": 3.1916381293937146, + "learning_rate": 8.224179931721354e-06, + "loss": 0.5714, + "step": 3654 + }, + { + "epoch": 0.3, + "grad_norm": 3.793635684996372, + "learning_rate": 8.22316802657201e-06, + "loss": 0.542, + "step": 3655 + }, + { + "epoch": 0.3, + "grad_norm": 3.6517913737323298, + "learning_rate": 8.222155895492616e-06, + "loss": 0.8904, + "step": 3656 + }, + { + "epoch": 0.3, + "grad_norm": 4.309634828087789, + "learning_rate": 8.221143538554116e-06, + "loss": 0.9436, + "step": 3657 + }, + { + "epoch": 0.3, + "grad_norm": 5.59453233275118, + "learning_rate": 8.220130955827472e-06, + "loss": 1.6515, + "step": 3658 + }, + { + "epoch": 0.3, + "grad_norm": 4.0694475479979255, + "learning_rate": 8.219118147383663e-06, + "loss": 1.2232, + "step": 3659 + }, + { + "epoch": 0.3, + "grad_norm": 3.7378378994927486, + "learning_rate": 8.218105113293681e-06, + "loss": 1.0414, + "step": 3660 + }, + { + "epoch": 0.3, + "grad_norm": 2.278096243544778, + "learning_rate": 8.217091853628535e-06, + "loss": 0.4086, + "step": 3661 + }, + { + "epoch": 0.3, + "grad_norm": 4.204213682458238, + "learning_rate": 8.216078368459253e-06, + "loss": 0.7492, + "step": 3662 + }, + { + "epoch": 0.3, + "grad_norm": 5.626511416048154, + "learning_rate": 8.21506465785687e-06, + "loss": 1.181, + "step": 3663 + }, + { + "epoch": 0.3, + "grad_norm": 4.263531538490547, + "learning_rate": 8.21405072189245e-06, + "loss": 0.8094, + "step": 3664 + }, + { + "epoch": 0.3, + "grad_norm": 3.842607134945213, + "learning_rate": 8.213036560637062e-06, + "loss": 0.9559, + "step": 3665 + }, + { + "epoch": 0.3, + "grad_norm": 2.6295866100852026, + "learning_rate": 8.21202217416179e-06, + "loss": 0.6185, + "step": 3666 + }, + { + "epoch": 0.3, + "grad_norm": 2.613191946253642, + "learning_rate": 8.211007562537747e-06, + "loss": 0.4489, + "step": 3667 + }, + { + "epoch": 0.3, + "grad_norm": 1.9761898696424298, + "learning_rate": 8.209992725836047e-06, + "loss": 0.4938, + "step": 3668 + }, + { + "epoch": 0.3, + "grad_norm": 3.3974288514895035, + "learning_rate": 8.208977664127827e-06, + "loss": 1.1194, + "step": 3669 + }, + { + "epoch": 0.3, + "grad_norm": 2.378116796132323, + "learning_rate": 8.207962377484237e-06, + "loss": 0.4233, + "step": 3670 + }, + { + "epoch": 0.3, + "grad_norm": 4.093985355862017, + "learning_rate": 8.206946865976446e-06, + "loss": 1.0518, + "step": 3671 + }, + { + "epoch": 0.3, + "grad_norm": 3.9070968664524504, + "learning_rate": 8.205931129675637e-06, + "loss": 0.7833, + "step": 3672 + }, + { + "epoch": 0.3, + "grad_norm": 3.719407576076978, + "learning_rate": 8.204915168653007e-06, + "loss": 0.6622, + "step": 3673 + }, + { + "epoch": 0.3, + "grad_norm": 2.7822855101196815, + "learning_rate": 8.203898982979773e-06, + "loss": 0.6128, + "step": 3674 + }, + { + "epoch": 0.3, + "grad_norm": 3.4931324684841822, + "learning_rate": 8.202882572727161e-06, + "loss": 0.5935, + "step": 3675 + }, + { + "epoch": 0.3, + "grad_norm": 4.591226514045569, + "learning_rate": 8.201865937966423e-06, + "loss": 1.1802, + "step": 3676 + }, + { + "epoch": 0.3, + "grad_norm": 4.043022550682876, + "learning_rate": 8.200849078768816e-06, + "loss": 0.6867, + "step": 3677 + }, + { + "epoch": 0.3, + "grad_norm": 3.6300983589675506, + "learning_rate": 8.199831995205619e-06, + "loss": 0.9301, + "step": 3678 + }, + { + "epoch": 0.3, + "grad_norm": 3.276523837771925, + "learning_rate": 8.198814687348123e-06, + "loss": 0.5072, + "step": 3679 + }, + { + "epoch": 0.3, + "grad_norm": 3.306168790115253, + "learning_rate": 8.19779715526764e-06, + "loss": 1.0553, + "step": 3680 + }, + { + "epoch": 0.3, + "grad_norm": 3.0588477007410275, + "learning_rate": 8.196779399035492e-06, + "loss": 0.5845, + "step": 3681 + }, + { + "epoch": 0.3, + "grad_norm": 2.9399757382701606, + "learning_rate": 8.195761418723023e-06, + "loss": 0.6021, + "step": 3682 + }, + { + "epoch": 0.3, + "grad_norm": 2.9964090262739225, + "learning_rate": 8.194743214401587e-06, + "loss": 0.7194, + "step": 3683 + }, + { + "epoch": 0.3, + "grad_norm": 3.500776219734207, + "learning_rate": 8.193724786142552e-06, + "loss": 0.9013, + "step": 3684 + }, + { + "epoch": 0.3, + "grad_norm": 3.1845474908110916, + "learning_rate": 8.192706134017312e-06, + "loss": 1.0675, + "step": 3685 + }, + { + "epoch": 0.3, + "grad_norm": 4.336093403098695, + "learning_rate": 8.191687258097264e-06, + "loss": 1.2922, + "step": 3686 + }, + { + "epoch": 0.3, + "grad_norm": 5.091688178982156, + "learning_rate": 8.19066815845383e-06, + "loss": 1.1675, + "step": 3687 + }, + { + "epoch": 0.3, + "grad_norm": 4.095007486460994, + "learning_rate": 8.189648835158445e-06, + "loss": 0.9352, + "step": 3688 + }, + { + "epoch": 0.3, + "grad_norm": 4.983663848468049, + "learning_rate": 8.188629288282557e-06, + "loss": 0.9149, + "step": 3689 + }, + { + "epoch": 0.3, + "grad_norm": 4.755782459984909, + "learning_rate": 8.187609517897634e-06, + "loss": 1.4777, + "step": 3690 + }, + { + "epoch": 0.3, + "grad_norm": 3.4191866976219076, + "learning_rate": 8.186589524075155e-06, + "loss": 1.014, + "step": 3691 + }, + { + "epoch": 0.3, + "grad_norm": 3.8368571027938874, + "learning_rate": 8.18556930688662e-06, + "loss": 1.0577, + "step": 3692 + }, + { + "epoch": 0.3, + "grad_norm": 3.6395547430488384, + "learning_rate": 8.18454886640354e-06, + "loss": 0.9151, + "step": 3693 + }, + { + "epoch": 0.3, + "grad_norm": 5.425943955850182, + "learning_rate": 8.183528202697441e-06, + "loss": 1.344, + "step": 3694 + }, + { + "epoch": 0.3, + "grad_norm": 2.573148618469356, + "learning_rate": 8.18250731583987e-06, + "loss": 0.2741, + "step": 3695 + }, + { + "epoch": 0.3, + "grad_norm": 4.766066715900613, + "learning_rate": 8.181486205902391e-06, + "loss": 0.8259, + "step": 3696 + }, + { + "epoch": 0.3, + "grad_norm": 1.4938051351655979, + "learning_rate": 8.180464872956572e-06, + "loss": 0.2153, + "step": 3697 + }, + { + "epoch": 0.3, + "grad_norm": 4.04645686085355, + "learning_rate": 8.179443317074008e-06, + "loss": 0.897, + "step": 3698 + }, + { + "epoch": 0.3, + "grad_norm": 4.281302380409822, + "learning_rate": 8.178421538326303e-06, + "loss": 0.8353, + "step": 3699 + }, + { + "epoch": 0.3, + "grad_norm": 4.739685727324987, + "learning_rate": 8.17739953678508e-06, + "loss": 1.5031, + "step": 3700 + }, + { + "epoch": 0.3, + "grad_norm": 4.517623643422315, + "learning_rate": 8.17637731252198e-06, + "loss": 1.1821, + "step": 3701 + }, + { + "epoch": 0.3, + "grad_norm": 3.058673056478612, + "learning_rate": 8.175354865608651e-06, + "loss": 0.5239, + "step": 3702 + }, + { + "epoch": 0.3, + "grad_norm": 4.307110932872007, + "learning_rate": 8.174332196116767e-06, + "loss": 1.2173, + "step": 3703 + }, + { + "epoch": 0.3, + "grad_norm": 4.667206926341169, + "learning_rate": 8.173309304118011e-06, + "loss": 0.812, + "step": 3704 + }, + { + "epoch": 0.3, + "grad_norm": 2.5122682057515293, + "learning_rate": 8.172286189684082e-06, + "loss": 0.5349, + "step": 3705 + }, + { + "epoch": 0.3, + "grad_norm": 3.206192713863934, + "learning_rate": 8.171262852886697e-06, + "loss": 0.8859, + "step": 3706 + }, + { + "epoch": 0.3, + "grad_norm": 3.745896340372273, + "learning_rate": 8.170239293797588e-06, + "loss": 0.6659, + "step": 3707 + }, + { + "epoch": 0.3, + "grad_norm": 3.472030233684138, + "learning_rate": 8.169215512488502e-06, + "loss": 0.937, + "step": 3708 + }, + { + "epoch": 0.3, + "grad_norm": 2.4137168251087555, + "learning_rate": 8.1681915090312e-06, + "loss": 0.5604, + "step": 3709 + }, + { + "epoch": 0.3, + "grad_norm": 3.9059515725854372, + "learning_rate": 8.167167283497462e-06, + "loss": 1.3055, + "step": 3710 + }, + { + "epoch": 0.3, + "grad_norm": 3.6243245126837236, + "learning_rate": 8.16614283595908e-06, + "loss": 1.3093, + "step": 3711 + }, + { + "epoch": 0.3, + "grad_norm": 2.7052226321091295, + "learning_rate": 8.165118166487866e-06, + "loss": 0.563, + "step": 3712 + }, + { + "epoch": 0.3, + "grad_norm": 2.6276544526695704, + "learning_rate": 8.164093275155642e-06, + "loss": 0.4162, + "step": 3713 + }, + { + "epoch": 0.3, + "grad_norm": 3.414009897639559, + "learning_rate": 8.16306816203425e-06, + "loss": 0.5708, + "step": 3714 + }, + { + "epoch": 0.3, + "grad_norm": 4.191779412183785, + "learning_rate": 8.162042827195545e-06, + "loss": 0.7869, + "step": 3715 + }, + { + "epoch": 0.3, + "grad_norm": 3.966228928181594, + "learning_rate": 8.1610172707114e-06, + "loss": 1.0857, + "step": 3716 + }, + { + "epoch": 0.3, + "grad_norm": 2.416053641244488, + "learning_rate": 8.1599914926537e-06, + "loss": 0.3816, + "step": 3717 + }, + { + "epoch": 0.3, + "grad_norm": 3.4520616146089678, + "learning_rate": 8.158965493094349e-06, + "loss": 0.8468, + "step": 3718 + }, + { + "epoch": 0.3, + "grad_norm": 2.866070693077448, + "learning_rate": 8.157939272105265e-06, + "loss": 0.3885, + "step": 3719 + }, + { + "epoch": 0.3, + "grad_norm": 2.3887081665511363, + "learning_rate": 8.15691282975838e-06, + "loss": 0.5784, + "step": 3720 + }, + { + "epoch": 0.3, + "grad_norm": 2.593655781114713, + "learning_rate": 8.155886166125647e-06, + "loss": 0.381, + "step": 3721 + }, + { + "epoch": 0.3, + "grad_norm": 1.1882794822789533, + "learning_rate": 8.154859281279028e-06, + "loss": 0.2174, + "step": 3722 + }, + { + "epoch": 0.3, + "grad_norm": 3.8400830066215126, + "learning_rate": 8.153832175290503e-06, + "loss": 0.8367, + "step": 3723 + }, + { + "epoch": 0.3, + "grad_norm": 3.357996923956879, + "learning_rate": 8.152804848232067e-06, + "loss": 0.7642, + "step": 3724 + }, + { + "epoch": 0.3, + "grad_norm": 2.0239486314492354, + "learning_rate": 8.151777300175733e-06, + "loss": 0.3608, + "step": 3725 + }, + { + "epoch": 0.3, + "grad_norm": 3.4587234054487888, + "learning_rate": 8.150749531193527e-06, + "loss": 0.8073, + "step": 3726 + }, + { + "epoch": 0.3, + "grad_norm": 3.8268776728496308, + "learning_rate": 8.14972154135749e-06, + "loss": 0.8313, + "step": 3727 + }, + { + "epoch": 0.3, + "grad_norm": 2.592494371196147, + "learning_rate": 8.14869333073968e-06, + "loss": 0.527, + "step": 3728 + }, + { + "epoch": 0.3, + "grad_norm": 4.116864773442245, + "learning_rate": 8.147664899412174e-06, + "loss": 0.7039, + "step": 3729 + }, + { + "epoch": 0.3, + "grad_norm": 3.763498367287853, + "learning_rate": 8.146636247447053e-06, + "loss": 0.766, + "step": 3730 + }, + { + "epoch": 0.3, + "grad_norm": 3.474528023608184, + "learning_rate": 8.145607374916428e-06, + "loss": 1.0169, + "step": 3731 + }, + { + "epoch": 0.31, + "grad_norm": 4.734071201766395, + "learning_rate": 8.144578281892414e-06, + "loss": 0.9553, + "step": 3732 + }, + { + "epoch": 0.31, + "grad_norm": 4.092806982413093, + "learning_rate": 8.14354896844715e-06, + "loss": 1.055, + "step": 3733 + }, + { + "epoch": 0.31, + "grad_norm": 5.42314577570228, + "learning_rate": 8.142519434652782e-06, + "loss": 0.8633, + "step": 3734 + }, + { + "epoch": 0.31, + "grad_norm": 4.467308023579122, + "learning_rate": 8.141489680581481e-06, + "loss": 1.3204, + "step": 3735 + }, + { + "epoch": 0.31, + "grad_norm": 2.970938266610763, + "learning_rate": 8.140459706305422e-06, + "loss": 0.6799, + "step": 3736 + }, + { + "epoch": 0.31, + "grad_norm": 4.45068394944959, + "learning_rate": 8.139429511896806e-06, + "loss": 1.2216, + "step": 3737 + }, + { + "epoch": 0.31, + "grad_norm": 3.082640105344123, + "learning_rate": 8.138399097427843e-06, + "loss": 0.5311, + "step": 3738 + }, + { + "epoch": 0.31, + "grad_norm": 4.065166150068355, + "learning_rate": 8.137368462970762e-06, + "loss": 0.995, + "step": 3739 + }, + { + "epoch": 0.31, + "grad_norm": 1.3988745842381092, + "learning_rate": 8.136337608597807e-06, + "loss": 0.1561, + "step": 3740 + }, + { + "epoch": 0.31, + "grad_norm": 3.16548411325666, + "learning_rate": 8.135306534381233e-06, + "loss": 0.4723, + "step": 3741 + }, + { + "epoch": 0.31, + "grad_norm": 4.435847542644207, + "learning_rate": 8.134275240393318e-06, + "loss": 1.2246, + "step": 3742 + }, + { + "epoch": 0.31, + "grad_norm": 4.052208610377437, + "learning_rate": 8.133243726706348e-06, + "loss": 1.0259, + "step": 3743 + }, + { + "epoch": 0.31, + "grad_norm": 3.9003020589030672, + "learning_rate": 8.132211993392629e-06, + "loss": 1.0583, + "step": 3744 + }, + { + "epoch": 0.31, + "grad_norm": 5.149256679099094, + "learning_rate": 8.131180040524482e-06, + "loss": 1.1742, + "step": 3745 + }, + { + "epoch": 0.31, + "grad_norm": 1.8161975702048256, + "learning_rate": 8.130147868174238e-06, + "loss": 0.354, + "step": 3746 + }, + { + "epoch": 0.31, + "grad_norm": 3.830405910264299, + "learning_rate": 8.129115476414253e-06, + "loss": 0.9095, + "step": 3747 + }, + { + "epoch": 0.31, + "grad_norm": 3.274315406313386, + "learning_rate": 8.12808286531689e-06, + "loss": 1.2045, + "step": 3748 + }, + { + "epoch": 0.31, + "grad_norm": 5.008066219732132, + "learning_rate": 8.127050034954533e-06, + "loss": 1.0741, + "step": 3749 + }, + { + "epoch": 0.31, + "grad_norm": 4.177893875464701, + "learning_rate": 8.126016985399576e-06, + "loss": 0.7071, + "step": 3750 + }, + { + "epoch": 0.31, + "grad_norm": 4.260607866286649, + "learning_rate": 8.124983716724434e-06, + "loss": 1.4082, + "step": 3751 + }, + { + "epoch": 0.31, + "grad_norm": 5.137610931060111, + "learning_rate": 8.123950229001533e-06, + "loss": 1.4752, + "step": 3752 + }, + { + "epoch": 0.31, + "grad_norm": 1.9728338746060647, + "learning_rate": 8.122916522303314e-06, + "loss": 0.3583, + "step": 3753 + }, + { + "epoch": 0.31, + "grad_norm": 3.642077452337778, + "learning_rate": 8.121882596702241e-06, + "loss": 0.7528, + "step": 3754 + }, + { + "epoch": 0.31, + "grad_norm": 4.488596401108742, + "learning_rate": 8.120848452270784e-06, + "loss": 1.0414, + "step": 3755 + }, + { + "epoch": 0.31, + "grad_norm": 4.429872932359214, + "learning_rate": 8.119814089081433e-06, + "loss": 0.9902, + "step": 3756 + }, + { + "epoch": 0.31, + "grad_norm": 4.132381624984531, + "learning_rate": 8.118779507206692e-06, + "loss": 1.2691, + "step": 3757 + }, + { + "epoch": 0.31, + "grad_norm": 4.051652764278989, + "learning_rate": 8.117744706719077e-06, + "loss": 0.7505, + "step": 3758 + }, + { + "epoch": 0.31, + "grad_norm": 3.7541486157549886, + "learning_rate": 8.11670968769113e-06, + "loss": 0.8725, + "step": 3759 + }, + { + "epoch": 0.31, + "grad_norm": 2.2188128012707553, + "learning_rate": 8.115674450195397e-06, + "loss": 0.4125, + "step": 3760 + }, + { + "epoch": 0.31, + "grad_norm": 3.8775421294704433, + "learning_rate": 8.114638994304442e-06, + "loss": 0.9549, + "step": 3761 + }, + { + "epoch": 0.31, + "grad_norm": 2.24804227327714, + "learning_rate": 8.113603320090852e-06, + "loss": 0.5652, + "step": 3762 + }, + { + "epoch": 0.31, + "grad_norm": 2.378678797966362, + "learning_rate": 8.112567427627218e-06, + "loss": 0.5553, + "step": 3763 + }, + { + "epoch": 0.31, + "grad_norm": 1.9313481307698672, + "learning_rate": 8.111531316986155e-06, + "loss": 0.3617, + "step": 3764 + }, + { + "epoch": 0.31, + "grad_norm": 3.841068469149828, + "learning_rate": 8.110494988240287e-06, + "loss": 0.8996, + "step": 3765 + }, + { + "epoch": 0.31, + "grad_norm": 2.5605387433865445, + "learning_rate": 8.109458441462257e-06, + "loss": 0.6492, + "step": 3766 + }, + { + "epoch": 0.31, + "grad_norm": 4.577317967150967, + "learning_rate": 8.108421676724721e-06, + "loss": 0.9732, + "step": 3767 + }, + { + "epoch": 0.31, + "grad_norm": 4.808841322327526, + "learning_rate": 8.107384694100355e-06, + "loss": 0.9966, + "step": 3768 + }, + { + "epoch": 0.31, + "grad_norm": 4.424400232186057, + "learning_rate": 8.106347493661846e-06, + "loss": 0.9183, + "step": 3769 + }, + { + "epoch": 0.31, + "grad_norm": 3.6727410221654617, + "learning_rate": 8.105310075481895e-06, + "loss": 0.7357, + "step": 3770 + }, + { + "epoch": 0.31, + "grad_norm": 4.092483526607276, + "learning_rate": 8.104272439633225e-06, + "loss": 1.1123, + "step": 3771 + }, + { + "epoch": 0.31, + "grad_norm": 4.640259166857718, + "learning_rate": 8.103234586188564e-06, + "loss": 1.1403, + "step": 3772 + }, + { + "epoch": 0.31, + "grad_norm": 3.7035333161123165, + "learning_rate": 8.102196515220664e-06, + "loss": 0.6131, + "step": 3773 + }, + { + "epoch": 0.31, + "grad_norm": 3.32058535500473, + "learning_rate": 8.10115822680229e-06, + "loss": 0.6775, + "step": 3774 + }, + { + "epoch": 0.31, + "grad_norm": 3.5768608057991385, + "learning_rate": 8.100119721006221e-06, + "loss": 0.7513, + "step": 3775 + }, + { + "epoch": 0.31, + "grad_norm": 4.461663425433065, + "learning_rate": 8.09908099790525e-06, + "loss": 1.1056, + "step": 3776 + }, + { + "epoch": 0.31, + "grad_norm": 4.728957538433035, + "learning_rate": 8.098042057572188e-06, + "loss": 1.4042, + "step": 3777 + }, + { + "epoch": 0.31, + "grad_norm": 5.687046450091034, + "learning_rate": 8.097002900079862e-06, + "loss": 1.29, + "step": 3778 + }, + { + "epoch": 0.31, + "grad_norm": 4.511627277637122, + "learning_rate": 8.095963525501111e-06, + "loss": 1.322, + "step": 3779 + }, + { + "epoch": 0.31, + "grad_norm": 2.8622060976057337, + "learning_rate": 8.094923933908789e-06, + "loss": 0.5612, + "step": 3780 + }, + { + "epoch": 0.31, + "grad_norm": 3.995257314579237, + "learning_rate": 8.093884125375769e-06, + "loss": 0.9435, + "step": 3781 + }, + { + "epoch": 0.31, + "grad_norm": 3.2454494244850616, + "learning_rate": 8.092844099974936e-06, + "loss": 0.3857, + "step": 3782 + }, + { + "epoch": 0.31, + "grad_norm": 3.781446530884619, + "learning_rate": 8.091803857779194e-06, + "loss": 0.8944, + "step": 3783 + }, + { + "epoch": 0.31, + "grad_norm": 2.7753792270276065, + "learning_rate": 8.090763398861455e-06, + "loss": 0.5534, + "step": 3784 + }, + { + "epoch": 0.31, + "grad_norm": 2.6585854485006717, + "learning_rate": 8.089722723294654e-06, + "loss": 0.5455, + "step": 3785 + }, + { + "epoch": 0.31, + "grad_norm": 3.2715132195590995, + "learning_rate": 8.088681831151737e-06, + "loss": 0.9291, + "step": 3786 + }, + { + "epoch": 0.31, + "grad_norm": 3.6559595986554685, + "learning_rate": 8.087640722505663e-06, + "loss": 0.7922, + "step": 3787 + }, + { + "epoch": 0.31, + "grad_norm": 2.9979353328556613, + "learning_rate": 8.086599397429413e-06, + "loss": 0.6418, + "step": 3788 + }, + { + "epoch": 0.31, + "grad_norm": 2.749128142979223, + "learning_rate": 8.085557855995979e-06, + "loss": 0.5899, + "step": 3789 + }, + { + "epoch": 0.31, + "grad_norm": 4.23753924224674, + "learning_rate": 8.084516098278367e-06, + "loss": 1.0294, + "step": 3790 + }, + { + "epoch": 0.31, + "grad_norm": 5.2067365626434885, + "learning_rate": 8.083474124349602e-06, + "loss": 1.2521, + "step": 3791 + }, + { + "epoch": 0.31, + "grad_norm": 3.053171316466954, + "learning_rate": 8.082431934282717e-06, + "loss": 0.6479, + "step": 3792 + }, + { + "epoch": 0.31, + "grad_norm": 5.178116731766731, + "learning_rate": 8.081389528150772e-06, + "loss": 1.078, + "step": 3793 + }, + { + "epoch": 0.31, + "grad_norm": 3.8930725769871146, + "learning_rate": 8.080346906026827e-06, + "loss": 0.9376, + "step": 3794 + }, + { + "epoch": 0.31, + "grad_norm": 3.874489840565244, + "learning_rate": 8.079304067983972e-06, + "loss": 0.5448, + "step": 3795 + }, + { + "epoch": 0.31, + "grad_norm": 3.0668608875262424, + "learning_rate": 8.078261014095303e-06, + "loss": 0.8477, + "step": 3796 + }, + { + "epoch": 0.31, + "grad_norm": 4.441895416564226, + "learning_rate": 8.077217744433934e-06, + "loss": 1.211, + "step": 3797 + }, + { + "epoch": 0.31, + "grad_norm": 2.2771698264465536, + "learning_rate": 8.076174259072994e-06, + "loss": 0.5761, + "step": 3798 + }, + { + "epoch": 0.31, + "grad_norm": 3.614567455901114, + "learning_rate": 8.075130558085625e-06, + "loss": 1.0329, + "step": 3799 + }, + { + "epoch": 0.31, + "grad_norm": 3.913152393460195, + "learning_rate": 8.074086641544985e-06, + "loss": 0.7162, + "step": 3800 + }, + { + "epoch": 0.31, + "grad_norm": 2.877290036991181, + "learning_rate": 8.073042509524252e-06, + "loss": 0.6069, + "step": 3801 + }, + { + "epoch": 0.31, + "grad_norm": 3.6871844033861536, + "learning_rate": 8.071998162096613e-06, + "loss": 0.6831, + "step": 3802 + }, + { + "epoch": 0.31, + "grad_norm": 3.592996585961057, + "learning_rate": 8.070953599335271e-06, + "loss": 1.1962, + "step": 3803 + }, + { + "epoch": 0.31, + "grad_norm": 3.986885576237931, + "learning_rate": 8.069908821313446e-06, + "loss": 0.8866, + "step": 3804 + }, + { + "epoch": 0.31, + "grad_norm": 2.7512065263487377, + "learning_rate": 8.068863828104377e-06, + "loss": 0.6925, + "step": 3805 + }, + { + "epoch": 0.31, + "grad_norm": 3.0276423770928598, + "learning_rate": 8.067818619781304e-06, + "loss": 0.6365, + "step": 3806 + }, + { + "epoch": 0.31, + "grad_norm": 2.167495623338151, + "learning_rate": 8.0667731964175e-06, + "loss": 0.6901, + "step": 3807 + }, + { + "epoch": 0.31, + "grad_norm": 2.766886335698619, + "learning_rate": 8.065727558086241e-06, + "loss": 0.4542, + "step": 3808 + }, + { + "epoch": 0.31, + "grad_norm": 4.171617751108701, + "learning_rate": 8.064681704860824e-06, + "loss": 0.8883, + "step": 3809 + }, + { + "epoch": 0.31, + "grad_norm": 2.7588444811396613, + "learning_rate": 8.063635636814555e-06, + "loss": 0.7518, + "step": 3810 + }, + { + "epoch": 0.31, + "grad_norm": 4.447026050799633, + "learning_rate": 8.06258935402076e-06, + "loss": 1.1046, + "step": 3811 + }, + { + "epoch": 0.31, + "grad_norm": 3.0596379802357623, + "learning_rate": 8.061542856552782e-06, + "loss": 0.6165, + "step": 3812 + }, + { + "epoch": 0.31, + "grad_norm": 4.767255884519906, + "learning_rate": 8.06049614448397e-06, + "loss": 1.2185, + "step": 3813 + }, + { + "epoch": 0.31, + "grad_norm": 2.873121207979868, + "learning_rate": 8.059449217887703e-06, + "loss": 0.7159, + "step": 3814 + }, + { + "epoch": 0.31, + "grad_norm": 2.283891610711034, + "learning_rate": 8.058402076837357e-06, + "loss": 0.5596, + "step": 3815 + }, + { + "epoch": 0.31, + "grad_norm": 4.062392687744952, + "learning_rate": 8.057354721406336e-06, + "loss": 0.8439, + "step": 3816 + }, + { + "epoch": 0.31, + "grad_norm": 4.557955428306261, + "learning_rate": 8.056307151668056e-06, + "loss": 1.1224, + "step": 3817 + }, + { + "epoch": 0.31, + "grad_norm": 4.138074034895307, + "learning_rate": 8.055259367695944e-06, + "loss": 1.0132, + "step": 3818 + }, + { + "epoch": 0.31, + "grad_norm": 2.846901873436069, + "learning_rate": 8.054211369563448e-06, + "loss": 0.7708, + "step": 3819 + }, + { + "epoch": 0.31, + "grad_norm": 4.566826926447175, + "learning_rate": 8.053163157344025e-06, + "loss": 1.1192, + "step": 3820 + }, + { + "epoch": 0.31, + "grad_norm": 5.438548471422388, + "learning_rate": 8.052114731111154e-06, + "loss": 1.3369, + "step": 3821 + }, + { + "epoch": 0.31, + "grad_norm": 3.2034914657844618, + "learning_rate": 8.051066090938325e-06, + "loss": 0.6045, + "step": 3822 + }, + { + "epoch": 0.31, + "grad_norm": 4.354183372517673, + "learning_rate": 8.050017236899038e-06, + "loss": 0.6668, + "step": 3823 + }, + { + "epoch": 0.31, + "grad_norm": 5.856461759994505, + "learning_rate": 8.048968169066817e-06, + "loss": 1.0358, + "step": 3824 + }, + { + "epoch": 0.31, + "grad_norm": 3.1394448375714346, + "learning_rate": 8.047918887515199e-06, + "loss": 0.6115, + "step": 3825 + }, + { + "epoch": 0.31, + "grad_norm": 3.307289435541749, + "learning_rate": 8.046869392317729e-06, + "loss": 0.4689, + "step": 3826 + }, + { + "epoch": 0.31, + "grad_norm": 3.4991103377960795, + "learning_rate": 8.045819683547976e-06, + "loss": 0.6016, + "step": 3827 + }, + { + "epoch": 0.31, + "grad_norm": 2.902513823352388, + "learning_rate": 8.044769761279516e-06, + "loss": 0.8296, + "step": 3828 + }, + { + "epoch": 0.31, + "grad_norm": 1.7915524409926955, + "learning_rate": 8.04371962558595e-06, + "loss": 0.4232, + "step": 3829 + }, + { + "epoch": 0.31, + "grad_norm": 3.357581688423257, + "learning_rate": 8.042669276540882e-06, + "loss": 0.7239, + "step": 3830 + }, + { + "epoch": 0.31, + "grad_norm": 5.034115483169221, + "learning_rate": 8.041618714217941e-06, + "loss": 1.0947, + "step": 3831 + }, + { + "epoch": 0.31, + "grad_norm": 3.4102344394295265, + "learning_rate": 8.040567938690764e-06, + "loss": 0.7779, + "step": 3832 + }, + { + "epoch": 0.31, + "grad_norm": 2.9394698900528162, + "learning_rate": 8.039516950033008e-06, + "loss": 0.7769, + "step": 3833 + }, + { + "epoch": 0.31, + "grad_norm": 3.7683839440198894, + "learning_rate": 8.038465748318342e-06, + "loss": 0.7318, + "step": 3834 + }, + { + "epoch": 0.31, + "grad_norm": 3.805228173708322, + "learning_rate": 8.03741433362045e-06, + "loss": 0.9185, + "step": 3835 + }, + { + "epoch": 0.31, + "grad_norm": 4.114508005145592, + "learning_rate": 8.036362706013033e-06, + "loss": 1.2705, + "step": 3836 + }, + { + "epoch": 0.31, + "grad_norm": 2.760045632096391, + "learning_rate": 8.035310865569806e-06, + "loss": 0.7342, + "step": 3837 + }, + { + "epoch": 0.31, + "grad_norm": 4.133031716360296, + "learning_rate": 8.034258812364492e-06, + "loss": 0.6147, + "step": 3838 + }, + { + "epoch": 0.31, + "grad_norm": 2.8580255170415314, + "learning_rate": 8.033206546470846e-06, + "loss": 0.6132, + "step": 3839 + }, + { + "epoch": 0.31, + "grad_norm": 3.789984994703072, + "learning_rate": 8.032154067962617e-06, + "loss": 0.9953, + "step": 3840 + }, + { + "epoch": 0.31, + "grad_norm": 4.252557561048338, + "learning_rate": 8.031101376913586e-06, + "loss": 0.693, + "step": 3841 + }, + { + "epoch": 0.31, + "grad_norm": 3.403266683476786, + "learning_rate": 8.03004847339754e-06, + "loss": 1.0276, + "step": 3842 + }, + { + "epoch": 0.31, + "grad_norm": 1.288651909984002, + "learning_rate": 8.028995357488284e-06, + "loss": 0.1872, + "step": 3843 + }, + { + "epoch": 0.31, + "grad_norm": 3.199625220667626, + "learning_rate": 8.027942029259633e-06, + "loss": 0.5979, + "step": 3844 + }, + { + "epoch": 0.31, + "grad_norm": 5.0408324902372055, + "learning_rate": 8.026888488785426e-06, + "loss": 0.9398, + "step": 3845 + }, + { + "epoch": 0.31, + "grad_norm": 4.251594099668823, + "learning_rate": 8.025834736139506e-06, + "loss": 0.8767, + "step": 3846 + }, + { + "epoch": 0.31, + "grad_norm": 2.90119502716278, + "learning_rate": 8.024780771395741e-06, + "loss": 0.7032, + "step": 3847 + }, + { + "epoch": 0.31, + "grad_norm": 5.3691113441484335, + "learning_rate": 8.023726594628008e-06, + "loss": 1.0865, + "step": 3848 + }, + { + "epoch": 0.31, + "grad_norm": 3.272557560423606, + "learning_rate": 8.0226722059102e-06, + "loss": 0.9431, + "step": 3849 + }, + { + "epoch": 0.31, + "grad_norm": 5.1018996779982, + "learning_rate": 8.021617605316225e-06, + "loss": 1.1198, + "step": 3850 + }, + { + "epoch": 0.31, + "grad_norm": 4.34270314985889, + "learning_rate": 8.020562792920007e-06, + "loss": 1.4252, + "step": 3851 + }, + { + "epoch": 0.31, + "grad_norm": 3.6576903851374674, + "learning_rate": 8.019507768795481e-06, + "loss": 0.5995, + "step": 3852 + }, + { + "epoch": 0.31, + "grad_norm": 2.676769230408065, + "learning_rate": 8.018452533016604e-06, + "loss": 0.6054, + "step": 3853 + }, + { + "epoch": 0.32, + "grad_norm": 5.304160513568824, + "learning_rate": 8.017397085657337e-06, + "loss": 1.1721, + "step": 3854 + }, + { + "epoch": 0.32, + "grad_norm": 3.834167523884977, + "learning_rate": 8.016341426791669e-06, + "loss": 1.001, + "step": 3855 + }, + { + "epoch": 0.32, + "grad_norm": 3.332024439659001, + "learning_rate": 8.015285556493592e-06, + "loss": 0.4388, + "step": 3856 + }, + { + "epoch": 0.32, + "grad_norm": 4.011369652265117, + "learning_rate": 8.014229474837122e-06, + "loss": 1.1744, + "step": 3857 + }, + { + "epoch": 0.32, + "grad_norm": 3.196051480762647, + "learning_rate": 8.013173181896283e-06, + "loss": 0.4594, + "step": 3858 + }, + { + "epoch": 0.32, + "grad_norm": 3.8879096058266005, + "learning_rate": 8.012116677745116e-06, + "loss": 1.116, + "step": 3859 + }, + { + "epoch": 0.32, + "grad_norm": 3.1754368357717064, + "learning_rate": 8.011059962457681e-06, + "loss": 0.605, + "step": 3860 + }, + { + "epoch": 0.32, + "grad_norm": 2.411006122417595, + "learning_rate": 8.010003036108045e-06, + "loss": 0.402, + "step": 3861 + }, + { + "epoch": 0.32, + "grad_norm": 3.7769095221665934, + "learning_rate": 8.008945898770298e-06, + "loss": 0.5581, + "step": 3862 + }, + { + "epoch": 0.32, + "grad_norm": 3.4745966802453316, + "learning_rate": 8.007888550518536e-06, + "loss": 0.5202, + "step": 3863 + }, + { + "epoch": 0.32, + "grad_norm": 3.0437840671956957, + "learning_rate": 8.006830991426879e-06, + "loss": 0.8083, + "step": 3864 + }, + { + "epoch": 0.32, + "grad_norm": 3.4753333783047267, + "learning_rate": 8.005773221569453e-06, + "loss": 0.8619, + "step": 3865 + }, + { + "epoch": 0.32, + "grad_norm": 4.218536691525492, + "learning_rate": 8.004715241020407e-06, + "loss": 1.2444, + "step": 3866 + }, + { + "epoch": 0.32, + "grad_norm": 4.707391376778872, + "learning_rate": 8.0036570498539e-06, + "loss": 1.2269, + "step": 3867 + }, + { + "epoch": 0.32, + "grad_norm": 3.269846290227814, + "learning_rate": 8.002598648144104e-06, + "loss": 0.7239, + "step": 3868 + }, + { + "epoch": 0.32, + "grad_norm": 4.416247878295309, + "learning_rate": 8.00154003596521e-06, + "loss": 0.8834, + "step": 3869 + }, + { + "epoch": 0.32, + "grad_norm": 2.61950119442241, + "learning_rate": 8.000481213391422e-06, + "loss": 0.5118, + "step": 3870 + }, + { + "epoch": 0.32, + "grad_norm": 4.452753986663066, + "learning_rate": 7.999422180496961e-06, + "loss": 1.2589, + "step": 3871 + }, + { + "epoch": 0.32, + "grad_norm": 3.1217098336646676, + "learning_rate": 7.998362937356057e-06, + "loss": 0.7483, + "step": 3872 + }, + { + "epoch": 0.32, + "grad_norm": 2.665134235943946, + "learning_rate": 7.99730348404296e-06, + "loss": 0.57, + "step": 3873 + }, + { + "epoch": 0.32, + "grad_norm": 4.284385585767888, + "learning_rate": 7.996243820631936e-06, + "loss": 1.1521, + "step": 3874 + }, + { + "epoch": 0.32, + "grad_norm": 3.167941665748867, + "learning_rate": 7.995183947197256e-06, + "loss": 0.8497, + "step": 3875 + }, + { + "epoch": 0.32, + "grad_norm": 4.1403467646995615, + "learning_rate": 7.994123863813217e-06, + "loss": 0.8655, + "step": 3876 + }, + { + "epoch": 0.32, + "grad_norm": 3.602304879232952, + "learning_rate": 7.993063570554128e-06, + "loss": 0.6588, + "step": 3877 + }, + { + "epoch": 0.32, + "grad_norm": 3.783080663293601, + "learning_rate": 7.992003067494307e-06, + "loss": 0.8908, + "step": 3878 + }, + { + "epoch": 0.32, + "grad_norm": 2.8033073441225937, + "learning_rate": 7.990942354708093e-06, + "loss": 0.6443, + "step": 3879 + }, + { + "epoch": 0.32, + "grad_norm": 3.077322224246064, + "learning_rate": 7.989881432269835e-06, + "loss": 0.8065, + "step": 3880 + }, + { + "epoch": 0.32, + "grad_norm": 3.7208368668094294, + "learning_rate": 7.988820300253902e-06, + "loss": 0.755, + "step": 3881 + }, + { + "epoch": 0.32, + "grad_norm": 3.6569711380052623, + "learning_rate": 7.987758958734672e-06, + "loss": 1.0911, + "step": 3882 + }, + { + "epoch": 0.32, + "grad_norm": 3.7112812737009873, + "learning_rate": 7.986697407786543e-06, + "loss": 1.043, + "step": 3883 + }, + { + "epoch": 0.32, + "grad_norm": 3.831832105716206, + "learning_rate": 7.985635647483922e-06, + "loss": 1.0517, + "step": 3884 + }, + { + "epoch": 0.32, + "grad_norm": 3.195462499961567, + "learning_rate": 7.984573677901238e-06, + "loss": 0.6196, + "step": 3885 + }, + { + "epoch": 0.32, + "grad_norm": 3.0927161199014352, + "learning_rate": 7.983511499112927e-06, + "loss": 0.747, + "step": 3886 + }, + { + "epoch": 0.32, + "grad_norm": 3.1604091144922686, + "learning_rate": 7.982449111193445e-06, + "loss": 0.7857, + "step": 3887 + }, + { + "epoch": 0.32, + "grad_norm": 4.184526063278354, + "learning_rate": 7.98138651421726e-06, + "loss": 0.7998, + "step": 3888 + }, + { + "epoch": 0.32, + "grad_norm": 5.414539531775396, + "learning_rate": 7.980323708258854e-06, + "loss": 1.2143, + "step": 3889 + }, + { + "epoch": 0.32, + "grad_norm": 5.0841779869278705, + "learning_rate": 7.979260693392726e-06, + "loss": 1.0425, + "step": 3890 + }, + { + "epoch": 0.32, + "grad_norm": 3.97584223991958, + "learning_rate": 7.978197469693392e-06, + "loss": 0.7008, + "step": 3891 + }, + { + "epoch": 0.32, + "grad_norm": 3.040826383131776, + "learning_rate": 7.977134037235375e-06, + "loss": 0.5522, + "step": 3892 + }, + { + "epoch": 0.32, + "grad_norm": 3.171089779923319, + "learning_rate": 7.976070396093217e-06, + "loss": 0.5813, + "step": 3893 + }, + { + "epoch": 0.32, + "grad_norm": 2.3970568499228593, + "learning_rate": 7.975006546341478e-06, + "loss": 0.366, + "step": 3894 + }, + { + "epoch": 0.32, + "grad_norm": 3.1551192192534327, + "learning_rate": 7.973942488054726e-06, + "loss": 0.6433, + "step": 3895 + }, + { + "epoch": 0.32, + "grad_norm": 2.4197719603725343, + "learning_rate": 7.97287822130755e-06, + "loss": 0.5926, + "step": 3896 + }, + { + "epoch": 0.32, + "grad_norm": 3.6648438074557177, + "learning_rate": 7.971813746174548e-06, + "loss": 0.5191, + "step": 3897 + }, + { + "epoch": 0.32, + "grad_norm": 4.228504648321421, + "learning_rate": 7.970749062730336e-06, + "loss": 0.8926, + "step": 3898 + }, + { + "epoch": 0.32, + "grad_norm": 4.388096166572964, + "learning_rate": 7.969684171049543e-06, + "loss": 0.7807, + "step": 3899 + }, + { + "epoch": 0.32, + "grad_norm": 5.4729634968726595, + "learning_rate": 7.968619071206813e-06, + "loss": 1.2072, + "step": 3900 + }, + { + "epoch": 0.32, + "grad_norm": 4.345484389252216, + "learning_rate": 7.967553763276808e-06, + "loss": 0.8457, + "step": 3901 + }, + { + "epoch": 0.32, + "grad_norm": 3.7935286469224163, + "learning_rate": 7.966488247334196e-06, + "loss": 0.8782, + "step": 3902 + }, + { + "epoch": 0.32, + "grad_norm": 4.29533451863232, + "learning_rate": 7.96542252345367e-06, + "loss": 0.58, + "step": 3903 + }, + { + "epoch": 0.32, + "grad_norm": 5.365676149628274, + "learning_rate": 7.96435659170993e-06, + "loss": 1.2744, + "step": 3904 + }, + { + "epoch": 0.32, + "grad_norm": 3.6534210181216107, + "learning_rate": 7.963290452177695e-06, + "loss": 0.9971, + "step": 3905 + }, + { + "epoch": 0.32, + "grad_norm": 1.6606709479755615, + "learning_rate": 7.962224104931692e-06, + "loss": 0.2075, + "step": 3906 + }, + { + "epoch": 0.32, + "grad_norm": 3.692509435384667, + "learning_rate": 7.961157550046674e-06, + "loss": 0.8298, + "step": 3907 + }, + { + "epoch": 0.32, + "grad_norm": 3.3064776406188825, + "learning_rate": 7.9600907875974e-06, + "loss": 0.6335, + "step": 3908 + }, + { + "epoch": 0.32, + "grad_norm": 3.2577188657508187, + "learning_rate": 7.959023817658642e-06, + "loss": 0.9199, + "step": 3909 + }, + { + "epoch": 0.32, + "grad_norm": 2.065028697756866, + "learning_rate": 7.957956640305192e-06, + "loss": 0.4054, + "step": 3910 + }, + { + "epoch": 0.32, + "grad_norm": 1.275405899307535, + "learning_rate": 7.956889255611855e-06, + "loss": 0.1956, + "step": 3911 + }, + { + "epoch": 0.32, + "grad_norm": 2.874605605122154, + "learning_rate": 7.955821663653448e-06, + "loss": 0.5685, + "step": 3912 + }, + { + "epoch": 0.32, + "grad_norm": 3.4400953132304206, + "learning_rate": 7.95475386450481e-06, + "loss": 0.8722, + "step": 3913 + }, + { + "epoch": 0.32, + "grad_norm": 4.918856789489256, + "learning_rate": 7.953685858240783e-06, + "loss": 1.1118, + "step": 3914 + }, + { + "epoch": 0.32, + "grad_norm": 5.0210892735943915, + "learning_rate": 7.952617644936231e-06, + "loss": 1.0947, + "step": 3915 + }, + { + "epoch": 0.32, + "grad_norm": 4.330205720746106, + "learning_rate": 7.951549224666034e-06, + "loss": 1.2001, + "step": 3916 + }, + { + "epoch": 0.32, + "grad_norm": 3.272606217468375, + "learning_rate": 7.950480597505079e-06, + "loss": 0.834, + "step": 3917 + }, + { + "epoch": 0.32, + "grad_norm": 3.6360691348276872, + "learning_rate": 7.949411763528276e-06, + "loss": 0.7878, + "step": 3918 + }, + { + "epoch": 0.32, + "grad_norm": 3.357258381230591, + "learning_rate": 7.948342722810544e-06, + "loss": 0.6788, + "step": 3919 + }, + { + "epoch": 0.32, + "grad_norm": 4.790471200518028, + "learning_rate": 7.94727347542682e-06, + "loss": 1.0941, + "step": 3920 + }, + { + "epoch": 0.32, + "grad_norm": 2.880762080944756, + "learning_rate": 7.946204021452049e-06, + "loss": 0.5526, + "step": 3921 + }, + { + "epoch": 0.32, + "grad_norm": 4.906717414163718, + "learning_rate": 7.9451343609612e-06, + "loss": 1.049, + "step": 3922 + }, + { + "epoch": 0.32, + "grad_norm": 3.4014886526948636, + "learning_rate": 7.94406449402925e-06, + "loss": 0.7849, + "step": 3923 + }, + { + "epoch": 0.32, + "grad_norm": 2.5413370736232705, + "learning_rate": 7.942994420731189e-06, + "loss": 0.402, + "step": 3924 + }, + { + "epoch": 0.32, + "grad_norm": 3.1905916450375775, + "learning_rate": 7.94192414114203e-06, + "loss": 0.9411, + "step": 3925 + }, + { + "epoch": 0.32, + "grad_norm": 2.654706974498102, + "learning_rate": 7.940853655336791e-06, + "loss": 0.5907, + "step": 3926 + }, + { + "epoch": 0.32, + "grad_norm": 3.672345696377473, + "learning_rate": 7.93978296339051e-06, + "loss": 0.7108, + "step": 3927 + }, + { + "epoch": 0.32, + "grad_norm": 3.6847837849040816, + "learning_rate": 7.938712065378235e-06, + "loss": 0.9222, + "step": 3928 + }, + { + "epoch": 0.32, + "grad_norm": 4.046999168227711, + "learning_rate": 7.937640961375036e-06, + "loss": 0.8271, + "step": 3929 + }, + { + "epoch": 0.32, + "grad_norm": 3.5800151195645653, + "learning_rate": 7.936569651455987e-06, + "loss": 0.8926, + "step": 3930 + }, + { + "epoch": 0.32, + "grad_norm": 3.6666933200400034, + "learning_rate": 7.93549813569619e-06, + "loss": 0.6696, + "step": 3931 + }, + { + "epoch": 0.32, + "grad_norm": 4.425401258509315, + "learning_rate": 7.934426414170747e-06, + "loss": 1.005, + "step": 3932 + }, + { + "epoch": 0.32, + "grad_norm": 3.191697810533967, + "learning_rate": 7.933354486954781e-06, + "loss": 0.9103, + "step": 3933 + }, + { + "epoch": 0.32, + "grad_norm": 2.755872248436024, + "learning_rate": 7.932282354123433e-06, + "loss": 0.4972, + "step": 3934 + }, + { + "epoch": 0.32, + "grad_norm": 5.279827015704212, + "learning_rate": 7.931210015751854e-06, + "loss": 0.9444, + "step": 3935 + }, + { + "epoch": 0.32, + "grad_norm": 1.8322074800850854, + "learning_rate": 7.93013747191521e-06, + "loss": 0.3787, + "step": 3936 + }, + { + "epoch": 0.32, + "grad_norm": 4.503534226337361, + "learning_rate": 7.92906472268868e-06, + "loss": 1.0117, + "step": 3937 + }, + { + "epoch": 0.32, + "grad_norm": 3.9678540703978524, + "learning_rate": 7.92799176814746e-06, + "loss": 0.7385, + "step": 3938 + }, + { + "epoch": 0.32, + "grad_norm": 3.4197151347721753, + "learning_rate": 7.92691860836676e-06, + "loss": 0.9999, + "step": 3939 + }, + { + "epoch": 0.32, + "grad_norm": 4.05089929572889, + "learning_rate": 7.925845243421803e-06, + "loss": 1.1175, + "step": 3940 + }, + { + "epoch": 0.32, + "grad_norm": 3.487274292627951, + "learning_rate": 7.924771673387828e-06, + "loss": 0.7277, + "step": 3941 + }, + { + "epoch": 0.32, + "grad_norm": 3.203089793147763, + "learning_rate": 7.92369789834009e-06, + "loss": 0.6635, + "step": 3942 + }, + { + "epoch": 0.32, + "grad_norm": 4.680738296993968, + "learning_rate": 7.92262391835385e-06, + "loss": 1.0936, + "step": 3943 + }, + { + "epoch": 0.32, + "grad_norm": 4.252418062665237, + "learning_rate": 7.921549733504394e-06, + "loss": 1.4216, + "step": 3944 + }, + { + "epoch": 0.32, + "grad_norm": 3.721036769804235, + "learning_rate": 7.920475343867016e-06, + "loss": 0.8752, + "step": 3945 + }, + { + "epoch": 0.32, + "grad_norm": 4.060450737205802, + "learning_rate": 7.919400749517026e-06, + "loss": 0.9217, + "step": 3946 + }, + { + "epoch": 0.32, + "grad_norm": 3.1952284512374827, + "learning_rate": 7.91832595052975e-06, + "loss": 0.6315, + "step": 3947 + }, + { + "epoch": 0.32, + "grad_norm": 2.6845820229727635, + "learning_rate": 7.917250946980525e-06, + "loss": 0.6788, + "step": 3948 + }, + { + "epoch": 0.32, + "grad_norm": 3.2111912846058237, + "learning_rate": 7.916175738944703e-06, + "loss": 0.962, + "step": 3949 + }, + { + "epoch": 0.32, + "grad_norm": 3.310775447454021, + "learning_rate": 7.915100326497655e-06, + "loss": 0.5742, + "step": 3950 + }, + { + "epoch": 0.32, + "grad_norm": 4.454874359101468, + "learning_rate": 7.91402470971476e-06, + "loss": 1.0579, + "step": 3951 + }, + { + "epoch": 0.32, + "grad_norm": 3.777140529355507, + "learning_rate": 7.912948888671415e-06, + "loss": 0.848, + "step": 3952 + }, + { + "epoch": 0.32, + "grad_norm": 4.917375205195893, + "learning_rate": 7.911872863443028e-06, + "loss": 1.0811, + "step": 3953 + }, + { + "epoch": 0.32, + "grad_norm": 4.20008656594547, + "learning_rate": 7.910796634105029e-06, + "loss": 0.8757, + "step": 3954 + }, + { + "epoch": 0.32, + "grad_norm": 3.3461871368791574, + "learning_rate": 7.90972020073285e-06, + "loss": 0.6789, + "step": 3955 + }, + { + "epoch": 0.32, + "grad_norm": 4.726416038016359, + "learning_rate": 7.90864356340195e-06, + "loss": 0.8677, + "step": 3956 + }, + { + "epoch": 0.32, + "grad_norm": 3.5161194158107385, + "learning_rate": 7.907566722187796e-06, + "loss": 0.8401, + "step": 3957 + }, + { + "epoch": 0.32, + "grad_norm": 4.44494707113555, + "learning_rate": 7.906489677165868e-06, + "loss": 0.765, + "step": 3958 + }, + { + "epoch": 0.32, + "grad_norm": 3.571669862485657, + "learning_rate": 7.905412428411663e-06, + "loss": 0.9774, + "step": 3959 + }, + { + "epoch": 0.32, + "grad_norm": 3.7726222805080134, + "learning_rate": 7.90433497600069e-06, + "loss": 0.8009, + "step": 3960 + }, + { + "epoch": 0.32, + "grad_norm": 2.904481349413059, + "learning_rate": 7.903257320008475e-06, + "loss": 0.6383, + "step": 3961 + }, + { + "epoch": 0.32, + "grad_norm": 4.329707161356198, + "learning_rate": 7.902179460510558e-06, + "loss": 1.1996, + "step": 3962 + }, + { + "epoch": 0.32, + "grad_norm": 4.031137913745396, + "learning_rate": 7.901101397582489e-06, + "loss": 1.0229, + "step": 3963 + }, + { + "epoch": 0.32, + "grad_norm": 5.233129840323534, + "learning_rate": 7.90002313129984e-06, + "loss": 1.5545, + "step": 3964 + }, + { + "epoch": 0.32, + "grad_norm": 3.5120920452991, + "learning_rate": 7.898944661738188e-06, + "loss": 0.8688, + "step": 3965 + }, + { + "epoch": 0.32, + "grad_norm": 3.8309770680435657, + "learning_rate": 7.897865988973133e-06, + "loss": 0.6724, + "step": 3966 + }, + { + "epoch": 0.32, + "grad_norm": 2.735828399323141, + "learning_rate": 7.896787113080284e-06, + "loss": 0.6444, + "step": 3967 + }, + { + "epoch": 0.32, + "grad_norm": 4.959729189806863, + "learning_rate": 7.895708034135265e-06, + "loss": 1.0035, + "step": 3968 + }, + { + "epoch": 0.32, + "grad_norm": 4.190551313321349, + "learning_rate": 7.894628752213715e-06, + "loss": 0.6192, + "step": 3969 + }, + { + "epoch": 0.32, + "grad_norm": 3.8311685842134064, + "learning_rate": 7.893549267391287e-06, + "loss": 0.8306, + "step": 3970 + }, + { + "epoch": 0.32, + "grad_norm": 4.819702650845054, + "learning_rate": 7.892469579743647e-06, + "loss": 0.7833, + "step": 3971 + }, + { + "epoch": 0.32, + "grad_norm": 2.5640523641815736, + "learning_rate": 7.891389689346479e-06, + "loss": 0.3882, + "step": 3972 + }, + { + "epoch": 0.32, + "grad_norm": 2.3126765364576305, + "learning_rate": 7.890309596275476e-06, + "loss": 0.5957, + "step": 3973 + }, + { + "epoch": 0.32, + "grad_norm": 2.89337526452459, + "learning_rate": 7.88922930060635e-06, + "loss": 0.6068, + "step": 3974 + }, + { + "epoch": 0.32, + "grad_norm": 3.134076773870964, + "learning_rate": 7.888148802414824e-06, + "loss": 0.8467, + "step": 3975 + }, + { + "epoch": 0.32, + "grad_norm": 4.898634541280175, + "learning_rate": 7.887068101776633e-06, + "loss": 0.9021, + "step": 3976 + }, + { + "epoch": 0.33, + "grad_norm": 2.921800857438866, + "learning_rate": 7.885987198767538e-06, + "loss": 0.4761, + "step": 3977 + }, + { + "epoch": 0.33, + "grad_norm": 3.193341180831775, + "learning_rate": 7.884906093463295e-06, + "loss": 0.585, + "step": 3978 + }, + { + "epoch": 0.33, + "grad_norm": 2.8833964995321897, + "learning_rate": 7.883824785939694e-06, + "loss": 0.4738, + "step": 3979 + }, + { + "epoch": 0.33, + "grad_norm": 3.5041822053988834, + "learning_rate": 7.882743276272524e-06, + "loss": 0.5757, + "step": 3980 + }, + { + "epoch": 0.33, + "grad_norm": 4.479653639530403, + "learning_rate": 7.881661564537598e-06, + "loss": 1.0564, + "step": 3981 + }, + { + "epoch": 0.33, + "grad_norm": 2.7477370824532636, + "learning_rate": 7.880579650810736e-06, + "loss": 0.7131, + "step": 3982 + }, + { + "epoch": 0.33, + "grad_norm": 2.8493434819124897, + "learning_rate": 7.879497535167776e-06, + "loss": 0.5089, + "step": 3983 + }, + { + "epoch": 0.33, + "grad_norm": 4.462570167528221, + "learning_rate": 7.87841521768457e-06, + "loss": 0.9516, + "step": 3984 + }, + { + "epoch": 0.33, + "grad_norm": 2.7688333887424483, + "learning_rate": 7.877332698436986e-06, + "loss": 0.4017, + "step": 3985 + }, + { + "epoch": 0.33, + "grad_norm": 1.9950498856678862, + "learning_rate": 7.876249977500902e-06, + "loss": 0.3658, + "step": 3986 + }, + { + "epoch": 0.33, + "grad_norm": 4.095597838558024, + "learning_rate": 7.875167054952211e-06, + "loss": 1.2446, + "step": 3987 + }, + { + "epoch": 0.33, + "grad_norm": 4.749989836918884, + "learning_rate": 7.874083930866822e-06, + "loss": 1.0899, + "step": 3988 + }, + { + "epoch": 0.33, + "grad_norm": 2.9102664158338305, + "learning_rate": 7.873000605320658e-06, + "loss": 0.6445, + "step": 3989 + }, + { + "epoch": 0.33, + "grad_norm": 4.269179065194626, + "learning_rate": 7.871917078389655e-06, + "loss": 1.2587, + "step": 3990 + }, + { + "epoch": 0.33, + "grad_norm": 4.166224022227581, + "learning_rate": 7.870833350149764e-06, + "loss": 0.941, + "step": 3991 + }, + { + "epoch": 0.33, + "grad_norm": 4.300253791870946, + "learning_rate": 7.869749420676949e-06, + "loss": 0.9979, + "step": 3992 + }, + { + "epoch": 0.33, + "grad_norm": 3.4175409176355664, + "learning_rate": 7.868665290047189e-06, + "loss": 0.7883, + "step": 3993 + }, + { + "epoch": 0.33, + "grad_norm": 4.183480038731687, + "learning_rate": 7.867580958336474e-06, + "loss": 1.0306, + "step": 3994 + }, + { + "epoch": 0.33, + "grad_norm": 3.974127330270877, + "learning_rate": 7.866496425620816e-06, + "loss": 0.9258, + "step": 3995 + }, + { + "epoch": 0.33, + "grad_norm": 5.185511981526038, + "learning_rate": 7.865411691976232e-06, + "loss": 1.3501, + "step": 3996 + }, + { + "epoch": 0.33, + "grad_norm": 1.2284723128580735, + "learning_rate": 7.864326757478762e-06, + "loss": 0.1758, + "step": 3997 + }, + { + "epoch": 0.33, + "grad_norm": 3.218909079487965, + "learning_rate": 7.86324162220445e-06, + "loss": 0.8576, + "step": 3998 + }, + { + "epoch": 0.33, + "grad_norm": 5.174994398303952, + "learning_rate": 7.86215628622936e-06, + "loss": 1.5497, + "step": 3999 + }, + { + "epoch": 0.33, + "grad_norm": 2.383769075116413, + "learning_rate": 7.86107074962957e-06, + "loss": 0.6589, + "step": 4000 + }, + { + "epoch": 0.33, + "grad_norm": 3.153776451524846, + "learning_rate": 7.859985012481174e-06, + "loss": 0.5676, + "step": 4001 + }, + { + "epoch": 0.33, + "grad_norm": 3.212803702193593, + "learning_rate": 7.858899074860276e-06, + "loss": 0.6885, + "step": 4002 + }, + { + "epoch": 0.33, + "grad_norm": 5.179651835380731, + "learning_rate": 7.857812936842992e-06, + "loss": 1.0661, + "step": 4003 + }, + { + "epoch": 0.33, + "grad_norm": 2.44607333850051, + "learning_rate": 7.856726598505459e-06, + "loss": 0.5944, + "step": 4004 + }, + { + "epoch": 0.33, + "grad_norm": 4.3703513487093195, + "learning_rate": 7.855640059923826e-06, + "loss": 0.9042, + "step": 4005 + }, + { + "epoch": 0.33, + "grad_norm": 4.3136300965208285, + "learning_rate": 7.85455332117425e-06, + "loss": 1.0606, + "step": 4006 + }, + { + "epoch": 0.33, + "grad_norm": 2.5904783749373816, + "learning_rate": 7.85346638233291e-06, + "loss": 0.2983, + "step": 4007 + }, + { + "epoch": 0.33, + "grad_norm": 3.986458941824462, + "learning_rate": 7.852379243475998e-06, + "loss": 1.0047, + "step": 4008 + }, + { + "epoch": 0.33, + "grad_norm": 3.4523716432618428, + "learning_rate": 7.851291904679712e-06, + "loss": 0.8155, + "step": 4009 + }, + { + "epoch": 0.33, + "grad_norm": 2.525587156791062, + "learning_rate": 7.850204366020271e-06, + "loss": 0.4656, + "step": 4010 + }, + { + "epoch": 0.33, + "grad_norm": 3.1498400625389915, + "learning_rate": 7.84911662757391e-06, + "loss": 0.9032, + "step": 4011 + }, + { + "epoch": 0.33, + "grad_norm": 4.740206562939246, + "learning_rate": 7.848028689416872e-06, + "loss": 1.2643, + "step": 4012 + }, + { + "epoch": 0.33, + "grad_norm": 3.0050724670801054, + "learning_rate": 7.846940551625417e-06, + "loss": 0.8215, + "step": 4013 + }, + { + "epoch": 0.33, + "grad_norm": 4.3537970585947905, + "learning_rate": 7.845852214275821e-06, + "loss": 1.0797, + "step": 4014 + }, + { + "epoch": 0.33, + "grad_norm": 3.0278943481498746, + "learning_rate": 7.84476367744437e-06, + "loss": 0.69, + "step": 4015 + }, + { + "epoch": 0.33, + "grad_norm": 3.741402674292105, + "learning_rate": 7.843674941207364e-06, + "loss": 0.9737, + "step": 4016 + }, + { + "epoch": 0.33, + "grad_norm": 4.492878113334946, + "learning_rate": 7.842586005641123e-06, + "loss": 0.8798, + "step": 4017 + }, + { + "epoch": 0.33, + "grad_norm": 3.533598324291426, + "learning_rate": 7.84149687082197e-06, + "loss": 0.7147, + "step": 4018 + }, + { + "epoch": 0.33, + "grad_norm": 4.769338538009378, + "learning_rate": 7.840407536826256e-06, + "loss": 0.8957, + "step": 4019 + }, + { + "epoch": 0.33, + "grad_norm": 4.860169527682215, + "learning_rate": 7.839318003730332e-06, + "loss": 0.9229, + "step": 4020 + }, + { + "epoch": 0.33, + "grad_norm": 3.5413965574010757, + "learning_rate": 7.838228271610575e-06, + "loss": 0.3215, + "step": 4021 + }, + { + "epoch": 0.33, + "grad_norm": 3.664172854252663, + "learning_rate": 7.837138340543368e-06, + "loss": 0.9405, + "step": 4022 + }, + { + "epoch": 0.33, + "grad_norm": 4.704049865569673, + "learning_rate": 7.836048210605109e-06, + "loss": 1.0005, + "step": 4023 + }, + { + "epoch": 0.33, + "grad_norm": 3.0229149140174143, + "learning_rate": 7.834957881872215e-06, + "loss": 0.5615, + "step": 4024 + }, + { + "epoch": 0.33, + "grad_norm": 3.293901840144184, + "learning_rate": 7.83386735442111e-06, + "loss": 0.8158, + "step": 4025 + }, + { + "epoch": 0.33, + "grad_norm": 2.9068154726580633, + "learning_rate": 7.832776628328237e-06, + "loss": 0.6568, + "step": 4026 + }, + { + "epoch": 0.33, + "grad_norm": 4.3477590814031055, + "learning_rate": 7.831685703670052e-06, + "loss": 0.8672, + "step": 4027 + }, + { + "epoch": 0.33, + "grad_norm": 2.9376357638709742, + "learning_rate": 7.83059458052302e-06, + "loss": 0.6062, + "step": 4028 + }, + { + "epoch": 0.33, + "grad_norm": 5.25054189403912, + "learning_rate": 7.829503258963629e-06, + "loss": 1.1562, + "step": 4029 + }, + { + "epoch": 0.33, + "grad_norm": 5.477460824502643, + "learning_rate": 7.82841173906837e-06, + "loss": 1.5191, + "step": 4030 + }, + { + "epoch": 0.33, + "grad_norm": 5.789155502998915, + "learning_rate": 7.827320020913762e-06, + "loss": 1.2872, + "step": 4031 + }, + { + "epoch": 0.33, + "grad_norm": 4.208558129868596, + "learning_rate": 7.826228104576324e-06, + "loss": 0.9987, + "step": 4032 + }, + { + "epoch": 0.33, + "grad_norm": 4.821842829169687, + "learning_rate": 7.825135990132592e-06, + "loss": 1.5145, + "step": 4033 + }, + { + "epoch": 0.33, + "grad_norm": 3.6215811257063426, + "learning_rate": 7.824043677659127e-06, + "loss": 1.0486, + "step": 4034 + }, + { + "epoch": 0.33, + "grad_norm": 3.879403051650573, + "learning_rate": 7.822951167232488e-06, + "loss": 1.1389, + "step": 4035 + }, + { + "epoch": 0.33, + "grad_norm": 3.3982667736288654, + "learning_rate": 7.821858458929256e-06, + "loss": 0.6997, + "step": 4036 + }, + { + "epoch": 0.33, + "grad_norm": 5.189642795215071, + "learning_rate": 7.82076555282603e-06, + "loss": 1.3593, + "step": 4037 + }, + { + "epoch": 0.33, + "grad_norm": 4.261351402183224, + "learning_rate": 7.819672448999413e-06, + "loss": 1.0783, + "step": 4038 + }, + { + "epoch": 0.33, + "grad_norm": 3.0582346994452942, + "learning_rate": 7.81857914752603e-06, + "loss": 0.8212, + "step": 4039 + }, + { + "epoch": 0.33, + "grad_norm": 5.128775511789336, + "learning_rate": 7.817485648482514e-06, + "loss": 1.4036, + "step": 4040 + }, + { + "epoch": 0.33, + "grad_norm": 3.1862480949901624, + "learning_rate": 7.816391951945517e-06, + "loss": 0.5946, + "step": 4041 + }, + { + "epoch": 0.33, + "grad_norm": 3.5393890511012724, + "learning_rate": 7.8152980579917e-06, + "loss": 0.5955, + "step": 4042 + }, + { + "epoch": 0.33, + "grad_norm": 4.291111423854695, + "learning_rate": 7.814203966697742e-06, + "loss": 1.3313, + "step": 4043 + }, + { + "epoch": 0.33, + "grad_norm": 3.198816817119858, + "learning_rate": 7.813109678140333e-06, + "loss": 0.6167, + "step": 4044 + }, + { + "epoch": 0.33, + "grad_norm": 5.178105362579311, + "learning_rate": 7.812015192396178e-06, + "loss": 1.0517, + "step": 4045 + }, + { + "epoch": 0.33, + "grad_norm": 2.2557654770972406, + "learning_rate": 7.810920509541997e-06, + "loss": 0.6081, + "step": 4046 + }, + { + "epoch": 0.33, + "grad_norm": 4.158509861411965, + "learning_rate": 7.80982562965452e-06, + "loss": 1.0021, + "step": 4047 + }, + { + "epoch": 0.33, + "grad_norm": 3.6075015660901797, + "learning_rate": 7.808730552810496e-06, + "loss": 0.5844, + "step": 4048 + }, + { + "epoch": 0.33, + "grad_norm": 2.118174912930761, + "learning_rate": 7.807635279086682e-06, + "loss": 0.5398, + "step": 4049 + }, + { + "epoch": 0.33, + "grad_norm": 4.087999613701567, + "learning_rate": 7.806539808559856e-06, + "loss": 0.7191, + "step": 4050 + }, + { + "epoch": 0.33, + "grad_norm": 1.2024083038375055, + "learning_rate": 7.805444141306804e-06, + "loss": 0.2323, + "step": 4051 + }, + { + "epoch": 0.33, + "grad_norm": 4.287541535163385, + "learning_rate": 7.804348277404324e-06, + "loss": 0.802, + "step": 4052 + }, + { + "epoch": 0.33, + "grad_norm": 3.3546151266802453, + "learning_rate": 7.803252216929236e-06, + "loss": 0.7602, + "step": 4053 + }, + { + "epoch": 0.33, + "grad_norm": 5.237050962673722, + "learning_rate": 7.802155959958368e-06, + "loss": 0.6012, + "step": 4054 + }, + { + "epoch": 0.33, + "grad_norm": 3.6069184684330327, + "learning_rate": 7.80105950656856e-06, + "loss": 0.8598, + "step": 4055 + }, + { + "epoch": 0.33, + "grad_norm": 2.9434142461101187, + "learning_rate": 7.799962856836674e-06, + "loss": 0.2683, + "step": 4056 + }, + { + "epoch": 0.33, + "grad_norm": 3.135090704148495, + "learning_rate": 7.798866010839577e-06, + "loss": 0.4931, + "step": 4057 + }, + { + "epoch": 0.33, + "grad_norm": 2.8851883209140317, + "learning_rate": 7.79776896865415e-06, + "loss": 0.4785, + "step": 4058 + }, + { + "epoch": 0.33, + "grad_norm": 4.578701701147169, + "learning_rate": 7.796671730357296e-06, + "loss": 1.5124, + "step": 4059 + }, + { + "epoch": 0.33, + "grad_norm": 4.839867870120802, + "learning_rate": 7.795574296025925e-06, + "loss": 1.0868, + "step": 4060 + }, + { + "epoch": 0.33, + "grad_norm": 4.327365478228468, + "learning_rate": 7.794476665736964e-06, + "loss": 1.2185, + "step": 4061 + }, + { + "epoch": 0.33, + "grad_norm": 3.7570060148375393, + "learning_rate": 7.793378839567348e-06, + "loss": 0.7382, + "step": 4062 + }, + { + "epoch": 0.33, + "grad_norm": 2.8834551320663824, + "learning_rate": 7.792280817594033e-06, + "loss": 0.775, + "step": 4063 + }, + { + "epoch": 0.33, + "grad_norm": 3.0132087833361663, + "learning_rate": 7.791182599893985e-06, + "loss": 0.5297, + "step": 4064 + }, + { + "epoch": 0.33, + "grad_norm": 6.041447540688191, + "learning_rate": 7.790084186544183e-06, + "loss": 1.5578, + "step": 4065 + }, + { + "epoch": 0.33, + "grad_norm": 1.3766809671719165, + "learning_rate": 7.788985577621623e-06, + "loss": 0.2116, + "step": 4066 + }, + { + "epoch": 0.33, + "grad_norm": 3.3972620375857447, + "learning_rate": 7.78788677320331e-06, + "loss": 0.7537, + "step": 4067 + }, + { + "epoch": 0.33, + "grad_norm": 2.9631226229157055, + "learning_rate": 7.78678777336627e-06, + "loss": 1.0203, + "step": 4068 + }, + { + "epoch": 0.33, + "grad_norm": 2.6928746766462877, + "learning_rate": 7.785688578187534e-06, + "loss": 0.6483, + "step": 4069 + }, + { + "epoch": 0.33, + "grad_norm": 2.1103911778195075, + "learning_rate": 7.784589187744151e-06, + "loss": 0.2568, + "step": 4070 + }, + { + "epoch": 0.33, + "grad_norm": 5.080635885354421, + "learning_rate": 7.783489602113185e-06, + "loss": 0.7317, + "step": 4071 + }, + { + "epoch": 0.33, + "grad_norm": 2.883177504009893, + "learning_rate": 7.782389821371712e-06, + "loss": 0.5972, + "step": 4072 + }, + { + "epoch": 0.33, + "grad_norm": 4.327020316159079, + "learning_rate": 7.781289845596821e-06, + "loss": 0.9701, + "step": 4073 + }, + { + "epoch": 0.33, + "grad_norm": 2.7278052835176005, + "learning_rate": 7.780189674865617e-06, + "loss": 0.5983, + "step": 4074 + }, + { + "epoch": 0.33, + "grad_norm": 4.160715692148022, + "learning_rate": 7.779089309255217e-06, + "loss": 0.7912, + "step": 4075 + }, + { + "epoch": 0.33, + "grad_norm": 4.248414112566387, + "learning_rate": 7.777988748842748e-06, + "loss": 1.1924, + "step": 4076 + }, + { + "epoch": 0.33, + "grad_norm": 4.836771315743967, + "learning_rate": 7.77688799370536e-06, + "loss": 1.2628, + "step": 4077 + }, + { + "epoch": 0.33, + "grad_norm": 4.348685058036905, + "learning_rate": 7.775787043920209e-06, + "loss": 0.9582, + "step": 4078 + }, + { + "epoch": 0.33, + "grad_norm": 4.85058506058486, + "learning_rate": 7.774685899564465e-06, + "loss": 0.8381, + "step": 4079 + }, + { + "epoch": 0.33, + "grad_norm": 3.9380355388535526, + "learning_rate": 7.773584560715315e-06, + "loss": 0.8012, + "step": 4080 + }, + { + "epoch": 0.33, + "grad_norm": 3.2877638211599227, + "learning_rate": 7.772483027449957e-06, + "loss": 0.7378, + "step": 4081 + }, + { + "epoch": 0.33, + "grad_norm": 3.58337541740198, + "learning_rate": 7.771381299845607e-06, + "loss": 0.7549, + "step": 4082 + }, + { + "epoch": 0.33, + "grad_norm": 2.431569297930181, + "learning_rate": 7.770279377979488e-06, + "loss": 0.349, + "step": 4083 + }, + { + "epoch": 0.33, + "grad_norm": 5.677928014296555, + "learning_rate": 7.76917726192884e-06, + "loss": 1.2377, + "step": 4084 + }, + { + "epoch": 0.33, + "grad_norm": 5.046569984878376, + "learning_rate": 7.768074951770919e-06, + "loss": 1.0108, + "step": 4085 + }, + { + "epoch": 0.33, + "grad_norm": 4.008142292994773, + "learning_rate": 7.76697244758299e-06, + "loss": 0.5098, + "step": 4086 + }, + { + "epoch": 0.33, + "grad_norm": 3.4447675454103566, + "learning_rate": 7.765869749442334e-06, + "loss": 0.7362, + "step": 4087 + }, + { + "epoch": 0.33, + "grad_norm": 5.213593544623172, + "learning_rate": 7.764766857426247e-06, + "loss": 1.1057, + "step": 4088 + }, + { + "epoch": 0.33, + "grad_norm": 3.7927402365139145, + "learning_rate": 7.763663771612033e-06, + "loss": 0.7684, + "step": 4089 + }, + { + "epoch": 0.33, + "grad_norm": 4.515848562656643, + "learning_rate": 7.762560492077018e-06, + "loss": 0.8077, + "step": 4090 + }, + { + "epoch": 0.33, + "grad_norm": 2.687411542813131, + "learning_rate": 7.761457018898536e-06, + "loss": 0.7698, + "step": 4091 + }, + { + "epoch": 0.33, + "grad_norm": 4.173316022459569, + "learning_rate": 7.760353352153933e-06, + "loss": 0.8834, + "step": 4092 + }, + { + "epoch": 0.33, + "grad_norm": 3.5145776395485453, + "learning_rate": 7.759249491920573e-06, + "loss": 0.7676, + "step": 4093 + }, + { + "epoch": 0.33, + "grad_norm": 5.379993757189861, + "learning_rate": 7.758145438275834e-06, + "loss": 0.9946, + "step": 4094 + }, + { + "epoch": 0.33, + "grad_norm": 5.071372107617467, + "learning_rate": 7.757041191297102e-06, + "loss": 0.9479, + "step": 4095 + }, + { + "epoch": 0.33, + "grad_norm": 3.354744362202982, + "learning_rate": 7.755936751061781e-06, + "loss": 0.6599, + "step": 4096 + }, + { + "epoch": 0.33, + "grad_norm": 3.615773819555657, + "learning_rate": 7.75483211764729e-06, + "loss": 0.7168, + "step": 4097 + }, + { + "epoch": 0.33, + "grad_norm": 4.520254715236886, + "learning_rate": 7.753727291131052e-06, + "loss": 0.8517, + "step": 4098 + }, + { + "epoch": 0.34, + "grad_norm": 4.185083001280689, + "learning_rate": 7.75262227159052e-06, + "loss": 0.9201, + "step": 4099 + }, + { + "epoch": 0.34, + "grad_norm": 3.808745561079665, + "learning_rate": 7.751517059103142e-06, + "loss": 0.9437, + "step": 4100 + }, + { + "epoch": 0.34, + "grad_norm": 1.7571468364421476, + "learning_rate": 7.750411653746395e-06, + "loss": 0.4474, + "step": 4101 + }, + { + "epoch": 0.34, + "grad_norm": 4.549902143678165, + "learning_rate": 7.74930605559776e-06, + "loss": 0.8425, + "step": 4102 + }, + { + "epoch": 0.34, + "grad_norm": 4.1746358152158685, + "learning_rate": 7.748200264734735e-06, + "loss": 1.2278, + "step": 4103 + }, + { + "epoch": 0.34, + "grad_norm": 3.987321493415824, + "learning_rate": 7.747094281234833e-06, + "loss": 0.4285, + "step": 4104 + }, + { + "epoch": 0.34, + "grad_norm": 2.0173645566645195, + "learning_rate": 7.745988105175577e-06, + "loss": 0.4033, + "step": 4105 + }, + { + "epoch": 0.34, + "grad_norm": 4.354547097155099, + "learning_rate": 7.744881736634506e-06, + "loss": 1.3625, + "step": 4106 + }, + { + "epoch": 0.34, + "grad_norm": 3.332371613972039, + "learning_rate": 7.74377517568917e-06, + "loss": 0.7414, + "step": 4107 + }, + { + "epoch": 0.34, + "grad_norm": 2.1920844245645, + "learning_rate": 7.742668422417137e-06, + "loss": 0.3366, + "step": 4108 + }, + { + "epoch": 0.34, + "grad_norm": 3.4605536417478153, + "learning_rate": 7.74156147689598e-06, + "loss": 0.7064, + "step": 4109 + }, + { + "epoch": 0.34, + "grad_norm": 3.4351461374391827, + "learning_rate": 7.740454339203298e-06, + "loss": 0.6305, + "step": 4110 + }, + { + "epoch": 0.34, + "grad_norm": 4.341168273700383, + "learning_rate": 7.739347009416693e-06, + "loss": 0.9239, + "step": 4111 + }, + { + "epoch": 0.34, + "grad_norm": 3.1896550569997317, + "learning_rate": 7.738239487613783e-06, + "loss": 0.4106, + "step": 4112 + }, + { + "epoch": 0.34, + "grad_norm": 2.772165868649677, + "learning_rate": 7.737131773872202e-06, + "loss": 0.7461, + "step": 4113 + }, + { + "epoch": 0.34, + "grad_norm": 3.172701606361016, + "learning_rate": 7.736023868269598e-06, + "loss": 0.572, + "step": 4114 + }, + { + "epoch": 0.34, + "grad_norm": 3.2955721559098285, + "learning_rate": 7.734915770883624e-06, + "loss": 0.6784, + "step": 4115 + }, + { + "epoch": 0.34, + "grad_norm": 5.013100997652645, + "learning_rate": 7.733807481791959e-06, + "loss": 1.4583, + "step": 4116 + }, + { + "epoch": 0.34, + "grad_norm": 3.8655354170040597, + "learning_rate": 7.732699001072288e-06, + "loss": 1.1896, + "step": 4117 + }, + { + "epoch": 0.34, + "grad_norm": 1.9395350071445177, + "learning_rate": 7.73159032880231e-06, + "loss": 0.4108, + "step": 4118 + }, + { + "epoch": 0.34, + "grad_norm": 3.7818826869175437, + "learning_rate": 7.730481465059736e-06, + "loss": 0.7055, + "step": 4119 + }, + { + "epoch": 0.34, + "grad_norm": 5.15552039543157, + "learning_rate": 7.729372409922295e-06, + "loss": 1.4362, + "step": 4120 + }, + { + "epoch": 0.34, + "grad_norm": 3.2196597029324914, + "learning_rate": 7.728263163467727e-06, + "loss": 0.6513, + "step": 4121 + }, + { + "epoch": 0.34, + "grad_norm": 4.255860561163699, + "learning_rate": 7.727153725773783e-06, + "loss": 0.8975, + "step": 4122 + }, + { + "epoch": 0.34, + "grad_norm": 3.043423662983045, + "learning_rate": 7.726044096918232e-06, + "loss": 0.6755, + "step": 4123 + }, + { + "epoch": 0.34, + "grad_norm": 4.680203207964453, + "learning_rate": 7.724934276978855e-06, + "loss": 1.0033, + "step": 4124 + }, + { + "epoch": 0.34, + "grad_norm": 2.746901777325029, + "learning_rate": 7.723824266033444e-06, + "loss": 0.5562, + "step": 4125 + }, + { + "epoch": 0.34, + "grad_norm": 2.272360035766652, + "learning_rate": 7.722714064159805e-06, + "loss": 0.3457, + "step": 4126 + }, + { + "epoch": 0.34, + "grad_norm": 4.7240643115152325, + "learning_rate": 7.72160367143576e-06, + "loss": 0.7799, + "step": 4127 + }, + { + "epoch": 0.34, + "grad_norm": 4.591078813080817, + "learning_rate": 7.720493087939143e-06, + "loss": 1.2883, + "step": 4128 + }, + { + "epoch": 0.34, + "grad_norm": 4.210409977210689, + "learning_rate": 7.719382313747799e-06, + "loss": 0.7096, + "step": 4129 + }, + { + "epoch": 0.34, + "grad_norm": 3.6817128508281134, + "learning_rate": 7.71827134893959e-06, + "loss": 0.6766, + "step": 4130 + }, + { + "epoch": 0.34, + "grad_norm": 1.7634823470854484, + "learning_rate": 7.717160193592387e-06, + "loss": 0.4359, + "step": 4131 + }, + { + "epoch": 0.34, + "grad_norm": 2.9420876984077293, + "learning_rate": 7.716048847784082e-06, + "loss": 0.7967, + "step": 4132 + }, + { + "epoch": 0.34, + "grad_norm": 1.148358437503598, + "learning_rate": 7.714937311592573e-06, + "loss": 0.1711, + "step": 4133 + }, + { + "epoch": 0.34, + "grad_norm": 4.250724443509678, + "learning_rate": 7.713825585095775e-06, + "loss": 1.2042, + "step": 4134 + }, + { + "epoch": 0.34, + "grad_norm": 4.758507214850353, + "learning_rate": 7.712713668371612e-06, + "loss": 1.0143, + "step": 4135 + }, + { + "epoch": 0.34, + "grad_norm": 2.7488317621633676, + "learning_rate": 7.711601561498027e-06, + "loss": 0.5658, + "step": 4136 + }, + { + "epoch": 0.34, + "grad_norm": 3.397262375524663, + "learning_rate": 7.710489264552974e-06, + "loss": 0.8973, + "step": 4137 + }, + { + "epoch": 0.34, + "grad_norm": 2.83739358311308, + "learning_rate": 7.709376777614418e-06, + "loss": 0.5373, + "step": 4138 + }, + { + "epoch": 0.34, + "grad_norm": 2.5305706042930973, + "learning_rate": 7.708264100760343e-06, + "loss": 0.4427, + "step": 4139 + }, + { + "epoch": 0.34, + "grad_norm": 3.6652675405330037, + "learning_rate": 7.707151234068741e-06, + "loss": 0.735, + "step": 4140 + }, + { + "epoch": 0.34, + "grad_norm": 3.988081527589675, + "learning_rate": 7.706038177617618e-06, + "loss": 0.7667, + "step": 4141 + }, + { + "epoch": 0.34, + "grad_norm": 3.477405865454765, + "learning_rate": 7.704924931484997e-06, + "loss": 0.613, + "step": 4142 + }, + { + "epoch": 0.34, + "grad_norm": 4.063093742168863, + "learning_rate": 7.703811495748908e-06, + "loss": 1.087, + "step": 4143 + }, + { + "epoch": 0.34, + "grad_norm": 2.503004822250554, + "learning_rate": 7.702697870487401e-06, + "loss": 0.4957, + "step": 4144 + }, + { + "epoch": 0.34, + "grad_norm": 5.030254929001747, + "learning_rate": 7.701584055778536e-06, + "loss": 1.126, + "step": 4145 + }, + { + "epoch": 0.34, + "grad_norm": 4.257522661426769, + "learning_rate": 7.700470051700385e-06, + "loss": 0.8968, + "step": 4146 + }, + { + "epoch": 0.34, + "grad_norm": 3.9125424198868415, + "learning_rate": 7.699355858331038e-06, + "loss": 0.5521, + "step": 4147 + }, + { + "epoch": 0.34, + "grad_norm": 3.844248185026583, + "learning_rate": 7.698241475748592e-06, + "loss": 0.6196, + "step": 4148 + }, + { + "epoch": 0.34, + "grad_norm": 3.9157845125845605, + "learning_rate": 7.69712690403116e-06, + "loss": 0.7622, + "step": 4149 + }, + { + "epoch": 0.34, + "grad_norm": 1.053419733377103, + "learning_rate": 7.696012143256873e-06, + "loss": 0.1924, + "step": 4150 + }, + { + "epoch": 0.34, + "grad_norm": 3.6960704618009794, + "learning_rate": 7.694897193503865e-06, + "loss": 1.0025, + "step": 4151 + }, + { + "epoch": 0.34, + "grad_norm": 3.4272805195111458, + "learning_rate": 7.693782054850293e-06, + "loss": 0.8893, + "step": 4152 + }, + { + "epoch": 0.34, + "grad_norm": 2.636670957115418, + "learning_rate": 7.692666727374321e-06, + "loss": 0.7311, + "step": 4153 + }, + { + "epoch": 0.34, + "grad_norm": 3.4288545667039547, + "learning_rate": 7.691551211154132e-06, + "loss": 0.7569, + "step": 4154 + }, + { + "epoch": 0.34, + "grad_norm": 3.996161453088842, + "learning_rate": 7.690435506267916e-06, + "loss": 1.0075, + "step": 4155 + }, + { + "epoch": 0.34, + "grad_norm": 4.473680046059575, + "learning_rate": 7.689319612793878e-06, + "loss": 0.964, + "step": 4156 + }, + { + "epoch": 0.34, + "grad_norm": 5.043930889076822, + "learning_rate": 7.68820353081024e-06, + "loss": 1.343, + "step": 4157 + }, + { + "epoch": 0.34, + "grad_norm": 3.8010199690411755, + "learning_rate": 7.687087260395237e-06, + "loss": 0.962, + "step": 4158 + }, + { + "epoch": 0.34, + "grad_norm": 4.846913545191014, + "learning_rate": 7.685970801627108e-06, + "loss": 0.9527, + "step": 4159 + }, + { + "epoch": 0.34, + "grad_norm": 2.7844959560021967, + "learning_rate": 7.684854154584117e-06, + "loss": 0.5735, + "step": 4160 + }, + { + "epoch": 0.34, + "grad_norm": 4.320667753043801, + "learning_rate": 7.68373731934453e-06, + "loss": 1.2572, + "step": 4161 + }, + { + "epoch": 0.34, + "grad_norm": 3.6840322336012488, + "learning_rate": 7.682620295986642e-06, + "loss": 0.7284, + "step": 4162 + }, + { + "epoch": 0.34, + "grad_norm": 2.9959981512529237, + "learning_rate": 7.681503084588743e-06, + "loss": 0.673, + "step": 4163 + }, + { + "epoch": 0.34, + "grad_norm": 2.1346317197548474, + "learning_rate": 7.680385685229148e-06, + "loss": 0.389, + "step": 4164 + }, + { + "epoch": 0.34, + "grad_norm": 5.405414090668094, + "learning_rate": 7.679268097986183e-06, + "loss": 1.4987, + "step": 4165 + }, + { + "epoch": 0.34, + "grad_norm": 3.157420352861571, + "learning_rate": 7.678150322938183e-06, + "loss": 0.5769, + "step": 4166 + }, + { + "epoch": 0.34, + "grad_norm": 3.3666285293683225, + "learning_rate": 7.6770323601635e-06, + "loss": 0.6687, + "step": 4167 + }, + { + "epoch": 0.34, + "grad_norm": 4.277406369736445, + "learning_rate": 7.675914209740503e-06, + "loss": 0.7282, + "step": 4168 + }, + { + "epoch": 0.34, + "grad_norm": 1.9396861983972804, + "learning_rate": 7.67479587174756e-06, + "loss": 0.2985, + "step": 4169 + }, + { + "epoch": 0.34, + "grad_norm": 4.241210008332523, + "learning_rate": 7.67367734626307e-06, + "loss": 1.2679, + "step": 4170 + }, + { + "epoch": 0.34, + "grad_norm": 3.780035706850577, + "learning_rate": 7.672558633365434e-06, + "loss": 0.8957, + "step": 4171 + }, + { + "epoch": 0.34, + "grad_norm": 4.875053635380888, + "learning_rate": 7.67143973313307e-06, + "loss": 1.4268, + "step": 4172 + }, + { + "epoch": 0.34, + "grad_norm": 2.7887924259311805, + "learning_rate": 7.670320645644404e-06, + "loss": 0.6755, + "step": 4173 + }, + { + "epoch": 0.34, + "grad_norm": 4.04293308169156, + "learning_rate": 7.669201370977885e-06, + "loss": 0.9438, + "step": 4174 + }, + { + "epoch": 0.34, + "grad_norm": 4.2374797347176925, + "learning_rate": 7.668081909211964e-06, + "loss": 0.8673, + "step": 4175 + }, + { + "epoch": 0.34, + "grad_norm": 3.8677793397185134, + "learning_rate": 7.666962260425113e-06, + "loss": 0.903, + "step": 4176 + }, + { + "epoch": 0.34, + "grad_norm": 4.348804643728993, + "learning_rate": 7.665842424695815e-06, + "loss": 0.9382, + "step": 4177 + }, + { + "epoch": 0.34, + "grad_norm": 2.9757059499309753, + "learning_rate": 7.664722402102564e-06, + "loss": 0.5758, + "step": 4178 + }, + { + "epoch": 0.34, + "grad_norm": 2.5385242534408654, + "learning_rate": 7.663602192723871e-06, + "loss": 0.6308, + "step": 4179 + }, + { + "epoch": 0.34, + "grad_norm": 4.389637679225512, + "learning_rate": 7.662481796638254e-06, + "loss": 1.2565, + "step": 4180 + }, + { + "epoch": 0.34, + "grad_norm": 4.035115559574408, + "learning_rate": 7.661361213924252e-06, + "loss": 0.8521, + "step": 4181 + }, + { + "epoch": 0.34, + "grad_norm": 3.824584879682278, + "learning_rate": 7.660240444660411e-06, + "loss": 0.8049, + "step": 4182 + }, + { + "epoch": 0.34, + "grad_norm": 4.586953981275565, + "learning_rate": 7.659119488925292e-06, + "loss": 1.082, + "step": 4183 + }, + { + "epoch": 0.34, + "grad_norm": 2.9302524382227393, + "learning_rate": 7.65799834679747e-06, + "loss": 0.7177, + "step": 4184 + }, + { + "epoch": 0.34, + "grad_norm": 4.019542010284757, + "learning_rate": 7.656877018355533e-06, + "loss": 0.9594, + "step": 4185 + }, + { + "epoch": 0.34, + "grad_norm": 2.68219286689825, + "learning_rate": 7.655755503678075e-06, + "loss": 0.4558, + "step": 4186 + }, + { + "epoch": 0.34, + "grad_norm": 4.0411454882488655, + "learning_rate": 7.654633802843718e-06, + "loss": 0.9238, + "step": 4187 + }, + { + "epoch": 0.34, + "grad_norm": 3.1291071195040256, + "learning_rate": 7.653511915931082e-06, + "loss": 0.485, + "step": 4188 + }, + { + "epoch": 0.34, + "grad_norm": 3.2706829407485616, + "learning_rate": 7.65238984301881e-06, + "loss": 0.9747, + "step": 4189 + }, + { + "epoch": 0.34, + "grad_norm": 4.027229745479082, + "learning_rate": 7.651267584185554e-06, + "loss": 1.2054, + "step": 4190 + }, + { + "epoch": 0.34, + "grad_norm": 3.778073601555418, + "learning_rate": 7.650145139509978e-06, + "loss": 0.7593, + "step": 4191 + }, + { + "epoch": 0.34, + "grad_norm": 4.897854125935847, + "learning_rate": 7.649022509070761e-06, + "loss": 0.6984, + "step": 4192 + }, + { + "epoch": 0.34, + "grad_norm": 2.233075336679899, + "learning_rate": 7.647899692946594e-06, + "loss": 0.4863, + "step": 4193 + }, + { + "epoch": 0.34, + "grad_norm": 3.915603508098223, + "learning_rate": 7.646776691216185e-06, + "loss": 0.681, + "step": 4194 + }, + { + "epoch": 0.34, + "grad_norm": 4.258355781356614, + "learning_rate": 7.645653503958246e-06, + "loss": 0.7473, + "step": 4195 + }, + { + "epoch": 0.34, + "grad_norm": 1.7927823875566085, + "learning_rate": 7.64453013125151e-06, + "loss": 0.2872, + "step": 4196 + }, + { + "epoch": 0.34, + "grad_norm": 3.508724132851226, + "learning_rate": 7.643406573174724e-06, + "loss": 0.7924, + "step": 4197 + }, + { + "epoch": 0.34, + "grad_norm": 5.050262457545431, + "learning_rate": 7.642282829806639e-06, + "loss": 1.0736, + "step": 4198 + }, + { + "epoch": 0.34, + "grad_norm": 4.029013736020635, + "learning_rate": 7.641158901226026e-06, + "loss": 0.8475, + "step": 4199 + }, + { + "epoch": 0.34, + "grad_norm": 3.696466610682216, + "learning_rate": 7.64003478751167e-06, + "loss": 0.6701, + "step": 4200 + }, + { + "epoch": 0.34, + "grad_norm": 3.143479928983292, + "learning_rate": 7.638910488742364e-06, + "loss": 0.729, + "step": 4201 + }, + { + "epoch": 0.34, + "grad_norm": 4.727763069861252, + "learning_rate": 7.637786004996918e-06, + "loss": 1.209, + "step": 4202 + }, + { + "epoch": 0.34, + "grad_norm": 4.851506905628155, + "learning_rate": 7.636661336354152e-06, + "loss": 1.0456, + "step": 4203 + }, + { + "epoch": 0.34, + "grad_norm": 2.9358162237417544, + "learning_rate": 7.635536482892902e-06, + "loss": 0.6617, + "step": 4204 + }, + { + "epoch": 0.34, + "grad_norm": 5.672038616038138, + "learning_rate": 7.634411444692014e-06, + "loss": 1.3754, + "step": 4205 + }, + { + "epoch": 0.34, + "grad_norm": 2.3060864557234773, + "learning_rate": 7.63328622183035e-06, + "loss": 0.5077, + "step": 4206 + }, + { + "epoch": 0.34, + "grad_norm": 5.1549988023402165, + "learning_rate": 7.63216081438678e-06, + "loss": 1.3093, + "step": 4207 + }, + { + "epoch": 0.34, + "grad_norm": 2.4609699622097643, + "learning_rate": 7.631035222440192e-06, + "loss": 0.3794, + "step": 4208 + }, + { + "epoch": 0.34, + "grad_norm": 3.602474189020254, + "learning_rate": 7.629909446069487e-06, + "loss": 0.7048, + "step": 4209 + }, + { + "epoch": 0.34, + "grad_norm": 3.785531242848229, + "learning_rate": 7.628783485353573e-06, + "loss": 0.8278, + "step": 4210 + }, + { + "epoch": 0.34, + "grad_norm": 3.5722162724562665, + "learning_rate": 7.6276573403713796e-06, + "loss": 0.8092, + "step": 4211 + }, + { + "epoch": 0.34, + "grad_norm": 3.458727519165083, + "learning_rate": 7.626531011201841e-06, + "loss": 0.9704, + "step": 4212 + }, + { + "epoch": 0.34, + "grad_norm": 3.8927187042148437, + "learning_rate": 7.625404497923909e-06, + "loss": 1.0258, + "step": 4213 + }, + { + "epoch": 0.34, + "grad_norm": 3.848434988336024, + "learning_rate": 7.6242778006165484e-06, + "loss": 0.7578, + "step": 4214 + }, + { + "epoch": 0.34, + "grad_norm": 1.755430091464229, + "learning_rate": 7.623150919358734e-06, + "loss": 0.4261, + "step": 4215 + }, + { + "epoch": 0.34, + "grad_norm": 4.131447734058058, + "learning_rate": 7.6220238542294565e-06, + "loss": 1.1711, + "step": 4216 + }, + { + "epoch": 0.34, + "grad_norm": 2.4337178438962437, + "learning_rate": 7.620896605307717e-06, + "loss": 0.6572, + "step": 4217 + }, + { + "epoch": 0.34, + "grad_norm": 4.2111164733993345, + "learning_rate": 7.619769172672533e-06, + "loss": 0.9089, + "step": 4218 + }, + { + "epoch": 0.34, + "grad_norm": 5.3934535906184164, + "learning_rate": 7.61864155640293e-06, + "loss": 1.6321, + "step": 4219 + }, + { + "epoch": 0.34, + "grad_norm": 4.380431114335264, + "learning_rate": 7.6175137565779524e-06, + "loss": 0.9619, + "step": 4220 + }, + { + "epoch": 0.35, + "grad_norm": 3.5949226198374613, + "learning_rate": 7.616385773276651e-06, + "loss": 0.707, + "step": 4221 + }, + { + "epoch": 0.35, + "grad_norm": 4.375986072874407, + "learning_rate": 7.615257606578093e-06, + "loss": 0.9955, + "step": 4222 + }, + { + "epoch": 0.35, + "grad_norm": 3.0945232970753622, + "learning_rate": 7.6141292565613574e-06, + "loss": 0.6822, + "step": 4223 + }, + { + "epoch": 0.35, + "grad_norm": 3.121369107003071, + "learning_rate": 7.613000723305539e-06, + "loss": 0.7806, + "step": 4224 + }, + { + "epoch": 0.35, + "grad_norm": 3.842882239042579, + "learning_rate": 7.611872006889741e-06, + "loss": 1.1877, + "step": 4225 + }, + { + "epoch": 0.35, + "grad_norm": 3.235155562415778, + "learning_rate": 7.610743107393083e-06, + "loss": 0.68, + "step": 4226 + }, + { + "epoch": 0.35, + "grad_norm": 5.197630275967312, + "learning_rate": 7.609614024894694e-06, + "loss": 0.9361, + "step": 4227 + }, + { + "epoch": 0.35, + "grad_norm": 1.8510247652966472, + "learning_rate": 7.60848475947372e-06, + "loss": 0.3807, + "step": 4228 + }, + { + "epoch": 0.35, + "grad_norm": 1.3552331105985607, + "learning_rate": 7.607355311209317e-06, + "loss": 0.1851, + "step": 4229 + }, + { + "epoch": 0.35, + "grad_norm": 2.2471461785944435, + "learning_rate": 7.606225680180652e-06, + "loss": 0.3754, + "step": 4230 + }, + { + "epoch": 0.35, + "grad_norm": 3.5961883792714393, + "learning_rate": 7.605095866466912e-06, + "loss": 0.6708, + "step": 4231 + }, + { + "epoch": 0.35, + "grad_norm": 3.8820743215669724, + "learning_rate": 7.603965870147285e-06, + "loss": 0.8477, + "step": 4232 + }, + { + "epoch": 0.35, + "grad_norm": 2.420318348105564, + "learning_rate": 7.602835691300986e-06, + "loss": 0.2926, + "step": 4233 + }, + { + "epoch": 0.35, + "grad_norm": 3.7725992633943646, + "learning_rate": 7.601705330007232e-06, + "loss": 0.8095, + "step": 4234 + }, + { + "epoch": 0.35, + "grad_norm": 2.561808281233337, + "learning_rate": 7.600574786345257e-06, + "loss": 0.5723, + "step": 4235 + }, + { + "epoch": 0.35, + "grad_norm": 5.3420721342527875, + "learning_rate": 7.599444060394308e-06, + "loss": 1.0933, + "step": 4236 + }, + { + "epoch": 0.35, + "grad_norm": 2.303114674088338, + "learning_rate": 7.598313152233643e-06, + "loss": 0.5405, + "step": 4237 + }, + { + "epoch": 0.35, + "grad_norm": 5.639902615736915, + "learning_rate": 7.597182061942533e-06, + "loss": 1.2133, + "step": 4238 + }, + { + "epoch": 0.35, + "grad_norm": 4.119971811972484, + "learning_rate": 7.5960507896002636e-06, + "loss": 1.0088, + "step": 4239 + }, + { + "epoch": 0.35, + "grad_norm": 2.9814954955260253, + "learning_rate": 7.594919335286133e-06, + "loss": 0.8402, + "step": 4240 + }, + { + "epoch": 0.35, + "grad_norm": 4.235203233182246, + "learning_rate": 7.593787699079449e-06, + "loss": 0.7345, + "step": 4241 + }, + { + "epoch": 0.35, + "grad_norm": 2.8363860198831965, + "learning_rate": 7.592655881059536e-06, + "loss": 0.6156, + "step": 4242 + }, + { + "epoch": 0.35, + "grad_norm": 4.137444392466568, + "learning_rate": 7.591523881305728e-06, + "loss": 1.2916, + "step": 4243 + }, + { + "epoch": 0.35, + "grad_norm": 3.376223279509348, + "learning_rate": 7.5903916998973745e-06, + "loss": 0.7987, + "step": 4244 + }, + { + "epoch": 0.35, + "grad_norm": 2.541874168567518, + "learning_rate": 7.589259336913839e-06, + "loss": 0.552, + "step": 4245 + }, + { + "epoch": 0.35, + "grad_norm": 3.0811400669956592, + "learning_rate": 7.588126792434489e-06, + "loss": 0.777, + "step": 4246 + }, + { + "epoch": 0.35, + "grad_norm": 3.6244134270135215, + "learning_rate": 7.586994066538715e-06, + "loss": 1.0474, + "step": 4247 + }, + { + "epoch": 0.35, + "grad_norm": 3.294865922920346, + "learning_rate": 7.585861159305917e-06, + "loss": 0.6377, + "step": 4248 + }, + { + "epoch": 0.35, + "grad_norm": 5.027424272181276, + "learning_rate": 7.584728070815504e-06, + "loss": 1.2547, + "step": 4249 + }, + { + "epoch": 0.35, + "grad_norm": 4.073784678794884, + "learning_rate": 7.583594801146903e-06, + "loss": 0.9806, + "step": 4250 + }, + { + "epoch": 0.35, + "grad_norm": 3.7825420370232674, + "learning_rate": 7.582461350379551e-06, + "loss": 0.8304, + "step": 4251 + }, + { + "epoch": 0.35, + "grad_norm": 3.0319950587374103, + "learning_rate": 7.581327718592896e-06, + "loss": 0.778, + "step": 4252 + }, + { + "epoch": 0.35, + "grad_norm": 2.0521053776449354, + "learning_rate": 7.580193905866402e-06, + "loss": 0.415, + "step": 4253 + }, + { + "epoch": 0.35, + "grad_norm": 2.885777423711224, + "learning_rate": 7.579059912279545e-06, + "loss": 0.78, + "step": 4254 + }, + { + "epoch": 0.35, + "grad_norm": 4.715809617328955, + "learning_rate": 7.577925737911811e-06, + "loss": 1.4035, + "step": 4255 + }, + { + "epoch": 0.35, + "grad_norm": 2.577394588141058, + "learning_rate": 7.576791382842702e-06, + "loss": 0.5588, + "step": 4256 + }, + { + "epoch": 0.35, + "grad_norm": 3.162114084015856, + "learning_rate": 7.575656847151732e-06, + "loss": 0.7035, + "step": 4257 + }, + { + "epoch": 0.35, + "grad_norm": 3.5025398505743817, + "learning_rate": 7.574522130918426e-06, + "loss": 0.9669, + "step": 4258 + }, + { + "epoch": 0.35, + "grad_norm": 4.689264485842858, + "learning_rate": 7.5733872342223235e-06, + "loss": 0.985, + "step": 4259 + }, + { + "epoch": 0.35, + "grad_norm": 3.1512518368625324, + "learning_rate": 7.572252157142976e-06, + "loss": 0.5427, + "step": 4260 + }, + { + "epoch": 0.35, + "grad_norm": 3.136962873078472, + "learning_rate": 7.571116899759945e-06, + "loss": 0.5495, + "step": 4261 + }, + { + "epoch": 0.35, + "grad_norm": 3.657164795694336, + "learning_rate": 7.56998146215281e-06, + "loss": 0.7432, + "step": 4262 + }, + { + "epoch": 0.35, + "grad_norm": 3.5593972705527133, + "learning_rate": 7.568845844401158e-06, + "loss": 0.9293, + "step": 4263 + }, + { + "epoch": 0.35, + "grad_norm": 4.104600859719557, + "learning_rate": 7.567710046584593e-06, + "loss": 0.937, + "step": 4264 + }, + { + "epoch": 0.35, + "grad_norm": 3.3107309830162897, + "learning_rate": 7.566574068782728e-06, + "loss": 0.8032, + "step": 4265 + }, + { + "epoch": 0.35, + "grad_norm": 3.597493941109041, + "learning_rate": 7.565437911075191e-06, + "loss": 1.1043, + "step": 4266 + }, + { + "epoch": 0.35, + "grad_norm": 2.8071170485716794, + "learning_rate": 7.564301573541621e-06, + "loss": 0.8017, + "step": 4267 + }, + { + "epoch": 0.35, + "grad_norm": 1.9680658468583032, + "learning_rate": 7.563165056261671e-06, + "loss": 0.4377, + "step": 4268 + }, + { + "epoch": 0.35, + "grad_norm": 4.4083963853423915, + "learning_rate": 7.562028359315005e-06, + "loss": 0.9813, + "step": 4269 + }, + { + "epoch": 0.35, + "grad_norm": 2.065016615189242, + "learning_rate": 7.560891482781301e-06, + "loss": 0.3513, + "step": 4270 + }, + { + "epoch": 0.35, + "grad_norm": 4.22180599717921, + "learning_rate": 7.559754426740249e-06, + "loss": 0.9598, + "step": 4271 + }, + { + "epoch": 0.35, + "grad_norm": 5.177975680038039, + "learning_rate": 7.558617191271551e-06, + "loss": 1.0541, + "step": 4272 + }, + { + "epoch": 0.35, + "grad_norm": 4.206282755330498, + "learning_rate": 7.557479776454923e-06, + "loss": 0.9705, + "step": 4273 + }, + { + "epoch": 0.35, + "grad_norm": 3.3772672282559864, + "learning_rate": 7.556342182370095e-06, + "loss": 0.7269, + "step": 4274 + }, + { + "epoch": 0.35, + "grad_norm": 4.815793144389638, + "learning_rate": 7.5552044090968035e-06, + "loss": 1.1305, + "step": 4275 + }, + { + "epoch": 0.35, + "grad_norm": 4.397789333338332, + "learning_rate": 7.554066456714804e-06, + "loss": 0.7765, + "step": 4276 + }, + { + "epoch": 0.35, + "grad_norm": 2.169891416483646, + "learning_rate": 7.552928325303861e-06, + "loss": 0.3282, + "step": 4277 + }, + { + "epoch": 0.35, + "grad_norm": 2.905886108680131, + "learning_rate": 7.551790014943752e-06, + "loss": 0.6804, + "step": 4278 + }, + { + "epoch": 0.35, + "grad_norm": 3.8025974684265913, + "learning_rate": 7.55065152571427e-06, + "loss": 0.7483, + "step": 4279 + }, + { + "epoch": 0.35, + "grad_norm": 3.7342712449571205, + "learning_rate": 7.549512857695216e-06, + "loss": 0.7249, + "step": 4280 + }, + { + "epoch": 0.35, + "grad_norm": 3.2459565813784925, + "learning_rate": 7.5483740109664036e-06, + "loss": 0.5911, + "step": 4281 + }, + { + "epoch": 0.35, + "grad_norm": 2.5258338374757656, + "learning_rate": 7.547234985607668e-06, + "loss": 0.5009, + "step": 4282 + }, + { + "epoch": 0.35, + "grad_norm": 5.486670744801288, + "learning_rate": 7.546095781698842e-06, + "loss": 1.0581, + "step": 4283 + }, + { + "epoch": 0.35, + "grad_norm": 4.9298991349988945, + "learning_rate": 7.544956399319785e-06, + "loss": 1.068, + "step": 4284 + }, + { + "epoch": 0.35, + "grad_norm": 4.172582064142233, + "learning_rate": 7.543816838550359e-06, + "loss": 1.1742, + "step": 4285 + }, + { + "epoch": 0.35, + "grad_norm": 4.201667663325446, + "learning_rate": 7.542677099470441e-06, + "loss": 0.8472, + "step": 4286 + }, + { + "epoch": 0.35, + "grad_norm": 3.1995176189199728, + "learning_rate": 7.541537182159926e-06, + "loss": 0.7892, + "step": 4287 + }, + { + "epoch": 0.35, + "grad_norm": 2.328526607337528, + "learning_rate": 7.540397086698716e-06, + "loss": 0.7107, + "step": 4288 + }, + { + "epoch": 0.35, + "grad_norm": 4.353118807139896, + "learning_rate": 7.5392568131667265e-06, + "loss": 0.8643, + "step": 4289 + }, + { + "epoch": 0.35, + "grad_norm": 3.1174315933767485, + "learning_rate": 7.538116361643883e-06, + "loss": 0.5618, + "step": 4290 + }, + { + "epoch": 0.35, + "grad_norm": 7.299874282257006, + "learning_rate": 7.53697573221013e-06, + "loss": 1.828, + "step": 4291 + }, + { + "epoch": 0.35, + "grad_norm": 2.045071910752177, + "learning_rate": 7.53583492494542e-06, + "loss": 0.3556, + "step": 4292 + }, + { + "epoch": 0.35, + "grad_norm": 4.386906087901318, + "learning_rate": 7.534693939929716e-06, + "loss": 0.9769, + "step": 4293 + }, + { + "epoch": 0.35, + "grad_norm": 4.1939267691785265, + "learning_rate": 7.533552777243e-06, + "loss": 1.0888, + "step": 4294 + }, + { + "epoch": 0.35, + "grad_norm": 2.7922405769680005, + "learning_rate": 7.532411436965258e-06, + "loss": 0.7094, + "step": 4295 + }, + { + "epoch": 0.35, + "grad_norm": 3.0949378799917553, + "learning_rate": 7.531269919176496e-06, + "loss": 0.7065, + "step": 4296 + }, + { + "epoch": 0.35, + "grad_norm": 3.3404622426864967, + "learning_rate": 7.530128223956729e-06, + "loss": 0.5209, + "step": 4297 + }, + { + "epoch": 0.35, + "grad_norm": 3.6828821634625255, + "learning_rate": 7.528986351385985e-06, + "loss": 1.0418, + "step": 4298 + }, + { + "epoch": 0.35, + "grad_norm": 2.3181216173240107, + "learning_rate": 7.527844301544304e-06, + "loss": 0.5293, + "step": 4299 + }, + { + "epoch": 0.35, + "grad_norm": 4.96720934548505, + "learning_rate": 7.526702074511738e-06, + "loss": 1.2176, + "step": 4300 + }, + { + "epoch": 0.35, + "grad_norm": 3.8152740806384444, + "learning_rate": 7.5255596703683535e-06, + "loss": 0.6621, + "step": 4301 + }, + { + "epoch": 0.35, + "grad_norm": 3.3656367937880436, + "learning_rate": 7.524417089194227e-06, + "loss": 0.77, + "step": 4302 + }, + { + "epoch": 0.35, + "grad_norm": 1.0161219249370117, + "learning_rate": 7.523274331069449e-06, + "loss": 0.1678, + "step": 4303 + }, + { + "epoch": 0.35, + "grad_norm": 4.962457911789098, + "learning_rate": 7.522131396074122e-06, + "loss": 1.2677, + "step": 4304 + }, + { + "epoch": 0.35, + "grad_norm": 3.7386987611806384, + "learning_rate": 7.52098828428836e-06, + "loss": 0.7596, + "step": 4305 + }, + { + "epoch": 0.35, + "grad_norm": 3.930736277089549, + "learning_rate": 7.5198449957922895e-06, + "loss": 0.955, + "step": 4306 + }, + { + "epoch": 0.35, + "grad_norm": 4.174764922991015, + "learning_rate": 7.5187015306660524e-06, + "loss": 0.7774, + "step": 4307 + }, + { + "epoch": 0.35, + "grad_norm": 3.139106980850027, + "learning_rate": 7.5175578889898016e-06, + "loss": 0.7638, + "step": 4308 + }, + { + "epoch": 0.35, + "grad_norm": 4.277844030429609, + "learning_rate": 7.516414070843696e-06, + "loss": 0.9127, + "step": 4309 + }, + { + "epoch": 0.35, + "grad_norm": 3.377312607254998, + "learning_rate": 7.515270076307917e-06, + "loss": 0.88, + "step": 4310 + }, + { + "epoch": 0.35, + "grad_norm": 4.753351823698591, + "learning_rate": 7.514125905462651e-06, + "loss": 1.3045, + "step": 4311 + }, + { + "epoch": 0.35, + "grad_norm": 4.380965467013008, + "learning_rate": 7.512981558388101e-06, + "loss": 0.7882, + "step": 4312 + }, + { + "epoch": 0.35, + "grad_norm": 4.472155010468368, + "learning_rate": 7.51183703516448e-06, + "loss": 0.7921, + "step": 4313 + }, + { + "epoch": 0.35, + "grad_norm": 4.192913037162404, + "learning_rate": 7.510692335872016e-06, + "loss": 1.0474, + "step": 4314 + }, + { + "epoch": 0.35, + "grad_norm": 3.2134565473107704, + "learning_rate": 7.509547460590945e-06, + "loss": 0.5618, + "step": 4315 + }, + { + "epoch": 0.35, + "grad_norm": 4.918952899825416, + "learning_rate": 7.508402409401519e-06, + "loss": 1.2301, + "step": 4316 + }, + { + "epoch": 0.35, + "grad_norm": 4.286764533220852, + "learning_rate": 7.507257182384e-06, + "loss": 0.6964, + "step": 4317 + }, + { + "epoch": 0.35, + "grad_norm": 3.4183487003326625, + "learning_rate": 7.506111779618663e-06, + "loss": 0.8308, + "step": 4318 + }, + { + "epoch": 0.35, + "grad_norm": 4.622961051435708, + "learning_rate": 7.504966201185798e-06, + "loss": 0.9083, + "step": 4319 + }, + { + "epoch": 0.35, + "grad_norm": 3.51977662181751, + "learning_rate": 7.503820447165705e-06, + "loss": 0.6034, + "step": 4320 + }, + { + "epoch": 0.35, + "grad_norm": 4.032466768649381, + "learning_rate": 7.502674517638694e-06, + "loss": 0.7514, + "step": 4321 + }, + { + "epoch": 0.35, + "grad_norm": 3.2389907475732223, + "learning_rate": 7.5015284126850915e-06, + "loss": 0.6963, + "step": 4322 + }, + { + "epoch": 0.35, + "grad_norm": 4.987600811240381, + "learning_rate": 7.500382132385234e-06, + "loss": 1.0207, + "step": 4323 + }, + { + "epoch": 0.35, + "grad_norm": 4.066603624193605, + "learning_rate": 7.499235676819471e-06, + "loss": 0.8864, + "step": 4324 + }, + { + "epoch": 0.35, + "grad_norm": 3.769839249998782, + "learning_rate": 7.498089046068163e-06, + "loss": 0.5207, + "step": 4325 + }, + { + "epoch": 0.35, + "grad_norm": 3.4436628569051204, + "learning_rate": 7.496942240211685e-06, + "loss": 1.22, + "step": 4326 + }, + { + "epoch": 0.35, + "grad_norm": 2.0388291037490998, + "learning_rate": 7.495795259330422e-06, + "loss": 0.6388, + "step": 4327 + }, + { + "epoch": 0.35, + "grad_norm": 3.42893247686087, + "learning_rate": 7.494648103504774e-06, + "loss": 0.8951, + "step": 4328 + }, + { + "epoch": 0.35, + "grad_norm": 2.7612822548289877, + "learning_rate": 7.49350077281515e-06, + "loss": 0.5806, + "step": 4329 + }, + { + "epoch": 0.35, + "grad_norm": 4.706151510063128, + "learning_rate": 7.492353267341974e-06, + "loss": 0.8698, + "step": 4330 + }, + { + "epoch": 0.35, + "grad_norm": 4.6983960971622984, + "learning_rate": 7.491205587165682e-06, + "loss": 1.1328, + "step": 4331 + }, + { + "epoch": 0.35, + "grad_norm": 3.319249197795334, + "learning_rate": 7.490057732366718e-06, + "loss": 0.9426, + "step": 4332 + }, + { + "epoch": 0.35, + "grad_norm": 3.5025170873982514, + "learning_rate": 7.4889097030255445e-06, + "loss": 0.5951, + "step": 4333 + }, + { + "epoch": 0.35, + "grad_norm": 3.4219007121333407, + "learning_rate": 7.487761499222632e-06, + "loss": 0.339, + "step": 4334 + }, + { + "epoch": 0.35, + "grad_norm": 4.367668946465433, + "learning_rate": 7.486613121038466e-06, + "loss": 1.0931, + "step": 4335 + }, + { + "epoch": 0.35, + "grad_norm": 2.9355104713653724, + "learning_rate": 7.485464568553541e-06, + "loss": 0.5703, + "step": 4336 + }, + { + "epoch": 0.35, + "grad_norm": 4.17807422897983, + "learning_rate": 7.484315841848368e-06, + "loss": 0.9853, + "step": 4337 + }, + { + "epoch": 0.35, + "grad_norm": 5.043315819974853, + "learning_rate": 7.483166941003466e-06, + "loss": 1.1789, + "step": 4338 + }, + { + "epoch": 0.35, + "grad_norm": 3.530950037640052, + "learning_rate": 7.482017866099367e-06, + "loss": 0.8051, + "step": 4339 + }, + { + "epoch": 0.35, + "grad_norm": 2.8380019274677557, + "learning_rate": 7.480868617216619e-06, + "loss": 0.3348, + "step": 4340 + }, + { + "epoch": 0.35, + "grad_norm": 3.6038676133727834, + "learning_rate": 7.479719194435776e-06, + "loss": 0.8044, + "step": 4341 + }, + { + "epoch": 0.35, + "grad_norm": 4.0925376126716975, + "learning_rate": 7.478569597837411e-06, + "loss": 0.9227, + "step": 4342 + }, + { + "epoch": 0.35, + "grad_norm": 3.696213275373554, + "learning_rate": 7.4774198275021014e-06, + "loss": 0.8679, + "step": 4343 + }, + { + "epoch": 0.36, + "grad_norm": 3.7817964848829764, + "learning_rate": 7.476269883510445e-06, + "loss": 0.935, + "step": 4344 + }, + { + "epoch": 0.36, + "grad_norm": 3.406957888921831, + "learning_rate": 7.475119765943049e-06, + "loss": 0.9647, + "step": 4345 + }, + { + "epoch": 0.36, + "grad_norm": 4.5175991949471355, + "learning_rate": 7.473969474880527e-06, + "loss": 1.0671, + "step": 4346 + }, + { + "epoch": 0.36, + "grad_norm": 4.442572440305692, + "learning_rate": 7.47281901040351e-06, + "loss": 1.2769, + "step": 4347 + }, + { + "epoch": 0.36, + "grad_norm": 3.039003798061688, + "learning_rate": 7.471668372592644e-06, + "loss": 0.7816, + "step": 4348 + }, + { + "epoch": 0.36, + "grad_norm": 3.356063338687145, + "learning_rate": 7.470517561528582e-06, + "loss": 0.8141, + "step": 4349 + }, + { + "epoch": 0.36, + "grad_norm": 3.627272893200747, + "learning_rate": 7.46936657729199e-06, + "loss": 0.8789, + "step": 4350 + }, + { + "epoch": 0.36, + "grad_norm": 3.165112986350278, + "learning_rate": 7.4682154199635475e-06, + "loss": 0.9576, + "step": 4351 + }, + { + "epoch": 0.36, + "grad_norm": 2.786959812117681, + "learning_rate": 7.467064089623945e-06, + "loss": 0.676, + "step": 4352 + }, + { + "epoch": 0.36, + "grad_norm": 4.344544708423713, + "learning_rate": 7.465912586353888e-06, + "loss": 1.0466, + "step": 4353 + }, + { + "epoch": 0.36, + "grad_norm": 5.141295381304714, + "learning_rate": 7.464760910234091e-06, + "loss": 1.1653, + "step": 4354 + }, + { + "epoch": 0.36, + "grad_norm": 5.106223819266664, + "learning_rate": 7.463609061345279e-06, + "loss": 1.2457, + "step": 4355 + }, + { + "epoch": 0.36, + "grad_norm": 5.190753808346917, + "learning_rate": 7.462457039768194e-06, + "loss": 1.1127, + "step": 4356 + }, + { + "epoch": 0.36, + "grad_norm": 2.536070663602797, + "learning_rate": 7.461304845583588e-06, + "loss": 0.5238, + "step": 4357 + }, + { + "epoch": 0.36, + "grad_norm": 3.067446549558834, + "learning_rate": 7.460152478872224e-06, + "loss": 0.7506, + "step": 4358 + }, + { + "epoch": 0.36, + "grad_norm": 4.767927207227846, + "learning_rate": 7.458999939714876e-06, + "loss": 0.849, + "step": 4359 + }, + { + "epoch": 0.36, + "grad_norm": 3.1358705863995686, + "learning_rate": 7.4578472281923356e-06, + "loss": 0.3897, + "step": 4360 + }, + { + "epoch": 0.36, + "grad_norm": 4.149045683865217, + "learning_rate": 7.456694344385401e-06, + "loss": 1.053, + "step": 4361 + }, + { + "epoch": 0.36, + "grad_norm": 3.297878415611068, + "learning_rate": 7.455541288374885e-06, + "loss": 0.5006, + "step": 4362 + }, + { + "epoch": 0.36, + "grad_norm": 4.493178783840786, + "learning_rate": 7.45438806024161e-06, + "loss": 0.8101, + "step": 4363 + }, + { + "epoch": 0.36, + "grad_norm": 3.92095481738485, + "learning_rate": 7.453234660066413e-06, + "loss": 0.9906, + "step": 4364 + }, + { + "epoch": 0.36, + "grad_norm": 2.954136088552313, + "learning_rate": 7.452081087930143e-06, + "loss": 0.81, + "step": 4365 + }, + { + "epoch": 0.36, + "grad_norm": 2.68244091356062, + "learning_rate": 7.450927343913661e-06, + "loss": 0.541, + "step": 4366 + }, + { + "epoch": 0.36, + "grad_norm": 4.627231427015078, + "learning_rate": 7.449773428097838e-06, + "loss": 0.9492, + "step": 4367 + }, + { + "epoch": 0.36, + "grad_norm": 5.8169653497783305, + "learning_rate": 7.44861934056356e-06, + "loss": 0.6861, + "step": 4368 + }, + { + "epoch": 0.36, + "grad_norm": 4.8882242598826835, + "learning_rate": 7.447465081391722e-06, + "loss": 1.1398, + "step": 4369 + }, + { + "epoch": 0.36, + "grad_norm": 4.067463057219432, + "learning_rate": 7.446310650663234e-06, + "loss": 0.7872, + "step": 4370 + }, + { + "epoch": 0.36, + "grad_norm": 2.0416188880316226, + "learning_rate": 7.445156048459016e-06, + "loss": 0.4258, + "step": 4371 + }, + { + "epoch": 0.36, + "grad_norm": 3.8461591247025497, + "learning_rate": 7.444001274859999e-06, + "loss": 1.0479, + "step": 4372 + }, + { + "epoch": 0.36, + "grad_norm": 2.6675086056877957, + "learning_rate": 7.4428463299471285e-06, + "loss": 0.4093, + "step": 4373 + }, + { + "epoch": 0.36, + "grad_norm": 2.8086978839082044, + "learning_rate": 7.441691213801363e-06, + "loss": 0.7284, + "step": 4374 + }, + { + "epoch": 0.36, + "grad_norm": 3.849463434709465, + "learning_rate": 7.440535926503669e-06, + "loss": 0.4524, + "step": 4375 + }, + { + "epoch": 0.36, + "grad_norm": 3.878760788811572, + "learning_rate": 7.439380468135029e-06, + "loss": 0.7494, + "step": 4376 + }, + { + "epoch": 0.36, + "grad_norm": 4.46426717186742, + "learning_rate": 7.4382248387764335e-06, + "loss": 0.8812, + "step": 4377 + }, + { + "epoch": 0.36, + "grad_norm": 3.307437375750391, + "learning_rate": 7.437069038508888e-06, + "loss": 0.6284, + "step": 4378 + }, + { + "epoch": 0.36, + "grad_norm": 5.498403905595587, + "learning_rate": 7.435913067413409e-06, + "loss": 1.0276, + "step": 4379 + }, + { + "epoch": 0.36, + "grad_norm": 2.338473989667431, + "learning_rate": 7.4347569255710254e-06, + "loss": 0.4613, + "step": 4380 + }, + { + "epoch": 0.36, + "grad_norm": 4.099509016808251, + "learning_rate": 7.433600613062777e-06, + "loss": 0.9861, + "step": 4381 + }, + { + "epoch": 0.36, + "grad_norm": 4.289778871357129, + "learning_rate": 7.432444129969717e-06, + "loss": 0.823, + "step": 4382 + }, + { + "epoch": 0.36, + "grad_norm": 4.002809802943864, + "learning_rate": 7.431287476372909e-06, + "loss": 0.9164, + "step": 4383 + }, + { + "epoch": 0.36, + "grad_norm": 3.526139381340818, + "learning_rate": 7.43013065235343e-06, + "loss": 0.7189, + "step": 4384 + }, + { + "epoch": 0.36, + "grad_norm": 3.7695362431841057, + "learning_rate": 7.4289736579923685e-06, + "loss": 0.7519, + "step": 4385 + }, + { + "epoch": 0.36, + "grad_norm": 3.6250804019629017, + "learning_rate": 7.427816493370825e-06, + "loss": 0.8639, + "step": 4386 + }, + { + "epoch": 0.36, + "grad_norm": 4.744309483105921, + "learning_rate": 7.426659158569911e-06, + "loss": 0.9766, + "step": 4387 + }, + { + "epoch": 0.36, + "grad_norm": 4.399109856726638, + "learning_rate": 7.425501653670751e-06, + "loss": 1.0083, + "step": 4388 + }, + { + "epoch": 0.36, + "grad_norm": 4.538105144605974, + "learning_rate": 7.4243439787544805e-06, + "loss": 1.0412, + "step": 4389 + }, + { + "epoch": 0.36, + "grad_norm": 4.200788192853599, + "learning_rate": 7.423186133902247e-06, + "loss": 1.1422, + "step": 4390 + }, + { + "epoch": 0.36, + "grad_norm": 3.5083238263558525, + "learning_rate": 7.422028119195213e-06, + "loss": 0.6474, + "step": 4391 + }, + { + "epoch": 0.36, + "grad_norm": 2.88018038458345, + "learning_rate": 7.420869934714548e-06, + "loss": 0.6862, + "step": 4392 + }, + { + "epoch": 0.36, + "grad_norm": 4.0792825826252725, + "learning_rate": 7.419711580541436e-06, + "loss": 1.0469, + "step": 4393 + }, + { + "epoch": 0.36, + "grad_norm": 1.1807818355369477, + "learning_rate": 7.418553056757072e-06, + "loss": 0.1271, + "step": 4394 + }, + { + "epoch": 0.36, + "grad_norm": 2.50969090143382, + "learning_rate": 7.417394363442665e-06, + "loss": 0.5851, + "step": 4395 + }, + { + "epoch": 0.36, + "grad_norm": 2.4359477881273057, + "learning_rate": 7.416235500679433e-06, + "loss": 0.5199, + "step": 4396 + }, + { + "epoch": 0.36, + "grad_norm": 3.6176376746670478, + "learning_rate": 7.41507646854861e-06, + "loss": 0.7529, + "step": 4397 + }, + { + "epoch": 0.36, + "grad_norm": 2.4386064681048185, + "learning_rate": 7.4139172671314344e-06, + "loss": 0.3244, + "step": 4398 + }, + { + "epoch": 0.36, + "grad_norm": 3.6355116762246937, + "learning_rate": 7.412757896509164e-06, + "loss": 0.9841, + "step": 4399 + }, + { + "epoch": 0.36, + "grad_norm": 4.632425199290458, + "learning_rate": 7.411598356763068e-06, + "loss": 1.095, + "step": 4400 + }, + { + "epoch": 0.36, + "grad_norm": 2.1940908728026596, + "learning_rate": 7.41043864797442e-06, + "loss": 0.568, + "step": 4401 + }, + { + "epoch": 0.36, + "grad_norm": 4.46000689781777, + "learning_rate": 7.409278770224515e-06, + "loss": 1.1823, + "step": 4402 + }, + { + "epoch": 0.36, + "grad_norm": 3.912980192437, + "learning_rate": 7.4081187235946515e-06, + "loss": 0.8146, + "step": 4403 + }, + { + "epoch": 0.36, + "grad_norm": 3.9160335596719977, + "learning_rate": 7.406958508166147e-06, + "loss": 0.8216, + "step": 4404 + }, + { + "epoch": 0.36, + "grad_norm": 4.55346932823823, + "learning_rate": 7.405798124020326e-06, + "loss": 0.9, + "step": 4405 + }, + { + "epoch": 0.36, + "grad_norm": 2.8839523637298212, + "learning_rate": 7.4046375712385256e-06, + "loss": 0.6489, + "step": 4406 + }, + { + "epoch": 0.36, + "grad_norm": 2.26364676153241, + "learning_rate": 7.403476849902096e-06, + "loss": 0.5996, + "step": 4407 + }, + { + "epoch": 0.36, + "grad_norm": 5.13212097104461, + "learning_rate": 7.402315960092401e-06, + "loss": 1.1889, + "step": 4408 + }, + { + "epoch": 0.36, + "grad_norm": 4.462942185439612, + "learning_rate": 7.401154901890812e-06, + "loss": 0.8067, + "step": 4409 + }, + { + "epoch": 0.36, + "grad_norm": 3.815555272242939, + "learning_rate": 7.399993675378714e-06, + "loss": 0.7739, + "step": 4410 + }, + { + "epoch": 0.36, + "grad_norm": 5.445708292478904, + "learning_rate": 7.398832280637504e-06, + "loss": 1.3827, + "step": 4411 + }, + { + "epoch": 0.36, + "grad_norm": 3.00619628143818, + "learning_rate": 7.397670717748591e-06, + "loss": 0.4527, + "step": 4412 + }, + { + "epoch": 0.36, + "grad_norm": 3.454879259467784, + "learning_rate": 7.396508986793393e-06, + "loss": 0.7629, + "step": 4413 + }, + { + "epoch": 0.36, + "grad_norm": 3.8921905481022745, + "learning_rate": 7.395347087853349e-06, + "loss": 0.7617, + "step": 4414 + }, + { + "epoch": 0.36, + "grad_norm": 3.457760151179242, + "learning_rate": 7.394185021009895e-06, + "loss": 0.8917, + "step": 4415 + }, + { + "epoch": 0.36, + "grad_norm": 2.1954274713742685, + "learning_rate": 7.393022786344492e-06, + "loss": 0.4957, + "step": 4416 + }, + { + "epoch": 0.36, + "grad_norm": 4.4738524987507375, + "learning_rate": 7.391860383938607e-06, + "loss": 0.8411, + "step": 4417 + }, + { + "epoch": 0.36, + "grad_norm": 3.476168980159589, + "learning_rate": 7.390697813873718e-06, + "loss": 0.6591, + "step": 4418 + }, + { + "epoch": 0.36, + "grad_norm": 4.836743690795182, + "learning_rate": 7.389535076231315e-06, + "loss": 1.1051, + "step": 4419 + }, + { + "epoch": 0.36, + "grad_norm": 3.70651116649257, + "learning_rate": 7.3883721710929045e-06, + "loss": 1.0457, + "step": 4420 + }, + { + "epoch": 0.36, + "grad_norm": 1.7965196368932832, + "learning_rate": 7.387209098539998e-06, + "loss": 0.2635, + "step": 4421 + }, + { + "epoch": 0.36, + "grad_norm": 4.633654664167891, + "learning_rate": 7.386045858654123e-06, + "loss": 1.1022, + "step": 4422 + }, + { + "epoch": 0.36, + "grad_norm": 3.2827126308298826, + "learning_rate": 7.384882451516817e-06, + "loss": 0.7331, + "step": 4423 + }, + { + "epoch": 0.36, + "grad_norm": 4.420810694777215, + "learning_rate": 7.383718877209631e-06, + "loss": 1.0322, + "step": 4424 + }, + { + "epoch": 0.36, + "grad_norm": 2.4038382834630596, + "learning_rate": 7.382555135814126e-06, + "loss": 0.4581, + "step": 4425 + }, + { + "epoch": 0.36, + "grad_norm": 6.085944848338009, + "learning_rate": 7.381391227411875e-06, + "loss": 1.3082, + "step": 4426 + }, + { + "epoch": 0.36, + "grad_norm": 3.6484173790981114, + "learning_rate": 7.380227152084461e-06, + "loss": 0.6663, + "step": 4427 + }, + { + "epoch": 0.36, + "grad_norm": 2.625304518689477, + "learning_rate": 7.379062909913484e-06, + "loss": 0.3384, + "step": 4428 + }, + { + "epoch": 0.36, + "grad_norm": 5.806506871861837, + "learning_rate": 7.37789850098055e-06, + "loss": 1.1255, + "step": 4429 + }, + { + "epoch": 0.36, + "grad_norm": 2.0405473109459544, + "learning_rate": 7.37673392536728e-06, + "loss": 0.5441, + "step": 4430 + }, + { + "epoch": 0.36, + "grad_norm": 3.217603664239242, + "learning_rate": 7.375569183155306e-06, + "loss": 0.6702, + "step": 4431 + }, + { + "epoch": 0.36, + "grad_norm": 3.465887043027505, + "learning_rate": 7.37440427442627e-06, + "loss": 0.8486, + "step": 4432 + }, + { + "epoch": 0.36, + "grad_norm": 4.5703859822195945, + "learning_rate": 7.373239199261828e-06, + "loss": 0.9924, + "step": 4433 + }, + { + "epoch": 0.36, + "grad_norm": 3.86403338734041, + "learning_rate": 7.372073957743646e-06, + "loss": 1.0849, + "step": 4434 + }, + { + "epoch": 0.36, + "grad_norm": 4.8492373869585315, + "learning_rate": 7.370908549953404e-06, + "loss": 0.9398, + "step": 4435 + }, + { + "epoch": 0.36, + "grad_norm": 4.111972033867977, + "learning_rate": 7.369742975972789e-06, + "loss": 0.7484, + "step": 4436 + }, + { + "epoch": 0.36, + "grad_norm": 3.7084509760487485, + "learning_rate": 7.368577235883508e-06, + "loss": 0.7273, + "step": 4437 + }, + { + "epoch": 0.36, + "grad_norm": 4.40040600697199, + "learning_rate": 7.367411329767267e-06, + "loss": 0.8338, + "step": 4438 + }, + { + "epoch": 0.36, + "grad_norm": 4.070106174285915, + "learning_rate": 7.366245257705798e-06, + "loss": 0.5525, + "step": 4439 + }, + { + "epoch": 0.36, + "grad_norm": 2.343881665537808, + "learning_rate": 7.365079019780832e-06, + "loss": 0.5944, + "step": 4440 + }, + { + "epoch": 0.36, + "grad_norm": 4.12321303109076, + "learning_rate": 7.36391261607412e-06, + "loss": 1.0556, + "step": 4441 + }, + { + "epoch": 0.36, + "grad_norm": 3.297429956371091, + "learning_rate": 7.3627460466674215e-06, + "loss": 0.8856, + "step": 4442 + }, + { + "epoch": 0.36, + "grad_norm": 4.004084573771542, + "learning_rate": 7.361579311642508e-06, + "loss": 0.8428, + "step": 4443 + }, + { + "epoch": 0.36, + "grad_norm": 4.137291119020999, + "learning_rate": 7.360412411081163e-06, + "loss": 0.8561, + "step": 4444 + }, + { + "epoch": 0.36, + "grad_norm": 4.425871984712271, + "learning_rate": 7.35924534506518e-06, + "loss": 1.0632, + "step": 4445 + }, + { + "epoch": 0.36, + "grad_norm": 4.263514033800204, + "learning_rate": 7.3580781136763656e-06, + "loss": 0.9843, + "step": 4446 + }, + { + "epoch": 0.36, + "grad_norm": 1.3638618591766134, + "learning_rate": 7.356910716996538e-06, + "loss": 0.2298, + "step": 4447 + }, + { + "epoch": 0.36, + "grad_norm": 2.188339227415263, + "learning_rate": 7.355743155107526e-06, + "loss": 0.3663, + "step": 4448 + }, + { + "epoch": 0.36, + "grad_norm": 3.7041795126120665, + "learning_rate": 7.354575428091172e-06, + "loss": 0.7665, + "step": 4449 + }, + { + "epoch": 0.36, + "grad_norm": 4.401883725066224, + "learning_rate": 7.353407536029327e-06, + "loss": 0.7469, + "step": 4450 + }, + { + "epoch": 0.36, + "grad_norm": 3.841773200375602, + "learning_rate": 7.352239479003857e-06, + "loss": 0.9938, + "step": 4451 + }, + { + "epoch": 0.36, + "grad_norm": 4.715651076061036, + "learning_rate": 7.351071257096634e-06, + "loss": 1.2306, + "step": 4452 + }, + { + "epoch": 0.36, + "grad_norm": 5.563175322043322, + "learning_rate": 7.349902870389549e-06, + "loss": 1.028, + "step": 4453 + }, + { + "epoch": 0.36, + "grad_norm": 2.465074526425466, + "learning_rate": 7.3487343189645e-06, + "loss": 0.3103, + "step": 4454 + }, + { + "epoch": 0.36, + "grad_norm": 3.055709724006543, + "learning_rate": 7.347565602903397e-06, + "loss": 0.6235, + "step": 4455 + }, + { + "epoch": 0.36, + "grad_norm": 3.656519516509362, + "learning_rate": 7.346396722288162e-06, + "loss": 0.679, + "step": 4456 + }, + { + "epoch": 0.36, + "grad_norm": 3.9727904646221956, + "learning_rate": 7.345227677200728e-06, + "loss": 0.5582, + "step": 4457 + }, + { + "epoch": 0.36, + "grad_norm": 2.824112013113073, + "learning_rate": 7.34405846772304e-06, + "loss": 0.798, + "step": 4458 + }, + { + "epoch": 0.36, + "grad_norm": 1.1629289610763556, + "learning_rate": 7.3428890939370545e-06, + "loss": 0.1277, + "step": 4459 + }, + { + "epoch": 0.36, + "grad_norm": 4.477409798493978, + "learning_rate": 7.341719555924741e-06, + "loss": 0.7387, + "step": 4460 + }, + { + "epoch": 0.36, + "grad_norm": 3.1155639227406398, + "learning_rate": 7.3405498537680765e-06, + "loss": 0.5929, + "step": 4461 + }, + { + "epoch": 0.36, + "grad_norm": 3.3179173250112637, + "learning_rate": 7.339379987549054e-06, + "loss": 0.5323, + "step": 4462 + }, + { + "epoch": 0.36, + "grad_norm": 3.758447610320383, + "learning_rate": 7.338209957349677e-06, + "loss": 1.1001, + "step": 4463 + }, + { + "epoch": 0.36, + "grad_norm": 2.9869551230787956, + "learning_rate": 7.337039763251956e-06, + "loss": 0.5098, + "step": 4464 + }, + { + "epoch": 0.36, + "grad_norm": 4.228207987999737, + "learning_rate": 7.335869405337919e-06, + "loss": 1.0302, + "step": 4465 + }, + { + "epoch": 0.37, + "grad_norm": 5.227773063389464, + "learning_rate": 7.334698883689601e-06, + "loss": 1.0155, + "step": 4466 + }, + { + "epoch": 0.37, + "grad_norm": 1.9999135138495103, + "learning_rate": 7.333528198389053e-06, + "loss": 0.3648, + "step": 4467 + }, + { + "epoch": 0.37, + "grad_norm": 4.16060025922408, + "learning_rate": 7.332357349518334e-06, + "loss": 0.6752, + "step": 4468 + }, + { + "epoch": 0.37, + "grad_norm": 4.04076814093737, + "learning_rate": 7.331186337159515e-06, + "loss": 1.1512, + "step": 4469 + }, + { + "epoch": 0.37, + "grad_norm": 4.613831500731791, + "learning_rate": 7.3300151613946805e-06, + "loss": 0.9678, + "step": 4470 + }, + { + "epoch": 0.37, + "grad_norm": 4.1838442374567535, + "learning_rate": 7.328843822305922e-06, + "loss": 0.7081, + "step": 4471 + }, + { + "epoch": 0.37, + "grad_norm": 3.4458421972312783, + "learning_rate": 7.327672319975348e-06, + "loss": 0.8107, + "step": 4472 + }, + { + "epoch": 0.37, + "grad_norm": 2.2583030157807977, + "learning_rate": 7.326500654485071e-06, + "loss": 0.5454, + "step": 4473 + }, + { + "epoch": 0.37, + "grad_norm": 4.715040107956233, + "learning_rate": 7.325328825917226e-06, + "loss": 1.1163, + "step": 4474 + }, + { + "epoch": 0.37, + "grad_norm": 2.0790611672845545, + "learning_rate": 7.3241568343539505e-06, + "loss": 0.5023, + "step": 4475 + }, + { + "epoch": 0.37, + "grad_norm": 2.4918898891044106, + "learning_rate": 7.322984679877394e-06, + "loss": 0.4306, + "step": 4476 + }, + { + "epoch": 0.37, + "grad_norm": 1.8286276573382796, + "learning_rate": 7.3218123625697225e-06, + "loss": 0.4407, + "step": 4477 + }, + { + "epoch": 0.37, + "grad_norm": 1.8752323357289453, + "learning_rate": 7.320639882513108e-06, + "loss": 0.4156, + "step": 4478 + }, + { + "epoch": 0.37, + "grad_norm": 3.1699152957715624, + "learning_rate": 7.319467239789738e-06, + "loss": 0.7109, + "step": 4479 + }, + { + "epoch": 0.37, + "grad_norm": 4.585118381742184, + "learning_rate": 7.318294434481808e-06, + "loss": 1.0924, + "step": 4480 + }, + { + "epoch": 0.37, + "grad_norm": 3.345711180050005, + "learning_rate": 7.317121466671528e-06, + "loss": 0.827, + "step": 4481 + }, + { + "epoch": 0.37, + "grad_norm": 2.9439381191619036, + "learning_rate": 7.3159483364411175e-06, + "loss": 0.6872, + "step": 4482 + }, + { + "epoch": 0.37, + "grad_norm": 2.0173118862391557, + "learning_rate": 7.314775043872807e-06, + "loss": 0.4207, + "step": 4483 + }, + { + "epoch": 0.37, + "grad_norm": 2.989630248205145, + "learning_rate": 7.31360158904884e-06, + "loss": 0.7898, + "step": 4484 + }, + { + "epoch": 0.37, + "grad_norm": 4.960866870194185, + "learning_rate": 7.3124279720514715e-06, + "loss": 0.8696, + "step": 4485 + }, + { + "epoch": 0.37, + "grad_norm": 3.2405716519452077, + "learning_rate": 7.311254192962964e-06, + "loss": 0.716, + "step": 4486 + }, + { + "epoch": 0.37, + "grad_norm": 3.4706557443470807, + "learning_rate": 7.3100802518655975e-06, + "loss": 0.8487, + "step": 4487 + }, + { + "epoch": 0.37, + "grad_norm": 4.719720262071688, + "learning_rate": 7.308906148841659e-06, + "loss": 1.1216, + "step": 4488 + }, + { + "epoch": 0.37, + "grad_norm": 4.172493649847887, + "learning_rate": 7.307731883973447e-06, + "loss": 0.6986, + "step": 4489 + }, + { + "epoch": 0.37, + "grad_norm": 2.934165336710921, + "learning_rate": 7.306557457343273e-06, + "loss": 0.554, + "step": 4490 + }, + { + "epoch": 0.37, + "grad_norm": 4.820002123608001, + "learning_rate": 7.30538286903346e-06, + "loss": 0.6866, + "step": 4491 + }, + { + "epoch": 0.37, + "grad_norm": 4.646670289136569, + "learning_rate": 7.3042081191263415e-06, + "loss": 1.1311, + "step": 4492 + }, + { + "epoch": 0.37, + "grad_norm": 2.7813068574299105, + "learning_rate": 7.30303320770426e-06, + "loss": 0.4627, + "step": 4493 + }, + { + "epoch": 0.37, + "grad_norm": 1.7240575636139102, + "learning_rate": 7.301858134849575e-06, + "loss": 0.2563, + "step": 4494 + }, + { + "epoch": 0.37, + "grad_norm": 4.305120771122525, + "learning_rate": 7.3006829006446535e-06, + "loss": 0.8042, + "step": 4495 + }, + { + "epoch": 0.37, + "grad_norm": 5.261801668048653, + "learning_rate": 7.299507505171871e-06, + "loss": 1.3942, + "step": 4496 + }, + { + "epoch": 0.37, + "grad_norm": 4.204142139929167, + "learning_rate": 7.298331948513622e-06, + "loss": 1.1927, + "step": 4497 + }, + { + "epoch": 0.37, + "grad_norm": 4.044871850015931, + "learning_rate": 7.297156230752303e-06, + "loss": 1.2654, + "step": 4498 + }, + { + "epoch": 0.37, + "grad_norm": 5.337850294767246, + "learning_rate": 7.295980351970331e-06, + "loss": 0.908, + "step": 4499 + }, + { + "epoch": 0.37, + "grad_norm": 3.053857052514148, + "learning_rate": 7.29480431225013e-06, + "loss": 0.6775, + "step": 4500 + }, + { + "epoch": 0.37, + "grad_norm": 4.949137459079822, + "learning_rate": 7.2936281116741314e-06, + "loss": 0.9686, + "step": 4501 + }, + { + "epoch": 0.37, + "grad_norm": 4.037736967540822, + "learning_rate": 7.292451750324785e-06, + "loss": 0.7931, + "step": 4502 + }, + { + "epoch": 0.37, + "grad_norm": 4.3779809612700555, + "learning_rate": 7.291275228284549e-06, + "loss": 0.9467, + "step": 4503 + }, + { + "epoch": 0.37, + "grad_norm": 3.418346099332919, + "learning_rate": 7.290098545635889e-06, + "loss": 0.7067, + "step": 4504 + }, + { + "epoch": 0.37, + "grad_norm": 2.232539189776654, + "learning_rate": 7.288921702461289e-06, + "loss": 0.3651, + "step": 4505 + }, + { + "epoch": 0.37, + "grad_norm": 3.5749360072395358, + "learning_rate": 7.287744698843237e-06, + "loss": 0.7257, + "step": 4506 + }, + { + "epoch": 0.37, + "grad_norm": 2.0033083083553005, + "learning_rate": 7.28656753486424e-06, + "loss": 0.3874, + "step": 4507 + }, + { + "epoch": 0.37, + "grad_norm": 5.740670590431987, + "learning_rate": 7.285390210606809e-06, + "loss": 1.3633, + "step": 4508 + }, + { + "epoch": 0.37, + "grad_norm": 3.5485675390190594, + "learning_rate": 7.28421272615347e-06, + "loss": 0.8501, + "step": 4509 + }, + { + "epoch": 0.37, + "grad_norm": 3.9958369179935214, + "learning_rate": 7.283035081586761e-06, + "loss": 0.9872, + "step": 4510 + }, + { + "epoch": 0.37, + "grad_norm": 4.60533465076516, + "learning_rate": 7.281857276989228e-06, + "loss": 1.1645, + "step": 4511 + }, + { + "epoch": 0.37, + "grad_norm": 3.431379925508597, + "learning_rate": 7.28067931244343e-06, + "loss": 0.8068, + "step": 4512 + }, + { + "epoch": 0.37, + "grad_norm": 4.452343209098598, + "learning_rate": 7.279501188031939e-06, + "loss": 1.1694, + "step": 4513 + }, + { + "epoch": 0.37, + "grad_norm": 3.6836445048120234, + "learning_rate": 7.278322903837334e-06, + "loss": 0.7585, + "step": 4514 + }, + { + "epoch": 0.37, + "grad_norm": 2.9059582544576688, + "learning_rate": 7.2771444599422096e-06, + "loss": 0.6519, + "step": 4515 + }, + { + "epoch": 0.37, + "grad_norm": 4.883169272294483, + "learning_rate": 7.275965856429167e-06, + "loss": 0.7735, + "step": 4516 + }, + { + "epoch": 0.37, + "grad_norm": 2.639805399214749, + "learning_rate": 7.274787093380825e-06, + "loss": 0.7228, + "step": 4517 + }, + { + "epoch": 0.37, + "grad_norm": 3.0174849334069562, + "learning_rate": 7.273608170879807e-06, + "loss": 0.8193, + "step": 4518 + }, + { + "epoch": 0.37, + "grad_norm": 4.095551984163171, + "learning_rate": 7.27242908900875e-06, + "loss": 0.9427, + "step": 4519 + }, + { + "epoch": 0.37, + "grad_norm": 1.704871089442316, + "learning_rate": 7.271249847850306e-06, + "loss": 0.429, + "step": 4520 + }, + { + "epoch": 0.37, + "grad_norm": 4.820630439574544, + "learning_rate": 7.27007044748713e-06, + "loss": 0.9946, + "step": 4521 + }, + { + "epoch": 0.37, + "grad_norm": 3.82734560676249, + "learning_rate": 7.268890888001896e-06, + "loss": 0.9962, + "step": 4522 + }, + { + "epoch": 0.37, + "grad_norm": 3.9606092407357973, + "learning_rate": 7.267711169477284e-06, + "loss": 0.8341, + "step": 4523 + }, + { + "epoch": 0.37, + "grad_norm": 3.245772471440562, + "learning_rate": 7.266531291995989e-06, + "loss": 0.6565, + "step": 4524 + }, + { + "epoch": 0.37, + "grad_norm": 3.211696537207009, + "learning_rate": 7.265351255640713e-06, + "loss": 0.983, + "step": 4525 + }, + { + "epoch": 0.37, + "grad_norm": 4.468724734753304, + "learning_rate": 7.2641710604941754e-06, + "loss": 0.8992, + "step": 4526 + }, + { + "epoch": 0.37, + "grad_norm": 4.586107456241061, + "learning_rate": 7.262990706639097e-06, + "loss": 1.0833, + "step": 4527 + }, + { + "epoch": 0.37, + "grad_norm": 4.473517836636879, + "learning_rate": 7.261810194158221e-06, + "loss": 0.8026, + "step": 4528 + }, + { + "epoch": 0.37, + "grad_norm": 3.502267514552507, + "learning_rate": 7.260629523134293e-06, + "loss": 0.8131, + "step": 4529 + }, + { + "epoch": 0.37, + "grad_norm": 2.220683416304137, + "learning_rate": 7.259448693650073e-06, + "loss": 0.42, + "step": 4530 + }, + { + "epoch": 0.37, + "grad_norm": 2.8593087635128094, + "learning_rate": 7.258267705788334e-06, + "loss": 0.3321, + "step": 4531 + }, + { + "epoch": 0.37, + "grad_norm": 2.6463886519185356, + "learning_rate": 7.2570865596318565e-06, + "loss": 0.5994, + "step": 4532 + }, + { + "epoch": 0.37, + "grad_norm": 3.4815018579928054, + "learning_rate": 7.255905255263434e-06, + "loss": 0.6225, + "step": 4533 + }, + { + "epoch": 0.37, + "grad_norm": 4.056716362932539, + "learning_rate": 7.254723792765872e-06, + "loss": 1.0677, + "step": 4534 + }, + { + "epoch": 0.37, + "grad_norm": 4.332364971141776, + "learning_rate": 7.253542172221982e-06, + "loss": 0.7248, + "step": 4535 + }, + { + "epoch": 0.37, + "grad_norm": 3.471393689780005, + "learning_rate": 7.252360393714595e-06, + "loss": 0.8082, + "step": 4536 + }, + { + "epoch": 0.37, + "grad_norm": 3.980817164222064, + "learning_rate": 7.251178457326547e-06, + "loss": 0.6879, + "step": 4537 + }, + { + "epoch": 0.37, + "grad_norm": 4.319603041963911, + "learning_rate": 7.249996363140686e-06, + "loss": 0.7517, + "step": 4538 + }, + { + "epoch": 0.37, + "grad_norm": 3.220525399600135, + "learning_rate": 7.24881411123987e-06, + "loss": 0.9474, + "step": 4539 + }, + { + "epoch": 0.37, + "grad_norm": 3.4638507168662445, + "learning_rate": 7.247631701706974e-06, + "loss": 0.6042, + "step": 4540 + }, + { + "epoch": 0.37, + "grad_norm": 2.2917495569563706, + "learning_rate": 7.246449134624878e-06, + "loss": 0.2501, + "step": 4541 + }, + { + "epoch": 0.37, + "grad_norm": 2.9933586455885357, + "learning_rate": 7.2452664100764725e-06, + "loss": 0.5591, + "step": 4542 + }, + { + "epoch": 0.37, + "grad_norm": 3.604732548760846, + "learning_rate": 7.244083528144663e-06, + "loss": 0.6803, + "step": 4543 + }, + { + "epoch": 0.37, + "grad_norm": 4.220440325230384, + "learning_rate": 7.242900488912364e-06, + "loss": 0.7553, + "step": 4544 + }, + { + "epoch": 0.37, + "grad_norm": 4.861147346123085, + "learning_rate": 7.241717292462505e-06, + "loss": 1.7752, + "step": 4545 + }, + { + "epoch": 0.37, + "grad_norm": 4.233779493000437, + "learning_rate": 7.240533938878016e-06, + "loss": 0.6548, + "step": 4546 + }, + { + "epoch": 0.37, + "grad_norm": 5.0855895495993035, + "learning_rate": 7.239350428241851e-06, + "loss": 1.3626, + "step": 4547 + }, + { + "epoch": 0.37, + "grad_norm": 4.136242258149596, + "learning_rate": 7.238166760636966e-06, + "loss": 1.0945, + "step": 4548 + }, + { + "epoch": 0.37, + "grad_norm": 2.536209408736868, + "learning_rate": 7.236982936146332e-06, + "loss": 0.3704, + "step": 4549 + }, + { + "epoch": 0.37, + "grad_norm": 2.679206527422662, + "learning_rate": 7.235798954852929e-06, + "loss": 0.4141, + "step": 4550 + }, + { + "epoch": 0.37, + "grad_norm": 5.609558168860129, + "learning_rate": 7.23461481683975e-06, + "loss": 1.0022, + "step": 4551 + }, + { + "epoch": 0.37, + "grad_norm": 3.9874756521860313, + "learning_rate": 7.233430522189797e-06, + "loss": 0.8539, + "step": 4552 + }, + { + "epoch": 0.37, + "grad_norm": 4.117818020127755, + "learning_rate": 7.232246070986084e-06, + "loss": 0.8997, + "step": 4553 + }, + { + "epoch": 0.37, + "grad_norm": 3.8538029708155297, + "learning_rate": 7.2310614633116376e-06, + "loss": 0.7597, + "step": 4554 + }, + { + "epoch": 0.37, + "grad_norm": 3.0481157383577426, + "learning_rate": 7.22987669924949e-06, + "loss": 0.6272, + "step": 4555 + }, + { + "epoch": 0.37, + "grad_norm": 3.4563412200333063, + "learning_rate": 7.2286917788826926e-06, + "loss": 0.8794, + "step": 4556 + }, + { + "epoch": 0.37, + "grad_norm": 5.077233686569749, + "learning_rate": 7.2275067022943005e-06, + "loss": 1.1632, + "step": 4557 + }, + { + "epoch": 0.37, + "grad_norm": 3.0498537414819245, + "learning_rate": 7.226321469567381e-06, + "loss": 0.4963, + "step": 4558 + }, + { + "epoch": 0.37, + "grad_norm": 4.4327224589889855, + "learning_rate": 7.225136080785016e-06, + "loss": 1.0744, + "step": 4559 + }, + { + "epoch": 0.37, + "grad_norm": 3.911606630606663, + "learning_rate": 7.223950536030297e-06, + "loss": 1.0902, + "step": 4560 + }, + { + "epoch": 0.37, + "grad_norm": 5.122772441786447, + "learning_rate": 7.2227648353863225e-06, + "loss": 1.3166, + "step": 4561 + }, + { + "epoch": 0.37, + "grad_norm": 4.281493251686354, + "learning_rate": 7.221578978936207e-06, + "loss": 0.7545, + "step": 4562 + }, + { + "epoch": 0.37, + "grad_norm": 3.8442070070178835, + "learning_rate": 7.220392966763072e-06, + "loss": 0.7848, + "step": 4563 + }, + { + "epoch": 0.37, + "grad_norm": 4.704511580743091, + "learning_rate": 7.219206798950056e-06, + "loss": 0.9293, + "step": 4564 + }, + { + "epoch": 0.37, + "grad_norm": 3.8800793017540376, + "learning_rate": 7.218020475580301e-06, + "loss": 0.7855, + "step": 4565 + }, + { + "epoch": 0.37, + "grad_norm": 4.285617047839879, + "learning_rate": 7.216833996736963e-06, + "loss": 0.8195, + "step": 4566 + }, + { + "epoch": 0.37, + "grad_norm": 5.473580101703317, + "learning_rate": 7.2156473625032075e-06, + "loss": 1.4403, + "step": 4567 + }, + { + "epoch": 0.37, + "grad_norm": 4.510327755197747, + "learning_rate": 7.2144605729622166e-06, + "loss": 1.1779, + "step": 4568 + }, + { + "epoch": 0.37, + "grad_norm": 3.0337100328946316, + "learning_rate": 7.213273628197176e-06, + "loss": 0.642, + "step": 4569 + }, + { + "epoch": 0.37, + "grad_norm": 4.993271610598595, + "learning_rate": 7.212086528291286e-06, + "loss": 0.9385, + "step": 4570 + }, + { + "epoch": 0.37, + "grad_norm": 3.5938483151970684, + "learning_rate": 7.2108992733277595e-06, + "loss": 0.5875, + "step": 4571 + }, + { + "epoch": 0.37, + "grad_norm": 2.905467746768345, + "learning_rate": 7.209711863389815e-06, + "loss": 0.7408, + "step": 4572 + }, + { + "epoch": 0.37, + "grad_norm": 4.3494342996801905, + "learning_rate": 7.208524298560684e-06, + "loss": 0.7769, + "step": 4573 + }, + { + "epoch": 0.37, + "grad_norm": 3.3883729246932255, + "learning_rate": 7.207336578923613e-06, + "loss": 0.5969, + "step": 4574 + }, + { + "epoch": 0.37, + "grad_norm": 1.1952773938067043, + "learning_rate": 7.206148704561853e-06, + "loss": 0.2003, + "step": 4575 + }, + { + "epoch": 0.37, + "grad_norm": 3.2361353989913852, + "learning_rate": 7.20496067555867e-06, + "loss": 0.7028, + "step": 4576 + }, + { + "epoch": 0.37, + "grad_norm": 3.4319513704107956, + "learning_rate": 7.20377249199734e-06, + "loss": 0.6851, + "step": 4577 + }, + { + "epoch": 0.37, + "grad_norm": 4.76818205154338, + "learning_rate": 7.2025841539611485e-06, + "loss": 0.7971, + "step": 4578 + }, + { + "epoch": 0.37, + "grad_norm": 2.088168301961054, + "learning_rate": 7.201395661533395e-06, + "loss": 0.3357, + "step": 4579 + }, + { + "epoch": 0.37, + "grad_norm": 4.965093420537595, + "learning_rate": 7.200207014797385e-06, + "loss": 0.8798, + "step": 4580 + }, + { + "epoch": 0.37, + "grad_norm": 3.1078580794068604, + "learning_rate": 7.19901821383644e-06, + "loss": 0.4883, + "step": 4581 + }, + { + "epoch": 0.37, + "grad_norm": 3.9574296718617967, + "learning_rate": 7.197829258733886e-06, + "loss": 1.0293, + "step": 4582 + }, + { + "epoch": 0.37, + "grad_norm": 3.736266789641212, + "learning_rate": 7.1966401495730675e-06, + "loss": 0.6452, + "step": 4583 + }, + { + "epoch": 0.37, + "grad_norm": 2.842006382916651, + "learning_rate": 7.195450886437334e-06, + "loss": 0.3723, + "step": 4584 + }, + { + "epoch": 0.37, + "grad_norm": 3.885450874992221, + "learning_rate": 7.1942614694100476e-06, + "loss": 0.8921, + "step": 4585 + }, + { + "epoch": 0.37, + "grad_norm": 4.33205324545829, + "learning_rate": 7.1930718985745815e-06, + "loss": 1.0317, + "step": 4586 + }, + { + "epoch": 0.37, + "grad_norm": 3.603549135962034, + "learning_rate": 7.1918821740143196e-06, + "loss": 0.88, + "step": 4587 + }, + { + "epoch": 0.38, + "grad_norm": 2.2702414115235223, + "learning_rate": 7.190692295812658e-06, + "loss": 0.4342, + "step": 4588 + }, + { + "epoch": 0.38, + "grad_norm": 3.7315755603115504, + "learning_rate": 7.189502264053e-06, + "loss": 0.8002, + "step": 4589 + }, + { + "epoch": 0.38, + "grad_norm": 5.350066189445501, + "learning_rate": 7.188312078818761e-06, + "loss": 1.4728, + "step": 4590 + }, + { + "epoch": 0.38, + "grad_norm": 3.6486634707171044, + "learning_rate": 7.18712174019337e-06, + "loss": 0.8281, + "step": 4591 + }, + { + "epoch": 0.38, + "grad_norm": 3.2919211998095594, + "learning_rate": 7.185931248260262e-06, + "loss": 0.78, + "step": 4592 + }, + { + "epoch": 0.38, + "grad_norm": 3.5585559243371536, + "learning_rate": 7.1847406031028866e-06, + "loss": 0.4944, + "step": 4593 + }, + { + "epoch": 0.38, + "grad_norm": 2.0813044061523454, + "learning_rate": 7.183549804804704e-06, + "loss": 0.3836, + "step": 4594 + }, + { + "epoch": 0.38, + "grad_norm": 2.3181687589362014, + "learning_rate": 7.182358853449183e-06, + "loss": 0.4667, + "step": 4595 + }, + { + "epoch": 0.38, + "grad_norm": 2.826918066825633, + "learning_rate": 7.181167749119804e-06, + "loss": 0.5078, + "step": 4596 + }, + { + "epoch": 0.38, + "grad_norm": 4.630806612184872, + "learning_rate": 7.179976491900058e-06, + "loss": 1.3874, + "step": 4597 + }, + { + "epoch": 0.38, + "grad_norm": 3.0602457597906483, + "learning_rate": 7.178785081873446e-06, + "loss": 0.5455, + "step": 4598 + }, + { + "epoch": 0.38, + "grad_norm": 2.5222743372079512, + "learning_rate": 7.177593519123483e-06, + "loss": 0.4905, + "step": 4599 + }, + { + "epoch": 0.38, + "grad_norm": 3.188427047383891, + "learning_rate": 7.176401803733691e-06, + "loss": 0.7658, + "step": 4600 + }, + { + "epoch": 0.38, + "grad_norm": 3.7162178644039856, + "learning_rate": 7.175209935787605e-06, + "loss": 0.9036, + "step": 4601 + }, + { + "epoch": 0.38, + "grad_norm": 4.602496845755888, + "learning_rate": 7.174017915368769e-06, + "loss": 0.9856, + "step": 4602 + }, + { + "epoch": 0.38, + "grad_norm": 3.381026091099391, + "learning_rate": 7.172825742560737e-06, + "loss": 0.6102, + "step": 4603 + }, + { + "epoch": 0.38, + "grad_norm": 4.774135585966469, + "learning_rate": 7.171633417447078e-06, + "loss": 0.8705, + "step": 4604 + }, + { + "epoch": 0.38, + "grad_norm": 2.7188104057779228, + "learning_rate": 7.170440940111367e-06, + "loss": 0.6872, + "step": 4605 + }, + { + "epoch": 0.38, + "grad_norm": 3.4582576816064883, + "learning_rate": 7.169248310637192e-06, + "loss": 0.9781, + "step": 4606 + }, + { + "epoch": 0.38, + "grad_norm": 3.6689730999616215, + "learning_rate": 7.168055529108151e-06, + "loss": 0.8557, + "step": 4607 + }, + { + "epoch": 0.38, + "grad_norm": 3.716459932166737, + "learning_rate": 7.166862595607853e-06, + "loss": 0.8339, + "step": 4608 + }, + { + "epoch": 0.38, + "grad_norm": 4.144471309239014, + "learning_rate": 7.165669510219917e-06, + "loss": 1.0876, + "step": 4609 + }, + { + "epoch": 0.38, + "grad_norm": 4.411698165275617, + "learning_rate": 7.164476273027973e-06, + "loss": 0.961, + "step": 4610 + }, + { + "epoch": 0.38, + "grad_norm": 3.257966379763284, + "learning_rate": 7.163282884115662e-06, + "loss": 0.6124, + "step": 4611 + }, + { + "epoch": 0.38, + "grad_norm": 4.2342756996710795, + "learning_rate": 7.1620893435666375e-06, + "loss": 0.8242, + "step": 4612 + }, + { + "epoch": 0.38, + "grad_norm": 1.9126104647795454, + "learning_rate": 7.160895651464557e-06, + "loss": 0.4754, + "step": 4613 + }, + { + "epoch": 0.38, + "grad_norm": 4.460995081548863, + "learning_rate": 7.159701807893097e-06, + "loss": 1.2291, + "step": 4614 + }, + { + "epoch": 0.38, + "grad_norm": 5.141431103320525, + "learning_rate": 7.1585078129359385e-06, + "loss": 0.9833, + "step": 4615 + }, + { + "epoch": 0.38, + "grad_norm": 4.277701753729068, + "learning_rate": 7.157313666676775e-06, + "loss": 0.9546, + "step": 4616 + }, + { + "epoch": 0.38, + "grad_norm": 2.7258342660221495, + "learning_rate": 7.156119369199315e-06, + "loss": 0.5565, + "step": 4617 + }, + { + "epoch": 0.38, + "grad_norm": 3.75708470979647, + "learning_rate": 7.154924920587269e-06, + "loss": 0.8413, + "step": 4618 + }, + { + "epoch": 0.38, + "grad_norm": 4.422246019971207, + "learning_rate": 7.153730320924365e-06, + "loss": 0.9829, + "step": 4619 + }, + { + "epoch": 0.38, + "grad_norm": 5.067999875100732, + "learning_rate": 7.152535570294339e-06, + "loss": 0.9425, + "step": 4620 + }, + { + "epoch": 0.38, + "grad_norm": 3.242856394561573, + "learning_rate": 7.151340668780935e-06, + "loss": 0.8913, + "step": 4621 + }, + { + "epoch": 0.38, + "grad_norm": 2.493206777102948, + "learning_rate": 7.150145616467916e-06, + "loss": 0.5349, + "step": 4622 + }, + { + "epoch": 0.38, + "grad_norm": 3.618796024916159, + "learning_rate": 7.148950413439044e-06, + "loss": 0.8845, + "step": 4623 + }, + { + "epoch": 0.38, + "grad_norm": 2.8203092018052547, + "learning_rate": 7.147755059778101e-06, + "loss": 0.4983, + "step": 4624 + }, + { + "epoch": 0.38, + "grad_norm": 4.312465249288361, + "learning_rate": 7.146559555568876e-06, + "loss": 0.6341, + "step": 4625 + }, + { + "epoch": 0.38, + "grad_norm": 5.007985232158787, + "learning_rate": 7.145363900895168e-06, + "loss": 0.8231, + "step": 4626 + }, + { + "epoch": 0.38, + "grad_norm": 3.36413579721577, + "learning_rate": 7.144168095840786e-06, + "loss": 0.6309, + "step": 4627 + }, + { + "epoch": 0.38, + "grad_norm": 3.760414558968082, + "learning_rate": 7.142972140489555e-06, + "loss": 0.9974, + "step": 4628 + }, + { + "epoch": 0.38, + "grad_norm": 3.611181049430095, + "learning_rate": 7.1417760349253005e-06, + "loss": 0.9156, + "step": 4629 + }, + { + "epoch": 0.38, + "grad_norm": 3.496988257817793, + "learning_rate": 7.140579779231866e-06, + "loss": 0.5388, + "step": 4630 + }, + { + "epoch": 0.38, + "grad_norm": 3.9493649618885285, + "learning_rate": 7.139383373493107e-06, + "loss": 0.5925, + "step": 4631 + }, + { + "epoch": 0.38, + "grad_norm": 4.630837403273287, + "learning_rate": 7.1381868177928834e-06, + "loss": 0.7775, + "step": 4632 + }, + { + "epoch": 0.38, + "grad_norm": 4.618547196665511, + "learning_rate": 7.1369901122150694e-06, + "loss": 0.8912, + "step": 4633 + }, + { + "epoch": 0.38, + "grad_norm": 3.5624447846221794, + "learning_rate": 7.1357932568435496e-06, + "loss": 0.9665, + "step": 4634 + }, + { + "epoch": 0.38, + "grad_norm": 3.767445364020535, + "learning_rate": 7.134596251762217e-06, + "loss": 1.3131, + "step": 4635 + }, + { + "epoch": 0.38, + "grad_norm": 4.482513277230487, + "learning_rate": 7.1333990970549764e-06, + "loss": 1.0189, + "step": 4636 + }, + { + "epoch": 0.38, + "grad_norm": 4.301321079555022, + "learning_rate": 7.132201792805744e-06, + "loss": 0.8224, + "step": 4637 + }, + { + "epoch": 0.38, + "grad_norm": 3.05399806054526, + "learning_rate": 7.131004339098445e-06, + "loss": 0.5469, + "step": 4638 + }, + { + "epoch": 0.38, + "grad_norm": 4.792266429876065, + "learning_rate": 7.129806736017015e-06, + "loss": 0.8165, + "step": 4639 + }, + { + "epoch": 0.38, + "grad_norm": 4.72525028465202, + "learning_rate": 7.128608983645404e-06, + "loss": 0.6924, + "step": 4640 + }, + { + "epoch": 0.38, + "grad_norm": 3.587819774226734, + "learning_rate": 7.127411082067566e-06, + "loss": 1.0171, + "step": 4641 + }, + { + "epoch": 0.38, + "grad_norm": 3.247042866915213, + "learning_rate": 7.126213031367471e-06, + "loss": 0.5919, + "step": 4642 + }, + { + "epoch": 0.38, + "grad_norm": 3.3236833483018917, + "learning_rate": 7.1250148316290936e-06, + "loss": 0.5729, + "step": 4643 + }, + { + "epoch": 0.38, + "grad_norm": 4.0220944780262355, + "learning_rate": 7.123816482936425e-06, + "loss": 0.9583, + "step": 4644 + }, + { + "epoch": 0.38, + "grad_norm": 3.455372502724746, + "learning_rate": 7.122617985373466e-06, + "loss": 0.6043, + "step": 4645 + }, + { + "epoch": 0.38, + "grad_norm": 4.784670669336466, + "learning_rate": 7.12141933902422e-06, + "loss": 1.4889, + "step": 4646 + }, + { + "epoch": 0.38, + "grad_norm": 3.5993468761311838, + "learning_rate": 7.120220543972714e-06, + "loss": 0.7301, + "step": 4647 + }, + { + "epoch": 0.38, + "grad_norm": 4.180978435085142, + "learning_rate": 7.119021600302973e-06, + "loss": 0.9515, + "step": 4648 + }, + { + "epoch": 0.38, + "grad_norm": 3.3315021353161502, + "learning_rate": 7.117822508099042e-06, + "loss": 0.7091, + "step": 4649 + }, + { + "epoch": 0.38, + "grad_norm": 3.7542941945456128, + "learning_rate": 7.116623267444969e-06, + "loss": 0.7678, + "step": 4650 + }, + { + "epoch": 0.38, + "grad_norm": 3.664733599625183, + "learning_rate": 7.115423878424817e-06, + "loss": 1.1193, + "step": 4651 + }, + { + "epoch": 0.38, + "grad_norm": 4.516391532806129, + "learning_rate": 7.114224341122655e-06, + "loss": 1.5861, + "step": 4652 + }, + { + "epoch": 0.38, + "grad_norm": 2.9343289636014185, + "learning_rate": 7.113024655622571e-06, + "loss": 0.4885, + "step": 4653 + }, + { + "epoch": 0.38, + "grad_norm": 3.356400620423507, + "learning_rate": 7.111824822008653e-06, + "loss": 0.6609, + "step": 4654 + }, + { + "epoch": 0.38, + "grad_norm": 4.128175145715575, + "learning_rate": 7.110624840365005e-06, + "loss": 0.9746, + "step": 4655 + }, + { + "epoch": 0.38, + "grad_norm": 3.2156393306671833, + "learning_rate": 7.109424710775742e-06, + "loss": 0.484, + "step": 4656 + }, + { + "epoch": 0.38, + "grad_norm": 3.1678849959490245, + "learning_rate": 7.108224433324987e-06, + "loss": 0.555, + "step": 4657 + }, + { + "epoch": 0.38, + "grad_norm": 2.5124720518160513, + "learning_rate": 7.107024008096874e-06, + "loss": 0.5308, + "step": 4658 + }, + { + "epoch": 0.38, + "grad_norm": 2.8215022406943118, + "learning_rate": 7.105823435175549e-06, + "loss": 0.4427, + "step": 4659 + }, + { + "epoch": 0.38, + "grad_norm": 4.791148483114985, + "learning_rate": 7.104622714645165e-06, + "loss": 1.3376, + "step": 4660 + }, + { + "epoch": 0.38, + "grad_norm": 3.2189910061655698, + "learning_rate": 7.103421846589888e-06, + "loss": 0.5059, + "step": 4661 + }, + { + "epoch": 0.38, + "grad_norm": 3.2099581856724844, + "learning_rate": 7.102220831093893e-06, + "loss": 0.5016, + "step": 4662 + }, + { + "epoch": 0.38, + "grad_norm": 3.551899396842402, + "learning_rate": 7.101019668241368e-06, + "loss": 0.7423, + "step": 4663 + }, + { + "epoch": 0.38, + "grad_norm": 2.839274902607117, + "learning_rate": 7.099818358116506e-06, + "loss": 0.8159, + "step": 4664 + }, + { + "epoch": 0.38, + "grad_norm": 3.091721458837044, + "learning_rate": 7.0986169008035175e-06, + "loss": 0.7227, + "step": 4665 + }, + { + "epoch": 0.38, + "grad_norm": 4.497212176209524, + "learning_rate": 7.097415296386617e-06, + "loss": 0.9619, + "step": 4666 + }, + { + "epoch": 0.38, + "grad_norm": 4.133435638548798, + "learning_rate": 7.096213544950032e-06, + "loss": 0.9201, + "step": 4667 + }, + { + "epoch": 0.38, + "grad_norm": 1.7868724618128153, + "learning_rate": 7.095011646578001e-06, + "loss": 0.3228, + "step": 4668 + }, + { + "epoch": 0.38, + "grad_norm": 2.6219947054937496, + "learning_rate": 7.093809601354769e-06, + "loss": 0.615, + "step": 4669 + }, + { + "epoch": 0.38, + "grad_norm": 3.4273523018159104, + "learning_rate": 7.092607409364597e-06, + "loss": 0.6523, + "step": 4670 + }, + { + "epoch": 0.38, + "grad_norm": 4.217834982538416, + "learning_rate": 7.0914050706917536e-06, + "loss": 0.7628, + "step": 4671 + }, + { + "epoch": 0.38, + "grad_norm": 4.023922645696487, + "learning_rate": 7.090202585420516e-06, + "loss": 1.1023, + "step": 4672 + }, + { + "epoch": 0.38, + "grad_norm": 3.1929940805848345, + "learning_rate": 7.088999953635174e-06, + "loss": 0.6617, + "step": 4673 + }, + { + "epoch": 0.38, + "grad_norm": 4.27375063412841, + "learning_rate": 7.087797175420028e-06, + "loss": 1.1346, + "step": 4674 + }, + { + "epoch": 0.38, + "grad_norm": 5.14383253909754, + "learning_rate": 7.086594250859383e-06, + "loss": 1.0806, + "step": 4675 + }, + { + "epoch": 0.38, + "grad_norm": 3.798510194907834, + "learning_rate": 7.085391180037564e-06, + "loss": 0.8079, + "step": 4676 + }, + { + "epoch": 0.38, + "grad_norm": 4.836075392491869, + "learning_rate": 7.084187963038899e-06, + "loss": 0.9677, + "step": 4677 + }, + { + "epoch": 0.38, + "grad_norm": 4.100173145603222, + "learning_rate": 7.082984599947727e-06, + "loss": 0.8584, + "step": 4678 + }, + { + "epoch": 0.38, + "grad_norm": 1.6888219478857118, + "learning_rate": 7.0817810908483995e-06, + "loss": 0.2592, + "step": 4679 + }, + { + "epoch": 0.38, + "grad_norm": 3.1288526522734137, + "learning_rate": 7.080577435825279e-06, + "loss": 0.8195, + "step": 4680 + }, + { + "epoch": 0.38, + "grad_norm": 3.1526606107911697, + "learning_rate": 7.079373634962735e-06, + "loss": 0.8578, + "step": 4681 + }, + { + "epoch": 0.38, + "grad_norm": 3.7193182479352145, + "learning_rate": 7.0781696883451486e-06, + "loss": 0.9729, + "step": 4682 + }, + { + "epoch": 0.38, + "grad_norm": 1.8929230926532878, + "learning_rate": 7.076965596056911e-06, + "loss": 0.4022, + "step": 4683 + }, + { + "epoch": 0.38, + "grad_norm": 2.720322440920711, + "learning_rate": 7.075761358182423e-06, + "loss": 0.5376, + "step": 4684 + }, + { + "epoch": 0.38, + "grad_norm": 4.45190880528166, + "learning_rate": 7.074556974806098e-06, + "loss": 0.9728, + "step": 4685 + }, + { + "epoch": 0.38, + "grad_norm": 3.503468316807604, + "learning_rate": 7.073352446012357e-06, + "loss": 0.7926, + "step": 4686 + }, + { + "epoch": 0.38, + "grad_norm": 5.194545270931051, + "learning_rate": 7.072147771885633e-06, + "loss": 1.4915, + "step": 4687 + }, + { + "epoch": 0.38, + "grad_norm": 5.941519706504127, + "learning_rate": 7.07094295251037e-06, + "loss": 1.0565, + "step": 4688 + }, + { + "epoch": 0.38, + "grad_norm": 4.2702531812199265, + "learning_rate": 7.069737987971017e-06, + "loss": 0.771, + "step": 4689 + }, + { + "epoch": 0.38, + "grad_norm": 2.864746525153197, + "learning_rate": 7.06853287835204e-06, + "loss": 0.6395, + "step": 4690 + }, + { + "epoch": 0.38, + "grad_norm": 4.320869939847818, + "learning_rate": 7.06732762373791e-06, + "loss": 0.8968, + "step": 4691 + }, + { + "epoch": 0.38, + "grad_norm": 3.8603919227263654, + "learning_rate": 7.06612222421311e-06, + "loss": 0.5715, + "step": 4692 + }, + { + "epoch": 0.38, + "grad_norm": 2.815299350955375, + "learning_rate": 7.064916679862134e-06, + "loss": 0.2505, + "step": 4693 + }, + { + "epoch": 0.38, + "grad_norm": 3.416622876392271, + "learning_rate": 7.0637109907694855e-06, + "loss": 0.6977, + "step": 4694 + }, + { + "epoch": 0.38, + "grad_norm": 4.325136418083608, + "learning_rate": 7.062505157019678e-06, + "loss": 1.155, + "step": 4695 + }, + { + "epoch": 0.38, + "grad_norm": 4.29304724423045, + "learning_rate": 7.061299178697234e-06, + "loss": 0.8864, + "step": 4696 + }, + { + "epoch": 0.38, + "grad_norm": 4.2348968803133, + "learning_rate": 7.06009305588669e-06, + "loss": 1.2064, + "step": 4697 + }, + { + "epoch": 0.38, + "grad_norm": 4.325225458239991, + "learning_rate": 7.058886788672588e-06, + "loss": 0.816, + "step": 4698 + }, + { + "epoch": 0.38, + "grad_norm": 2.7366570370091066, + "learning_rate": 7.057680377139482e-06, + "loss": 0.549, + "step": 4699 + }, + { + "epoch": 0.38, + "grad_norm": 3.6770758782743433, + "learning_rate": 7.056473821371936e-06, + "loss": 1.0477, + "step": 4700 + }, + { + "epoch": 0.38, + "grad_norm": 3.9101510171746092, + "learning_rate": 7.055267121454525e-06, + "loss": 1.0068, + "step": 4701 + }, + { + "epoch": 0.38, + "grad_norm": 4.387053921551004, + "learning_rate": 7.054060277471834e-06, + "loss": 1.2587, + "step": 4702 + }, + { + "epoch": 0.38, + "grad_norm": 3.8716796704049807, + "learning_rate": 7.052853289508458e-06, + "loss": 0.7437, + "step": 4703 + }, + { + "epoch": 0.38, + "grad_norm": 3.101094078305496, + "learning_rate": 7.051646157648998e-06, + "loss": 0.5624, + "step": 4704 + }, + { + "epoch": 0.38, + "grad_norm": 3.2949686093473995, + "learning_rate": 7.050438881978073e-06, + "loss": 0.5994, + "step": 4705 + }, + { + "epoch": 0.38, + "grad_norm": 4.680571162710411, + "learning_rate": 7.049231462580306e-06, + "loss": 0.984, + "step": 4706 + }, + { + "epoch": 0.38, + "grad_norm": 3.764122359448531, + "learning_rate": 7.0480238995403305e-06, + "loss": 1.0566, + "step": 4707 + }, + { + "epoch": 0.38, + "grad_norm": 3.3283288351922162, + "learning_rate": 7.046816192942794e-06, + "loss": 0.867, + "step": 4708 + }, + { + "epoch": 0.38, + "grad_norm": 3.8178504026156137, + "learning_rate": 7.045608342872349e-06, + "loss": 0.6565, + "step": 4709 + }, + { + "epoch": 0.38, + "grad_norm": 4.594460551811602, + "learning_rate": 7.044400349413661e-06, + "loss": 1.315, + "step": 4710 + }, + { + "epoch": 0.39, + "grad_norm": 2.49430484143887, + "learning_rate": 7.043192212651407e-06, + "loss": 0.326, + "step": 4711 + }, + { + "epoch": 0.39, + "grad_norm": 3.924263522486229, + "learning_rate": 7.041983932670271e-06, + "loss": 1.0455, + "step": 4712 + }, + { + "epoch": 0.39, + "grad_norm": 3.046648117170458, + "learning_rate": 7.040775509554948e-06, + "loss": 0.531, + "step": 4713 + }, + { + "epoch": 0.39, + "grad_norm": 3.7765304854224135, + "learning_rate": 7.039566943390144e-06, + "loss": 0.717, + "step": 4714 + }, + { + "epoch": 0.39, + "grad_norm": 3.7657904788378427, + "learning_rate": 7.038358234260572e-06, + "loss": 0.9202, + "step": 4715 + }, + { + "epoch": 0.39, + "grad_norm": 2.952201527018048, + "learning_rate": 7.037149382250959e-06, + "loss": 0.5853, + "step": 4716 + }, + { + "epoch": 0.39, + "grad_norm": 4.1458126725943245, + "learning_rate": 7.035940387446041e-06, + "loss": 0.6387, + "step": 4717 + }, + { + "epoch": 0.39, + "grad_norm": 4.345177420459916, + "learning_rate": 7.03473124993056e-06, + "loss": 0.7602, + "step": 4718 + }, + { + "epoch": 0.39, + "grad_norm": 1.9731307704986518, + "learning_rate": 7.033521969789275e-06, + "loss": 0.3163, + "step": 4719 + }, + { + "epoch": 0.39, + "grad_norm": 3.9952518683111844, + "learning_rate": 7.03231254710695e-06, + "loss": 0.7432, + "step": 4720 + }, + { + "epoch": 0.39, + "grad_norm": 2.6574995418647163, + "learning_rate": 7.031102981968361e-06, + "loss": 0.5555, + "step": 4721 + }, + { + "epoch": 0.39, + "grad_norm": 2.1569025298242925, + "learning_rate": 7.029893274458291e-06, + "loss": 0.4483, + "step": 4722 + }, + { + "epoch": 0.39, + "grad_norm": 3.535936462310264, + "learning_rate": 7.028683424661538e-06, + "loss": 0.9622, + "step": 4723 + }, + { + "epoch": 0.39, + "grad_norm": 2.7605565413311677, + "learning_rate": 7.0274734326629035e-06, + "loss": 0.6126, + "step": 4724 + }, + { + "epoch": 0.39, + "grad_norm": 2.8761513436527633, + "learning_rate": 7.026263298547207e-06, + "loss": 0.7268, + "step": 4725 + }, + { + "epoch": 0.39, + "grad_norm": 4.849127956931701, + "learning_rate": 7.025053022399271e-06, + "loss": 0.9776, + "step": 4726 + }, + { + "epoch": 0.39, + "grad_norm": 2.2426848629555685, + "learning_rate": 7.02384260430393e-06, + "loss": 0.4053, + "step": 4727 + }, + { + "epoch": 0.39, + "grad_norm": 4.547617522634692, + "learning_rate": 7.022632044346032e-06, + "loss": 1.2811, + "step": 4728 + }, + { + "epoch": 0.39, + "grad_norm": 2.7078112908147984, + "learning_rate": 7.0214213426104295e-06, + "loss": 0.6024, + "step": 4729 + }, + { + "epoch": 0.39, + "grad_norm": 3.5512443500532593, + "learning_rate": 7.020210499181988e-06, + "loss": 0.7873, + "step": 4730 + }, + { + "epoch": 0.39, + "grad_norm": 3.8017442415960017, + "learning_rate": 7.0189995141455836e-06, + "loss": 0.6312, + "step": 4731 + }, + { + "epoch": 0.39, + "grad_norm": 4.198275952054718, + "learning_rate": 7.017788387586097e-06, + "loss": 0.9264, + "step": 4732 + }, + { + "epoch": 0.39, + "grad_norm": 3.9401947156048407, + "learning_rate": 7.016577119588428e-06, + "loss": 0.8761, + "step": 4733 + }, + { + "epoch": 0.39, + "grad_norm": 3.407470671577025, + "learning_rate": 7.015365710237479e-06, + "loss": 0.8748, + "step": 4734 + }, + { + "epoch": 0.39, + "grad_norm": 5.3772780839984895, + "learning_rate": 7.0141541596181654e-06, + "loss": 1.3098, + "step": 4735 + }, + { + "epoch": 0.39, + "grad_norm": 4.125854112287967, + "learning_rate": 7.01294246781541e-06, + "loss": 0.606, + "step": 4736 + }, + { + "epoch": 0.39, + "grad_norm": 2.953373275840162, + "learning_rate": 7.0117306349141485e-06, + "loss": 0.5249, + "step": 4737 + }, + { + "epoch": 0.39, + "grad_norm": 4.831197244885152, + "learning_rate": 7.010518660999324e-06, + "loss": 1.3313, + "step": 4738 + }, + { + "epoch": 0.39, + "grad_norm": 3.08326069466575, + "learning_rate": 7.009306546155889e-06, + "loss": 0.6701, + "step": 4739 + }, + { + "epoch": 0.39, + "grad_norm": 3.6870448741867894, + "learning_rate": 7.008094290468813e-06, + "loss": 0.9529, + "step": 4740 + }, + { + "epoch": 0.39, + "grad_norm": 4.124650538400718, + "learning_rate": 7.006881894023065e-06, + "loss": 0.8456, + "step": 4741 + }, + { + "epoch": 0.39, + "grad_norm": 3.983367153795829, + "learning_rate": 7.005669356903631e-06, + "loss": 1.1963, + "step": 4742 + }, + { + "epoch": 0.39, + "grad_norm": 4.0868385441047606, + "learning_rate": 7.004456679195503e-06, + "loss": 0.8454, + "step": 4743 + }, + { + "epoch": 0.39, + "grad_norm": 3.0532609433396725, + "learning_rate": 7.003243860983686e-06, + "loss": 0.6778, + "step": 4744 + }, + { + "epoch": 0.39, + "grad_norm": 4.0415860533912085, + "learning_rate": 7.002030902353193e-06, + "loss": 0.8216, + "step": 4745 + }, + { + "epoch": 0.39, + "grad_norm": 4.584568112521553, + "learning_rate": 7.000817803389045e-06, + "loss": 0.9213, + "step": 4746 + }, + { + "epoch": 0.39, + "grad_norm": 3.4185218446827954, + "learning_rate": 6.999604564176277e-06, + "loss": 0.8299, + "step": 4747 + }, + { + "epoch": 0.39, + "grad_norm": 5.722918145577284, + "learning_rate": 6.998391184799932e-06, + "loss": 1.7364, + "step": 4748 + }, + { + "epoch": 0.39, + "grad_norm": 4.229748668312871, + "learning_rate": 6.99717766534506e-06, + "loss": 0.8441, + "step": 4749 + }, + { + "epoch": 0.39, + "grad_norm": 3.1861083971798494, + "learning_rate": 6.995964005896727e-06, + "loss": 0.4733, + "step": 4750 + }, + { + "epoch": 0.39, + "grad_norm": 3.2484214364248305, + "learning_rate": 6.994750206540004e-06, + "loss": 0.7676, + "step": 4751 + }, + { + "epoch": 0.39, + "grad_norm": 5.449813091236215, + "learning_rate": 6.993536267359974e-06, + "loss": 0.9787, + "step": 4752 + }, + { + "epoch": 0.39, + "grad_norm": 3.6520084624347993, + "learning_rate": 6.992322188441725e-06, + "loss": 0.9034, + "step": 4753 + }, + { + "epoch": 0.39, + "grad_norm": 2.1529575535292254, + "learning_rate": 6.991107969870363e-06, + "loss": 0.4434, + "step": 4754 + }, + { + "epoch": 0.39, + "grad_norm": 4.303898302529973, + "learning_rate": 6.989893611730996e-06, + "loss": 0.6798, + "step": 4755 + }, + { + "epoch": 0.39, + "grad_norm": 3.822231187335992, + "learning_rate": 6.988679114108747e-06, + "loss": 1.0074, + "step": 4756 + }, + { + "epoch": 0.39, + "grad_norm": 3.588487698818553, + "learning_rate": 6.987464477088748e-06, + "loss": 0.8526, + "step": 4757 + }, + { + "epoch": 0.39, + "grad_norm": 2.1697715748788413, + "learning_rate": 6.986249700756138e-06, + "loss": 0.3875, + "step": 4758 + }, + { + "epoch": 0.39, + "grad_norm": 4.4852047598958436, + "learning_rate": 6.985034785196069e-06, + "loss": 1.0415, + "step": 4759 + }, + { + "epoch": 0.39, + "grad_norm": 4.216136059924069, + "learning_rate": 6.983819730493699e-06, + "loss": 0.6872, + "step": 4760 + }, + { + "epoch": 0.39, + "grad_norm": 3.5697743111338145, + "learning_rate": 6.9826045367342e-06, + "loss": 0.6744, + "step": 4761 + }, + { + "epoch": 0.39, + "grad_norm": 3.286634141739833, + "learning_rate": 6.981389204002751e-06, + "loss": 0.7238, + "step": 4762 + }, + { + "epoch": 0.39, + "grad_norm": 3.5571073222616736, + "learning_rate": 6.980173732384543e-06, + "loss": 0.5277, + "step": 4763 + }, + { + "epoch": 0.39, + "grad_norm": 4.818274543221162, + "learning_rate": 6.978958121964773e-06, + "loss": 1.4853, + "step": 4764 + }, + { + "epoch": 0.39, + "grad_norm": 3.9200206363249097, + "learning_rate": 6.977742372828652e-06, + "loss": 0.9199, + "step": 4765 + }, + { + "epoch": 0.39, + "grad_norm": 3.176623938334342, + "learning_rate": 6.976526485061397e-06, + "loss": 0.6575, + "step": 4766 + }, + { + "epoch": 0.39, + "grad_norm": 5.174931941643446, + "learning_rate": 6.975310458748238e-06, + "loss": 1.1418, + "step": 4767 + }, + { + "epoch": 0.39, + "grad_norm": 2.2719845268830756, + "learning_rate": 6.9740942939744125e-06, + "loss": 0.5741, + "step": 4768 + }, + { + "epoch": 0.39, + "grad_norm": 5.788764754495829, + "learning_rate": 6.972877990825167e-06, + "loss": 0.7534, + "step": 4769 + }, + { + "epoch": 0.39, + "grad_norm": 3.6073263780058684, + "learning_rate": 6.971661549385762e-06, + "loss": 0.6959, + "step": 4770 + }, + { + "epoch": 0.39, + "grad_norm": 5.604762311779483, + "learning_rate": 6.970444969741462e-06, + "loss": 1.6464, + "step": 4771 + }, + { + "epoch": 0.39, + "grad_norm": 2.7151129366464, + "learning_rate": 6.969228251977545e-06, + "loss": 0.552, + "step": 4772 + }, + { + "epoch": 0.39, + "grad_norm": 2.972356743488502, + "learning_rate": 6.968011396179297e-06, + "loss": 0.7818, + "step": 4773 + }, + { + "epoch": 0.39, + "grad_norm": 2.365531384342218, + "learning_rate": 6.966794402432017e-06, + "loss": 0.469, + "step": 4774 + }, + { + "epoch": 0.39, + "grad_norm": 5.091976058428769, + "learning_rate": 6.965577270821008e-06, + "loss": 0.8084, + "step": 4775 + }, + { + "epoch": 0.39, + "grad_norm": 4.840710225808364, + "learning_rate": 6.964360001431586e-06, + "loss": 1.1058, + "step": 4776 + }, + { + "epoch": 0.39, + "grad_norm": 4.307707946660783, + "learning_rate": 6.963142594349077e-06, + "loss": 1.0741, + "step": 4777 + }, + { + "epoch": 0.39, + "grad_norm": 5.7821674714992835, + "learning_rate": 6.961925049658816e-06, + "loss": 1.1528, + "step": 4778 + }, + { + "epoch": 0.39, + "grad_norm": 3.610844148978979, + "learning_rate": 6.960707367446147e-06, + "loss": 0.9325, + "step": 4779 + }, + { + "epoch": 0.39, + "grad_norm": 3.9451373481729353, + "learning_rate": 6.959489547796426e-06, + "loss": 0.6211, + "step": 4780 + }, + { + "epoch": 0.39, + "grad_norm": 3.697845570706351, + "learning_rate": 6.958271590795014e-06, + "loss": 0.5125, + "step": 4781 + }, + { + "epoch": 0.39, + "grad_norm": 3.993889001188721, + "learning_rate": 6.957053496527286e-06, + "loss": 1.0208, + "step": 4782 + }, + { + "epoch": 0.39, + "grad_norm": 2.9045862533892004, + "learning_rate": 6.955835265078626e-06, + "loss": 1.0204, + "step": 4783 + }, + { + "epoch": 0.39, + "grad_norm": 3.574727633134075, + "learning_rate": 6.954616896534424e-06, + "loss": 0.8613, + "step": 4784 + }, + { + "epoch": 0.39, + "grad_norm": 3.0174056463238688, + "learning_rate": 6.953398390980086e-06, + "loss": 0.5144, + "step": 4785 + }, + { + "epoch": 0.39, + "grad_norm": 2.568291338854759, + "learning_rate": 6.95217974850102e-06, + "loss": 0.7935, + "step": 4786 + }, + { + "epoch": 0.39, + "grad_norm": 3.635262649776755, + "learning_rate": 6.950960969182649e-06, + "loss": 0.6803, + "step": 4787 + }, + { + "epoch": 0.39, + "grad_norm": 4.877628339574649, + "learning_rate": 6.949742053110408e-06, + "loss": 0.9124, + "step": 4788 + }, + { + "epoch": 0.39, + "grad_norm": 3.122801296062488, + "learning_rate": 6.9485230003697325e-06, + "loss": 0.6189, + "step": 4789 + }, + { + "epoch": 0.39, + "grad_norm": 3.4939221191245924, + "learning_rate": 6.947303811046074e-06, + "loss": 0.6689, + "step": 4790 + }, + { + "epoch": 0.39, + "grad_norm": 3.140755038586852, + "learning_rate": 6.946084485224895e-06, + "loss": 0.6994, + "step": 4791 + }, + { + "epoch": 0.39, + "grad_norm": 4.215834949502133, + "learning_rate": 6.944865022991661e-06, + "loss": 0.986, + "step": 4792 + }, + { + "epoch": 0.39, + "grad_norm": 3.4420112654537522, + "learning_rate": 6.943645424431854e-06, + "loss": 0.6215, + "step": 4793 + }, + { + "epoch": 0.39, + "grad_norm": 4.709803801079052, + "learning_rate": 6.942425689630962e-06, + "loss": 0.7751, + "step": 4794 + }, + { + "epoch": 0.39, + "grad_norm": 4.797934906655287, + "learning_rate": 6.941205818674482e-06, + "loss": 1.1691, + "step": 4795 + }, + { + "epoch": 0.39, + "grad_norm": 4.187981917818049, + "learning_rate": 6.93998581164792e-06, + "loss": 1.2553, + "step": 4796 + }, + { + "epoch": 0.39, + "grad_norm": 4.89920836804541, + "learning_rate": 6.9387656686367985e-06, + "loss": 1.244, + "step": 4797 + }, + { + "epoch": 0.39, + "grad_norm": 2.9961056564406827, + "learning_rate": 6.93754538972664e-06, + "loss": 0.5682, + "step": 4798 + }, + { + "epoch": 0.39, + "grad_norm": 3.6553136476349426, + "learning_rate": 6.936324975002983e-06, + "loss": 0.8373, + "step": 4799 + }, + { + "epoch": 0.39, + "grad_norm": 3.015444678582228, + "learning_rate": 6.935104424551372e-06, + "loss": 0.7753, + "step": 4800 + }, + { + "epoch": 0.39, + "grad_norm": 3.835661988834962, + "learning_rate": 6.933883738457361e-06, + "loss": 0.7163, + "step": 4801 + }, + { + "epoch": 0.39, + "grad_norm": 3.730630702598074, + "learning_rate": 6.932662916806516e-06, + "loss": 0.9613, + "step": 4802 + }, + { + "epoch": 0.39, + "grad_norm": 4.070507776187825, + "learning_rate": 6.931441959684414e-06, + "loss": 0.7724, + "step": 4803 + }, + { + "epoch": 0.39, + "grad_norm": 4.472275772354918, + "learning_rate": 6.930220867176633e-06, + "loss": 0.8967, + "step": 4804 + }, + { + "epoch": 0.39, + "grad_norm": 5.17464067657258, + "learning_rate": 6.928999639368773e-06, + "loss": 1.3153, + "step": 4805 + }, + { + "epoch": 0.39, + "grad_norm": 3.7710448689759484, + "learning_rate": 6.927778276346431e-06, + "loss": 0.7007, + "step": 4806 + }, + { + "epoch": 0.39, + "grad_norm": 2.7912421280625472, + "learning_rate": 6.926556778195224e-06, + "loss": 0.7555, + "step": 4807 + }, + { + "epoch": 0.39, + "grad_norm": 2.6303058642059765, + "learning_rate": 6.925335145000769e-06, + "loss": 0.4378, + "step": 4808 + }, + { + "epoch": 0.39, + "grad_norm": 3.552899346086931, + "learning_rate": 6.9241133768487005e-06, + "loss": 0.8362, + "step": 4809 + }, + { + "epoch": 0.39, + "grad_norm": 3.691047824294737, + "learning_rate": 6.922891473824655e-06, + "loss": 0.8027, + "step": 4810 + }, + { + "epoch": 0.39, + "grad_norm": 4.868971910272877, + "learning_rate": 6.92166943601429e-06, + "loss": 1.427, + "step": 4811 + }, + { + "epoch": 0.39, + "grad_norm": 3.197455573046435, + "learning_rate": 6.9204472635032586e-06, + "loss": 0.8535, + "step": 4812 + }, + { + "epoch": 0.39, + "grad_norm": 2.943066512717961, + "learning_rate": 6.9192249563772304e-06, + "loss": 0.7115, + "step": 4813 + }, + { + "epoch": 0.39, + "grad_norm": 2.7013442623838446, + "learning_rate": 6.918002514721887e-06, + "loss": 0.6296, + "step": 4814 + }, + { + "epoch": 0.39, + "grad_norm": 3.3205044878653345, + "learning_rate": 6.916779938622916e-06, + "loss": 0.6106, + "step": 4815 + }, + { + "epoch": 0.39, + "grad_norm": 2.576051596597386, + "learning_rate": 6.9155572281660114e-06, + "loss": 0.4611, + "step": 4816 + }, + { + "epoch": 0.39, + "grad_norm": 4.3346066642862535, + "learning_rate": 6.914334383436882e-06, + "loss": 1.1357, + "step": 4817 + }, + { + "epoch": 0.39, + "grad_norm": 4.324620550091956, + "learning_rate": 6.913111404521242e-06, + "loss": 0.8622, + "step": 4818 + }, + { + "epoch": 0.39, + "grad_norm": 5.50650194741537, + "learning_rate": 6.9118882915048204e-06, + "loss": 1.2789, + "step": 4819 + }, + { + "epoch": 0.39, + "grad_norm": 2.085677384828208, + "learning_rate": 6.9106650444733495e-06, + "loss": 0.3836, + "step": 4820 + }, + { + "epoch": 0.39, + "grad_norm": 5.459250967794102, + "learning_rate": 6.909441663512574e-06, + "loss": 0.9297, + "step": 4821 + }, + { + "epoch": 0.39, + "grad_norm": 3.723734873755664, + "learning_rate": 6.908218148708248e-06, + "loss": 1.0447, + "step": 4822 + }, + { + "epoch": 0.39, + "grad_norm": 3.082525983452769, + "learning_rate": 6.906994500146134e-06, + "loss": 0.7147, + "step": 4823 + }, + { + "epoch": 0.39, + "grad_norm": 4.72503396605867, + "learning_rate": 6.905770717912006e-06, + "loss": 1.0701, + "step": 4824 + }, + { + "epoch": 0.39, + "grad_norm": 4.2688936455944555, + "learning_rate": 6.904546802091644e-06, + "loss": 1.1725, + "step": 4825 + }, + { + "epoch": 0.39, + "grad_norm": 3.991151581247189, + "learning_rate": 6.903322752770839e-06, + "loss": 0.8043, + "step": 4826 + }, + { + "epoch": 0.39, + "grad_norm": 4.291152192141413, + "learning_rate": 6.9020985700353925e-06, + "loss": 1.0158, + "step": 4827 + }, + { + "epoch": 0.39, + "grad_norm": 3.054654981801179, + "learning_rate": 6.900874253971115e-06, + "loss": 0.7262, + "step": 4828 + }, + { + "epoch": 0.39, + "grad_norm": 3.352789982756976, + "learning_rate": 6.899649804663824e-06, + "loss": 0.4879, + "step": 4829 + }, + { + "epoch": 0.39, + "grad_norm": 4.69679378842051, + "learning_rate": 6.898425222199349e-06, + "loss": 0.7028, + "step": 4830 + }, + { + "epoch": 0.39, + "grad_norm": 4.316293810375166, + "learning_rate": 6.897200506663531e-06, + "loss": 1.0383, + "step": 4831 + }, + { + "epoch": 0.39, + "grad_norm": 3.9655308923279713, + "learning_rate": 6.89597565814221e-06, + "loss": 0.7912, + "step": 4832 + }, + { + "epoch": 0.4, + "grad_norm": 3.3248114459066294, + "learning_rate": 6.894750676721248e-06, + "loss": 0.6337, + "step": 4833 + }, + { + "epoch": 0.4, + "grad_norm": 4.877496016078539, + "learning_rate": 6.893525562486511e-06, + "loss": 1.5636, + "step": 4834 + }, + { + "epoch": 0.4, + "grad_norm": 2.0601769129749345, + "learning_rate": 6.89230031552387e-06, + "loss": 0.6569, + "step": 4835 + }, + { + "epoch": 0.4, + "grad_norm": 3.5687417670290493, + "learning_rate": 6.891074935919213e-06, + "loss": 0.9939, + "step": 4836 + }, + { + "epoch": 0.4, + "grad_norm": 4.141206013039817, + "learning_rate": 6.889849423758435e-06, + "loss": 1.0678, + "step": 4837 + }, + { + "epoch": 0.4, + "grad_norm": 4.782425765103793, + "learning_rate": 6.888623779127436e-06, + "loss": 0.8534, + "step": 4838 + }, + { + "epoch": 0.4, + "grad_norm": 3.249219968307424, + "learning_rate": 6.887398002112129e-06, + "loss": 1.0996, + "step": 4839 + }, + { + "epoch": 0.4, + "grad_norm": 4.089008116773115, + "learning_rate": 6.886172092798436e-06, + "loss": 1.0721, + "step": 4840 + }, + { + "epoch": 0.4, + "grad_norm": 2.914132826221444, + "learning_rate": 6.8849460512722874e-06, + "loss": 0.4047, + "step": 4841 + }, + { + "epoch": 0.4, + "grad_norm": 1.2686723684403072, + "learning_rate": 6.883719877619625e-06, + "loss": 0.2041, + "step": 4842 + }, + { + "epoch": 0.4, + "grad_norm": 5.565081196331538, + "learning_rate": 6.882493571926396e-06, + "loss": 1.047, + "step": 4843 + }, + { + "epoch": 0.4, + "grad_norm": 2.948217418062479, + "learning_rate": 6.881267134278562e-06, + "loss": 0.5319, + "step": 4844 + }, + { + "epoch": 0.4, + "grad_norm": 4.177452726225325, + "learning_rate": 6.880040564762089e-06, + "loss": 1.0535, + "step": 4845 + }, + { + "epoch": 0.4, + "grad_norm": 3.215548927000322, + "learning_rate": 6.878813863462953e-06, + "loss": 0.9454, + "step": 4846 + }, + { + "epoch": 0.4, + "grad_norm": 3.090912258099464, + "learning_rate": 6.877587030467142e-06, + "loss": 0.7074, + "step": 4847 + }, + { + "epoch": 0.4, + "grad_norm": 3.7783187665986246, + "learning_rate": 6.876360065860653e-06, + "loss": 0.7078, + "step": 4848 + }, + { + "epoch": 0.4, + "grad_norm": 3.5361133983410573, + "learning_rate": 6.875132969729488e-06, + "loss": 0.7058, + "step": 4849 + }, + { + "epoch": 0.4, + "grad_norm": 3.377776290693036, + "learning_rate": 6.873905742159661e-06, + "loss": 0.8566, + "step": 4850 + }, + { + "epoch": 0.4, + "grad_norm": 2.8147401543582333, + "learning_rate": 6.872678383237199e-06, + "loss": 0.5288, + "step": 4851 + }, + { + "epoch": 0.4, + "grad_norm": 4.080223092160162, + "learning_rate": 6.871450893048132e-06, + "loss": 1.1785, + "step": 4852 + }, + { + "epoch": 0.4, + "grad_norm": 3.8363511837021016, + "learning_rate": 6.8702232716785015e-06, + "loss": 1.0499, + "step": 4853 + }, + { + "epoch": 0.4, + "grad_norm": 3.670029911106436, + "learning_rate": 6.868995519214359e-06, + "loss": 1.0351, + "step": 4854 + }, + { + "epoch": 0.4, + "grad_norm": 4.94734432167073, + "learning_rate": 6.867767635741765e-06, + "loss": 1.0276, + "step": 4855 + }, + { + "epoch": 0.4, + "grad_norm": 3.5924768730461567, + "learning_rate": 6.866539621346786e-06, + "loss": 0.4919, + "step": 4856 + }, + { + "epoch": 0.4, + "grad_norm": 1.6637855824823422, + "learning_rate": 6.865311476115506e-06, + "loss": 0.3427, + "step": 4857 + }, + { + "epoch": 0.4, + "grad_norm": 4.41025917393053, + "learning_rate": 6.864083200134007e-06, + "loss": 0.7974, + "step": 4858 + }, + { + "epoch": 0.4, + "grad_norm": 2.333879761950064, + "learning_rate": 6.862854793488388e-06, + "loss": 0.4166, + "step": 4859 + }, + { + "epoch": 0.4, + "grad_norm": 2.7012765849009965, + "learning_rate": 6.861626256264757e-06, + "loss": 0.5799, + "step": 4860 + }, + { + "epoch": 0.4, + "grad_norm": 4.0504789769281935, + "learning_rate": 6.860397588549227e-06, + "loss": 1.1706, + "step": 4861 + }, + { + "epoch": 0.4, + "grad_norm": 2.9441842501381243, + "learning_rate": 6.859168790427921e-06, + "loss": 0.3816, + "step": 4862 + }, + { + "epoch": 0.4, + "grad_norm": 3.2684089653925614, + "learning_rate": 6.857939861986976e-06, + "loss": 0.7132, + "step": 4863 + }, + { + "epoch": 0.4, + "grad_norm": 4.049869237519961, + "learning_rate": 6.856710803312531e-06, + "loss": 0.99, + "step": 4864 + }, + { + "epoch": 0.4, + "grad_norm": 2.98686435202232, + "learning_rate": 6.85548161449074e-06, + "loss": 0.5555, + "step": 4865 + }, + { + "epoch": 0.4, + "grad_norm": 3.66733054774265, + "learning_rate": 6.854252295607761e-06, + "loss": 0.9083, + "step": 4866 + }, + { + "epoch": 0.4, + "grad_norm": 3.0811161745650453, + "learning_rate": 6.8530228467497685e-06, + "loss": 0.6796, + "step": 4867 + }, + { + "epoch": 0.4, + "grad_norm": 3.4239574539894875, + "learning_rate": 6.8517932680029374e-06, + "loss": 0.9727, + "step": 4868 + }, + { + "epoch": 0.4, + "grad_norm": 5.419282291306946, + "learning_rate": 6.850563559453458e-06, + "loss": 0.8212, + "step": 4869 + }, + { + "epoch": 0.4, + "grad_norm": 4.033435941073197, + "learning_rate": 6.849333721187525e-06, + "loss": 0.8775, + "step": 4870 + }, + { + "epoch": 0.4, + "grad_norm": 4.107942841853004, + "learning_rate": 6.848103753291349e-06, + "loss": 1.1675, + "step": 4871 + }, + { + "epoch": 0.4, + "grad_norm": 4.517224443664284, + "learning_rate": 6.8468736558511405e-06, + "loss": 1.021, + "step": 4872 + }, + { + "epoch": 0.4, + "grad_norm": 3.4227119961292236, + "learning_rate": 6.845643428953127e-06, + "loss": 0.9713, + "step": 4873 + }, + { + "epoch": 0.4, + "grad_norm": 4.666843444544057, + "learning_rate": 6.844413072683542e-06, + "loss": 0.7418, + "step": 4874 + }, + { + "epoch": 0.4, + "grad_norm": 3.315576569835842, + "learning_rate": 6.8431825871286275e-06, + "loss": 0.7384, + "step": 4875 + }, + { + "epoch": 0.4, + "grad_norm": 3.1521853040591217, + "learning_rate": 6.841951972374636e-06, + "loss": 0.5464, + "step": 4876 + }, + { + "epoch": 0.4, + "grad_norm": 3.3331402848579645, + "learning_rate": 6.840721228507826e-06, + "loss": 0.9008, + "step": 4877 + }, + { + "epoch": 0.4, + "grad_norm": 4.730462628966787, + "learning_rate": 6.839490355614468e-06, + "loss": 1.1883, + "step": 4878 + }, + { + "epoch": 0.4, + "grad_norm": 4.152897251668137, + "learning_rate": 6.838259353780843e-06, + "loss": 0.6253, + "step": 4879 + }, + { + "epoch": 0.4, + "grad_norm": 1.4914931988995255, + "learning_rate": 6.8370282230932375e-06, + "loss": 0.2271, + "step": 4880 + }, + { + "epoch": 0.4, + "grad_norm": 3.220985605585712, + "learning_rate": 6.835796963637947e-06, + "loss": 0.5998, + "step": 4881 + }, + { + "epoch": 0.4, + "grad_norm": 4.224144863216822, + "learning_rate": 6.83456557550128e-06, + "loss": 1.1766, + "step": 4882 + }, + { + "epoch": 0.4, + "grad_norm": 4.6436117844737055, + "learning_rate": 6.83333405876955e-06, + "loss": 1.1108, + "step": 4883 + }, + { + "epoch": 0.4, + "grad_norm": 4.825779692682705, + "learning_rate": 6.832102413529082e-06, + "loss": 0.8362, + "step": 4884 + }, + { + "epoch": 0.4, + "grad_norm": 3.0609627060824423, + "learning_rate": 6.830870639866207e-06, + "loss": 0.5508, + "step": 4885 + }, + { + "epoch": 0.4, + "grad_norm": 3.765868409693761, + "learning_rate": 6.829638737867268e-06, + "loss": 1.0675, + "step": 4886 + }, + { + "epoch": 0.4, + "grad_norm": 0.8755726868889399, + "learning_rate": 6.828406707618616e-06, + "loss": 0.1566, + "step": 4887 + }, + { + "epoch": 0.4, + "grad_norm": 3.431109974323921, + "learning_rate": 6.827174549206612e-06, + "loss": 0.9188, + "step": 4888 + }, + { + "epoch": 0.4, + "grad_norm": 3.6741186744873215, + "learning_rate": 6.825942262717623e-06, + "loss": 1.0093, + "step": 4889 + }, + { + "epoch": 0.4, + "grad_norm": 2.414868717211905, + "learning_rate": 6.824709848238028e-06, + "loss": 0.3435, + "step": 4890 + }, + { + "epoch": 0.4, + "grad_norm": 3.4953640571943225, + "learning_rate": 6.823477305854215e-06, + "loss": 0.7189, + "step": 4891 + }, + { + "epoch": 0.4, + "grad_norm": 3.071480562210039, + "learning_rate": 6.822244635652579e-06, + "loss": 0.5268, + "step": 4892 + }, + { + "epoch": 0.4, + "grad_norm": 3.9820593782485667, + "learning_rate": 6.821011837719522e-06, + "loss": 0.8389, + "step": 4893 + }, + { + "epoch": 0.4, + "grad_norm": 2.7425034378072586, + "learning_rate": 6.819778912141461e-06, + "loss": 0.7943, + "step": 4894 + }, + { + "epoch": 0.4, + "grad_norm": 1.5328573063544362, + "learning_rate": 6.818545859004819e-06, + "loss": 0.1973, + "step": 4895 + }, + { + "epoch": 0.4, + "grad_norm": 5.588916469323275, + "learning_rate": 6.817312678396026e-06, + "loss": 1.0831, + "step": 4896 + }, + { + "epoch": 0.4, + "grad_norm": 3.555749972340232, + "learning_rate": 6.8160793704015234e-06, + "loss": 0.6709, + "step": 4897 + }, + { + "epoch": 0.4, + "grad_norm": 2.8676428553444286, + "learning_rate": 6.81484593510776e-06, + "loss": 0.7756, + "step": 4898 + }, + { + "epoch": 0.4, + "grad_norm": 3.780370674790176, + "learning_rate": 6.813612372601196e-06, + "loss": 0.5553, + "step": 4899 + }, + { + "epoch": 0.4, + "grad_norm": 4.940664038442654, + "learning_rate": 6.812378682968297e-06, + "loss": 0.9143, + "step": 4900 + }, + { + "epoch": 0.4, + "grad_norm": 3.548389415177721, + "learning_rate": 6.811144866295541e-06, + "loss": 0.7957, + "step": 4901 + }, + { + "epoch": 0.4, + "grad_norm": 3.0121381310770756, + "learning_rate": 6.8099109226694095e-06, + "loss": 0.6322, + "step": 4902 + }, + { + "epoch": 0.4, + "grad_norm": 4.40491945442787, + "learning_rate": 6.8086768521764006e-06, + "loss": 1.3037, + "step": 4903 + }, + { + "epoch": 0.4, + "grad_norm": 3.4569409355456053, + "learning_rate": 6.807442654903015e-06, + "loss": 0.7545, + "step": 4904 + }, + { + "epoch": 0.4, + "grad_norm": 4.398706886637341, + "learning_rate": 6.806208330935766e-06, + "loss": 1.2965, + "step": 4905 + }, + { + "epoch": 0.4, + "grad_norm": 4.009674249846179, + "learning_rate": 6.804973880361172e-06, + "loss": 0.4626, + "step": 4906 + }, + { + "epoch": 0.4, + "grad_norm": 4.770078543481889, + "learning_rate": 6.8037393032657665e-06, + "loss": 1.2375, + "step": 4907 + }, + { + "epoch": 0.4, + "grad_norm": 3.9386078500281094, + "learning_rate": 6.802504599736085e-06, + "loss": 1.0183, + "step": 4908 + }, + { + "epoch": 0.4, + "grad_norm": 3.705145761727528, + "learning_rate": 6.801269769858676e-06, + "loss": 0.773, + "step": 4909 + }, + { + "epoch": 0.4, + "grad_norm": 3.7872201645622794, + "learning_rate": 6.800034813720093e-06, + "loss": 0.5834, + "step": 4910 + }, + { + "epoch": 0.4, + "grad_norm": 1.5884509932720416, + "learning_rate": 6.798799731406906e-06, + "loss": 0.222, + "step": 4911 + }, + { + "epoch": 0.4, + "grad_norm": 3.9759992294640036, + "learning_rate": 6.797564523005685e-06, + "loss": 0.691, + "step": 4912 + }, + { + "epoch": 0.4, + "grad_norm": 4.130717058700437, + "learning_rate": 6.796329188603015e-06, + "loss": 1.0033, + "step": 4913 + }, + { + "epoch": 0.4, + "grad_norm": 2.3091944709110024, + "learning_rate": 6.795093728285485e-06, + "loss": 0.5313, + "step": 4914 + }, + { + "epoch": 0.4, + "grad_norm": 4.183029532855425, + "learning_rate": 6.793858142139698e-06, + "loss": 0.9509, + "step": 4915 + }, + { + "epoch": 0.4, + "grad_norm": 3.924495972395091, + "learning_rate": 6.792622430252263e-06, + "loss": 0.9398, + "step": 4916 + }, + { + "epoch": 0.4, + "grad_norm": 4.319719235075423, + "learning_rate": 6.791386592709795e-06, + "loss": 0.906, + "step": 4917 + }, + { + "epoch": 0.4, + "grad_norm": 3.862168877419175, + "learning_rate": 6.790150629598924e-06, + "loss": 0.8313, + "step": 4918 + }, + { + "epoch": 0.4, + "grad_norm": 3.5343487433565732, + "learning_rate": 6.788914541006284e-06, + "loss": 0.8099, + "step": 4919 + }, + { + "epoch": 0.4, + "grad_norm": 6.140203940052827, + "learning_rate": 6.787678327018521e-06, + "loss": 1.2256, + "step": 4920 + }, + { + "epoch": 0.4, + "grad_norm": 5.050709535933509, + "learning_rate": 6.786441987722288e-06, + "loss": 0.8507, + "step": 4921 + }, + { + "epoch": 0.4, + "grad_norm": 2.55419400867438, + "learning_rate": 6.785205523204244e-06, + "loss": 0.4897, + "step": 4922 + }, + { + "epoch": 0.4, + "grad_norm": 2.2000464736683516, + "learning_rate": 6.783968933551064e-06, + "loss": 0.3428, + "step": 4923 + }, + { + "epoch": 0.4, + "grad_norm": 5.170422918668265, + "learning_rate": 6.782732218849425e-06, + "loss": 1.2579, + "step": 4924 + }, + { + "epoch": 0.4, + "grad_norm": 3.0477546435397898, + "learning_rate": 6.781495379186016e-06, + "loss": 0.7338, + "step": 4925 + }, + { + "epoch": 0.4, + "grad_norm": 4.076146588457267, + "learning_rate": 6.780258414647534e-06, + "loss": 0.9534, + "step": 4926 + }, + { + "epoch": 0.4, + "grad_norm": 3.711725811895843, + "learning_rate": 6.779021325320684e-06, + "loss": 0.6164, + "step": 4927 + }, + { + "epoch": 0.4, + "grad_norm": 2.6802883358122496, + "learning_rate": 6.7777841112921825e-06, + "loss": 0.3153, + "step": 4928 + }, + { + "epoch": 0.4, + "grad_norm": 2.8613066773022577, + "learning_rate": 6.776546772648751e-06, + "loss": 0.7101, + "step": 4929 + }, + { + "epoch": 0.4, + "grad_norm": 2.823045677823471, + "learning_rate": 6.775309309477123e-06, + "loss": 0.401, + "step": 4930 + }, + { + "epoch": 0.4, + "grad_norm": 3.2928797004917074, + "learning_rate": 6.7740717218640374e-06, + "loss": 0.6771, + "step": 4931 + }, + { + "epoch": 0.4, + "grad_norm": 3.8684327129382496, + "learning_rate": 6.772834009896248e-06, + "loss": 0.9103, + "step": 4932 + }, + { + "epoch": 0.4, + "grad_norm": 3.648521213162987, + "learning_rate": 6.771596173660506e-06, + "loss": 0.7071, + "step": 4933 + }, + { + "epoch": 0.4, + "grad_norm": 3.804648349814525, + "learning_rate": 6.770358213243584e-06, + "loss": 1.1199, + "step": 4934 + }, + { + "epoch": 0.4, + "grad_norm": 2.1312186278869145, + "learning_rate": 6.769120128732256e-06, + "loss": 0.3414, + "step": 4935 + }, + { + "epoch": 0.4, + "grad_norm": 2.662486515215794, + "learning_rate": 6.7678819202133054e-06, + "loss": 0.2188, + "step": 4936 + }, + { + "epoch": 0.4, + "grad_norm": 3.5205019556917874, + "learning_rate": 6.766643587773527e-06, + "loss": 0.5434, + "step": 4937 + }, + { + "epoch": 0.4, + "grad_norm": 3.6374269679557463, + "learning_rate": 6.76540513149972e-06, + "loss": 0.878, + "step": 4938 + }, + { + "epoch": 0.4, + "grad_norm": 4.7359192378862724, + "learning_rate": 6.764166551478699e-06, + "loss": 0.842, + "step": 4939 + }, + { + "epoch": 0.4, + "grad_norm": 4.778568949153722, + "learning_rate": 6.762927847797279e-06, + "loss": 1.1776, + "step": 4940 + }, + { + "epoch": 0.4, + "grad_norm": 4.048195079758534, + "learning_rate": 6.761689020542288e-06, + "loss": 0.6763, + "step": 4941 + }, + { + "epoch": 0.4, + "grad_norm": 1.911603612677543, + "learning_rate": 6.760450069800565e-06, + "loss": 0.4051, + "step": 4942 + }, + { + "epoch": 0.4, + "grad_norm": 3.2709982715047503, + "learning_rate": 6.759210995658953e-06, + "loss": 1.0407, + "step": 4943 + }, + { + "epoch": 0.4, + "grad_norm": 4.101935661507507, + "learning_rate": 6.757971798204307e-06, + "loss": 0.6369, + "step": 4944 + }, + { + "epoch": 0.4, + "grad_norm": 4.350693122627702, + "learning_rate": 6.756732477523489e-06, + "loss": 0.9884, + "step": 4945 + }, + { + "epoch": 0.4, + "grad_norm": 3.426764541479174, + "learning_rate": 6.755493033703367e-06, + "loss": 0.6377, + "step": 4946 + }, + { + "epoch": 0.4, + "grad_norm": 5.511797157726579, + "learning_rate": 6.754253466830827e-06, + "loss": 1.0663, + "step": 4947 + }, + { + "epoch": 0.4, + "grad_norm": 4.986812080177097, + "learning_rate": 6.753013776992752e-06, + "loss": 1.2665, + "step": 4948 + }, + { + "epoch": 0.4, + "grad_norm": 3.622379462419554, + "learning_rate": 6.751773964276039e-06, + "loss": 0.523, + "step": 4949 + }, + { + "epoch": 0.4, + "grad_norm": 4.52878001591544, + "learning_rate": 6.750534028767596e-06, + "loss": 0.6513, + "step": 4950 + }, + { + "epoch": 0.4, + "grad_norm": 5.057689733164265, + "learning_rate": 6.7492939705543355e-06, + "loss": 1.0922, + "step": 4951 + }, + { + "epoch": 0.4, + "grad_norm": 4.326429923094338, + "learning_rate": 6.748053789723181e-06, + "loss": 1.0292, + "step": 4952 + }, + { + "epoch": 0.4, + "grad_norm": 3.5933440981978935, + "learning_rate": 6.746813486361063e-06, + "loss": 0.7417, + "step": 4953 + }, + { + "epoch": 0.4, + "grad_norm": 3.0535524993846352, + "learning_rate": 6.745573060554922e-06, + "loss": 0.6412, + "step": 4954 + }, + { + "epoch": 0.41, + "grad_norm": 5.09612405979385, + "learning_rate": 6.744332512391707e-06, + "loss": 1.0326, + "step": 4955 + }, + { + "epoch": 0.41, + "grad_norm": 3.9193694002324544, + "learning_rate": 6.743091841958373e-06, + "loss": 0.8915, + "step": 4956 + }, + { + "epoch": 0.41, + "grad_norm": 3.4841669402172886, + "learning_rate": 6.741851049341888e-06, + "loss": 0.7924, + "step": 4957 + }, + { + "epoch": 0.41, + "grad_norm": 4.163209288640059, + "learning_rate": 6.740610134629224e-06, + "loss": 1.0377, + "step": 4958 + }, + { + "epoch": 0.41, + "grad_norm": 3.709614902603784, + "learning_rate": 6.739369097907365e-06, + "loss": 0.6028, + "step": 4959 + }, + { + "epoch": 0.41, + "grad_norm": 4.164803894961905, + "learning_rate": 6.7381279392633025e-06, + "loss": 0.6679, + "step": 4960 + }, + { + "epoch": 0.41, + "grad_norm": 3.2438255695122615, + "learning_rate": 6.736886658784034e-06, + "loss": 0.5955, + "step": 4961 + }, + { + "epoch": 0.41, + "grad_norm": 5.618511131153098, + "learning_rate": 6.735645256556572e-06, + "loss": 1.2125, + "step": 4962 + }, + { + "epoch": 0.41, + "grad_norm": 3.0953174041968565, + "learning_rate": 6.734403732667931e-06, + "loss": 0.6487, + "step": 4963 + }, + { + "epoch": 0.41, + "grad_norm": 3.3826746576217044, + "learning_rate": 6.733162087205135e-06, + "loss": 0.7166, + "step": 4964 + }, + { + "epoch": 0.41, + "grad_norm": 3.7470232526694387, + "learning_rate": 6.731920320255221e-06, + "loss": 0.7716, + "step": 4965 + }, + { + "epoch": 0.41, + "grad_norm": 3.0457182226061286, + "learning_rate": 6.730678431905228e-06, + "loss": 0.7626, + "step": 4966 + }, + { + "epoch": 0.41, + "grad_norm": 4.626770885537802, + "learning_rate": 6.7294364222422104e-06, + "loss": 1.2037, + "step": 4967 + }, + { + "epoch": 0.41, + "grad_norm": 4.745128912279462, + "learning_rate": 6.728194291353226e-06, + "loss": 1.2067, + "step": 4968 + }, + { + "epoch": 0.41, + "grad_norm": 2.6009434566161396, + "learning_rate": 6.7269520393253414e-06, + "loss": 0.3551, + "step": 4969 + }, + { + "epoch": 0.41, + "grad_norm": 4.0433053176479525, + "learning_rate": 6.725709666245637e-06, + "loss": 0.8137, + "step": 4970 + }, + { + "epoch": 0.41, + "grad_norm": 2.612255664240316, + "learning_rate": 6.7244671722011946e-06, + "loss": 0.5242, + "step": 4971 + }, + { + "epoch": 0.41, + "grad_norm": 5.227490852490809, + "learning_rate": 6.723224557279107e-06, + "loss": 1.0358, + "step": 4972 + }, + { + "epoch": 0.41, + "grad_norm": 3.9207008712420093, + "learning_rate": 6.721981821566476e-06, + "loss": 0.8894, + "step": 4973 + }, + { + "epoch": 0.41, + "grad_norm": 3.6462179783860873, + "learning_rate": 6.7207389651504175e-06, + "loss": 1.1477, + "step": 4974 + }, + { + "epoch": 0.41, + "grad_norm": 3.202301954200226, + "learning_rate": 6.719495988118043e-06, + "loss": 0.7103, + "step": 4975 + }, + { + "epoch": 0.41, + "grad_norm": 2.6300639065375386, + "learning_rate": 6.718252890556485e-06, + "loss": 0.5941, + "step": 4976 + }, + { + "epoch": 0.41, + "grad_norm": 2.548896662632624, + "learning_rate": 6.717009672552877e-06, + "loss": 0.6483, + "step": 4977 + }, + { + "epoch": 0.41, + "grad_norm": 3.513029543800947, + "learning_rate": 6.715766334194362e-06, + "loss": 0.7638, + "step": 4978 + }, + { + "epoch": 0.41, + "grad_norm": 4.354263444583725, + "learning_rate": 6.714522875568095e-06, + "loss": 0.8984, + "step": 4979 + }, + { + "epoch": 0.41, + "grad_norm": 2.937833410720314, + "learning_rate": 6.713279296761237e-06, + "loss": 0.4593, + "step": 4980 + }, + { + "epoch": 0.41, + "grad_norm": 2.705053803134201, + "learning_rate": 6.712035597860955e-06, + "loss": 0.6847, + "step": 4981 + }, + { + "epoch": 0.41, + "grad_norm": 4.107071080317969, + "learning_rate": 6.710791778954429e-06, + "loss": 1.013, + "step": 4982 + }, + { + "epoch": 0.41, + "grad_norm": 4.549944846724847, + "learning_rate": 6.709547840128844e-06, + "loss": 0.8946, + "step": 4983 + }, + { + "epoch": 0.41, + "grad_norm": 9.743906409309483, + "learning_rate": 6.708303781471396e-06, + "loss": 0.6407, + "step": 4984 + }, + { + "epoch": 0.41, + "grad_norm": 3.9118164028408535, + "learning_rate": 6.707059603069288e-06, + "loss": 0.947, + "step": 4985 + }, + { + "epoch": 0.41, + "grad_norm": 4.0791859941056, + "learning_rate": 6.705815305009731e-06, + "loss": 0.6561, + "step": 4986 + }, + { + "epoch": 0.41, + "grad_norm": 2.574974857317388, + "learning_rate": 6.7045708873799435e-06, + "loss": 0.6531, + "step": 4987 + }, + { + "epoch": 0.41, + "grad_norm": 4.563827132306292, + "learning_rate": 6.703326350267157e-06, + "loss": 1.0643, + "step": 4988 + }, + { + "epoch": 0.41, + "grad_norm": 3.784500592588589, + "learning_rate": 6.7020816937586046e-06, + "loss": 0.7969, + "step": 4989 + }, + { + "epoch": 0.41, + "grad_norm": 3.9799773996493015, + "learning_rate": 6.7008369179415324e-06, + "loss": 0.7725, + "step": 4990 + }, + { + "epoch": 0.41, + "grad_norm": 3.510790901955249, + "learning_rate": 6.699592022903197e-06, + "loss": 0.9653, + "step": 4991 + }, + { + "epoch": 0.41, + "grad_norm": 3.180373643015554, + "learning_rate": 6.698347008730854e-06, + "loss": 0.676, + "step": 4992 + }, + { + "epoch": 0.41, + "grad_norm": 2.9645250432962915, + "learning_rate": 6.697101875511779e-06, + "loss": 0.6309, + "step": 4993 + }, + { + "epoch": 0.41, + "grad_norm": 3.2167881441671975, + "learning_rate": 6.695856623333249e-06, + "loss": 0.5562, + "step": 4994 + }, + { + "epoch": 0.41, + "grad_norm": 1.7854948379887672, + "learning_rate": 6.694611252282549e-06, + "loss": 0.3688, + "step": 4995 + }, + { + "epoch": 0.41, + "grad_norm": 4.119302845105482, + "learning_rate": 6.693365762446975e-06, + "loss": 1.0407, + "step": 4996 + }, + { + "epoch": 0.41, + "grad_norm": 4.19548604602088, + "learning_rate": 6.692120153913831e-06, + "loss": 0.5511, + "step": 4997 + }, + { + "epoch": 0.41, + "grad_norm": 3.9396715204049366, + "learning_rate": 6.690874426770428e-06, + "loss": 0.7435, + "step": 4998 + }, + { + "epoch": 0.41, + "grad_norm": 5.067757458057823, + "learning_rate": 6.6896285811040865e-06, + "loss": 0.8161, + "step": 4999 + }, + { + "epoch": 0.41, + "grad_norm": 4.027032846338074, + "learning_rate": 6.688382617002135e-06, + "loss": 1.0669, + "step": 5000 + }, + { + "epoch": 0.41, + "grad_norm": 3.804959960690094, + "learning_rate": 6.687136534551909e-06, + "loss": 1.174, + "step": 5001 + }, + { + "epoch": 0.41, + "grad_norm": 3.4409453582294325, + "learning_rate": 6.685890333840757e-06, + "loss": 0.5652, + "step": 5002 + }, + { + "epoch": 0.41, + "grad_norm": 3.6361198348967276, + "learning_rate": 6.6846440149560276e-06, + "loss": 0.7007, + "step": 5003 + }, + { + "epoch": 0.41, + "grad_norm": 4.93626377499621, + "learning_rate": 6.683397577985084e-06, + "loss": 0.9234, + "step": 5004 + }, + { + "epoch": 0.41, + "grad_norm": 3.49474660728893, + "learning_rate": 6.6821510230152975e-06, + "loss": 0.4029, + "step": 5005 + }, + { + "epoch": 0.41, + "grad_norm": 2.615337291292612, + "learning_rate": 6.680904350134044e-06, + "loss": 0.5586, + "step": 5006 + }, + { + "epoch": 0.41, + "grad_norm": 1.4573510011122839, + "learning_rate": 6.679657559428712e-06, + "loss": 0.4093, + "step": 5007 + }, + { + "epoch": 0.41, + "grad_norm": 4.085463953356308, + "learning_rate": 6.678410650986694e-06, + "loss": 0.7689, + "step": 5008 + }, + { + "epoch": 0.41, + "grad_norm": 2.620326708654164, + "learning_rate": 6.677163624895393e-06, + "loss": 0.4192, + "step": 5009 + }, + { + "epoch": 0.41, + "grad_norm": 4.145993087796699, + "learning_rate": 6.6759164812422225e-06, + "loss": 0.6969, + "step": 5010 + }, + { + "epoch": 0.41, + "grad_norm": 4.507549152610467, + "learning_rate": 6.674669220114601e-06, + "loss": 1.1422, + "step": 5011 + }, + { + "epoch": 0.41, + "grad_norm": 3.4986053398734898, + "learning_rate": 6.673421841599954e-06, + "loss": 0.6505, + "step": 5012 + }, + { + "epoch": 0.41, + "grad_norm": 2.8082322958596393, + "learning_rate": 6.672174345785718e-06, + "loss": 0.3428, + "step": 5013 + }, + { + "epoch": 0.41, + "grad_norm": 3.544467121237856, + "learning_rate": 6.6709267327593396e-06, + "loss": 0.625, + "step": 5014 + }, + { + "epoch": 0.41, + "grad_norm": 3.9603559962796795, + "learning_rate": 6.669679002608267e-06, + "loss": 0.6185, + "step": 5015 + }, + { + "epoch": 0.41, + "grad_norm": 4.217658841777538, + "learning_rate": 6.668431155419963e-06, + "loss": 0.9385, + "step": 5016 + }, + { + "epoch": 0.41, + "grad_norm": 3.1587212821777464, + "learning_rate": 6.6671831912818985e-06, + "loss": 0.5103, + "step": 5017 + }, + { + "epoch": 0.41, + "grad_norm": 1.0508066768483084, + "learning_rate": 6.6659351102815475e-06, + "loss": 0.1753, + "step": 5018 + }, + { + "epoch": 0.41, + "grad_norm": 3.1266872495955336, + "learning_rate": 6.664686912506393e-06, + "loss": 0.3366, + "step": 5019 + }, + { + "epoch": 0.41, + "grad_norm": 3.31249644910684, + "learning_rate": 6.663438598043932e-06, + "loss": 0.6046, + "step": 5020 + }, + { + "epoch": 0.41, + "grad_norm": 3.1792768803257747, + "learning_rate": 6.662190166981665e-06, + "loss": 0.8227, + "step": 5021 + }, + { + "epoch": 0.41, + "grad_norm": 2.0539249160343376, + "learning_rate": 6.6609416194071e-06, + "loss": 0.3148, + "step": 5022 + }, + { + "epoch": 0.41, + "grad_norm": 2.899219170184907, + "learning_rate": 6.659692955407757e-06, + "loss": 0.6692, + "step": 5023 + }, + { + "epoch": 0.41, + "grad_norm": 4.542525499890373, + "learning_rate": 6.65844417507116e-06, + "loss": 0.9865, + "step": 5024 + }, + { + "epoch": 0.41, + "grad_norm": 2.938901666456711, + "learning_rate": 6.657195278484845e-06, + "loss": 0.8797, + "step": 5025 + }, + { + "epoch": 0.41, + "grad_norm": 3.461312687276681, + "learning_rate": 6.6559462657363525e-06, + "loss": 0.8987, + "step": 5026 + }, + { + "epoch": 0.41, + "grad_norm": 3.123690030958761, + "learning_rate": 6.654697136913233e-06, + "loss": 0.9183, + "step": 5027 + }, + { + "epoch": 0.41, + "grad_norm": 3.4249287381361073, + "learning_rate": 6.653447892103047e-06, + "loss": 0.4914, + "step": 5028 + }, + { + "epoch": 0.41, + "grad_norm": 3.428473580726565, + "learning_rate": 6.652198531393358e-06, + "loss": 0.8932, + "step": 5029 + }, + { + "epoch": 0.41, + "grad_norm": 5.264837748812746, + "learning_rate": 6.650949054871742e-06, + "loss": 1.3844, + "step": 5030 + }, + { + "epoch": 0.41, + "grad_norm": 2.8526595143421294, + "learning_rate": 6.649699462625784e-06, + "loss": 0.6511, + "step": 5031 + }, + { + "epoch": 0.41, + "grad_norm": 3.886729359381155, + "learning_rate": 6.648449754743072e-06, + "loss": 0.951, + "step": 5032 + }, + { + "epoch": 0.41, + "grad_norm": 4.10215164550763, + "learning_rate": 6.647199931311207e-06, + "loss": 0.7978, + "step": 5033 + }, + { + "epoch": 0.41, + "grad_norm": 3.4323786311499314, + "learning_rate": 6.645949992417795e-06, + "loss": 0.9083, + "step": 5034 + }, + { + "epoch": 0.41, + "grad_norm": 2.0846317512520756, + "learning_rate": 6.644699938150452e-06, + "loss": 0.333, + "step": 5035 + }, + { + "epoch": 0.41, + "grad_norm": 4.03665806830898, + "learning_rate": 6.6434497685968e-06, + "loss": 0.7356, + "step": 5036 + }, + { + "epoch": 0.41, + "grad_norm": 3.1071041268719446, + "learning_rate": 6.642199483844473e-06, + "loss": 0.4502, + "step": 5037 + }, + { + "epoch": 0.41, + "grad_norm": 3.7463856308086165, + "learning_rate": 6.640949083981108e-06, + "loss": 0.9525, + "step": 5038 + }, + { + "epoch": 0.41, + "grad_norm": 3.950570863817365, + "learning_rate": 6.639698569094353e-06, + "loss": 1.06, + "step": 5039 + }, + { + "epoch": 0.41, + "grad_norm": 4.304358286996472, + "learning_rate": 6.638447939271866e-06, + "loss": 0.7415, + "step": 5040 + }, + { + "epoch": 0.41, + "grad_norm": 4.653097435243491, + "learning_rate": 6.637197194601309e-06, + "loss": 1.3601, + "step": 5041 + }, + { + "epoch": 0.41, + "grad_norm": 3.391416070719546, + "learning_rate": 6.635946335170352e-06, + "loss": 0.8694, + "step": 5042 + }, + { + "epoch": 0.41, + "grad_norm": 2.807569406655368, + "learning_rate": 6.634695361066679e-06, + "loss": 0.643, + "step": 5043 + }, + { + "epoch": 0.41, + "grad_norm": 3.5645027321889433, + "learning_rate": 6.633444272377974e-06, + "loss": 0.7314, + "step": 5044 + }, + { + "epoch": 0.41, + "grad_norm": 1.8905815390481095, + "learning_rate": 6.632193069191934e-06, + "loss": 0.4264, + "step": 5045 + }, + { + "epoch": 0.41, + "grad_norm": 4.678692969410927, + "learning_rate": 6.630941751596264e-06, + "loss": 0.9051, + "step": 5046 + }, + { + "epoch": 0.41, + "grad_norm": 2.0461687288099317, + "learning_rate": 6.629690319678674e-06, + "loss": 0.3569, + "step": 5047 + }, + { + "epoch": 0.41, + "grad_norm": 3.4543454838653753, + "learning_rate": 6.6284387735268865e-06, + "loss": 0.8512, + "step": 5048 + }, + { + "epoch": 0.41, + "grad_norm": 3.1412107825446887, + "learning_rate": 6.627187113228627e-06, + "loss": 0.7086, + "step": 5049 + }, + { + "epoch": 0.41, + "grad_norm": 2.3819860329330944, + "learning_rate": 6.625935338871632e-06, + "loss": 0.4417, + "step": 5050 + }, + { + "epoch": 0.41, + "grad_norm": 3.70029031761103, + "learning_rate": 6.624683450543647e-06, + "loss": 0.7293, + "step": 5051 + }, + { + "epoch": 0.41, + "grad_norm": 2.5089283676811505, + "learning_rate": 6.623431448332421e-06, + "loss": 0.3025, + "step": 5052 + }, + { + "epoch": 0.41, + "grad_norm": 2.9728192523556984, + "learning_rate": 6.622179332325718e-06, + "loss": 0.7548, + "step": 5053 + }, + { + "epoch": 0.41, + "grad_norm": 2.8230702830268473, + "learning_rate": 6.620927102611302e-06, + "loss": 0.7466, + "step": 5054 + }, + { + "epoch": 0.41, + "grad_norm": 4.773342740786333, + "learning_rate": 6.619674759276951e-06, + "loss": 1.38, + "step": 5055 + }, + { + "epoch": 0.41, + "grad_norm": 4.359982031316425, + "learning_rate": 6.6184223024104474e-06, + "loss": 1.1946, + "step": 5056 + }, + { + "epoch": 0.41, + "grad_norm": 4.312823363429766, + "learning_rate": 6.6171697320995855e-06, + "loss": 1.1666, + "step": 5057 + }, + { + "epoch": 0.41, + "grad_norm": 4.862686956689697, + "learning_rate": 6.615917048432161e-06, + "loss": 1.3333, + "step": 5058 + }, + { + "epoch": 0.41, + "grad_norm": 5.1330910039124475, + "learning_rate": 6.614664251495986e-06, + "loss": 1.6148, + "step": 5059 + }, + { + "epoch": 0.41, + "grad_norm": 3.906527461127112, + "learning_rate": 6.613411341378872e-06, + "loss": 0.7501, + "step": 5060 + }, + { + "epoch": 0.41, + "grad_norm": 4.627117957234848, + "learning_rate": 6.6121583181686466e-06, + "loss": 1.197, + "step": 5061 + }, + { + "epoch": 0.41, + "grad_norm": 4.1981226308634, + "learning_rate": 6.610905181953138e-06, + "loss": 0.9891, + "step": 5062 + }, + { + "epoch": 0.41, + "grad_norm": 3.7675032094973404, + "learning_rate": 6.609651932820187e-06, + "loss": 0.4652, + "step": 5063 + }, + { + "epoch": 0.41, + "grad_norm": 4.3054489860527765, + "learning_rate": 6.608398570857642e-06, + "loss": 0.9609, + "step": 5064 + }, + { + "epoch": 0.41, + "grad_norm": 4.036451784571845, + "learning_rate": 6.607145096153355e-06, + "loss": 0.8836, + "step": 5065 + }, + { + "epoch": 0.41, + "grad_norm": 2.7342217509523232, + "learning_rate": 6.605891508795193e-06, + "loss": 0.4318, + "step": 5066 + }, + { + "epoch": 0.41, + "grad_norm": 4.884013713134834, + "learning_rate": 6.604637808871023e-06, + "loss": 0.915, + "step": 5067 + }, + { + "epoch": 0.41, + "grad_norm": 2.360546766458889, + "learning_rate": 6.603383996468727e-06, + "loss": 0.3974, + "step": 5068 + }, + { + "epoch": 0.41, + "grad_norm": 1.9703620324064468, + "learning_rate": 6.602130071676191e-06, + "loss": 0.4134, + "step": 5069 + }, + { + "epoch": 0.41, + "grad_norm": 3.6625200237642317, + "learning_rate": 6.600876034581308e-06, + "loss": 0.9043, + "step": 5070 + }, + { + "epoch": 0.41, + "grad_norm": 3.4910179839017763, + "learning_rate": 6.599621885271984e-06, + "loss": 0.6769, + "step": 5071 + }, + { + "epoch": 0.41, + "grad_norm": 4.013895703767678, + "learning_rate": 6.5983676238361284e-06, + "loss": 1.139, + "step": 5072 + }, + { + "epoch": 0.41, + "grad_norm": 2.950931574451492, + "learning_rate": 6.5971132503616554e-06, + "loss": 0.5599, + "step": 5073 + }, + { + "epoch": 0.41, + "grad_norm": 3.736885945190558, + "learning_rate": 6.5958587649364955e-06, + "loss": 1.0086, + "step": 5074 + }, + { + "epoch": 0.41, + "grad_norm": 4.896035336353438, + "learning_rate": 6.5946041676485815e-06, + "loss": 1.1428, + "step": 5075 + }, + { + "epoch": 0.41, + "grad_norm": 3.677453534155014, + "learning_rate": 6.593349458585855e-06, + "loss": 0.864, + "step": 5076 + }, + { + "epoch": 0.41, + "grad_norm": 1.7658904975909975, + "learning_rate": 6.592094637836266e-06, + "loss": 0.4789, + "step": 5077 + }, + { + "epoch": 0.42, + "grad_norm": 3.702326669703679, + "learning_rate": 6.5908397054877715e-06, + "loss": 0.8829, + "step": 5078 + }, + { + "epoch": 0.42, + "grad_norm": 3.204164682234299, + "learning_rate": 6.589584661628338e-06, + "loss": 0.7511, + "step": 5079 + }, + { + "epoch": 0.42, + "grad_norm": 2.0722343576460274, + "learning_rate": 6.588329506345936e-06, + "loss": 0.3892, + "step": 5080 + }, + { + "epoch": 0.42, + "grad_norm": 1.3100602640670984, + "learning_rate": 6.587074239728549e-06, + "loss": 0.2382, + "step": 5081 + }, + { + "epoch": 0.42, + "grad_norm": 3.720024571628034, + "learning_rate": 6.585818861864164e-06, + "loss": 0.9731, + "step": 5082 + }, + { + "epoch": 0.42, + "grad_norm": 4.2744365581092225, + "learning_rate": 6.584563372840779e-06, + "loss": 1.0198, + "step": 5083 + }, + { + "epoch": 0.42, + "grad_norm": 3.324752110388792, + "learning_rate": 6.583307772746397e-06, + "loss": 0.5498, + "step": 5084 + }, + { + "epoch": 0.42, + "grad_norm": 5.008820777817378, + "learning_rate": 6.582052061669032e-06, + "loss": 1.1406, + "step": 5085 + }, + { + "epoch": 0.42, + "grad_norm": 3.971044782311768, + "learning_rate": 6.580796239696701e-06, + "loss": 1.0095, + "step": 5086 + }, + { + "epoch": 0.42, + "grad_norm": 3.7736435523199208, + "learning_rate": 6.579540306917434e-06, + "loss": 0.9718, + "step": 5087 + }, + { + "epoch": 0.42, + "grad_norm": 1.449587139629432, + "learning_rate": 6.578284263419266e-06, + "loss": 0.1805, + "step": 5088 + }, + { + "epoch": 0.42, + "grad_norm": 4.2797921454680115, + "learning_rate": 6.5770281092902385e-06, + "loss": 0.7411, + "step": 5089 + }, + { + "epoch": 0.42, + "grad_norm": 2.885074877905152, + "learning_rate": 6.575771844618405e-06, + "loss": 0.5535, + "step": 5090 + }, + { + "epoch": 0.42, + "grad_norm": 3.8296362782063436, + "learning_rate": 6.574515469491823e-06, + "loss": 0.7637, + "step": 5091 + }, + { + "epoch": 0.42, + "grad_norm": 3.861826826046879, + "learning_rate": 6.573258983998558e-06, + "loss": 0.612, + "step": 5092 + }, + { + "epoch": 0.42, + "grad_norm": 4.555017757308554, + "learning_rate": 6.572002388226686e-06, + "loss": 1.2151, + "step": 5093 + }, + { + "epoch": 0.42, + "grad_norm": 4.497326683154025, + "learning_rate": 6.570745682264288e-06, + "loss": 0.7634, + "step": 5094 + }, + { + "epoch": 0.42, + "grad_norm": 3.187260057733963, + "learning_rate": 6.569488866199454e-06, + "loss": 0.5992, + "step": 5095 + }, + { + "epoch": 0.42, + "grad_norm": 2.917652628497841, + "learning_rate": 6.568231940120279e-06, + "loss": 0.9032, + "step": 5096 + }, + { + "epoch": 0.42, + "grad_norm": 5.302888393164354, + "learning_rate": 6.566974904114871e-06, + "loss": 0.9961, + "step": 5097 + }, + { + "epoch": 0.42, + "grad_norm": 1.9869652572565435, + "learning_rate": 6.565717758271342e-06, + "loss": 0.488, + "step": 5098 + }, + { + "epoch": 0.42, + "grad_norm": 3.894664576934961, + "learning_rate": 6.5644605026778115e-06, + "loss": 0.9216, + "step": 5099 + }, + { + "epoch": 0.42, + "grad_norm": 6.1909555246544485, + "learning_rate": 6.563203137422409e-06, + "loss": 1.1481, + "step": 5100 + }, + { + "epoch": 0.42, + "grad_norm": 4.448891006913339, + "learning_rate": 6.561945662593268e-06, + "loss": 1.0079, + "step": 5101 + }, + { + "epoch": 0.42, + "grad_norm": 3.68256971253895, + "learning_rate": 6.5606880782785365e-06, + "loss": 0.9523, + "step": 5102 + }, + { + "epoch": 0.42, + "grad_norm": 2.2626027984745805, + "learning_rate": 6.559430384566361e-06, + "loss": 0.4791, + "step": 5103 + }, + { + "epoch": 0.42, + "grad_norm": 2.2812898718664627, + "learning_rate": 6.558172581544904e-06, + "loss": 0.6137, + "step": 5104 + }, + { + "epoch": 0.42, + "grad_norm": 2.9689861187037994, + "learning_rate": 6.5569146693023285e-06, + "loss": 0.4125, + "step": 5105 + }, + { + "epoch": 0.42, + "grad_norm": 3.3895446517519527, + "learning_rate": 6.5556566479268105e-06, + "loss": 0.7106, + "step": 5106 + }, + { + "epoch": 0.42, + "grad_norm": 4.13831278802137, + "learning_rate": 6.5543985175065315e-06, + "loss": 1.0889, + "step": 5107 + }, + { + "epoch": 0.42, + "grad_norm": 3.307667915755631, + "learning_rate": 6.553140278129683e-06, + "loss": 0.4637, + "step": 5108 + }, + { + "epoch": 0.42, + "grad_norm": 4.620203642781845, + "learning_rate": 6.551881929884458e-06, + "loss": 1.3451, + "step": 5109 + }, + { + "epoch": 0.42, + "grad_norm": 2.818227667612198, + "learning_rate": 6.550623472859063e-06, + "loss": 0.4817, + "step": 5110 + }, + { + "epoch": 0.42, + "grad_norm": 2.744101031433427, + "learning_rate": 6.549364907141713e-06, + "loss": 0.4434, + "step": 5111 + }, + { + "epoch": 0.42, + "grad_norm": 2.6282621358430904, + "learning_rate": 6.5481062328206265e-06, + "loss": 0.5363, + "step": 5112 + }, + { + "epoch": 0.42, + "grad_norm": 3.7443038737454217, + "learning_rate": 6.546847449984028e-06, + "loss": 0.8433, + "step": 5113 + }, + { + "epoch": 0.42, + "grad_norm": 3.5481004467431294, + "learning_rate": 6.5455885587201574e-06, + "loss": 0.5598, + "step": 5114 + }, + { + "epoch": 0.42, + "grad_norm": 3.827802691559211, + "learning_rate": 6.544329559117254e-06, + "loss": 0.896, + "step": 5115 + }, + { + "epoch": 0.42, + "grad_norm": 2.65077598090812, + "learning_rate": 6.543070451263569e-06, + "loss": 0.4954, + "step": 5116 + }, + { + "epoch": 0.42, + "grad_norm": 3.689678199470181, + "learning_rate": 6.5418112352473616e-06, + "loss": 0.6008, + "step": 5117 + }, + { + "epoch": 0.42, + "grad_norm": 2.3606194456689513, + "learning_rate": 6.540551911156896e-06, + "loss": 0.4282, + "step": 5118 + }, + { + "epoch": 0.42, + "grad_norm": 3.317531169578305, + "learning_rate": 6.5392924790804475e-06, + "loss": 0.7708, + "step": 5119 + }, + { + "epoch": 0.42, + "grad_norm": 2.7304560986881934, + "learning_rate": 6.538032939106295e-06, + "loss": 0.6627, + "step": 5120 + }, + { + "epoch": 0.42, + "grad_norm": 2.198971090333942, + "learning_rate": 6.536773291322726e-06, + "loss": 0.3177, + "step": 5121 + }, + { + "epoch": 0.42, + "grad_norm": 3.217417961186106, + "learning_rate": 6.5355135358180365e-06, + "loss": 0.5996, + "step": 5122 + }, + { + "epoch": 0.42, + "grad_norm": 3.115620532143598, + "learning_rate": 6.5342536726805325e-06, + "loss": 0.7306, + "step": 5123 + }, + { + "epoch": 0.42, + "grad_norm": 2.446784294414177, + "learning_rate": 6.532993701998522e-06, + "loss": 0.4508, + "step": 5124 + }, + { + "epoch": 0.42, + "grad_norm": 2.8874074345338547, + "learning_rate": 6.531733623860326e-06, + "loss": 0.4286, + "step": 5125 + }, + { + "epoch": 0.42, + "grad_norm": 4.661011299093553, + "learning_rate": 6.5304734383542664e-06, + "loss": 1.1787, + "step": 5126 + }, + { + "epoch": 0.42, + "grad_norm": 2.880231091993628, + "learning_rate": 6.5292131455686825e-06, + "loss": 0.6272, + "step": 5127 + }, + { + "epoch": 0.42, + "grad_norm": 4.639270364858419, + "learning_rate": 6.527952745591911e-06, + "loss": 1.0348, + "step": 5128 + }, + { + "epoch": 0.42, + "grad_norm": 3.2309889550281254, + "learning_rate": 6.526692238512301e-06, + "loss": 0.7215, + "step": 5129 + }, + { + "epoch": 0.42, + "grad_norm": 2.240868644277337, + "learning_rate": 6.5254316244182096e-06, + "loss": 0.4796, + "step": 5130 + }, + { + "epoch": 0.42, + "grad_norm": 4.479293937527797, + "learning_rate": 6.524170903398001e-06, + "loss": 0.9256, + "step": 5131 + }, + { + "epoch": 0.42, + "grad_norm": 3.3407275596290247, + "learning_rate": 6.522910075540043e-06, + "loss": 0.9344, + "step": 5132 + }, + { + "epoch": 0.42, + "grad_norm": 2.091374171071857, + "learning_rate": 6.5216491409327174e-06, + "loss": 0.3943, + "step": 5133 + }, + { + "epoch": 0.42, + "grad_norm": 3.700049334269497, + "learning_rate": 6.5203880996644105e-06, + "loss": 1.0703, + "step": 5134 + }, + { + "epoch": 0.42, + "grad_norm": 2.604011669234343, + "learning_rate": 6.519126951823516e-06, + "loss": 0.4824, + "step": 5135 + }, + { + "epoch": 0.42, + "grad_norm": 3.522206485399803, + "learning_rate": 6.51786569749843e-06, + "loss": 0.4807, + "step": 5136 + }, + { + "epoch": 0.42, + "grad_norm": 3.886140827984647, + "learning_rate": 6.516604336777565e-06, + "loss": 0.8842, + "step": 5137 + }, + { + "epoch": 0.42, + "grad_norm": 5.020753694642666, + "learning_rate": 6.515342869749337e-06, + "loss": 0.9779, + "step": 5138 + }, + { + "epoch": 0.42, + "grad_norm": 2.913893544134915, + "learning_rate": 6.5140812965021685e-06, + "loss": 0.6798, + "step": 5139 + }, + { + "epoch": 0.42, + "grad_norm": 3.839134577631026, + "learning_rate": 6.512819617124491e-06, + "loss": 0.9442, + "step": 5140 + }, + { + "epoch": 0.42, + "grad_norm": 3.5516407268408408, + "learning_rate": 6.511557831704741e-06, + "loss": 0.7877, + "step": 5141 + }, + { + "epoch": 0.42, + "grad_norm": 0.8731293715755902, + "learning_rate": 6.510295940331367e-06, + "loss": 0.1423, + "step": 5142 + }, + { + "epoch": 0.42, + "grad_norm": 5.0190805563422805, + "learning_rate": 6.509033943092819e-06, + "loss": 0.8424, + "step": 5143 + }, + { + "epoch": 0.42, + "grad_norm": 4.14674799879688, + "learning_rate": 6.50777184007756e-06, + "loss": 0.6919, + "step": 5144 + }, + { + "epoch": 0.42, + "grad_norm": 2.113716663688054, + "learning_rate": 6.506509631374056e-06, + "loss": 0.419, + "step": 5145 + }, + { + "epoch": 0.42, + "grad_norm": 3.937709065235011, + "learning_rate": 6.5052473170707844e-06, + "loss": 1.1737, + "step": 5146 + }, + { + "epoch": 0.42, + "grad_norm": 4.277353913858537, + "learning_rate": 6.5039848972562246e-06, + "loss": 0.7087, + "step": 5147 + }, + { + "epoch": 0.42, + "grad_norm": 3.3910715221425365, + "learning_rate": 6.50272237201887e-06, + "loss": 0.7598, + "step": 5148 + }, + { + "epoch": 0.42, + "grad_norm": 3.939373388917265, + "learning_rate": 6.501459741447217e-06, + "loss": 0.8829, + "step": 5149 + }, + { + "epoch": 0.42, + "grad_norm": 3.0479883517194137, + "learning_rate": 6.500197005629772e-06, + "loss": 0.5932, + "step": 5150 + }, + { + "epoch": 0.42, + "grad_norm": 4.306912013821991, + "learning_rate": 6.498934164655044e-06, + "loss": 1.0467, + "step": 5151 + }, + { + "epoch": 0.42, + "grad_norm": 2.8791463716466894, + "learning_rate": 6.4976712186115545e-06, + "loss": 0.5539, + "step": 5152 + }, + { + "epoch": 0.42, + "grad_norm": 4.319894481396109, + "learning_rate": 6.49640816758783e-06, + "loss": 0.813, + "step": 5153 + }, + { + "epoch": 0.42, + "grad_norm": 3.2850483337946716, + "learning_rate": 6.495145011672406e-06, + "loss": 0.8408, + "step": 5154 + }, + { + "epoch": 0.42, + "grad_norm": 3.7827482983780927, + "learning_rate": 6.493881750953823e-06, + "loss": 0.9218, + "step": 5155 + }, + { + "epoch": 0.42, + "grad_norm": 3.6812886117281316, + "learning_rate": 6.49261838552063e-06, + "loss": 0.7084, + "step": 5156 + }, + { + "epoch": 0.42, + "grad_norm": 3.422633886344834, + "learning_rate": 6.491354915461387e-06, + "loss": 0.907, + "step": 5157 + }, + { + "epoch": 0.42, + "grad_norm": 3.8343098758964165, + "learning_rate": 6.490091340864654e-06, + "loss": 1.0196, + "step": 5158 + }, + { + "epoch": 0.42, + "grad_norm": 4.662496099515557, + "learning_rate": 6.488827661819002e-06, + "loss": 0.9483, + "step": 5159 + }, + { + "epoch": 0.42, + "grad_norm": 5.168576346105122, + "learning_rate": 6.48756387841301e-06, + "loss": 1.0537, + "step": 5160 + }, + { + "epoch": 0.42, + "grad_norm": 4.17476297436785, + "learning_rate": 6.486299990735263e-06, + "loss": 1.0399, + "step": 5161 + }, + { + "epoch": 0.42, + "grad_norm": 2.3953438108264753, + "learning_rate": 6.485035998874356e-06, + "loss": 0.3847, + "step": 5162 + }, + { + "epoch": 0.42, + "grad_norm": 4.156931565798611, + "learning_rate": 6.48377190291889e-06, + "loss": 0.9419, + "step": 5163 + }, + { + "epoch": 0.42, + "grad_norm": 3.7204080529191734, + "learning_rate": 6.482507702957469e-06, + "loss": 0.804, + "step": 5164 + }, + { + "epoch": 0.42, + "grad_norm": 4.306466886058456, + "learning_rate": 6.481243399078712e-06, + "loss": 0.8862, + "step": 5165 + }, + { + "epoch": 0.42, + "grad_norm": 1.9172894698382654, + "learning_rate": 6.479978991371239e-06, + "loss": 0.3973, + "step": 5166 + }, + { + "epoch": 0.42, + "grad_norm": 3.6456024896313104, + "learning_rate": 6.478714479923677e-06, + "loss": 0.7912, + "step": 5167 + }, + { + "epoch": 0.42, + "grad_norm": 1.752028880061259, + "learning_rate": 6.4774498648246675e-06, + "loss": 0.3674, + "step": 5168 + }, + { + "epoch": 0.42, + "grad_norm": 3.2718369483666523, + "learning_rate": 6.4761851461628514e-06, + "loss": 0.5094, + "step": 5169 + }, + { + "epoch": 0.42, + "grad_norm": 3.5861334535536775, + "learning_rate": 6.47492032402688e-06, + "loss": 0.8346, + "step": 5170 + }, + { + "epoch": 0.42, + "grad_norm": 3.514568973146496, + "learning_rate": 6.473655398505414e-06, + "loss": 0.9248, + "step": 5171 + }, + { + "epoch": 0.42, + "grad_norm": 5.151764429495408, + "learning_rate": 6.472390369687118e-06, + "loss": 0.8877, + "step": 5172 + }, + { + "epoch": 0.42, + "grad_norm": 2.7227006795204702, + "learning_rate": 6.471125237660665e-06, + "loss": 0.532, + "step": 5173 + }, + { + "epoch": 0.42, + "grad_norm": 4.548160455981264, + "learning_rate": 6.469860002514736e-06, + "loss": 0.7771, + "step": 5174 + }, + { + "epoch": 0.42, + "grad_norm": 2.223658539674634, + "learning_rate": 6.468594664338016e-06, + "loss": 0.4935, + "step": 5175 + }, + { + "epoch": 0.42, + "grad_norm": 1.4522936109620626, + "learning_rate": 6.467329223219201e-06, + "loss": 0.2192, + "step": 5176 + }, + { + "epoch": 0.42, + "grad_norm": 4.913587802323358, + "learning_rate": 6.4660636792469955e-06, + "loss": 1.3289, + "step": 5177 + }, + { + "epoch": 0.42, + "grad_norm": 3.732764437814705, + "learning_rate": 6.464798032510104e-06, + "loss": 0.9273, + "step": 5178 + }, + { + "epoch": 0.42, + "grad_norm": 3.9186169441628973, + "learning_rate": 6.4635322830972465e-06, + "loss": 0.9121, + "step": 5179 + }, + { + "epoch": 0.42, + "grad_norm": 4.9273731689932845, + "learning_rate": 6.462266431097146e-06, + "loss": 0.7626, + "step": 5180 + }, + { + "epoch": 0.42, + "grad_norm": 4.427628339205233, + "learning_rate": 6.461000476598532e-06, + "loss": 1.3493, + "step": 5181 + }, + { + "epoch": 0.42, + "grad_norm": 2.0967838122151363, + "learning_rate": 6.459734419690143e-06, + "loss": 0.2514, + "step": 5182 + }, + { + "epoch": 0.42, + "grad_norm": 1.88698206894186, + "learning_rate": 6.458468260460724e-06, + "loss": 0.4262, + "step": 5183 + }, + { + "epoch": 0.42, + "grad_norm": 3.9352789215009603, + "learning_rate": 6.457201998999025e-06, + "loss": 1.0131, + "step": 5184 + }, + { + "epoch": 0.42, + "grad_norm": 3.9065289517117976, + "learning_rate": 6.455935635393811e-06, + "loss": 0.7477, + "step": 5185 + }, + { + "epoch": 0.42, + "grad_norm": 3.289868895267918, + "learning_rate": 6.454669169733843e-06, + "loss": 0.8929, + "step": 5186 + }, + { + "epoch": 0.42, + "grad_norm": 2.8693499507499465, + "learning_rate": 6.4534026021078966e-06, + "loss": 0.5043, + "step": 5187 + }, + { + "epoch": 0.42, + "grad_norm": 5.947055324633199, + "learning_rate": 6.452135932604755e-06, + "loss": 1.3338, + "step": 5188 + }, + { + "epoch": 0.42, + "grad_norm": 4.990863485491423, + "learning_rate": 6.450869161313205e-06, + "loss": 1.1023, + "step": 5189 + }, + { + "epoch": 0.42, + "grad_norm": 2.6732958963792015, + "learning_rate": 6.4496022883220376e-06, + "loss": 0.4853, + "step": 5190 + }, + { + "epoch": 0.42, + "grad_norm": 4.5877289700187065, + "learning_rate": 6.448335313720061e-06, + "loss": 1.2412, + "step": 5191 + }, + { + "epoch": 0.42, + "grad_norm": 3.922699463637487, + "learning_rate": 6.447068237596081e-06, + "loss": 0.7607, + "step": 5192 + }, + { + "epoch": 0.42, + "grad_norm": 2.6481712138131246, + "learning_rate": 6.445801060038915e-06, + "loss": 0.5285, + "step": 5193 + }, + { + "epoch": 0.42, + "grad_norm": 4.319125009354408, + "learning_rate": 6.444533781137387e-06, + "loss": 0.9255, + "step": 5194 + }, + { + "epoch": 0.42, + "grad_norm": 3.0171710686961, + "learning_rate": 6.443266400980328e-06, + "loss": 0.8706, + "step": 5195 + }, + { + "epoch": 0.42, + "grad_norm": 4.344644252269986, + "learning_rate": 6.441998919656575e-06, + "loss": 1.2445, + "step": 5196 + }, + { + "epoch": 0.42, + "grad_norm": 2.909068690829836, + "learning_rate": 6.440731337254975e-06, + "loss": 0.7838, + "step": 5197 + }, + { + "epoch": 0.42, + "grad_norm": 1.7922868575018684, + "learning_rate": 6.439463653864376e-06, + "loss": 0.318, + "step": 5198 + }, + { + "epoch": 0.42, + "grad_norm": 4.713543045431059, + "learning_rate": 6.43819586957364e-06, + "loss": 0.693, + "step": 5199 + }, + { + "epoch": 0.43, + "grad_norm": 3.86145422753484, + "learning_rate": 6.436927984471634e-06, + "loss": 1.1019, + "step": 5200 + }, + { + "epoch": 0.43, + "grad_norm": 2.9079994710944934, + "learning_rate": 6.435659998647228e-06, + "loss": 0.8679, + "step": 5201 + }, + { + "epoch": 0.43, + "grad_norm": 4.172939734126654, + "learning_rate": 6.434391912189304e-06, + "loss": 0.6581, + "step": 5202 + }, + { + "epoch": 0.43, + "grad_norm": 2.971402010277261, + "learning_rate": 6.433123725186752e-06, + "loss": 0.8514, + "step": 5203 + }, + { + "epoch": 0.43, + "grad_norm": 4.570102971289309, + "learning_rate": 6.431855437728463e-06, + "loss": 1.2454, + "step": 5204 + }, + { + "epoch": 0.43, + "grad_norm": 2.120223974832511, + "learning_rate": 6.430587049903336e-06, + "loss": 0.743, + "step": 5205 + }, + { + "epoch": 0.43, + "grad_norm": 4.162178464204561, + "learning_rate": 6.429318561800286e-06, + "loss": 1.0354, + "step": 5206 + }, + { + "epoch": 0.43, + "grad_norm": 3.501800227045963, + "learning_rate": 6.428049973508225e-06, + "loss": 0.7937, + "step": 5207 + }, + { + "epoch": 0.43, + "grad_norm": 2.94871567949659, + "learning_rate": 6.426781285116075e-06, + "loss": 0.6766, + "step": 5208 + }, + { + "epoch": 0.43, + "grad_norm": 2.466894583246494, + "learning_rate": 6.4255124967127665e-06, + "loss": 0.4047, + "step": 5209 + }, + { + "epoch": 0.43, + "grad_norm": 5.43354768501383, + "learning_rate": 6.424243608387235e-06, + "loss": 0.734, + "step": 5210 + }, + { + "epoch": 0.43, + "grad_norm": 5.646302110542977, + "learning_rate": 6.422974620228426e-06, + "loss": 1.5648, + "step": 5211 + }, + { + "epoch": 0.43, + "grad_norm": 2.5900228242446808, + "learning_rate": 6.421705532325289e-06, + "loss": 0.6069, + "step": 5212 + }, + { + "epoch": 0.43, + "grad_norm": 4.402590196811873, + "learning_rate": 6.420436344766781e-06, + "loss": 0.8138, + "step": 5213 + }, + { + "epoch": 0.43, + "grad_norm": 4.417897697444654, + "learning_rate": 6.419167057641868e-06, + "loss": 0.6705, + "step": 5214 + }, + { + "epoch": 0.43, + "grad_norm": 2.35920113079431, + "learning_rate": 6.417897671039519e-06, + "loss": 0.4583, + "step": 5215 + }, + { + "epoch": 0.43, + "grad_norm": 3.834644706331093, + "learning_rate": 6.4166281850487135e-06, + "loss": 0.9184, + "step": 5216 + }, + { + "epoch": 0.43, + "grad_norm": 4.201190354437537, + "learning_rate": 6.415358599758439e-06, + "loss": 0.6057, + "step": 5217 + }, + { + "epoch": 0.43, + "grad_norm": 2.498817582268024, + "learning_rate": 6.414088915257686e-06, + "loss": 0.4364, + "step": 5218 + }, + { + "epoch": 0.43, + "grad_norm": 2.101896399093929, + "learning_rate": 6.4128191316354525e-06, + "loss": 0.4627, + "step": 5219 + }, + { + "epoch": 0.43, + "grad_norm": 3.7323505431124886, + "learning_rate": 6.411549248980748e-06, + "loss": 0.8131, + "step": 5220 + }, + { + "epoch": 0.43, + "grad_norm": 3.0423803714767366, + "learning_rate": 6.410279267382585e-06, + "loss": 0.7822, + "step": 5221 + }, + { + "epoch": 0.43, + "grad_norm": 2.115267896822398, + "learning_rate": 6.409009186929982e-06, + "loss": 0.4279, + "step": 5222 + }, + { + "epoch": 0.43, + "grad_norm": 2.012951896962024, + "learning_rate": 6.407739007711969e-06, + "loss": 0.3176, + "step": 5223 + }, + { + "epoch": 0.43, + "grad_norm": 2.608909053480998, + "learning_rate": 6.406468729817574e-06, + "loss": 0.5814, + "step": 5224 + }, + { + "epoch": 0.43, + "grad_norm": 3.4885622809302306, + "learning_rate": 6.405198353335844e-06, + "loss": 0.7136, + "step": 5225 + }, + { + "epoch": 0.43, + "grad_norm": 3.275506002331668, + "learning_rate": 6.403927878355825e-06, + "loss": 0.5962, + "step": 5226 + }, + { + "epoch": 0.43, + "grad_norm": 3.760382784091925, + "learning_rate": 6.402657304966572e-06, + "loss": 1.0417, + "step": 5227 + }, + { + "epoch": 0.43, + "grad_norm": 3.418888169136374, + "learning_rate": 6.401386633257146e-06, + "loss": 0.6734, + "step": 5228 + }, + { + "epoch": 0.43, + "grad_norm": 2.440478521109242, + "learning_rate": 6.400115863316616e-06, + "loss": 0.4783, + "step": 5229 + }, + { + "epoch": 0.43, + "grad_norm": 4.641165967006061, + "learning_rate": 6.398844995234057e-06, + "loss": 0.8536, + "step": 5230 + }, + { + "epoch": 0.43, + "grad_norm": 3.7431611684166457, + "learning_rate": 6.397574029098552e-06, + "loss": 0.5524, + "step": 5231 + }, + { + "epoch": 0.43, + "grad_norm": 3.040278877345919, + "learning_rate": 6.39630296499919e-06, + "loss": 0.9451, + "step": 5232 + }, + { + "epoch": 0.43, + "grad_norm": 3.409153375979241, + "learning_rate": 6.3950318030250654e-06, + "loss": 0.6427, + "step": 5233 + }, + { + "epoch": 0.43, + "grad_norm": 3.7850491565280184, + "learning_rate": 6.393760543265285e-06, + "loss": 0.8784, + "step": 5234 + }, + { + "epoch": 0.43, + "grad_norm": 3.452661896158122, + "learning_rate": 6.392489185808954e-06, + "loss": 0.7516, + "step": 5235 + }, + { + "epoch": 0.43, + "grad_norm": 4.916918257978019, + "learning_rate": 6.391217730745193e-06, + "loss": 0.9683, + "step": 5236 + }, + { + "epoch": 0.43, + "grad_norm": 4.432841017110807, + "learning_rate": 6.3899461781631225e-06, + "loss": 1.1874, + "step": 5237 + }, + { + "epoch": 0.43, + "grad_norm": 3.5810353135567894, + "learning_rate": 6.388674528151875e-06, + "loss": 0.8334, + "step": 5238 + }, + { + "epoch": 0.43, + "grad_norm": 3.7601699256718675, + "learning_rate": 6.387402780800585e-06, + "loss": 0.7938, + "step": 5239 + }, + { + "epoch": 0.43, + "grad_norm": 3.490327124441589, + "learning_rate": 6.386130936198399e-06, + "loss": 0.8184, + "step": 5240 + }, + { + "epoch": 0.43, + "grad_norm": 3.657950700172878, + "learning_rate": 6.384858994434467e-06, + "loss": 0.7991, + "step": 5241 + }, + { + "epoch": 0.43, + "grad_norm": 5.360273681567106, + "learning_rate": 6.383586955597945e-06, + "loss": 1.0199, + "step": 5242 + }, + { + "epoch": 0.43, + "grad_norm": 3.221993123701766, + "learning_rate": 6.382314819778e-06, + "loss": 0.4936, + "step": 5243 + }, + { + "epoch": 0.43, + "grad_norm": 3.0920500320981583, + "learning_rate": 6.381042587063803e-06, + "loss": 0.5727, + "step": 5244 + }, + { + "epoch": 0.43, + "grad_norm": 1.2347677618701909, + "learning_rate": 6.379770257544529e-06, + "loss": 0.1976, + "step": 5245 + }, + { + "epoch": 0.43, + "grad_norm": 3.033352049988148, + "learning_rate": 6.378497831309367e-06, + "loss": 0.6842, + "step": 5246 + }, + { + "epoch": 0.43, + "grad_norm": 4.4052660445723, + "learning_rate": 6.377225308447503e-06, + "loss": 0.8753, + "step": 5247 + }, + { + "epoch": 0.43, + "grad_norm": 4.323082370180678, + "learning_rate": 6.375952689048141e-06, + "loss": 1.2976, + "step": 5248 + }, + { + "epoch": 0.43, + "grad_norm": 2.142553012636817, + "learning_rate": 6.374679973200484e-06, + "loss": 0.3026, + "step": 5249 + }, + { + "epoch": 0.43, + "grad_norm": 2.790216347839119, + "learning_rate": 6.373407160993742e-06, + "loss": 0.6913, + "step": 5250 + }, + { + "epoch": 0.43, + "grad_norm": 3.9260276299107923, + "learning_rate": 6.372134252517136e-06, + "loss": 1.0458, + "step": 5251 + }, + { + "epoch": 0.43, + "grad_norm": 3.4639919518325244, + "learning_rate": 6.370861247859891e-06, + "loss": 0.4821, + "step": 5252 + }, + { + "epoch": 0.43, + "grad_norm": 3.3230537659127353, + "learning_rate": 6.369588147111236e-06, + "loss": 0.5219, + "step": 5253 + }, + { + "epoch": 0.43, + "grad_norm": 4.368258655052682, + "learning_rate": 6.368314950360416e-06, + "loss": 0.8872, + "step": 5254 + }, + { + "epoch": 0.43, + "grad_norm": 2.3266571921352304, + "learning_rate": 6.36704165769667e-06, + "loss": 0.3647, + "step": 5255 + }, + { + "epoch": 0.43, + "grad_norm": 3.475615810880121, + "learning_rate": 6.365768269209254e-06, + "loss": 1.0257, + "step": 5256 + }, + { + "epoch": 0.43, + "grad_norm": 4.926406979750642, + "learning_rate": 6.364494784987427e-06, + "loss": 1.1206, + "step": 5257 + }, + { + "epoch": 0.43, + "grad_norm": 2.387279390308762, + "learning_rate": 6.363221205120452e-06, + "loss": 0.5091, + "step": 5258 + }, + { + "epoch": 0.43, + "grad_norm": 4.051417370757732, + "learning_rate": 6.361947529697605e-06, + "loss": 1.0578, + "step": 5259 + }, + { + "epoch": 0.43, + "grad_norm": 4.161788346493241, + "learning_rate": 6.360673758808163e-06, + "loss": 0.781, + "step": 5260 + }, + { + "epoch": 0.43, + "grad_norm": 3.948150103760491, + "learning_rate": 6.359399892541412e-06, + "loss": 0.726, + "step": 5261 + }, + { + "epoch": 0.43, + "grad_norm": 3.113461144271678, + "learning_rate": 6.358125930986645e-06, + "loss": 0.3269, + "step": 5262 + }, + { + "epoch": 0.43, + "grad_norm": 2.04931175796063, + "learning_rate": 6.356851874233161e-06, + "loss": 0.5767, + "step": 5263 + }, + { + "epoch": 0.43, + "grad_norm": 2.9176332686041966, + "learning_rate": 6.355577722370264e-06, + "loss": 0.7563, + "step": 5264 + }, + { + "epoch": 0.43, + "grad_norm": 4.59725461401139, + "learning_rate": 6.354303475487269e-06, + "loss": 1.1339, + "step": 5265 + }, + { + "epoch": 0.43, + "grad_norm": 2.889559779448481, + "learning_rate": 6.353029133673496e-06, + "loss": 0.9163, + "step": 5266 + }, + { + "epoch": 0.43, + "grad_norm": 4.026137753654765, + "learning_rate": 6.351754697018269e-06, + "loss": 1.1465, + "step": 5267 + }, + { + "epoch": 0.43, + "grad_norm": 3.1978137486717033, + "learning_rate": 6.3504801656109195e-06, + "loss": 0.6188, + "step": 5268 + }, + { + "epoch": 0.43, + "grad_norm": 2.443129720076294, + "learning_rate": 6.349205539540786e-06, + "loss": 0.444, + "step": 5269 + }, + { + "epoch": 0.43, + "grad_norm": 2.5323700976741597, + "learning_rate": 6.3479308188972175e-06, + "loss": 0.6203, + "step": 5270 + }, + { + "epoch": 0.43, + "grad_norm": 4.506653631760839, + "learning_rate": 6.346656003769565e-06, + "loss": 0.8935, + "step": 5271 + }, + { + "epoch": 0.43, + "grad_norm": 4.51929605166405, + "learning_rate": 6.345381094247188e-06, + "loss": 0.8745, + "step": 5272 + }, + { + "epoch": 0.43, + "grad_norm": 2.7618715703681103, + "learning_rate": 6.34410609041945e-06, + "loss": 0.7381, + "step": 5273 + }, + { + "epoch": 0.43, + "grad_norm": 2.9031600701897586, + "learning_rate": 6.342830992375725e-06, + "loss": 0.8362, + "step": 5274 + }, + { + "epoch": 0.43, + "grad_norm": 4.069650157218168, + "learning_rate": 6.341555800205392e-06, + "loss": 1.0269, + "step": 5275 + }, + { + "epoch": 0.43, + "grad_norm": 2.452049394631239, + "learning_rate": 6.340280513997835e-06, + "loss": 0.2514, + "step": 5276 + }, + { + "epoch": 0.43, + "grad_norm": 4.893706571124117, + "learning_rate": 6.3390051338424485e-06, + "loss": 1.5367, + "step": 5277 + }, + { + "epoch": 0.43, + "grad_norm": 3.659535833865813, + "learning_rate": 6.337729659828627e-06, + "loss": 0.9222, + "step": 5278 + }, + { + "epoch": 0.43, + "grad_norm": 3.2514738051998213, + "learning_rate": 6.33645409204578e-06, + "loss": 0.7972, + "step": 5279 + }, + { + "epoch": 0.43, + "grad_norm": 3.6843164318194446, + "learning_rate": 6.3351784305833175e-06, + "loss": 0.6152, + "step": 5280 + }, + { + "epoch": 0.43, + "grad_norm": 3.5362471559949156, + "learning_rate": 6.333902675530657e-06, + "loss": 1.0854, + "step": 5281 + }, + { + "epoch": 0.43, + "grad_norm": 3.556924473924575, + "learning_rate": 6.332626826977224e-06, + "loss": 1.0025, + "step": 5282 + }, + { + "epoch": 0.43, + "grad_norm": 3.0381830710652, + "learning_rate": 6.33135088501245e-06, + "loss": 0.6271, + "step": 5283 + }, + { + "epoch": 0.43, + "grad_norm": 4.457149041496543, + "learning_rate": 6.330074849725774e-06, + "loss": 0.7812, + "step": 5284 + }, + { + "epoch": 0.43, + "grad_norm": 2.794346012715579, + "learning_rate": 6.328798721206638e-06, + "loss": 0.4939, + "step": 5285 + }, + { + "epoch": 0.43, + "grad_norm": 2.158328439921532, + "learning_rate": 6.327522499544496e-06, + "loss": 0.4347, + "step": 5286 + }, + { + "epoch": 0.43, + "grad_norm": 5.111621810371288, + "learning_rate": 6.3262461848288034e-06, + "loss": 1.3857, + "step": 5287 + }, + { + "epoch": 0.43, + "grad_norm": 2.8505774832006874, + "learning_rate": 6.324969777149026e-06, + "loss": 0.6636, + "step": 5288 + }, + { + "epoch": 0.43, + "grad_norm": 3.506604162373283, + "learning_rate": 6.323693276594632e-06, + "loss": 0.5305, + "step": 5289 + }, + { + "epoch": 0.43, + "grad_norm": 2.8964055593354967, + "learning_rate": 6.322416683255103e-06, + "loss": 0.5667, + "step": 5290 + }, + { + "epoch": 0.43, + "grad_norm": 2.76693745022548, + "learning_rate": 6.321139997219917e-06, + "loss": 0.5249, + "step": 5291 + }, + { + "epoch": 0.43, + "grad_norm": 2.3431205458148905, + "learning_rate": 6.319863218578568e-06, + "loss": 0.6691, + "step": 5292 + }, + { + "epoch": 0.43, + "grad_norm": 4.317627231571605, + "learning_rate": 6.31858634742055e-06, + "loss": 1.1668, + "step": 5293 + }, + { + "epoch": 0.43, + "grad_norm": 6.154345697947195, + "learning_rate": 6.317309383835368e-06, + "loss": 1.2021, + "step": 5294 + }, + { + "epoch": 0.43, + "grad_norm": 2.437607805714208, + "learning_rate": 6.316032327912532e-06, + "loss": 0.4886, + "step": 5295 + }, + { + "epoch": 0.43, + "grad_norm": 2.6188452267712865, + "learning_rate": 6.314755179741556e-06, + "loss": 0.4352, + "step": 5296 + }, + { + "epoch": 0.43, + "grad_norm": 3.2353476662810428, + "learning_rate": 6.313477939411965e-06, + "loss": 0.4746, + "step": 5297 + }, + { + "epoch": 0.43, + "grad_norm": 3.243223467921312, + "learning_rate": 6.312200607013287e-06, + "loss": 1.0017, + "step": 5298 + }, + { + "epoch": 0.43, + "grad_norm": 5.310191103900768, + "learning_rate": 6.310923182635056e-06, + "loss": 1.0909, + "step": 5299 + }, + { + "epoch": 0.43, + "grad_norm": 3.4573373782516903, + "learning_rate": 6.309645666366816e-06, + "loss": 0.9911, + "step": 5300 + }, + { + "epoch": 0.43, + "grad_norm": 3.5494617356408997, + "learning_rate": 6.308368058298114e-06, + "loss": 0.6491, + "step": 5301 + }, + { + "epoch": 0.43, + "grad_norm": 1.0475818745256136, + "learning_rate": 6.307090358518504e-06, + "loss": 0.1533, + "step": 5302 + }, + { + "epoch": 0.43, + "grad_norm": 4.288289841775113, + "learning_rate": 6.30581256711755e-06, + "loss": 0.9734, + "step": 5303 + }, + { + "epoch": 0.43, + "grad_norm": 3.3351673026973065, + "learning_rate": 6.304534684184816e-06, + "loss": 0.5136, + "step": 5304 + }, + { + "epoch": 0.43, + "grad_norm": 4.0374178588832, + "learning_rate": 6.303256709809879e-06, + "loss": 0.8494, + "step": 5305 + }, + { + "epoch": 0.43, + "grad_norm": 1.5981949932303068, + "learning_rate": 6.301978644082321e-06, + "loss": 0.3312, + "step": 5306 + }, + { + "epoch": 0.43, + "grad_norm": 1.8521095385310853, + "learning_rate": 6.300700487091723e-06, + "loss": 0.3675, + "step": 5307 + }, + { + "epoch": 0.43, + "grad_norm": 4.542418793742306, + "learning_rate": 6.299422238927683e-06, + "loss": 1.0892, + "step": 5308 + }, + { + "epoch": 0.43, + "grad_norm": 3.363744970945015, + "learning_rate": 6.298143899679798e-06, + "loss": 0.6085, + "step": 5309 + }, + { + "epoch": 0.43, + "grad_norm": 3.358168052292183, + "learning_rate": 6.296865469437675e-06, + "loss": 0.8132, + "step": 5310 + }, + { + "epoch": 0.43, + "grad_norm": 3.9215527271968007, + "learning_rate": 6.295586948290928e-06, + "loss": 1.1532, + "step": 5311 + }, + { + "epoch": 0.43, + "grad_norm": 4.222417314593268, + "learning_rate": 6.294308336329174e-06, + "loss": 0.7616, + "step": 5312 + }, + { + "epoch": 0.43, + "grad_norm": 2.7126852976017393, + "learning_rate": 6.293029633642038e-06, + "loss": 0.5614, + "step": 5313 + }, + { + "epoch": 0.43, + "grad_norm": 2.361404869047012, + "learning_rate": 6.291750840319152e-06, + "loss": 0.495, + "step": 5314 + }, + { + "epoch": 0.43, + "grad_norm": 2.676970942660408, + "learning_rate": 6.2904719564501545e-06, + "loss": 0.5076, + "step": 5315 + }, + { + "epoch": 0.43, + "grad_norm": 3.7089502354975656, + "learning_rate": 6.2891929821246875e-06, + "loss": 0.7755, + "step": 5316 + }, + { + "epoch": 0.43, + "grad_norm": 6.006691414153541, + "learning_rate": 6.287913917432405e-06, + "loss": 1.0736, + "step": 5317 + }, + { + "epoch": 0.43, + "grad_norm": 3.859693190142713, + "learning_rate": 6.286634762462961e-06, + "loss": 0.6757, + "step": 5318 + }, + { + "epoch": 0.43, + "grad_norm": 4.372561771884962, + "learning_rate": 6.285355517306019e-06, + "loss": 0.9634, + "step": 5319 + }, + { + "epoch": 0.43, + "grad_norm": 5.077137875843918, + "learning_rate": 6.2840761820512505e-06, + "loss": 1.1677, + "step": 5320 + }, + { + "epoch": 0.43, + "grad_norm": 1.1370738600649346, + "learning_rate": 6.282796756788328e-06, + "loss": 0.1414, + "step": 5321 + }, + { + "epoch": 0.43, + "grad_norm": 3.220375852691493, + "learning_rate": 6.281517241606938e-06, + "loss": 0.4744, + "step": 5322 + }, + { + "epoch": 0.44, + "grad_norm": 3.5808255674719076, + "learning_rate": 6.280237636596765e-06, + "loss": 0.5416, + "step": 5323 + }, + { + "epoch": 0.44, + "grad_norm": 3.384436992824313, + "learning_rate": 6.278957941847506e-06, + "loss": 0.708, + "step": 5324 + }, + { + "epoch": 0.44, + "grad_norm": 3.3301604915033787, + "learning_rate": 6.27767815744886e-06, + "loss": 0.8014, + "step": 5325 + }, + { + "epoch": 0.44, + "grad_norm": 2.8885651689869096, + "learning_rate": 6.276398283490537e-06, + "loss": 0.6231, + "step": 5326 + }, + { + "epoch": 0.44, + "grad_norm": 4.355467328726323, + "learning_rate": 6.275118320062248e-06, + "loss": 1.1165, + "step": 5327 + }, + { + "epoch": 0.44, + "grad_norm": 4.197077206685935, + "learning_rate": 6.273838267253716e-06, + "loss": 1.0601, + "step": 5328 + }, + { + "epoch": 0.44, + "grad_norm": 5.5896109313530795, + "learning_rate": 6.272558125154663e-06, + "loss": 1.3009, + "step": 5329 + }, + { + "epoch": 0.44, + "grad_norm": 2.9650801372699567, + "learning_rate": 6.271277893854825e-06, + "loss": 0.6123, + "step": 5330 + }, + { + "epoch": 0.44, + "grad_norm": 3.2756477223518687, + "learning_rate": 6.26999757344394e-06, + "loss": 0.5752, + "step": 5331 + }, + { + "epoch": 0.44, + "grad_norm": 2.172137842304576, + "learning_rate": 6.268717164011751e-06, + "loss": 0.44, + "step": 5332 + }, + { + "epoch": 0.44, + "grad_norm": 4.484492362826107, + "learning_rate": 6.2674366656480105e-06, + "loss": 1.3716, + "step": 5333 + }, + { + "epoch": 0.44, + "grad_norm": 5.409549960962839, + "learning_rate": 6.266156078442476e-06, + "loss": 1.2073, + "step": 5334 + }, + { + "epoch": 0.44, + "grad_norm": 2.4379919669246224, + "learning_rate": 6.264875402484909e-06, + "loss": 0.6082, + "step": 5335 + }, + { + "epoch": 0.44, + "grad_norm": 2.4813981703057624, + "learning_rate": 6.263594637865081e-06, + "loss": 0.5548, + "step": 5336 + }, + { + "epoch": 0.44, + "grad_norm": 5.174097987149481, + "learning_rate": 6.262313784672771e-06, + "loss": 0.7561, + "step": 5337 + }, + { + "epoch": 0.44, + "grad_norm": 4.338098544074793, + "learning_rate": 6.261032842997756e-06, + "loss": 0.9866, + "step": 5338 + }, + { + "epoch": 0.44, + "grad_norm": 3.5408474628918287, + "learning_rate": 6.259751812929829e-06, + "loss": 0.7631, + "step": 5339 + }, + { + "epoch": 0.44, + "grad_norm": 3.8321054319008008, + "learning_rate": 6.25847069455878e-06, + "loss": 0.6125, + "step": 5340 + }, + { + "epoch": 0.44, + "grad_norm": 2.8415935691356777, + "learning_rate": 6.257189487974414e-06, + "loss": 0.6153, + "step": 5341 + }, + { + "epoch": 0.44, + "grad_norm": 4.96059793607359, + "learning_rate": 6.255908193266533e-06, + "loss": 0.7248, + "step": 5342 + }, + { + "epoch": 0.44, + "grad_norm": 4.367845793763824, + "learning_rate": 6.254626810524956e-06, + "loss": 0.8438, + "step": 5343 + }, + { + "epoch": 0.44, + "grad_norm": 2.9224084609222465, + "learning_rate": 6.2533453398395e-06, + "loss": 0.6552, + "step": 5344 + }, + { + "epoch": 0.44, + "grad_norm": 3.990580969392164, + "learning_rate": 6.25206378129999e-06, + "loss": 0.9274, + "step": 5345 + }, + { + "epoch": 0.44, + "grad_norm": 3.6908493507615985, + "learning_rate": 6.250782134996257e-06, + "loss": 0.6803, + "step": 5346 + }, + { + "epoch": 0.44, + "grad_norm": 4.576204374110863, + "learning_rate": 6.24950040101814e-06, + "loss": 1.2582, + "step": 5347 + }, + { + "epoch": 0.44, + "grad_norm": 1.4265933546341147, + "learning_rate": 6.248218579455484e-06, + "loss": 0.2474, + "step": 5348 + }, + { + "epoch": 0.44, + "grad_norm": 3.1413602211082585, + "learning_rate": 6.246936670398136e-06, + "loss": 0.9275, + "step": 5349 + }, + { + "epoch": 0.44, + "grad_norm": 3.004104808189576, + "learning_rate": 6.245654673935955e-06, + "loss": 0.7209, + "step": 5350 + }, + { + "epoch": 0.44, + "grad_norm": 4.363007538770154, + "learning_rate": 6.244372590158802e-06, + "loss": 1.1466, + "step": 5351 + }, + { + "epoch": 0.44, + "grad_norm": 5.362132366906052, + "learning_rate": 6.243090419156547e-06, + "loss": 1.5448, + "step": 5352 + }, + { + "epoch": 0.44, + "grad_norm": 2.5829805256999094, + "learning_rate": 6.241808161019063e-06, + "loss": 0.5098, + "step": 5353 + }, + { + "epoch": 0.44, + "grad_norm": 3.414527589241339, + "learning_rate": 6.2405258158362315e-06, + "loss": 0.7025, + "step": 5354 + }, + { + "epoch": 0.44, + "grad_norm": 5.349450647296863, + "learning_rate": 6.239243383697938e-06, + "loss": 1.1286, + "step": 5355 + }, + { + "epoch": 0.44, + "grad_norm": 3.115393927866742, + "learning_rate": 6.237960864694077e-06, + "loss": 0.913, + "step": 5356 + }, + { + "epoch": 0.44, + "grad_norm": 3.3695423402768703, + "learning_rate": 6.236678258914548e-06, + "loss": 1.0175, + "step": 5357 + }, + { + "epoch": 0.44, + "grad_norm": 3.415987708355679, + "learning_rate": 6.235395566449253e-06, + "loss": 0.9599, + "step": 5358 + }, + { + "epoch": 0.44, + "grad_norm": 3.2467334640139955, + "learning_rate": 6.234112787388107e-06, + "loss": 0.448, + "step": 5359 + }, + { + "epoch": 0.44, + "grad_norm": 3.8813058250264225, + "learning_rate": 6.232829921821025e-06, + "loss": 0.8177, + "step": 5360 + }, + { + "epoch": 0.44, + "grad_norm": 4.876710756602804, + "learning_rate": 6.2315469698379316e-06, + "loss": 0.6011, + "step": 5361 + }, + { + "epoch": 0.44, + "grad_norm": 2.4807173168245664, + "learning_rate": 6.230263931528755e-06, + "loss": 0.5266, + "step": 5362 + }, + { + "epoch": 0.44, + "grad_norm": 3.781292797993209, + "learning_rate": 6.2289808069834315e-06, + "loss": 0.7123, + "step": 5363 + }, + { + "epoch": 0.44, + "grad_norm": 2.1198830841349747, + "learning_rate": 6.227697596291899e-06, + "loss": 0.4698, + "step": 5364 + }, + { + "epoch": 0.44, + "grad_norm": 3.6855322557577477, + "learning_rate": 6.22641429954411e-06, + "loss": 0.73, + "step": 5365 + }, + { + "epoch": 0.44, + "grad_norm": 3.8384085488713446, + "learning_rate": 6.225130916830017e-06, + "loss": 0.8108, + "step": 5366 + }, + { + "epoch": 0.44, + "grad_norm": 2.4435267183396716, + "learning_rate": 6.223847448239577e-06, + "loss": 0.5509, + "step": 5367 + }, + { + "epoch": 0.44, + "grad_norm": 4.270433996869262, + "learning_rate": 6.222563893862758e-06, + "loss": 0.9696, + "step": 5368 + }, + { + "epoch": 0.44, + "grad_norm": 2.5873863164036046, + "learning_rate": 6.22128025378953e-06, + "loss": 0.6469, + "step": 5369 + }, + { + "epoch": 0.44, + "grad_norm": 3.631306525128808, + "learning_rate": 6.219996528109872e-06, + "loss": 0.6957, + "step": 5370 + }, + { + "epoch": 0.44, + "grad_norm": 3.8230625464072774, + "learning_rate": 6.218712716913766e-06, + "loss": 0.7993, + "step": 5371 + }, + { + "epoch": 0.44, + "grad_norm": 3.009789421544894, + "learning_rate": 6.217428820291202e-06, + "loss": 0.3, + "step": 5372 + }, + { + "epoch": 0.44, + "grad_norm": 3.322712395684982, + "learning_rate": 6.2161448383321766e-06, + "loss": 0.7037, + "step": 5373 + }, + { + "epoch": 0.44, + "grad_norm": 5.018911762920762, + "learning_rate": 6.214860771126692e-06, + "loss": 1.1273, + "step": 5374 + }, + { + "epoch": 0.44, + "grad_norm": 5.427465808351938, + "learning_rate": 6.213576618764752e-06, + "loss": 1.2406, + "step": 5375 + }, + { + "epoch": 0.44, + "grad_norm": 3.7019203466619572, + "learning_rate": 6.2122923813363746e-06, + "loss": 0.6044, + "step": 5376 + }, + { + "epoch": 0.44, + "grad_norm": 3.4004793599648444, + "learning_rate": 6.211008058931577e-06, + "loss": 0.5663, + "step": 5377 + }, + { + "epoch": 0.44, + "grad_norm": 5.592096067149256, + "learning_rate": 6.209723651640383e-06, + "loss": 1.1116, + "step": 5378 + }, + { + "epoch": 0.44, + "grad_norm": 3.6651568911112204, + "learning_rate": 6.208439159552826e-06, + "loss": 0.7627, + "step": 5379 + }, + { + "epoch": 0.44, + "grad_norm": 2.456885830028118, + "learning_rate": 6.207154582758945e-06, + "loss": 0.4029, + "step": 5380 + }, + { + "epoch": 0.44, + "grad_norm": 1.2173491774046696, + "learning_rate": 6.205869921348779e-06, + "loss": 0.1786, + "step": 5381 + }, + { + "epoch": 0.44, + "grad_norm": 3.2110055439490357, + "learning_rate": 6.204585175412381e-06, + "loss": 0.7859, + "step": 5382 + }, + { + "epoch": 0.44, + "grad_norm": 4.813704688609762, + "learning_rate": 6.203300345039804e-06, + "loss": 1.3696, + "step": 5383 + }, + { + "epoch": 0.44, + "grad_norm": 2.059360791876233, + "learning_rate": 6.202015430321111e-06, + "loss": 0.2938, + "step": 5384 + }, + { + "epoch": 0.44, + "grad_norm": 4.339930811234842, + "learning_rate": 6.200730431346366e-06, + "loss": 1.0934, + "step": 5385 + }, + { + "epoch": 0.44, + "grad_norm": 2.948427648322673, + "learning_rate": 6.1994453482056436e-06, + "loss": 0.642, + "step": 5386 + }, + { + "epoch": 0.44, + "grad_norm": 3.2495473070357863, + "learning_rate": 6.198160180989022e-06, + "loss": 0.8292, + "step": 5387 + }, + { + "epoch": 0.44, + "grad_norm": 3.579186010590315, + "learning_rate": 6.196874929786587e-06, + "loss": 0.5365, + "step": 5388 + }, + { + "epoch": 0.44, + "grad_norm": 5.12675507132254, + "learning_rate": 6.195589594688428e-06, + "loss": 0.8934, + "step": 5389 + }, + { + "epoch": 0.44, + "grad_norm": 3.3617677981629064, + "learning_rate": 6.194304175784641e-06, + "loss": 0.6774, + "step": 5390 + }, + { + "epoch": 0.44, + "grad_norm": 4.848370066636426, + "learning_rate": 6.19301867316533e-06, + "loss": 1.2827, + "step": 5391 + }, + { + "epoch": 0.44, + "grad_norm": 4.417159823089846, + "learning_rate": 6.1917330869206015e-06, + "loss": 1.1245, + "step": 5392 + }, + { + "epoch": 0.44, + "grad_norm": 3.440488032675081, + "learning_rate": 6.190447417140569e-06, + "loss": 0.7665, + "step": 5393 + }, + { + "epoch": 0.44, + "grad_norm": 3.7880391211632882, + "learning_rate": 6.189161663915355e-06, + "loss": 0.6494, + "step": 5394 + }, + { + "epoch": 0.44, + "grad_norm": 4.784454644022496, + "learning_rate": 6.187875827335082e-06, + "loss": 0.8974, + "step": 5395 + }, + { + "epoch": 0.44, + "grad_norm": 2.6564929135237456, + "learning_rate": 6.186589907489884e-06, + "loss": 0.4535, + "step": 5396 + }, + { + "epoch": 0.44, + "grad_norm": 3.6595258116723364, + "learning_rate": 6.185303904469898e-06, + "loss": 0.7704, + "step": 5397 + }, + { + "epoch": 0.44, + "grad_norm": 4.558221882463503, + "learning_rate": 6.1840178183652665e-06, + "loss": 0.9053, + "step": 5398 + }, + { + "epoch": 0.44, + "grad_norm": 4.009016678164901, + "learning_rate": 6.1827316492661395e-06, + "loss": 0.934, + "step": 5399 + }, + { + "epoch": 0.44, + "grad_norm": 2.933120148917259, + "learning_rate": 6.181445397262671e-06, + "loss": 0.5614, + "step": 5400 + }, + { + "epoch": 0.44, + "grad_norm": 4.563426634437689, + "learning_rate": 6.180159062445021e-06, + "loss": 1.2745, + "step": 5401 + }, + { + "epoch": 0.44, + "grad_norm": 3.930257212761975, + "learning_rate": 6.178872644903355e-06, + "loss": 0.8114, + "step": 5402 + }, + { + "epoch": 0.44, + "grad_norm": 3.4791722485318206, + "learning_rate": 6.177586144727851e-06, + "loss": 0.85, + "step": 5403 + }, + { + "epoch": 0.44, + "grad_norm": 4.404753109780831, + "learning_rate": 6.17629956200868e-06, + "loss": 0.9254, + "step": 5404 + }, + { + "epoch": 0.44, + "grad_norm": 3.9481351872811214, + "learning_rate": 6.17501289683603e-06, + "loss": 0.8583, + "step": 5405 + }, + { + "epoch": 0.44, + "grad_norm": 2.9426272635727204, + "learning_rate": 6.17372614930009e-06, + "loss": 0.9742, + "step": 5406 + }, + { + "epoch": 0.44, + "grad_norm": 4.7767531685817355, + "learning_rate": 6.172439319491055e-06, + "loss": 1.239, + "step": 5407 + }, + { + "epoch": 0.44, + "grad_norm": 4.140978720203091, + "learning_rate": 6.171152407499127e-06, + "loss": 1.0022, + "step": 5408 + }, + { + "epoch": 0.44, + "grad_norm": 3.5839408873445144, + "learning_rate": 6.16986541341451e-06, + "loss": 0.4733, + "step": 5409 + }, + { + "epoch": 0.44, + "grad_norm": 2.6971096050248082, + "learning_rate": 6.168578337327419e-06, + "loss": 0.6547, + "step": 5410 + }, + { + "epoch": 0.44, + "grad_norm": 3.608322605111382, + "learning_rate": 6.167291179328074e-06, + "loss": 0.6168, + "step": 5411 + }, + { + "epoch": 0.44, + "grad_norm": 3.6416010801486935, + "learning_rate": 6.166003939506696e-06, + "loss": 0.7506, + "step": 5412 + }, + { + "epoch": 0.44, + "grad_norm": 4.090256949971821, + "learning_rate": 6.164716617953515e-06, + "loss": 0.8866, + "step": 5413 + }, + { + "epoch": 0.44, + "grad_norm": 4.722911518681974, + "learning_rate": 6.163429214758772e-06, + "loss": 0.7578, + "step": 5414 + }, + { + "epoch": 0.44, + "grad_norm": 3.9354120613448753, + "learning_rate": 6.1621417300127015e-06, + "loss": 0.6877, + "step": 5415 + }, + { + "epoch": 0.44, + "grad_norm": 4.216105671503533, + "learning_rate": 6.160854163805554e-06, + "loss": 0.9651, + "step": 5416 + }, + { + "epoch": 0.44, + "grad_norm": 4.631811758347738, + "learning_rate": 6.159566516227582e-06, + "loss": 1.1827, + "step": 5417 + }, + { + "epoch": 0.44, + "grad_norm": 3.0660770200994407, + "learning_rate": 6.158278787369043e-06, + "loss": 0.7919, + "step": 5418 + }, + { + "epoch": 0.44, + "grad_norm": 4.432485108761104, + "learning_rate": 6.156990977320201e-06, + "loss": 1.1963, + "step": 5419 + }, + { + "epoch": 0.44, + "grad_norm": 3.591645022195573, + "learning_rate": 6.155703086171328e-06, + "loss": 0.9741, + "step": 5420 + }, + { + "epoch": 0.44, + "grad_norm": 3.2928527957734675, + "learning_rate": 6.154415114012697e-06, + "loss": 0.5989, + "step": 5421 + }, + { + "epoch": 0.44, + "grad_norm": 4.5588751254931115, + "learning_rate": 6.1531270609345915e-06, + "loss": 1.0163, + "step": 5422 + }, + { + "epoch": 0.44, + "grad_norm": 4.696751098953714, + "learning_rate": 6.151838927027299e-06, + "loss": 1.1034, + "step": 5423 + }, + { + "epoch": 0.44, + "grad_norm": 5.01743567800604, + "learning_rate": 6.150550712381109e-06, + "loss": 0.7967, + "step": 5424 + }, + { + "epoch": 0.44, + "grad_norm": 4.214745265293675, + "learning_rate": 6.149262417086321e-06, + "loss": 0.9011, + "step": 5425 + }, + { + "epoch": 0.44, + "grad_norm": 3.5396276536538127, + "learning_rate": 6.14797404123324e-06, + "loss": 0.9816, + "step": 5426 + }, + { + "epoch": 0.44, + "grad_norm": 2.9307304526840565, + "learning_rate": 6.146685584912174e-06, + "loss": 0.3536, + "step": 5427 + }, + { + "epoch": 0.44, + "grad_norm": 3.9738710837402182, + "learning_rate": 6.1453970482134395e-06, + "loss": 1.1024, + "step": 5428 + }, + { + "epoch": 0.44, + "grad_norm": 3.634119645787071, + "learning_rate": 6.1441084312273555e-06, + "loss": 0.6975, + "step": 5429 + }, + { + "epoch": 0.44, + "grad_norm": 3.9498673560304196, + "learning_rate": 6.142819734044251e-06, + "loss": 1.0039, + "step": 5430 + }, + { + "epoch": 0.44, + "grad_norm": 3.557682544770897, + "learning_rate": 6.141530956754457e-06, + "loss": 0.8699, + "step": 5431 + }, + { + "epoch": 0.44, + "grad_norm": 3.506189181916139, + "learning_rate": 6.1402420994483104e-06, + "loss": 0.7495, + "step": 5432 + }, + { + "epoch": 0.44, + "grad_norm": 4.374658163440085, + "learning_rate": 6.138953162216154e-06, + "loss": 0.6774, + "step": 5433 + }, + { + "epoch": 0.44, + "grad_norm": 2.5169033704099713, + "learning_rate": 6.137664145148339e-06, + "loss": 0.6085, + "step": 5434 + }, + { + "epoch": 0.44, + "grad_norm": 3.7937381584883245, + "learning_rate": 6.136375048335217e-06, + "loss": 0.6849, + "step": 5435 + }, + { + "epoch": 0.44, + "grad_norm": 4.520608587272846, + "learning_rate": 6.13508587186715e-06, + "loss": 0.9342, + "step": 5436 + }, + { + "epoch": 0.44, + "grad_norm": 4.906253332892987, + "learning_rate": 6.133796615834504e-06, + "loss": 1.0895, + "step": 5437 + }, + { + "epoch": 0.44, + "grad_norm": 5.032434857952008, + "learning_rate": 6.13250728032765e-06, + "loss": 1.4765, + "step": 5438 + }, + { + "epoch": 0.44, + "grad_norm": 4.800388318250968, + "learning_rate": 6.131217865436964e-06, + "loss": 1.2376, + "step": 5439 + }, + { + "epoch": 0.44, + "grad_norm": 4.600806621811746, + "learning_rate": 6.129928371252829e-06, + "loss": 0.8767, + "step": 5440 + }, + { + "epoch": 0.44, + "grad_norm": 5.008348225563147, + "learning_rate": 6.128638797865631e-06, + "loss": 1.0546, + "step": 5441 + }, + { + "epoch": 0.44, + "grad_norm": 4.12472089271931, + "learning_rate": 6.127349145365766e-06, + "loss": 1.1961, + "step": 5442 + }, + { + "epoch": 0.44, + "grad_norm": 4.319833628907693, + "learning_rate": 6.126059413843633e-06, + "loss": 1.1684, + "step": 5443 + }, + { + "epoch": 0.44, + "grad_norm": 3.48468306527288, + "learning_rate": 6.1247696033896345e-06, + "loss": 0.9538, + "step": 5444 + }, + { + "epoch": 0.45, + "grad_norm": 2.1059854657164463, + "learning_rate": 6.123479714094181e-06, + "loss": 0.3268, + "step": 5445 + }, + { + "epoch": 0.45, + "grad_norm": 2.9646551102691605, + "learning_rate": 6.122189746047691e-06, + "loss": 0.7663, + "step": 5446 + }, + { + "epoch": 0.45, + "grad_norm": 3.4451680586833144, + "learning_rate": 6.1208996993405835e-06, + "loss": 1.1906, + "step": 5447 + }, + { + "epoch": 0.45, + "grad_norm": 3.69245122401176, + "learning_rate": 6.119609574063285e-06, + "loss": 0.9925, + "step": 5448 + }, + { + "epoch": 0.45, + "grad_norm": 4.7749565835361505, + "learning_rate": 6.118319370306227e-06, + "loss": 1.0335, + "step": 5449 + }, + { + "epoch": 0.45, + "grad_norm": 3.74924286006014, + "learning_rate": 6.117029088159849e-06, + "loss": 0.7541, + "step": 5450 + }, + { + "epoch": 0.45, + "grad_norm": 3.710865070074655, + "learning_rate": 6.115738727714593e-06, + "loss": 0.7989, + "step": 5451 + }, + { + "epoch": 0.45, + "grad_norm": 3.197289043979481, + "learning_rate": 6.114448289060908e-06, + "loss": 0.539, + "step": 5452 + }, + { + "epoch": 0.45, + "grad_norm": 3.880621264997749, + "learning_rate": 6.113157772289246e-06, + "loss": 0.8333, + "step": 5453 + }, + { + "epoch": 0.45, + "grad_norm": 3.72748977598308, + "learning_rate": 6.111867177490072e-06, + "loss": 0.9621, + "step": 5454 + }, + { + "epoch": 0.45, + "grad_norm": 3.43236946265601, + "learning_rate": 6.1105765047538465e-06, + "loss": 0.9359, + "step": 5455 + }, + { + "epoch": 0.45, + "grad_norm": 3.8075159373402525, + "learning_rate": 6.10928575417104e-06, + "loss": 1.1357, + "step": 5456 + }, + { + "epoch": 0.45, + "grad_norm": 4.3112380522958835, + "learning_rate": 6.107994925832131e-06, + "loss": 0.6632, + "step": 5457 + }, + { + "epoch": 0.45, + "grad_norm": 5.6203833931869545, + "learning_rate": 6.106704019827599e-06, + "loss": 1.3663, + "step": 5458 + }, + { + "epoch": 0.45, + "grad_norm": 2.6398419742734265, + "learning_rate": 6.105413036247933e-06, + "loss": 0.6478, + "step": 5459 + }, + { + "epoch": 0.45, + "grad_norm": 4.119880358959511, + "learning_rate": 6.104121975183623e-06, + "loss": 0.9093, + "step": 5460 + }, + { + "epoch": 0.45, + "grad_norm": 3.932584298045523, + "learning_rate": 6.102830836725167e-06, + "loss": 1.1778, + "step": 5461 + }, + { + "epoch": 0.45, + "grad_norm": 3.117612576846759, + "learning_rate": 6.10153962096307e-06, + "loss": 0.9118, + "step": 5462 + }, + { + "epoch": 0.45, + "grad_norm": 3.6671893745898414, + "learning_rate": 6.100248327987839e-06, + "loss": 1.0006, + "step": 5463 + }, + { + "epoch": 0.45, + "grad_norm": 6.39738255545773, + "learning_rate": 6.0989569578899885e-06, + "loss": 1.1004, + "step": 5464 + }, + { + "epoch": 0.45, + "grad_norm": 2.3645479788769417, + "learning_rate": 6.097665510760037e-06, + "loss": 0.4135, + "step": 5465 + }, + { + "epoch": 0.45, + "grad_norm": 2.8817906212282347, + "learning_rate": 6.096373986688512e-06, + "loss": 0.499, + "step": 5466 + }, + { + "epoch": 0.45, + "grad_norm": 3.431268970812689, + "learning_rate": 6.09508238576594e-06, + "loss": 0.9452, + "step": 5467 + }, + { + "epoch": 0.45, + "grad_norm": 3.3163539589645685, + "learning_rate": 6.093790708082861e-06, + "loss": 0.4327, + "step": 5468 + }, + { + "epoch": 0.45, + "grad_norm": 3.8907058897093836, + "learning_rate": 6.092498953729812e-06, + "loss": 1.168, + "step": 5469 + }, + { + "epoch": 0.45, + "grad_norm": 2.771897691342272, + "learning_rate": 6.091207122797341e-06, + "loss": 0.4317, + "step": 5470 + }, + { + "epoch": 0.45, + "grad_norm": 4.400698623834575, + "learning_rate": 6.089915215376001e-06, + "loss": 0.9681, + "step": 5471 + }, + { + "epoch": 0.45, + "grad_norm": 2.2667063679311665, + "learning_rate": 6.088623231556345e-06, + "loss": 0.5894, + "step": 5472 + }, + { + "epoch": 0.45, + "grad_norm": 3.1379010219634256, + "learning_rate": 6.087331171428941e-06, + "loss": 0.581, + "step": 5473 + }, + { + "epoch": 0.45, + "grad_norm": 2.992031232407942, + "learning_rate": 6.086039035084353e-06, + "loss": 0.6718, + "step": 5474 + }, + { + "epoch": 0.45, + "grad_norm": 4.954322977592264, + "learning_rate": 6.084746822613154e-06, + "loss": 1.194, + "step": 5475 + }, + { + "epoch": 0.45, + "grad_norm": 2.707176933117051, + "learning_rate": 6.083454534105924e-06, + "loss": 0.4213, + "step": 5476 + }, + { + "epoch": 0.45, + "grad_norm": 6.327659835343956, + "learning_rate": 6.082162169653247e-06, + "loss": 1.5028, + "step": 5477 + }, + { + "epoch": 0.45, + "grad_norm": 4.502937909209937, + "learning_rate": 6.080869729345712e-06, + "loss": 0.5243, + "step": 5478 + }, + { + "epoch": 0.45, + "grad_norm": 3.2022708381766205, + "learning_rate": 6.079577213273911e-06, + "loss": 0.9597, + "step": 5479 + }, + { + "epoch": 0.45, + "grad_norm": 3.902687279888713, + "learning_rate": 6.078284621528448e-06, + "loss": 1.1178, + "step": 5480 + }, + { + "epoch": 0.45, + "grad_norm": 2.3045093198536692, + "learning_rate": 6.076991954199923e-06, + "loss": 0.3561, + "step": 5481 + }, + { + "epoch": 0.45, + "grad_norm": 2.5323094331506057, + "learning_rate": 6.0756992113789514e-06, + "loss": 0.7133, + "step": 5482 + }, + { + "epoch": 0.45, + "grad_norm": 4.904239608362607, + "learning_rate": 6.074406393156146e-06, + "loss": 1.0177, + "step": 5483 + }, + { + "epoch": 0.45, + "grad_norm": 2.155375095499139, + "learning_rate": 6.073113499622127e-06, + "loss": 0.3451, + "step": 5484 + }, + { + "epoch": 0.45, + "grad_norm": 2.6957454743955696, + "learning_rate": 6.071820530867524e-06, + "loss": 0.4902, + "step": 5485 + }, + { + "epoch": 0.45, + "grad_norm": 3.7827946076692256, + "learning_rate": 6.070527486982965e-06, + "loss": 0.782, + "step": 5486 + }, + { + "epoch": 0.45, + "grad_norm": 4.670415124750267, + "learning_rate": 6.0692343680590894e-06, + "loss": 1.0284, + "step": 5487 + }, + { + "epoch": 0.45, + "grad_norm": 2.632010818389444, + "learning_rate": 6.067941174186537e-06, + "loss": 0.72, + "step": 5488 + }, + { + "epoch": 0.45, + "grad_norm": 4.421765823790791, + "learning_rate": 6.066647905455955e-06, + "loss": 1.3704, + "step": 5489 + }, + { + "epoch": 0.45, + "grad_norm": 3.4884903471133044, + "learning_rate": 6.065354561957998e-06, + "loss": 0.9789, + "step": 5490 + }, + { + "epoch": 0.45, + "grad_norm": 4.738458251237575, + "learning_rate": 6.064061143783323e-06, + "loss": 0.7934, + "step": 5491 + }, + { + "epoch": 0.45, + "grad_norm": 4.025182361176615, + "learning_rate": 6.0627676510225915e-06, + "loss": 0.8666, + "step": 5492 + }, + { + "epoch": 0.45, + "grad_norm": 3.467433330714114, + "learning_rate": 6.061474083766475e-06, + "loss": 0.6233, + "step": 5493 + }, + { + "epoch": 0.45, + "grad_norm": 3.3534751064256008, + "learning_rate": 6.060180442105643e-06, + "loss": 0.5152, + "step": 5494 + }, + { + "epoch": 0.45, + "grad_norm": 5.071042799742737, + "learning_rate": 6.058886726130776e-06, + "loss": 1.062, + "step": 5495 + }, + { + "epoch": 0.45, + "grad_norm": 3.362853383119304, + "learning_rate": 6.057592935932557e-06, + "loss": 0.4955, + "step": 5496 + }, + { + "epoch": 0.45, + "grad_norm": 4.536563141462291, + "learning_rate": 6.056299071601678e-06, + "loss": 0.9823, + "step": 5497 + }, + { + "epoch": 0.45, + "grad_norm": 2.8697421361151076, + "learning_rate": 6.055005133228829e-06, + "loss": 0.6875, + "step": 5498 + }, + { + "epoch": 0.45, + "grad_norm": 3.109542289630285, + "learning_rate": 6.0537111209047115e-06, + "loss": 0.4757, + "step": 5499 + }, + { + "epoch": 0.45, + "grad_norm": 4.5212861195477325, + "learning_rate": 6.052417034720032e-06, + "loss": 0.8824, + "step": 5500 + }, + { + "epoch": 0.45, + "grad_norm": 4.002153356204538, + "learning_rate": 6.0511228747654985e-06, + "loss": 0.9321, + "step": 5501 + }, + { + "epoch": 0.45, + "grad_norm": 3.0857396379637962, + "learning_rate": 6.0498286411318255e-06, + "loss": 0.9694, + "step": 5502 + }, + { + "epoch": 0.45, + "grad_norm": 2.853963101535627, + "learning_rate": 6.0485343339097326e-06, + "loss": 0.8505, + "step": 5503 + }, + { + "epoch": 0.45, + "grad_norm": 3.3368021160183443, + "learning_rate": 6.047239953189947e-06, + "loss": 0.9013, + "step": 5504 + }, + { + "epoch": 0.45, + "grad_norm": 4.310716106274053, + "learning_rate": 6.045945499063197e-06, + "loss": 0.8569, + "step": 5505 + }, + { + "epoch": 0.45, + "grad_norm": 4.6831018651989345, + "learning_rate": 6.044650971620222e-06, + "loss": 1.0725, + "step": 5506 + }, + { + "epoch": 0.45, + "grad_norm": 2.9558960430298806, + "learning_rate": 6.043356370951757e-06, + "loss": 0.5028, + "step": 5507 + }, + { + "epoch": 0.45, + "grad_norm": 0.8734062377687183, + "learning_rate": 6.042061697148555e-06, + "loss": 0.1543, + "step": 5508 + }, + { + "epoch": 0.45, + "grad_norm": 4.0318544937798455, + "learning_rate": 6.040766950301361e-06, + "loss": 0.8827, + "step": 5509 + }, + { + "epoch": 0.45, + "grad_norm": 4.356019493312616, + "learning_rate": 6.039472130500933e-06, + "loss": 0.6415, + "step": 5510 + }, + { + "epoch": 0.45, + "grad_norm": 0.9896690571284249, + "learning_rate": 6.038177237838034e-06, + "loss": 0.1408, + "step": 5511 + }, + { + "epoch": 0.45, + "grad_norm": 3.614899518715243, + "learning_rate": 6.036882272403426e-06, + "loss": 0.8803, + "step": 5512 + }, + { + "epoch": 0.45, + "grad_norm": 4.004401352590362, + "learning_rate": 6.035587234287884e-06, + "loss": 0.7291, + "step": 5513 + }, + { + "epoch": 0.45, + "grad_norm": 4.7206832528416385, + "learning_rate": 6.034292123582185e-06, + "loss": 1.5733, + "step": 5514 + }, + { + "epoch": 0.45, + "grad_norm": 5.143042002035592, + "learning_rate": 6.032996940377108e-06, + "loss": 1.1401, + "step": 5515 + }, + { + "epoch": 0.45, + "grad_norm": 3.4042392691211365, + "learning_rate": 6.031701684763443e-06, + "loss": 1.0504, + "step": 5516 + }, + { + "epoch": 0.45, + "grad_norm": 3.4200691856860685, + "learning_rate": 6.030406356831979e-06, + "loss": 0.9079, + "step": 5517 + }, + { + "epoch": 0.45, + "grad_norm": 3.323923364132357, + "learning_rate": 6.029110956673513e-06, + "loss": 0.6282, + "step": 5518 + }, + { + "epoch": 0.45, + "grad_norm": 4.098465470441731, + "learning_rate": 6.027815484378848e-06, + "loss": 0.9255, + "step": 5519 + }, + { + "epoch": 0.45, + "grad_norm": 2.5134793061531457, + "learning_rate": 6.0265199400387904e-06, + "loss": 0.6663, + "step": 5520 + }, + { + "epoch": 0.45, + "grad_norm": 3.517252099688381, + "learning_rate": 6.025224323744153e-06, + "loss": 0.9122, + "step": 5521 + }, + { + "epoch": 0.45, + "grad_norm": 3.236853991100827, + "learning_rate": 6.023928635585752e-06, + "loss": 0.5387, + "step": 5522 + }, + { + "epoch": 0.45, + "grad_norm": 2.0916988080878007, + "learning_rate": 6.0226328756544105e-06, + "loss": 0.3724, + "step": 5523 + }, + { + "epoch": 0.45, + "grad_norm": 4.777582158576558, + "learning_rate": 6.021337044040954e-06, + "loss": 0.889, + "step": 5524 + }, + { + "epoch": 0.45, + "grad_norm": 2.314986998203574, + "learning_rate": 6.020041140836217e-06, + "loss": 0.3922, + "step": 5525 + }, + { + "epoch": 0.45, + "grad_norm": 4.836377061609312, + "learning_rate": 6.0187451661310345e-06, + "loss": 0.7884, + "step": 5526 + }, + { + "epoch": 0.45, + "grad_norm": 4.28583804011794, + "learning_rate": 6.017449120016249e-06, + "loss": 0.9602, + "step": 5527 + }, + { + "epoch": 0.45, + "grad_norm": 2.8415936348387962, + "learning_rate": 6.016153002582708e-06, + "loss": 0.3775, + "step": 5528 + }, + { + "epoch": 0.45, + "grad_norm": 4.3430676896119875, + "learning_rate": 6.014856813921264e-06, + "loss": 1.0213, + "step": 5529 + }, + { + "epoch": 0.45, + "grad_norm": 2.3657222141846157, + "learning_rate": 6.013560554122773e-06, + "loss": 0.3764, + "step": 5530 + }, + { + "epoch": 0.45, + "grad_norm": 2.845682187224199, + "learning_rate": 6.0122642232781e-06, + "loss": 0.472, + "step": 5531 + }, + { + "epoch": 0.45, + "grad_norm": 2.7079274159398072, + "learning_rate": 6.01096782147811e-06, + "loss": 0.469, + "step": 5532 + }, + { + "epoch": 0.45, + "grad_norm": 1.345960476458339, + "learning_rate": 6.009671348813675e-06, + "loss": 0.2141, + "step": 5533 + }, + { + "epoch": 0.45, + "grad_norm": 3.052975998276666, + "learning_rate": 6.008374805375674e-06, + "loss": 0.595, + "step": 5534 + }, + { + "epoch": 0.45, + "grad_norm": 4.3505565706139375, + "learning_rate": 6.0070781912549855e-06, + "loss": 1.0399, + "step": 5535 + }, + { + "epoch": 0.45, + "grad_norm": 2.2468596412968216, + "learning_rate": 6.005781506542498e-06, + "loss": 0.6398, + "step": 5536 + }, + { + "epoch": 0.45, + "grad_norm": 4.073450939851316, + "learning_rate": 6.004484751329107e-06, + "loss": 0.8609, + "step": 5537 + }, + { + "epoch": 0.45, + "grad_norm": 4.93717082896763, + "learning_rate": 6.003187925705704e-06, + "loss": 1.2229, + "step": 5538 + }, + { + "epoch": 0.45, + "grad_norm": 3.492738577572085, + "learning_rate": 6.001891029763194e-06, + "loss": 0.7971, + "step": 5539 + }, + { + "epoch": 0.45, + "grad_norm": 5.101721588026555, + "learning_rate": 6.000594063592484e-06, + "loss": 0.7578, + "step": 5540 + }, + { + "epoch": 0.45, + "grad_norm": 3.8164989870699646, + "learning_rate": 5.999297027284484e-06, + "loss": 0.7848, + "step": 5541 + }, + { + "epoch": 0.45, + "grad_norm": 3.4856305895260355, + "learning_rate": 5.997999920930111e-06, + "loss": 0.8738, + "step": 5542 + }, + { + "epoch": 0.45, + "grad_norm": 2.6225289921817803, + "learning_rate": 5.9967027446202885e-06, + "loss": 0.7648, + "step": 5543 + }, + { + "epoch": 0.45, + "grad_norm": 1.766273112541162, + "learning_rate": 5.995405498445939e-06, + "loss": 0.3674, + "step": 5544 + }, + { + "epoch": 0.45, + "grad_norm": 2.7646141585134343, + "learning_rate": 5.994108182497997e-06, + "loss": 0.7503, + "step": 5545 + }, + { + "epoch": 0.45, + "grad_norm": 3.8461361156409706, + "learning_rate": 5.992810796867398e-06, + "loss": 0.7506, + "step": 5546 + }, + { + "epoch": 0.45, + "grad_norm": 5.003624900489704, + "learning_rate": 5.991513341645082e-06, + "loss": 0.9627, + "step": 5547 + }, + { + "epoch": 0.45, + "grad_norm": 4.417943959323835, + "learning_rate": 5.990215816921998e-06, + "loss": 1.0619, + "step": 5548 + }, + { + "epoch": 0.45, + "grad_norm": 4.733192583725168, + "learning_rate": 5.988918222789093e-06, + "loss": 0.8055, + "step": 5549 + }, + { + "epoch": 0.45, + "grad_norm": 3.890782699748269, + "learning_rate": 5.987620559337325e-06, + "loss": 0.7575, + "step": 5550 + }, + { + "epoch": 0.45, + "grad_norm": 4.791496139999864, + "learning_rate": 5.9863228266576535e-06, + "loss": 1.1452, + "step": 5551 + }, + { + "epoch": 0.45, + "grad_norm": 1.8890858540861521, + "learning_rate": 5.985025024841043e-06, + "loss": 0.3557, + "step": 5552 + }, + { + "epoch": 0.45, + "grad_norm": 2.912391990086859, + "learning_rate": 5.983727153978467e-06, + "loss": 0.5329, + "step": 5553 + }, + { + "epoch": 0.45, + "grad_norm": 2.0390393911337132, + "learning_rate": 5.982429214160899e-06, + "loss": 0.5287, + "step": 5554 + }, + { + "epoch": 0.45, + "grad_norm": 2.299533614708435, + "learning_rate": 5.981131205479317e-06, + "loss": 0.3959, + "step": 5555 + }, + { + "epoch": 0.45, + "grad_norm": 5.61051813457963, + "learning_rate": 5.9798331280247094e-06, + "loss": 1.3886, + "step": 5556 + }, + { + "epoch": 0.45, + "grad_norm": 3.8045804554973506, + "learning_rate": 5.9785349818880626e-06, + "loss": 0.7375, + "step": 5557 + }, + { + "epoch": 0.45, + "grad_norm": 3.170660867053804, + "learning_rate": 5.9772367671603715e-06, + "loss": 0.5208, + "step": 5558 + }, + { + "epoch": 0.45, + "grad_norm": 2.786348074967104, + "learning_rate": 5.975938483932636e-06, + "loss": 0.7823, + "step": 5559 + }, + { + "epoch": 0.45, + "grad_norm": 2.6829597297233416, + "learning_rate": 5.974640132295862e-06, + "loss": 0.5209, + "step": 5560 + }, + { + "epoch": 0.45, + "grad_norm": 3.311060390431268, + "learning_rate": 5.973341712341054e-06, + "loss": 0.5302, + "step": 5561 + }, + { + "epoch": 0.45, + "grad_norm": 3.122646154918164, + "learning_rate": 5.9720432241592285e-06, + "loss": 0.835, + "step": 5562 + }, + { + "epoch": 0.45, + "grad_norm": 4.768375739659902, + "learning_rate": 5.970744667841404e-06, + "loss": 1.0994, + "step": 5563 + }, + { + "epoch": 0.45, + "grad_norm": 3.145289986517489, + "learning_rate": 5.9694460434786035e-06, + "loss": 0.7075, + "step": 5564 + }, + { + "epoch": 0.45, + "grad_norm": 3.2116496176499525, + "learning_rate": 5.968147351161854e-06, + "loss": 0.3714, + "step": 5565 + }, + { + "epoch": 0.45, + "grad_norm": 4.668740981570834, + "learning_rate": 5.9668485909821886e-06, + "loss": 0.9293, + "step": 5566 + }, + { + "epoch": 0.46, + "grad_norm": 3.489957152442339, + "learning_rate": 5.965549763030643e-06, + "loss": 0.438, + "step": 5567 + }, + { + "epoch": 0.46, + "grad_norm": 1.2246574479366155, + "learning_rate": 5.9642508673982634e-06, + "loss": 0.225, + "step": 5568 + }, + { + "epoch": 0.46, + "grad_norm": 1.3514524458334403, + "learning_rate": 5.9629519041760934e-06, + "loss": 0.1995, + "step": 5569 + }, + { + "epoch": 0.46, + "grad_norm": 3.902275863099431, + "learning_rate": 5.961652873455186e-06, + "loss": 0.7944, + "step": 5570 + }, + { + "epoch": 0.46, + "grad_norm": 3.4541717446544973, + "learning_rate": 5.9603537753265975e-06, + "loss": 0.6926, + "step": 5571 + }, + { + "epoch": 0.46, + "grad_norm": 3.036517138546423, + "learning_rate": 5.959054609881388e-06, + "loss": 0.7789, + "step": 5572 + }, + { + "epoch": 0.46, + "grad_norm": 3.843901626875687, + "learning_rate": 5.957755377210624e-06, + "loss": 0.7952, + "step": 5573 + }, + { + "epoch": 0.46, + "grad_norm": 3.240485021356367, + "learning_rate": 5.956456077405378e-06, + "loss": 0.4053, + "step": 5574 + }, + { + "epoch": 0.46, + "grad_norm": 4.836154350544739, + "learning_rate": 5.955156710556722e-06, + "loss": 1.0861, + "step": 5575 + }, + { + "epoch": 0.46, + "grad_norm": 4.504086906465851, + "learning_rate": 5.953857276755737e-06, + "loss": 1.4191, + "step": 5576 + }, + { + "epoch": 0.46, + "grad_norm": 2.627717249324198, + "learning_rate": 5.95255777609351e-06, + "loss": 0.4167, + "step": 5577 + }, + { + "epoch": 0.46, + "grad_norm": 3.0684441155294637, + "learning_rate": 5.951258208661126e-06, + "loss": 0.4953, + "step": 5578 + }, + { + "epoch": 0.46, + "grad_norm": 2.802080977568901, + "learning_rate": 5.949958574549683e-06, + "loss": 0.5834, + "step": 5579 + }, + { + "epoch": 0.46, + "grad_norm": 4.077596077455358, + "learning_rate": 5.948658873850279e-06, + "loss": 0.9969, + "step": 5580 + }, + { + "epoch": 0.46, + "grad_norm": 4.116797514430086, + "learning_rate": 5.947359106654016e-06, + "loss": 0.7344, + "step": 5581 + }, + { + "epoch": 0.46, + "grad_norm": 4.413224521799694, + "learning_rate": 5.946059273052001e-06, + "loss": 1.4048, + "step": 5582 + }, + { + "epoch": 0.46, + "grad_norm": 4.876379618779488, + "learning_rate": 5.944759373135349e-06, + "loss": 0.8151, + "step": 5583 + }, + { + "epoch": 0.46, + "grad_norm": 3.721767456197241, + "learning_rate": 5.943459406995177e-06, + "loss": 0.7432, + "step": 5584 + }, + { + "epoch": 0.46, + "grad_norm": 3.366332111754268, + "learning_rate": 5.942159374722606e-06, + "loss": 0.7185, + "step": 5585 + }, + { + "epoch": 0.46, + "grad_norm": 4.0003025244338435, + "learning_rate": 5.940859276408764e-06, + "loss": 0.8708, + "step": 5586 + }, + { + "epoch": 0.46, + "grad_norm": 3.4982341148824743, + "learning_rate": 5.939559112144781e-06, + "loss": 0.7583, + "step": 5587 + }, + { + "epoch": 0.46, + "grad_norm": 2.4006679307302226, + "learning_rate": 5.938258882021793e-06, + "loss": 0.3947, + "step": 5588 + }, + { + "epoch": 0.46, + "grad_norm": 4.548102068329933, + "learning_rate": 5.936958586130941e-06, + "loss": 0.8525, + "step": 5589 + }, + { + "epoch": 0.46, + "grad_norm": 4.541643842186359, + "learning_rate": 5.935658224563369e-06, + "loss": 1.1481, + "step": 5590 + }, + { + "epoch": 0.46, + "grad_norm": 3.6352525308965022, + "learning_rate": 5.934357797410229e-06, + "loss": 0.6617, + "step": 5591 + }, + { + "epoch": 0.46, + "grad_norm": 3.956310662055934, + "learning_rate": 5.933057304762672e-06, + "loss": 0.7801, + "step": 5592 + }, + { + "epoch": 0.46, + "grad_norm": 3.0028046951097123, + "learning_rate": 5.9317567467118585e-06, + "loss": 0.5619, + "step": 5593 + }, + { + "epoch": 0.46, + "grad_norm": 3.959436889256036, + "learning_rate": 5.930456123348953e-06, + "loss": 0.9281, + "step": 5594 + }, + { + "epoch": 0.46, + "grad_norm": 1.907500813877162, + "learning_rate": 5.929155434765122e-06, + "loss": 0.4004, + "step": 5595 + }, + { + "epoch": 0.46, + "grad_norm": 4.106542104042912, + "learning_rate": 5.927854681051539e-06, + "loss": 0.622, + "step": 5596 + }, + { + "epoch": 0.46, + "grad_norm": 1.5786093027769945, + "learning_rate": 5.926553862299382e-06, + "loss": 0.3419, + "step": 5597 + }, + { + "epoch": 0.46, + "grad_norm": 3.5457708150946003, + "learning_rate": 5.92525297859983e-06, + "loss": 0.7155, + "step": 5598 + }, + { + "epoch": 0.46, + "grad_norm": 3.684123186030232, + "learning_rate": 5.923952030044071e-06, + "loss": 0.6645, + "step": 5599 + }, + { + "epoch": 0.46, + "grad_norm": 3.085103688310422, + "learning_rate": 5.922651016723298e-06, + "loss": 0.4691, + "step": 5600 + }, + { + "epoch": 0.46, + "grad_norm": 3.366456362481405, + "learning_rate": 5.9213499387287025e-06, + "loss": 0.4627, + "step": 5601 + }, + { + "epoch": 0.46, + "grad_norm": 4.945239115251609, + "learning_rate": 5.9200487961514855e-06, + "loss": 1.3532, + "step": 5602 + }, + { + "epoch": 0.46, + "grad_norm": 4.0888492208054945, + "learning_rate": 5.918747589082853e-06, + "loss": 1.133, + "step": 5603 + }, + { + "epoch": 0.46, + "grad_norm": 3.8174920053002035, + "learning_rate": 5.917446317614012e-06, + "loss": 0.9695, + "step": 5604 + }, + { + "epoch": 0.46, + "grad_norm": 3.6787831396291586, + "learning_rate": 5.916144981836177e-06, + "loss": 0.5809, + "step": 5605 + }, + { + "epoch": 0.46, + "grad_norm": 1.2059337241792771, + "learning_rate": 5.914843581840566e-06, + "loss": 0.2091, + "step": 5606 + }, + { + "epoch": 0.46, + "grad_norm": 3.134426566217191, + "learning_rate": 5.913542117718401e-06, + "loss": 0.556, + "step": 5607 + }, + { + "epoch": 0.46, + "grad_norm": 2.488473168302814, + "learning_rate": 5.91224058956091e-06, + "loss": 0.4394, + "step": 5608 + }, + { + "epoch": 0.46, + "grad_norm": 4.674172646517885, + "learning_rate": 5.9109389974593234e-06, + "loss": 1.0567, + "step": 5609 + }, + { + "epoch": 0.46, + "grad_norm": 3.098756970166008, + "learning_rate": 5.909637341504878e-06, + "loss": 0.731, + "step": 5610 + }, + { + "epoch": 0.46, + "grad_norm": 3.020079420394541, + "learning_rate": 5.908335621788814e-06, + "loss": 0.7689, + "step": 5611 + }, + { + "epoch": 0.46, + "grad_norm": 3.9346032106283833, + "learning_rate": 5.907033838402375e-06, + "loss": 0.8292, + "step": 5612 + }, + { + "epoch": 0.46, + "grad_norm": 3.3910071745667127, + "learning_rate": 5.90573199143681e-06, + "loss": 0.8922, + "step": 5613 + }, + { + "epoch": 0.46, + "grad_norm": 2.5611325907497147, + "learning_rate": 5.904430080983378e-06, + "loss": 0.5135, + "step": 5614 + }, + { + "epoch": 0.46, + "grad_norm": 4.210320865357581, + "learning_rate": 5.9031281071333305e-06, + "loss": 0.87, + "step": 5615 + }, + { + "epoch": 0.46, + "grad_norm": 2.9804731507309135, + "learning_rate": 5.901826069977933e-06, + "loss": 0.5034, + "step": 5616 + }, + { + "epoch": 0.46, + "grad_norm": 1.7768985879771326, + "learning_rate": 5.900523969608454e-06, + "loss": 0.3578, + "step": 5617 + }, + { + "epoch": 0.46, + "grad_norm": 4.0134309205375125, + "learning_rate": 5.8992218061161645e-06, + "loss": 0.6261, + "step": 5618 + }, + { + "epoch": 0.46, + "grad_norm": 4.525221805410668, + "learning_rate": 5.897919579592337e-06, + "loss": 1.1076, + "step": 5619 + }, + { + "epoch": 0.46, + "grad_norm": 4.80410281650793, + "learning_rate": 5.896617290128258e-06, + "loss": 1.1153, + "step": 5620 + }, + { + "epoch": 0.46, + "grad_norm": 3.8945697187673267, + "learning_rate": 5.895314937815206e-06, + "loss": 0.9155, + "step": 5621 + }, + { + "epoch": 0.46, + "grad_norm": 2.4692131851393646, + "learning_rate": 5.894012522744474e-06, + "loss": 0.3466, + "step": 5622 + }, + { + "epoch": 0.46, + "grad_norm": 4.439566624699234, + "learning_rate": 5.892710045007357e-06, + "loss": 1.1799, + "step": 5623 + }, + { + "epoch": 0.46, + "grad_norm": 1.3540267704180882, + "learning_rate": 5.891407504695149e-06, + "loss": 0.2322, + "step": 5624 + }, + { + "epoch": 0.46, + "grad_norm": 3.8034982849943217, + "learning_rate": 5.8901049018991564e-06, + "loss": 0.7363, + "step": 5625 + }, + { + "epoch": 0.46, + "grad_norm": 2.844189826181136, + "learning_rate": 5.888802236710681e-06, + "loss": 0.568, + "step": 5626 + }, + { + "epoch": 0.46, + "grad_norm": 4.288461366974695, + "learning_rate": 5.88749950922104e-06, + "loss": 1.1385, + "step": 5627 + }, + { + "epoch": 0.46, + "grad_norm": 5.448119701622255, + "learning_rate": 5.886196719521544e-06, + "loss": 1.1424, + "step": 5628 + }, + { + "epoch": 0.46, + "grad_norm": 3.2093745245117726, + "learning_rate": 5.884893867703515e-06, + "loss": 0.6699, + "step": 5629 + }, + { + "epoch": 0.46, + "grad_norm": 2.6306971758348965, + "learning_rate": 5.883590953858276e-06, + "loss": 0.4229, + "step": 5630 + }, + { + "epoch": 0.46, + "grad_norm": 3.804745123512171, + "learning_rate": 5.882287978077158e-06, + "loss": 0.4848, + "step": 5631 + }, + { + "epoch": 0.46, + "grad_norm": 5.331107191514581, + "learning_rate": 5.880984940451491e-06, + "loss": 1.6346, + "step": 5632 + }, + { + "epoch": 0.46, + "grad_norm": 3.233822282708715, + "learning_rate": 5.879681841072614e-06, + "loss": 0.7985, + "step": 5633 + }, + { + "epoch": 0.46, + "grad_norm": 3.374574431792657, + "learning_rate": 5.87837868003187e-06, + "loss": 0.5632, + "step": 5634 + }, + { + "epoch": 0.46, + "grad_norm": 3.36991586919913, + "learning_rate": 5.877075457420602e-06, + "loss": 0.7271, + "step": 5635 + }, + { + "epoch": 0.46, + "grad_norm": 1.221505425581795, + "learning_rate": 5.875772173330162e-06, + "loss": 0.1695, + "step": 5636 + }, + { + "epoch": 0.46, + "grad_norm": 3.6084175650892627, + "learning_rate": 5.874468827851903e-06, + "loss": 0.5352, + "step": 5637 + }, + { + "epoch": 0.46, + "grad_norm": 3.3842514466431557, + "learning_rate": 5.873165421077186e-06, + "loss": 0.6894, + "step": 5638 + }, + { + "epoch": 0.46, + "grad_norm": 4.907343104798318, + "learning_rate": 5.871861953097372e-06, + "loss": 1.0439, + "step": 5639 + }, + { + "epoch": 0.46, + "grad_norm": 1.8433580518949468, + "learning_rate": 5.87055842400383e-06, + "loss": 0.3318, + "step": 5640 + }, + { + "epoch": 0.46, + "grad_norm": 4.193401909883628, + "learning_rate": 5.869254833887931e-06, + "loss": 0.6984, + "step": 5641 + }, + { + "epoch": 0.46, + "grad_norm": 3.215790357501764, + "learning_rate": 5.867951182841052e-06, + "loss": 0.9167, + "step": 5642 + }, + { + "epoch": 0.46, + "grad_norm": 3.2390874229381965, + "learning_rate": 5.866647470954572e-06, + "loss": 0.4804, + "step": 5643 + }, + { + "epoch": 0.46, + "grad_norm": 2.5895580562867244, + "learning_rate": 5.8653436983198755e-06, + "loss": 0.4501, + "step": 5644 + }, + { + "epoch": 0.46, + "grad_norm": 3.5130031528387895, + "learning_rate": 5.864039865028351e-06, + "loss": 0.64, + "step": 5645 + }, + { + "epoch": 0.46, + "grad_norm": 3.5725355905213174, + "learning_rate": 5.862735971171394e-06, + "loss": 0.8459, + "step": 5646 + }, + { + "epoch": 0.46, + "grad_norm": 2.600874218765597, + "learning_rate": 5.8614320168403986e-06, + "loss": 0.5834, + "step": 5647 + }, + { + "epoch": 0.46, + "grad_norm": 3.7204376203085254, + "learning_rate": 5.860128002126769e-06, + "loss": 0.8484, + "step": 5648 + }, + { + "epoch": 0.46, + "grad_norm": 2.766384604117466, + "learning_rate": 5.858823927121908e-06, + "loss": 0.5566, + "step": 5649 + }, + { + "epoch": 0.46, + "grad_norm": 6.210756108565881, + "learning_rate": 5.85751979191723e-06, + "loss": 1.1673, + "step": 5650 + }, + { + "epoch": 0.46, + "grad_norm": 4.199498809687956, + "learning_rate": 5.856215596604146e-06, + "loss": 0.7733, + "step": 5651 + }, + { + "epoch": 0.46, + "grad_norm": 3.9279068313710535, + "learning_rate": 5.854911341274074e-06, + "loss": 0.9522, + "step": 5652 + }, + { + "epoch": 0.46, + "grad_norm": 2.0719494374336302, + "learning_rate": 5.853607026018435e-06, + "loss": 0.519, + "step": 5653 + }, + { + "epoch": 0.46, + "grad_norm": 3.687955377889996, + "learning_rate": 5.852302650928663e-06, + "loss": 0.8059, + "step": 5654 + }, + { + "epoch": 0.46, + "grad_norm": 4.218504325638327, + "learning_rate": 5.850998216096181e-06, + "loss": 0.8211, + "step": 5655 + }, + { + "epoch": 0.46, + "grad_norm": 5.226698442187045, + "learning_rate": 5.849693721612428e-06, + "loss": 1.1432, + "step": 5656 + }, + { + "epoch": 0.46, + "grad_norm": 4.8147250107784245, + "learning_rate": 5.848389167568845e-06, + "loss": 1.0348, + "step": 5657 + }, + { + "epoch": 0.46, + "grad_norm": 3.9113101562177413, + "learning_rate": 5.847084554056873e-06, + "loss": 0.8063, + "step": 5658 + }, + { + "epoch": 0.46, + "grad_norm": 4.512599912384629, + "learning_rate": 5.845779881167959e-06, + "loss": 0.9087, + "step": 5659 + }, + { + "epoch": 0.46, + "grad_norm": 3.254628577072085, + "learning_rate": 5.844475148993558e-06, + "loss": 0.7667, + "step": 5660 + }, + { + "epoch": 0.46, + "grad_norm": 3.965014543590636, + "learning_rate": 5.843170357625122e-06, + "loss": 0.6832, + "step": 5661 + }, + { + "epoch": 0.46, + "grad_norm": 3.922702768592842, + "learning_rate": 5.8418655071541145e-06, + "loss": 0.5666, + "step": 5662 + }, + { + "epoch": 0.46, + "grad_norm": 3.1475215219336645, + "learning_rate": 5.840560597671999e-06, + "loss": 0.3893, + "step": 5663 + }, + { + "epoch": 0.46, + "grad_norm": 2.9612076985975904, + "learning_rate": 5.8392556292702425e-06, + "loss": 0.5127, + "step": 5664 + }, + { + "epoch": 0.46, + "grad_norm": 3.8902169840323704, + "learning_rate": 5.837950602040321e-06, + "loss": 0.6394, + "step": 5665 + }, + { + "epoch": 0.46, + "grad_norm": 5.313920807203621, + "learning_rate": 5.836645516073709e-06, + "loss": 1.2423, + "step": 5666 + }, + { + "epoch": 0.46, + "grad_norm": 4.533505413128344, + "learning_rate": 5.835340371461886e-06, + "loss": 0.7282, + "step": 5667 + }, + { + "epoch": 0.46, + "grad_norm": 3.180132481219037, + "learning_rate": 5.83403516829634e-06, + "loss": 0.5309, + "step": 5668 + }, + { + "epoch": 0.46, + "grad_norm": 3.42936288406953, + "learning_rate": 5.832729906668556e-06, + "loss": 0.7588, + "step": 5669 + }, + { + "epoch": 0.46, + "grad_norm": 5.619655411931857, + "learning_rate": 5.83142458667003e-06, + "loss": 1.2244, + "step": 5670 + }, + { + "epoch": 0.46, + "grad_norm": 3.3241952225594344, + "learning_rate": 5.83011920839226e-06, + "loss": 0.6462, + "step": 5671 + }, + { + "epoch": 0.46, + "grad_norm": 3.3524127689735126, + "learning_rate": 5.828813771926746e-06, + "loss": 0.563, + "step": 5672 + }, + { + "epoch": 0.46, + "grad_norm": 3.7958909598404076, + "learning_rate": 5.827508277364994e-06, + "loss": 0.9086, + "step": 5673 + }, + { + "epoch": 0.46, + "grad_norm": 3.6682499503574486, + "learning_rate": 5.826202724798513e-06, + "loss": 1.0416, + "step": 5674 + }, + { + "epoch": 0.46, + "grad_norm": 4.167000503301076, + "learning_rate": 5.824897114318815e-06, + "loss": 0.9161, + "step": 5675 + }, + { + "epoch": 0.46, + "grad_norm": 2.1926145434582027, + "learning_rate": 5.82359144601742e-06, + "loss": 0.4624, + "step": 5676 + }, + { + "epoch": 0.46, + "grad_norm": 4.785094822337679, + "learning_rate": 5.8222857199858495e-06, + "loss": 1.0595, + "step": 5677 + }, + { + "epoch": 0.46, + "grad_norm": 3.5222900636213748, + "learning_rate": 5.820979936315628e-06, + "loss": 0.3336, + "step": 5678 + }, + { + "epoch": 0.46, + "grad_norm": 3.0167878091794575, + "learning_rate": 5.819674095098286e-06, + "loss": 0.4347, + "step": 5679 + }, + { + "epoch": 0.46, + "grad_norm": 2.9541820770494716, + "learning_rate": 5.818368196425358e-06, + "loss": 0.5592, + "step": 5680 + }, + { + "epoch": 0.46, + "grad_norm": 5.438792495249288, + "learning_rate": 5.8170622403883815e-06, + "loss": 1.0044, + "step": 5681 + }, + { + "epoch": 0.46, + "grad_norm": 2.7398885866053737, + "learning_rate": 5.815756227078896e-06, + "loss": 0.4634, + "step": 5682 + }, + { + "epoch": 0.46, + "grad_norm": 3.352284171432852, + "learning_rate": 5.814450156588451e-06, + "loss": 0.8685, + "step": 5683 + }, + { + "epoch": 0.46, + "grad_norm": 3.1546835878369515, + "learning_rate": 5.813144029008593e-06, + "loss": 0.7393, + "step": 5684 + }, + { + "epoch": 0.46, + "grad_norm": 3.0705438955166504, + "learning_rate": 5.811837844430877e-06, + "loss": 0.6707, + "step": 5685 + }, + { + "epoch": 0.46, + "grad_norm": 5.250858309091342, + "learning_rate": 5.810531602946863e-06, + "loss": 0.9572, + "step": 5686 + }, + { + "epoch": 0.46, + "grad_norm": 2.595430117641484, + "learning_rate": 5.8092253046481095e-06, + "loss": 0.2611, + "step": 5687 + }, + { + "epoch": 0.46, + "grad_norm": 4.0958849916024, + "learning_rate": 5.807918949626184e-06, + "loss": 0.7519, + "step": 5688 + }, + { + "epoch": 0.46, + "grad_norm": 2.858938120379014, + "learning_rate": 5.806612537972658e-06, + "loss": 0.3707, + "step": 5689 + }, + { + "epoch": 0.47, + "grad_norm": 2.710846347541895, + "learning_rate": 5.805306069779102e-06, + "loss": 0.7015, + "step": 5690 + }, + { + "epoch": 0.47, + "grad_norm": 3.6940713544638384, + "learning_rate": 5.803999545137096e-06, + "loss": 0.8145, + "step": 5691 + }, + { + "epoch": 0.47, + "grad_norm": 2.9538177719718988, + "learning_rate": 5.80269296413822e-06, + "loss": 0.5931, + "step": 5692 + }, + { + "epoch": 0.47, + "grad_norm": 4.266763180046466, + "learning_rate": 5.80138632687406e-06, + "loss": 0.7787, + "step": 5693 + }, + { + "epoch": 0.47, + "grad_norm": 3.5477191117174707, + "learning_rate": 5.8000796334362074e-06, + "loss": 0.5738, + "step": 5694 + }, + { + "epoch": 0.47, + "grad_norm": 2.7226809880218266, + "learning_rate": 5.798772883916254e-06, + "loss": 0.6034, + "step": 5695 + }, + { + "epoch": 0.47, + "grad_norm": 1.7352513352651564, + "learning_rate": 5.797466078405798e-06, + "loss": 0.4274, + "step": 5696 + }, + { + "epoch": 0.47, + "grad_norm": 3.044027972306153, + "learning_rate": 5.796159216996441e-06, + "loss": 0.4223, + "step": 5697 + }, + { + "epoch": 0.47, + "grad_norm": 2.2023661997985142, + "learning_rate": 5.794852299779787e-06, + "loss": 0.3714, + "step": 5698 + }, + { + "epoch": 0.47, + "grad_norm": 4.603670980340674, + "learning_rate": 5.7935453268474454e-06, + "loss": 1.1581, + "step": 5699 + }, + { + "epoch": 0.47, + "grad_norm": 3.592738406110725, + "learning_rate": 5.792238298291031e-06, + "loss": 1.0247, + "step": 5700 + }, + { + "epoch": 0.47, + "grad_norm": 3.7736571140746995, + "learning_rate": 5.790931214202159e-06, + "loss": 0.7871, + "step": 5701 + }, + { + "epoch": 0.47, + "grad_norm": 3.3027765022500475, + "learning_rate": 5.7896240746724505e-06, + "loss": 0.7136, + "step": 5702 + }, + { + "epoch": 0.47, + "grad_norm": 3.1869716893594364, + "learning_rate": 5.788316879793533e-06, + "loss": 0.5946, + "step": 5703 + }, + { + "epoch": 0.47, + "grad_norm": 4.43303866822753, + "learning_rate": 5.787009629657032e-06, + "loss": 0.6631, + "step": 5704 + }, + { + "epoch": 0.47, + "grad_norm": 4.369661125838084, + "learning_rate": 5.78570232435458e-06, + "loss": 0.8293, + "step": 5705 + }, + { + "epoch": 0.47, + "grad_norm": 4.144263937869944, + "learning_rate": 5.784394963977815e-06, + "loss": 0.9573, + "step": 5706 + }, + { + "epoch": 0.47, + "grad_norm": 3.1146427071289295, + "learning_rate": 5.783087548618377e-06, + "loss": 0.6525, + "step": 5707 + }, + { + "epoch": 0.47, + "grad_norm": 4.547824438666656, + "learning_rate": 5.78178007836791e-06, + "loss": 0.966, + "step": 5708 + }, + { + "epoch": 0.47, + "grad_norm": 4.950261807048099, + "learning_rate": 5.7804725533180615e-06, + "loss": 1.0529, + "step": 5709 + }, + { + "epoch": 0.47, + "grad_norm": 2.9747306031892378, + "learning_rate": 5.779164973560483e-06, + "loss": 0.6283, + "step": 5710 + }, + { + "epoch": 0.47, + "grad_norm": 4.529311563326823, + "learning_rate": 5.777857339186832e-06, + "loss": 0.7368, + "step": 5711 + }, + { + "epoch": 0.47, + "grad_norm": 4.246827561234806, + "learning_rate": 5.776549650288767e-06, + "loss": 1.0219, + "step": 5712 + }, + { + "epoch": 0.47, + "grad_norm": 4.408726329418388, + "learning_rate": 5.775241906957949e-06, + "loss": 0.8025, + "step": 5713 + }, + { + "epoch": 0.47, + "grad_norm": 4.653669605183592, + "learning_rate": 5.7739341092860505e-06, + "loss": 1.1074, + "step": 5714 + }, + { + "epoch": 0.47, + "grad_norm": 3.2976226358559413, + "learning_rate": 5.772626257364736e-06, + "loss": 0.503, + "step": 5715 + }, + { + "epoch": 0.47, + "grad_norm": 3.2205688378314914, + "learning_rate": 5.771318351285684e-06, + "loss": 0.4389, + "step": 5716 + }, + { + "epoch": 0.47, + "grad_norm": 4.054750227078472, + "learning_rate": 5.7700103911405735e-06, + "loss": 0.8102, + "step": 5717 + }, + { + "epoch": 0.47, + "grad_norm": 3.245210152402887, + "learning_rate": 5.7687023770210835e-06, + "loss": 0.57, + "step": 5718 + }, + { + "epoch": 0.47, + "grad_norm": 4.907066907446562, + "learning_rate": 5.767394309018905e-06, + "loss": 0.8984, + "step": 5719 + }, + { + "epoch": 0.47, + "grad_norm": 4.286741875986198, + "learning_rate": 5.766086187225725e-06, + "loss": 0.9761, + "step": 5720 + }, + { + "epoch": 0.47, + "grad_norm": 4.224340502273579, + "learning_rate": 5.764778011733235e-06, + "loss": 1.224, + "step": 5721 + }, + { + "epoch": 0.47, + "grad_norm": 3.559377456310667, + "learning_rate": 5.763469782633136e-06, + "loss": 0.9564, + "step": 5722 + }, + { + "epoch": 0.47, + "grad_norm": 2.497098893514583, + "learning_rate": 5.762161500017128e-06, + "loss": 0.5805, + "step": 5723 + }, + { + "epoch": 0.47, + "grad_norm": 4.434473663235255, + "learning_rate": 5.760853163976915e-06, + "loss": 0.8909, + "step": 5724 + }, + { + "epoch": 0.47, + "grad_norm": 3.9448167601439676, + "learning_rate": 5.759544774604207e-06, + "loss": 0.8595, + "step": 5725 + }, + { + "epoch": 0.47, + "grad_norm": 3.743227996145874, + "learning_rate": 5.758236331990717e-06, + "loss": 0.7532, + "step": 5726 + }, + { + "epoch": 0.47, + "grad_norm": 4.024226223243954, + "learning_rate": 5.756927836228158e-06, + "loss": 0.8766, + "step": 5727 + }, + { + "epoch": 0.47, + "grad_norm": 2.5246596586966805, + "learning_rate": 5.755619287408253e-06, + "loss": 0.4241, + "step": 5728 + }, + { + "epoch": 0.47, + "grad_norm": 2.005558144090475, + "learning_rate": 5.754310685622724e-06, + "loss": 0.2407, + "step": 5729 + }, + { + "epoch": 0.47, + "grad_norm": 3.4679414027430684, + "learning_rate": 5.753002030963298e-06, + "loss": 0.7409, + "step": 5730 + }, + { + "epoch": 0.47, + "grad_norm": 3.7261723466337284, + "learning_rate": 5.751693323521709e-06, + "loss": 0.7915, + "step": 5731 + }, + { + "epoch": 0.47, + "grad_norm": 3.9821460241633777, + "learning_rate": 5.750384563389687e-06, + "loss": 0.8028, + "step": 5732 + }, + { + "epoch": 0.47, + "grad_norm": 4.857185247382448, + "learning_rate": 5.749075750658973e-06, + "loss": 0.8735, + "step": 5733 + }, + { + "epoch": 0.47, + "grad_norm": 3.846167569519588, + "learning_rate": 5.747766885421309e-06, + "loss": 0.5309, + "step": 5734 + }, + { + "epoch": 0.47, + "grad_norm": 3.7474054280595746, + "learning_rate": 5.7464579677684415e-06, + "loss": 0.8645, + "step": 5735 + }, + { + "epoch": 0.47, + "grad_norm": 4.295387737306311, + "learning_rate": 5.745148997792119e-06, + "loss": 0.8833, + "step": 5736 + }, + { + "epoch": 0.47, + "grad_norm": 3.1438553701194087, + "learning_rate": 5.743839975584096e-06, + "loss": 0.4879, + "step": 5737 + }, + { + "epoch": 0.47, + "grad_norm": 4.732617652793853, + "learning_rate": 5.7425309012361255e-06, + "loss": 1.003, + "step": 5738 + }, + { + "epoch": 0.47, + "grad_norm": 4.8092772920083044, + "learning_rate": 5.741221774839971e-06, + "loss": 0.903, + "step": 5739 + }, + { + "epoch": 0.47, + "grad_norm": 3.2358834200477284, + "learning_rate": 5.739912596487396e-06, + "loss": 0.7134, + "step": 5740 + }, + { + "epoch": 0.47, + "grad_norm": 2.775149360538276, + "learning_rate": 5.738603366270168e-06, + "loss": 0.436, + "step": 5741 + }, + { + "epoch": 0.47, + "grad_norm": 3.6307972809973466, + "learning_rate": 5.737294084280058e-06, + "loss": 0.7147, + "step": 5742 + }, + { + "epoch": 0.47, + "grad_norm": 3.3848807124944083, + "learning_rate": 5.735984750608843e-06, + "loss": 1.0273, + "step": 5743 + }, + { + "epoch": 0.47, + "grad_norm": 2.759591221597147, + "learning_rate": 5.734675365348299e-06, + "loss": 0.5455, + "step": 5744 + }, + { + "epoch": 0.47, + "grad_norm": 4.205319482526289, + "learning_rate": 5.733365928590208e-06, + "loss": 0.9674, + "step": 5745 + }, + { + "epoch": 0.47, + "grad_norm": 3.888076685253536, + "learning_rate": 5.732056440426359e-06, + "loss": 0.8197, + "step": 5746 + }, + { + "epoch": 0.47, + "grad_norm": 4.375645006216542, + "learning_rate": 5.730746900948538e-06, + "loss": 1.0402, + "step": 5747 + }, + { + "epoch": 0.47, + "grad_norm": 3.742123547621015, + "learning_rate": 5.729437310248541e-06, + "loss": 0.4881, + "step": 5748 + }, + { + "epoch": 0.47, + "grad_norm": 4.354753309388454, + "learning_rate": 5.728127668418162e-06, + "loss": 0.9882, + "step": 5749 + }, + { + "epoch": 0.47, + "grad_norm": 4.141328133970809, + "learning_rate": 5.726817975549201e-06, + "loss": 1.0981, + "step": 5750 + }, + { + "epoch": 0.47, + "grad_norm": 4.362439718903292, + "learning_rate": 5.7255082317334665e-06, + "loss": 0.895, + "step": 5751 + }, + { + "epoch": 0.47, + "grad_norm": 3.465193722877705, + "learning_rate": 5.72419843706276e-06, + "loss": 0.9494, + "step": 5752 + }, + { + "epoch": 0.47, + "grad_norm": 2.394136592870638, + "learning_rate": 5.722888591628895e-06, + "loss": 0.6267, + "step": 5753 + }, + { + "epoch": 0.47, + "grad_norm": 2.902125420421986, + "learning_rate": 5.7215786955236865e-06, + "loss": 0.6764, + "step": 5754 + }, + { + "epoch": 0.47, + "grad_norm": 7.066351910581949, + "learning_rate": 5.72026874883895e-06, + "loss": 1.0081, + "step": 5755 + }, + { + "epoch": 0.47, + "grad_norm": 4.120000950336103, + "learning_rate": 5.7189587516665105e-06, + "loss": 0.5777, + "step": 5756 + }, + { + "epoch": 0.47, + "grad_norm": 3.7120891216701657, + "learning_rate": 5.717648704098191e-06, + "loss": 0.8645, + "step": 5757 + }, + { + "epoch": 0.47, + "grad_norm": 3.5467828767445257, + "learning_rate": 5.716338606225821e-06, + "loss": 0.5317, + "step": 5758 + }, + { + "epoch": 0.47, + "grad_norm": 3.9582341832144654, + "learning_rate": 5.715028458141232e-06, + "loss": 0.948, + "step": 5759 + }, + { + "epoch": 0.47, + "grad_norm": 3.9280643667834787, + "learning_rate": 5.71371825993626e-06, + "loss": 0.7351, + "step": 5760 + }, + { + "epoch": 0.47, + "grad_norm": 4.238562911035449, + "learning_rate": 5.7124080117027435e-06, + "loss": 0.6953, + "step": 5761 + }, + { + "epoch": 0.47, + "grad_norm": 4.01743293033794, + "learning_rate": 5.711097713532525e-06, + "loss": 0.92, + "step": 5762 + }, + { + "epoch": 0.47, + "grad_norm": 1.9946077563575941, + "learning_rate": 5.709787365517453e-06, + "loss": 0.2586, + "step": 5763 + }, + { + "epoch": 0.47, + "grad_norm": 3.9150789608805256, + "learning_rate": 5.708476967749375e-06, + "loss": 0.6597, + "step": 5764 + }, + { + "epoch": 0.47, + "grad_norm": 1.8986227363890973, + "learning_rate": 5.7071665203201444e-06, + "loss": 0.4035, + "step": 5765 + }, + { + "epoch": 0.47, + "grad_norm": 5.552870877519428, + "learning_rate": 5.70585602332162e-06, + "loss": 1.4425, + "step": 5766 + }, + { + "epoch": 0.47, + "grad_norm": 3.336830414117216, + "learning_rate": 5.704545476845659e-06, + "loss": 0.8516, + "step": 5767 + }, + { + "epoch": 0.47, + "grad_norm": 2.513803551281201, + "learning_rate": 5.703234880984126e-06, + "loss": 0.454, + "step": 5768 + }, + { + "epoch": 0.47, + "grad_norm": 2.9759675928845812, + "learning_rate": 5.70192423582889e-06, + "loss": 0.5834, + "step": 5769 + }, + { + "epoch": 0.47, + "grad_norm": 3.596271191049582, + "learning_rate": 5.700613541471818e-06, + "loss": 0.5559, + "step": 5770 + }, + { + "epoch": 0.47, + "grad_norm": 4.074366001164237, + "learning_rate": 5.6993027980047866e-06, + "loss": 0.6883, + "step": 5771 + }, + { + "epoch": 0.47, + "grad_norm": 4.394754836340234, + "learning_rate": 5.6979920055196725e-06, + "loss": 1.3086, + "step": 5772 + }, + { + "epoch": 0.47, + "grad_norm": 3.611169159436164, + "learning_rate": 5.696681164108355e-06, + "loss": 0.5912, + "step": 5773 + }, + { + "epoch": 0.47, + "grad_norm": 4.738071640705117, + "learning_rate": 5.6953702738627215e-06, + "loss": 0.8901, + "step": 5774 + }, + { + "epoch": 0.47, + "grad_norm": 4.026026575450983, + "learning_rate": 5.694059334874658e-06, + "loss": 0.8682, + "step": 5775 + }, + { + "epoch": 0.47, + "grad_norm": 3.7881691134910755, + "learning_rate": 5.692748347236055e-06, + "loss": 0.5597, + "step": 5776 + }, + { + "epoch": 0.47, + "grad_norm": 4.318696413394652, + "learning_rate": 5.69143731103881e-06, + "loss": 1.2268, + "step": 5777 + }, + { + "epoch": 0.47, + "grad_norm": 4.482657348304525, + "learning_rate": 5.6901262263748155e-06, + "loss": 0.7588, + "step": 5778 + }, + { + "epoch": 0.47, + "grad_norm": 4.181824507035459, + "learning_rate": 5.6888150933359765e-06, + "loss": 1.1801, + "step": 5779 + }, + { + "epoch": 0.47, + "grad_norm": 4.050095910926002, + "learning_rate": 5.687503912014199e-06, + "loss": 0.9628, + "step": 5780 + }, + { + "epoch": 0.47, + "grad_norm": 4.63451128404456, + "learning_rate": 5.686192682501388e-06, + "loss": 1.1779, + "step": 5781 + }, + { + "epoch": 0.47, + "grad_norm": 2.2602723144577888, + "learning_rate": 5.684881404889456e-06, + "loss": 0.4366, + "step": 5782 + }, + { + "epoch": 0.47, + "grad_norm": 4.9475423541631125, + "learning_rate": 5.68357007927032e-06, + "loss": 0.968, + "step": 5783 + }, + { + "epoch": 0.47, + "grad_norm": 2.903104647376368, + "learning_rate": 5.682258705735895e-06, + "loss": 0.5192, + "step": 5784 + }, + { + "epoch": 0.47, + "grad_norm": 4.125000797006016, + "learning_rate": 5.680947284378102e-06, + "loss": 1.1176, + "step": 5785 + }, + { + "epoch": 0.47, + "grad_norm": 3.9315500926892883, + "learning_rate": 5.679635815288871e-06, + "loss": 0.7608, + "step": 5786 + }, + { + "epoch": 0.47, + "grad_norm": 4.367998612990884, + "learning_rate": 5.678324298560125e-06, + "loss": 0.9233, + "step": 5787 + }, + { + "epoch": 0.47, + "grad_norm": 3.4874247781562575, + "learning_rate": 5.677012734283799e-06, + "loss": 0.6668, + "step": 5788 + }, + { + "epoch": 0.47, + "grad_norm": 2.371404386512218, + "learning_rate": 5.675701122551827e-06, + "loss": 0.4671, + "step": 5789 + }, + { + "epoch": 0.47, + "grad_norm": 3.789388752179784, + "learning_rate": 5.674389463456146e-06, + "loss": 0.5927, + "step": 5790 + }, + { + "epoch": 0.47, + "grad_norm": 3.8202185280672785, + "learning_rate": 5.6730777570887e-06, + "loss": 0.6714, + "step": 5791 + }, + { + "epoch": 0.47, + "grad_norm": 3.733175702563129, + "learning_rate": 5.67176600354143e-06, + "loss": 0.8003, + "step": 5792 + }, + { + "epoch": 0.47, + "grad_norm": 4.072108180130317, + "learning_rate": 5.670454202906288e-06, + "loss": 0.7722, + "step": 5793 + }, + { + "epoch": 0.47, + "grad_norm": 3.9407350730100346, + "learning_rate": 5.669142355275225e-06, + "loss": 0.5922, + "step": 5794 + }, + { + "epoch": 0.47, + "grad_norm": 3.7537621502854615, + "learning_rate": 5.6678304607401934e-06, + "loss": 0.7926, + "step": 5795 + }, + { + "epoch": 0.47, + "grad_norm": 3.5741520687458026, + "learning_rate": 5.6665185193931535e-06, + "loss": 0.6796, + "step": 5796 + }, + { + "epoch": 0.47, + "grad_norm": 5.1724419860360955, + "learning_rate": 5.6652065313260675e-06, + "loss": 0.8818, + "step": 5797 + }, + { + "epoch": 0.47, + "grad_norm": 5.183680157685205, + "learning_rate": 5.663894496630898e-06, + "loss": 1.269, + "step": 5798 + }, + { + "epoch": 0.47, + "grad_norm": 3.298214517678515, + "learning_rate": 5.662582415399612e-06, + "loss": 0.6199, + "step": 5799 + }, + { + "epoch": 0.47, + "grad_norm": 3.2366070249034333, + "learning_rate": 5.661270287724184e-06, + "loss": 0.5728, + "step": 5800 + }, + { + "epoch": 0.47, + "grad_norm": 3.9758275887289702, + "learning_rate": 5.6599581136965855e-06, + "loss": 0.4086, + "step": 5801 + }, + { + "epoch": 0.47, + "grad_norm": 3.251838524436833, + "learning_rate": 5.658645893408795e-06, + "loss": 0.6051, + "step": 5802 + }, + { + "epoch": 0.47, + "grad_norm": 3.1888373776623515, + "learning_rate": 5.657333626952796e-06, + "loss": 0.893, + "step": 5803 + }, + { + "epoch": 0.47, + "grad_norm": 3.505630791060191, + "learning_rate": 5.656021314420568e-06, + "loss": 0.8322, + "step": 5804 + }, + { + "epoch": 0.47, + "grad_norm": 3.5915373058970728, + "learning_rate": 5.6547089559041025e-06, + "loss": 0.6709, + "step": 5805 + }, + { + "epoch": 0.47, + "grad_norm": 2.9530410208834805, + "learning_rate": 5.65339655149539e-06, + "loss": 0.6155, + "step": 5806 + }, + { + "epoch": 0.47, + "grad_norm": 3.0412472608721006, + "learning_rate": 5.652084101286419e-06, + "loss": 0.553, + "step": 5807 + }, + { + "epoch": 0.47, + "grad_norm": 1.7818611388354708, + "learning_rate": 5.6507716053691916e-06, + "loss": 0.2103, + "step": 5808 + }, + { + "epoch": 0.47, + "grad_norm": 1.8957158313893403, + "learning_rate": 5.649459063835708e-06, + "loss": 0.3198, + "step": 5809 + }, + { + "epoch": 0.47, + "grad_norm": 4.033966939716265, + "learning_rate": 5.648146476777969e-06, + "loss": 0.5737, + "step": 5810 + }, + { + "epoch": 0.47, + "grad_norm": 4.0922371158650925, + "learning_rate": 5.646833844287985e-06, + "loss": 1.1988, + "step": 5811 + }, + { + "epoch": 0.48, + "grad_norm": 4.005342843854818, + "learning_rate": 5.6455211664577615e-06, + "loss": 0.8087, + "step": 5812 + }, + { + "epoch": 0.48, + "grad_norm": 2.9302402031710386, + "learning_rate": 5.644208443379315e-06, + "loss": 0.5335, + "step": 5813 + }, + { + "epoch": 0.48, + "grad_norm": 5.093569074041109, + "learning_rate": 5.642895675144659e-06, + "loss": 0.9548, + "step": 5814 + }, + { + "epoch": 0.48, + "grad_norm": 5.564522932067168, + "learning_rate": 5.641582861845815e-06, + "loss": 1.2332, + "step": 5815 + }, + { + "epoch": 0.48, + "grad_norm": 2.296403499703234, + "learning_rate": 5.640270003574804e-06, + "loss": 0.3742, + "step": 5816 + }, + { + "epoch": 0.48, + "grad_norm": 2.5994409059679557, + "learning_rate": 5.638957100423652e-06, + "loss": 0.3849, + "step": 5817 + }, + { + "epoch": 0.48, + "grad_norm": 4.550878428964589, + "learning_rate": 5.637644152484389e-06, + "loss": 1.1475, + "step": 5818 + }, + { + "epoch": 0.48, + "grad_norm": 4.33694740793171, + "learning_rate": 5.6363311598490444e-06, + "loss": 0.8275, + "step": 5819 + }, + { + "epoch": 0.48, + "grad_norm": 3.4951912625209527, + "learning_rate": 5.635018122609656e-06, + "loss": 0.5433, + "step": 5820 + }, + { + "epoch": 0.48, + "grad_norm": 2.888249678559443, + "learning_rate": 5.633705040858262e-06, + "loss": 0.4817, + "step": 5821 + }, + { + "epoch": 0.48, + "grad_norm": 1.9934104155114218, + "learning_rate": 5.6323919146869e-06, + "loss": 0.4254, + "step": 5822 + }, + { + "epoch": 0.48, + "grad_norm": 2.9285482506530185, + "learning_rate": 5.631078744187618e-06, + "loss": 0.6149, + "step": 5823 + }, + { + "epoch": 0.48, + "grad_norm": 4.701760545520466, + "learning_rate": 5.629765529452463e-06, + "loss": 1.0096, + "step": 5824 + }, + { + "epoch": 0.48, + "grad_norm": 3.097522815975042, + "learning_rate": 5.628452270573483e-06, + "loss": 0.8303, + "step": 5825 + }, + { + "epoch": 0.48, + "grad_norm": 3.130182625184013, + "learning_rate": 5.6271389676427365e-06, + "loss": 0.6292, + "step": 5826 + }, + { + "epoch": 0.48, + "grad_norm": 3.911329866629304, + "learning_rate": 5.625825620752277e-06, + "loss": 0.9716, + "step": 5827 + }, + { + "epoch": 0.48, + "grad_norm": 3.1791773291730485, + "learning_rate": 5.624512229994165e-06, + "loss": 0.5456, + "step": 5828 + }, + { + "epoch": 0.48, + "grad_norm": 4.022555284755302, + "learning_rate": 5.623198795460463e-06, + "loss": 0.7795, + "step": 5829 + }, + { + "epoch": 0.48, + "grad_norm": 4.251633314084586, + "learning_rate": 5.621885317243238e-06, + "loss": 0.9052, + "step": 5830 + }, + { + "epoch": 0.48, + "grad_norm": 3.1305021462405858, + "learning_rate": 5.620571795434559e-06, + "loss": 0.8813, + "step": 5831 + }, + { + "epoch": 0.48, + "grad_norm": 4.017284324772185, + "learning_rate": 5.619258230126497e-06, + "loss": 0.7426, + "step": 5832 + }, + { + "epoch": 0.48, + "grad_norm": 3.6937448691485972, + "learning_rate": 5.617944621411128e-06, + "loss": 1.0898, + "step": 5833 + }, + { + "epoch": 0.48, + "grad_norm": 4.990890063867452, + "learning_rate": 5.616630969380532e-06, + "loss": 1.0355, + "step": 5834 + }, + { + "epoch": 0.48, + "grad_norm": 3.1770042370908653, + "learning_rate": 5.615317274126787e-06, + "loss": 0.4759, + "step": 5835 + }, + { + "epoch": 0.48, + "grad_norm": 3.3327796202482585, + "learning_rate": 5.614003535741979e-06, + "loss": 0.4879, + "step": 5836 + }, + { + "epoch": 0.48, + "grad_norm": 3.616353182769598, + "learning_rate": 5.612689754318196e-06, + "loss": 0.899, + "step": 5837 + }, + { + "epoch": 0.48, + "grad_norm": 3.9915988752037297, + "learning_rate": 5.611375929947528e-06, + "loss": 0.5136, + "step": 5838 + }, + { + "epoch": 0.48, + "grad_norm": 4.015772413753597, + "learning_rate": 5.610062062722067e-06, + "loss": 0.9687, + "step": 5839 + }, + { + "epoch": 0.48, + "grad_norm": 3.8811532612639135, + "learning_rate": 5.608748152733911e-06, + "loss": 0.8158, + "step": 5840 + }, + { + "epoch": 0.48, + "grad_norm": 2.7737396464658843, + "learning_rate": 5.607434200075159e-06, + "loss": 0.5984, + "step": 5841 + }, + { + "epoch": 0.48, + "grad_norm": 2.641508917195879, + "learning_rate": 5.6061202048379125e-06, + "loss": 0.6941, + "step": 5842 + }, + { + "epoch": 0.48, + "grad_norm": 2.8913397236093714, + "learning_rate": 5.6048061671142784e-06, + "loss": 0.5939, + "step": 5843 + }, + { + "epoch": 0.48, + "grad_norm": 3.8629156087235854, + "learning_rate": 5.603492086996362e-06, + "loss": 0.7298, + "step": 5844 + }, + { + "epoch": 0.48, + "grad_norm": 2.405977959179416, + "learning_rate": 5.602177964576279e-06, + "loss": 0.5936, + "step": 5845 + }, + { + "epoch": 0.48, + "grad_norm": 3.454230608280289, + "learning_rate": 5.600863799946142e-06, + "loss": 0.7115, + "step": 5846 + }, + { + "epoch": 0.48, + "grad_norm": 3.032858118379392, + "learning_rate": 5.599549593198066e-06, + "loss": 0.7614, + "step": 5847 + }, + { + "epoch": 0.48, + "grad_norm": 1.8293281378783064, + "learning_rate": 5.598235344424172e-06, + "loss": 0.3763, + "step": 5848 + }, + { + "epoch": 0.48, + "grad_norm": 3.9752145053909422, + "learning_rate": 5.596921053716585e-06, + "loss": 0.5239, + "step": 5849 + }, + { + "epoch": 0.48, + "grad_norm": 3.8362708691057366, + "learning_rate": 5.59560672116743e-06, + "loss": 0.8695, + "step": 5850 + }, + { + "epoch": 0.48, + "grad_norm": 2.093146394011919, + "learning_rate": 5.594292346868836e-06, + "loss": 0.3435, + "step": 5851 + }, + { + "epoch": 0.48, + "grad_norm": 4.654731671404846, + "learning_rate": 5.592977930912934e-06, + "loss": 0.9871, + "step": 5852 + }, + { + "epoch": 0.48, + "grad_norm": 2.566857605599453, + "learning_rate": 5.5916634733918604e-06, + "loss": 0.3263, + "step": 5853 + }, + { + "epoch": 0.48, + "grad_norm": 4.165932241309278, + "learning_rate": 5.590348974397754e-06, + "loss": 0.7467, + "step": 5854 + }, + { + "epoch": 0.48, + "grad_norm": 4.921543603924678, + "learning_rate": 5.589034434022751e-06, + "loss": 0.878, + "step": 5855 + }, + { + "epoch": 0.48, + "grad_norm": 3.4275927453103914, + "learning_rate": 5.587719852358998e-06, + "loss": 0.7094, + "step": 5856 + }, + { + "epoch": 0.48, + "grad_norm": 3.42932349890855, + "learning_rate": 5.586405229498641e-06, + "loss": 0.717, + "step": 5857 + }, + { + "epoch": 0.48, + "grad_norm": 2.4348472387267925, + "learning_rate": 5.58509056553383e-06, + "loss": 0.3761, + "step": 5858 + }, + { + "epoch": 0.48, + "grad_norm": 5.504870214130669, + "learning_rate": 5.583775860556717e-06, + "loss": 1.402, + "step": 5859 + }, + { + "epoch": 0.48, + "grad_norm": 2.994133857225235, + "learning_rate": 5.582461114659456e-06, + "loss": 0.6329, + "step": 5860 + }, + { + "epoch": 0.48, + "grad_norm": 2.549347230680468, + "learning_rate": 5.581146327934207e-06, + "loss": 0.5622, + "step": 5861 + }, + { + "epoch": 0.48, + "grad_norm": 1.511334130940213, + "learning_rate": 5.579831500473129e-06, + "loss": 0.3354, + "step": 5862 + }, + { + "epoch": 0.48, + "grad_norm": 3.567918857164272, + "learning_rate": 5.578516632368387e-06, + "loss": 0.7942, + "step": 5863 + }, + { + "epoch": 0.48, + "grad_norm": 4.184583826317246, + "learning_rate": 5.577201723712145e-06, + "loss": 0.9476, + "step": 5864 + }, + { + "epoch": 0.48, + "grad_norm": 3.20165301221488, + "learning_rate": 5.575886774596574e-06, + "loss": 0.5796, + "step": 5865 + }, + { + "epoch": 0.48, + "grad_norm": 3.294679591078616, + "learning_rate": 5.574571785113848e-06, + "loss": 0.7292, + "step": 5866 + }, + { + "epoch": 0.48, + "grad_norm": 3.3593511678914845, + "learning_rate": 5.57325675535614e-06, + "loss": 0.7505, + "step": 5867 + }, + { + "epoch": 0.48, + "grad_norm": 3.96636084138706, + "learning_rate": 5.571941685415628e-06, + "loss": 0.9361, + "step": 5868 + }, + { + "epoch": 0.48, + "grad_norm": 4.014532399988285, + "learning_rate": 5.570626575384494e-06, + "loss": 1.1275, + "step": 5869 + }, + { + "epoch": 0.48, + "grad_norm": 3.261551974745053, + "learning_rate": 5.569311425354918e-06, + "loss": 0.4729, + "step": 5870 + }, + { + "epoch": 0.48, + "grad_norm": 2.3052957847977624, + "learning_rate": 5.567996235419092e-06, + "loss": 0.428, + "step": 5871 + }, + { + "epoch": 0.48, + "grad_norm": 2.925265009700976, + "learning_rate": 5.566681005669199e-06, + "loss": 0.7657, + "step": 5872 + }, + { + "epoch": 0.48, + "grad_norm": 4.014876049573892, + "learning_rate": 5.565365736197434e-06, + "loss": 0.904, + "step": 5873 + }, + { + "epoch": 0.48, + "grad_norm": 4.136471844997838, + "learning_rate": 5.564050427095993e-06, + "loss": 0.6476, + "step": 5874 + }, + { + "epoch": 0.48, + "grad_norm": 3.9040217539038466, + "learning_rate": 5.56273507845707e-06, + "loss": 1.1665, + "step": 5875 + }, + { + "epoch": 0.48, + "grad_norm": 2.9449207137628903, + "learning_rate": 5.561419690372869e-06, + "loss": 0.5986, + "step": 5876 + }, + { + "epoch": 0.48, + "grad_norm": 2.207911771550932, + "learning_rate": 5.56010426293559e-06, + "loss": 0.3904, + "step": 5877 + }, + { + "epoch": 0.48, + "grad_norm": 4.728565173368823, + "learning_rate": 5.55878879623744e-06, + "loss": 1.4535, + "step": 5878 + }, + { + "epoch": 0.48, + "grad_norm": 2.8645212034986347, + "learning_rate": 5.557473290370626e-06, + "loss": 0.552, + "step": 5879 + }, + { + "epoch": 0.48, + "grad_norm": 6.05123044463225, + "learning_rate": 5.556157745427362e-06, + "loss": 1.2705, + "step": 5880 + }, + { + "epoch": 0.48, + "grad_norm": 2.530542306620343, + "learning_rate": 5.554842161499859e-06, + "loss": 0.3485, + "step": 5881 + }, + { + "epoch": 0.48, + "grad_norm": 2.2538968371779498, + "learning_rate": 5.553526538680336e-06, + "loss": 0.4511, + "step": 5882 + }, + { + "epoch": 0.48, + "grad_norm": 4.075364276320732, + "learning_rate": 5.552210877061013e-06, + "loss": 0.8227, + "step": 5883 + }, + { + "epoch": 0.48, + "grad_norm": 3.914795662493378, + "learning_rate": 5.550895176734109e-06, + "loss": 0.7729, + "step": 5884 + }, + { + "epoch": 0.48, + "grad_norm": 5.610903928405641, + "learning_rate": 5.549579437791851e-06, + "loss": 0.959, + "step": 5885 + }, + { + "epoch": 0.48, + "grad_norm": 1.9201182277283213, + "learning_rate": 5.548263660326466e-06, + "loss": 0.3125, + "step": 5886 + }, + { + "epoch": 0.48, + "grad_norm": 3.6869314822047303, + "learning_rate": 5.546947844430185e-06, + "loss": 0.9981, + "step": 5887 + }, + { + "epoch": 0.48, + "grad_norm": 4.61344356519624, + "learning_rate": 5.5456319901952395e-06, + "loss": 1.2744, + "step": 5888 + }, + { + "epoch": 0.48, + "grad_norm": 4.566978454501438, + "learning_rate": 5.5443160977138665e-06, + "loss": 1.2574, + "step": 5889 + }, + { + "epoch": 0.48, + "grad_norm": 5.499805201646772, + "learning_rate": 5.543000167078304e-06, + "loss": 1.0968, + "step": 5890 + }, + { + "epoch": 0.48, + "grad_norm": 4.3004879299099406, + "learning_rate": 5.541684198380793e-06, + "loss": 0.7849, + "step": 5891 + }, + { + "epoch": 0.48, + "grad_norm": 2.486444310642127, + "learning_rate": 5.5403681917135785e-06, + "loss": 0.2836, + "step": 5892 + }, + { + "epoch": 0.48, + "grad_norm": 3.346837915740214, + "learning_rate": 5.539052147168903e-06, + "loss": 0.4871, + "step": 5893 + }, + { + "epoch": 0.48, + "grad_norm": 4.654557335604403, + "learning_rate": 5.53773606483902e-06, + "loss": 0.5943, + "step": 5894 + }, + { + "epoch": 0.48, + "grad_norm": 5.12528844521019, + "learning_rate": 5.536419944816177e-06, + "loss": 0.8638, + "step": 5895 + }, + { + "epoch": 0.48, + "grad_norm": 4.340640069400724, + "learning_rate": 5.535103787192631e-06, + "loss": 0.8913, + "step": 5896 + }, + { + "epoch": 0.48, + "grad_norm": 3.315891110519818, + "learning_rate": 5.53378759206064e-06, + "loss": 0.6577, + "step": 5897 + }, + { + "epoch": 0.48, + "grad_norm": 4.025794917047234, + "learning_rate": 5.53247135951246e-06, + "loss": 0.851, + "step": 5898 + }, + { + "epoch": 0.48, + "grad_norm": 3.9692707729717536, + "learning_rate": 5.531155089640357e-06, + "loss": 0.8836, + "step": 5899 + }, + { + "epoch": 0.48, + "grad_norm": 3.5622011013882013, + "learning_rate": 5.529838782536591e-06, + "loss": 0.5705, + "step": 5900 + }, + { + "epoch": 0.48, + "grad_norm": 4.680175791348038, + "learning_rate": 5.528522438293434e-06, + "loss": 0.7379, + "step": 5901 + }, + { + "epoch": 0.48, + "grad_norm": 3.8857911704118298, + "learning_rate": 5.527206057003154e-06, + "loss": 0.9183, + "step": 5902 + }, + { + "epoch": 0.48, + "grad_norm": 1.635674392930614, + "learning_rate": 5.525889638758024e-06, + "loss": 0.3244, + "step": 5903 + }, + { + "epoch": 0.48, + "grad_norm": 5.16657170536579, + "learning_rate": 5.524573183650318e-06, + "loss": 1.1687, + "step": 5904 + }, + { + "epoch": 0.48, + "grad_norm": 3.6165556998196884, + "learning_rate": 5.523256691772315e-06, + "loss": 0.9416, + "step": 5905 + }, + { + "epoch": 0.48, + "grad_norm": 3.649624359146059, + "learning_rate": 5.521940163216296e-06, + "loss": 0.7414, + "step": 5906 + }, + { + "epoch": 0.48, + "grad_norm": 3.278093059658078, + "learning_rate": 5.5206235980745435e-06, + "loss": 0.8448, + "step": 5907 + }, + { + "epoch": 0.48, + "grad_norm": 2.904017485476948, + "learning_rate": 5.519306996439342e-06, + "loss": 0.5803, + "step": 5908 + }, + { + "epoch": 0.48, + "grad_norm": 2.9049980254893626, + "learning_rate": 5.5179903584029805e-06, + "loss": 0.4991, + "step": 5909 + }, + { + "epoch": 0.48, + "grad_norm": 4.362250538158217, + "learning_rate": 5.516673684057747e-06, + "loss": 0.8793, + "step": 5910 + }, + { + "epoch": 0.48, + "grad_norm": 3.8059950212682, + "learning_rate": 5.515356973495939e-06, + "loss": 0.9024, + "step": 5911 + }, + { + "epoch": 0.48, + "grad_norm": 3.2889491763754126, + "learning_rate": 5.514040226809849e-06, + "loss": 0.8642, + "step": 5912 + }, + { + "epoch": 0.48, + "grad_norm": 4.014061500307177, + "learning_rate": 5.512723444091776e-06, + "loss": 0.8543, + "step": 5913 + }, + { + "epoch": 0.48, + "grad_norm": 2.317930909121491, + "learning_rate": 5.5114066254340215e-06, + "loss": 0.3756, + "step": 5914 + }, + { + "epoch": 0.48, + "grad_norm": 4.296118836384158, + "learning_rate": 5.510089770928889e-06, + "loss": 0.7751, + "step": 5915 + }, + { + "epoch": 0.48, + "grad_norm": 4.17047831966923, + "learning_rate": 5.508772880668682e-06, + "loss": 0.9673, + "step": 5916 + }, + { + "epoch": 0.48, + "grad_norm": 3.36608965530601, + "learning_rate": 5.507455954745712e-06, + "loss": 0.7936, + "step": 5917 + }, + { + "epoch": 0.48, + "grad_norm": 3.3478663979393133, + "learning_rate": 5.506138993252285e-06, + "loss": 0.7502, + "step": 5918 + }, + { + "epoch": 0.48, + "grad_norm": 2.997363442657447, + "learning_rate": 5.504821996280719e-06, + "loss": 0.8172, + "step": 5919 + }, + { + "epoch": 0.48, + "grad_norm": 4.639148310727434, + "learning_rate": 5.50350496392333e-06, + "loss": 0.7692, + "step": 5920 + }, + { + "epoch": 0.48, + "grad_norm": 4.705950501431014, + "learning_rate": 5.502187896272432e-06, + "loss": 0.8484, + "step": 5921 + }, + { + "epoch": 0.48, + "grad_norm": 5.888276588328582, + "learning_rate": 5.500870793420349e-06, + "loss": 1.4463, + "step": 5922 + }, + { + "epoch": 0.48, + "grad_norm": 6.138729432535756, + "learning_rate": 5.4995536554594035e-06, + "loss": 1.5104, + "step": 5923 + }, + { + "epoch": 0.48, + "grad_norm": 3.424694881068357, + "learning_rate": 5.498236482481919e-06, + "loss": 0.6552, + "step": 5924 + }, + { + "epoch": 0.48, + "grad_norm": 2.7475488823187413, + "learning_rate": 5.496919274580226e-06, + "loss": 0.449, + "step": 5925 + }, + { + "epoch": 0.48, + "grad_norm": 3.8115374734444227, + "learning_rate": 5.495602031846655e-06, + "loss": 0.8435, + "step": 5926 + }, + { + "epoch": 0.48, + "grad_norm": 3.8308256770443916, + "learning_rate": 5.494284754373538e-06, + "loss": 0.56, + "step": 5927 + }, + { + "epoch": 0.48, + "grad_norm": 4.414466118736038, + "learning_rate": 5.492967442253211e-06, + "loss": 0.9561, + "step": 5928 + }, + { + "epoch": 0.48, + "grad_norm": 3.5556606784233122, + "learning_rate": 5.491650095578013e-06, + "loss": 0.5788, + "step": 5929 + }, + { + "epoch": 0.48, + "grad_norm": 5.00760104064909, + "learning_rate": 5.4903327144402814e-06, + "loss": 0.9597, + "step": 5930 + }, + { + "epoch": 0.48, + "grad_norm": 3.699846439981498, + "learning_rate": 5.489015298932362e-06, + "loss": 0.8504, + "step": 5931 + }, + { + "epoch": 0.48, + "grad_norm": 2.5765579870648723, + "learning_rate": 5.487697849146596e-06, + "loss": 0.7532, + "step": 5932 + }, + { + "epoch": 0.48, + "grad_norm": 4.194937056636086, + "learning_rate": 5.4863803651753345e-06, + "loss": 0.5019, + "step": 5933 + }, + { + "epoch": 0.49, + "grad_norm": 2.387994818204489, + "learning_rate": 5.485062847110927e-06, + "loss": 0.3907, + "step": 5934 + }, + { + "epoch": 0.49, + "grad_norm": 3.7829552978663625, + "learning_rate": 5.483745295045724e-06, + "loss": 0.8088, + "step": 5935 + }, + { + "epoch": 0.49, + "grad_norm": 2.194828419721978, + "learning_rate": 5.48242770907208e-06, + "loss": 0.3704, + "step": 5936 + }, + { + "epoch": 0.49, + "grad_norm": 4.16157216863073, + "learning_rate": 5.481110089282355e-06, + "loss": 1.063, + "step": 5937 + }, + { + "epoch": 0.49, + "grad_norm": 5.462414446566765, + "learning_rate": 5.4797924357689045e-06, + "loss": 0.9265, + "step": 5938 + }, + { + "epoch": 0.49, + "grad_norm": 3.1440044922376944, + "learning_rate": 5.478474748624095e-06, + "loss": 0.6075, + "step": 5939 + }, + { + "epoch": 0.49, + "grad_norm": 2.715496593630727, + "learning_rate": 5.477157027940286e-06, + "loss": 0.5834, + "step": 5940 + }, + { + "epoch": 0.49, + "grad_norm": 4.899814377063645, + "learning_rate": 5.475839273809846e-06, + "loss": 1.2268, + "step": 5941 + }, + { + "epoch": 0.49, + "grad_norm": 3.6442247505119956, + "learning_rate": 5.474521486325145e-06, + "loss": 0.5574, + "step": 5942 + }, + { + "epoch": 0.49, + "grad_norm": 2.2709048047438967, + "learning_rate": 5.473203665578553e-06, + "loss": 0.411, + "step": 5943 + }, + { + "epoch": 0.49, + "grad_norm": 3.2479962450882995, + "learning_rate": 5.471885811662442e-06, + "loss": 0.5546, + "step": 5944 + }, + { + "epoch": 0.49, + "grad_norm": 3.4924594522157797, + "learning_rate": 5.470567924669189e-06, + "loss": 0.9061, + "step": 5945 + }, + { + "epoch": 0.49, + "grad_norm": 4.707626979273108, + "learning_rate": 5.469250004691174e-06, + "loss": 1.1444, + "step": 5946 + }, + { + "epoch": 0.49, + "grad_norm": 3.7467684133691357, + "learning_rate": 5.467932051820776e-06, + "loss": 0.8675, + "step": 5947 + }, + { + "epoch": 0.49, + "grad_norm": 4.129339723411537, + "learning_rate": 5.466614066150375e-06, + "loss": 1.0813, + "step": 5948 + }, + { + "epoch": 0.49, + "grad_norm": 3.2643072965740694, + "learning_rate": 5.465296047772362e-06, + "loss": 0.6233, + "step": 5949 + }, + { + "epoch": 0.49, + "grad_norm": 3.1355986530047826, + "learning_rate": 5.463977996779119e-06, + "loss": 0.8856, + "step": 5950 + }, + { + "epoch": 0.49, + "grad_norm": 4.443501834756944, + "learning_rate": 5.4626599132630384e-06, + "loss": 1.0806, + "step": 5951 + }, + { + "epoch": 0.49, + "grad_norm": 3.627958424176721, + "learning_rate": 5.46134179731651e-06, + "loss": 0.659, + "step": 5952 + }, + { + "epoch": 0.49, + "grad_norm": 3.309435183852325, + "learning_rate": 5.4600236490319305e-06, + "loss": 0.7406, + "step": 5953 + }, + { + "epoch": 0.49, + "grad_norm": 4.533187208048489, + "learning_rate": 5.458705468501696e-06, + "loss": 0.6347, + "step": 5954 + }, + { + "epoch": 0.49, + "grad_norm": 1.5210903979167285, + "learning_rate": 5.457387255818204e-06, + "loss": 0.39, + "step": 5955 + }, + { + "epoch": 0.49, + "grad_norm": 3.4005814640182117, + "learning_rate": 5.456069011073854e-06, + "loss": 0.753, + "step": 5956 + }, + { + "epoch": 0.49, + "grad_norm": 5.28949496031527, + "learning_rate": 5.454750734361054e-06, + "loss": 1.3707, + "step": 5957 + }, + { + "epoch": 0.49, + "grad_norm": 4.28649999961873, + "learning_rate": 5.453432425772205e-06, + "loss": 0.6708, + "step": 5958 + }, + { + "epoch": 0.49, + "grad_norm": 5.050799746874854, + "learning_rate": 5.4521140853997166e-06, + "loss": 0.8119, + "step": 5959 + }, + { + "epoch": 0.49, + "grad_norm": 3.9711602718109136, + "learning_rate": 5.450795713335999e-06, + "loss": 0.8896, + "step": 5960 + }, + { + "epoch": 0.49, + "grad_norm": 2.5967403665073654, + "learning_rate": 5.449477309673462e-06, + "loss": 0.4791, + "step": 5961 + }, + { + "epoch": 0.49, + "grad_norm": 4.957273264260848, + "learning_rate": 5.4481588745045245e-06, + "loss": 1.1205, + "step": 5962 + }, + { + "epoch": 0.49, + "grad_norm": 2.5226503055402056, + "learning_rate": 5.446840407921599e-06, + "loss": 0.3371, + "step": 5963 + }, + { + "epoch": 0.49, + "grad_norm": 2.6492716318407035, + "learning_rate": 5.445521910017104e-06, + "loss": 0.7335, + "step": 5964 + }, + { + "epoch": 0.49, + "grad_norm": 4.534855989455732, + "learning_rate": 5.444203380883464e-06, + "loss": 0.5681, + "step": 5965 + }, + { + "epoch": 0.49, + "grad_norm": 3.8778627977130533, + "learning_rate": 5.442884820613099e-06, + "loss": 0.5447, + "step": 5966 + }, + { + "epoch": 0.49, + "grad_norm": 5.641037162912989, + "learning_rate": 5.441566229298436e-06, + "loss": 0.8469, + "step": 5967 + }, + { + "epoch": 0.49, + "grad_norm": 5.211550326231087, + "learning_rate": 5.440247607031901e-06, + "loss": 1.1026, + "step": 5968 + }, + { + "epoch": 0.49, + "grad_norm": 5.769302466784294, + "learning_rate": 5.438928953905926e-06, + "loss": 1.2086, + "step": 5969 + }, + { + "epoch": 0.49, + "grad_norm": 4.350234579846934, + "learning_rate": 5.437610270012943e-06, + "loss": 0.889, + "step": 5970 + }, + { + "epoch": 0.49, + "grad_norm": 3.712562852846676, + "learning_rate": 5.436291555445383e-06, + "loss": 1.0308, + "step": 5971 + }, + { + "epoch": 0.49, + "grad_norm": 3.5171631949861393, + "learning_rate": 5.434972810295683e-06, + "loss": 0.671, + "step": 5972 + }, + { + "epoch": 0.49, + "grad_norm": 3.662786074086821, + "learning_rate": 5.433654034656283e-06, + "loss": 0.932, + "step": 5973 + }, + { + "epoch": 0.49, + "grad_norm": 4.598717293441438, + "learning_rate": 5.4323352286196215e-06, + "loss": 0.695, + "step": 5974 + }, + { + "epoch": 0.49, + "grad_norm": 5.191268318394292, + "learning_rate": 5.431016392278142e-06, + "loss": 1.0992, + "step": 5975 + }, + { + "epoch": 0.49, + "grad_norm": 1.7364795990950455, + "learning_rate": 5.429697525724289e-06, + "loss": 0.3564, + "step": 5976 + }, + { + "epoch": 0.49, + "grad_norm": 2.4752863215518532, + "learning_rate": 5.428378629050511e-06, + "loss": 0.5215, + "step": 5977 + }, + { + "epoch": 0.49, + "grad_norm": 2.6326295764189065, + "learning_rate": 5.427059702349255e-06, + "loss": 0.7813, + "step": 5978 + }, + { + "epoch": 0.49, + "grad_norm": 3.4500554392551557, + "learning_rate": 5.425740745712972e-06, + "loss": 0.5694, + "step": 5979 + }, + { + "epoch": 0.49, + "grad_norm": 3.664928275209959, + "learning_rate": 5.4244217592341165e-06, + "loss": 0.7722, + "step": 5980 + }, + { + "epoch": 0.49, + "grad_norm": 3.2221681323965026, + "learning_rate": 5.423102743005141e-06, + "loss": 0.4881, + "step": 5981 + }, + { + "epoch": 0.49, + "grad_norm": 5.414440890088172, + "learning_rate": 5.421783697118506e-06, + "loss": 1.3149, + "step": 5982 + }, + { + "epoch": 0.49, + "grad_norm": 4.183253117450436, + "learning_rate": 5.420464621666669e-06, + "loss": 1.2179, + "step": 5983 + }, + { + "epoch": 0.49, + "grad_norm": 3.7352526455230937, + "learning_rate": 5.4191455167420905e-06, + "loss": 0.581, + "step": 5984 + }, + { + "epoch": 0.49, + "grad_norm": 3.8724811227526703, + "learning_rate": 5.417826382437238e-06, + "loss": 0.9634, + "step": 5985 + }, + { + "epoch": 0.49, + "grad_norm": 6.3110195488406875, + "learning_rate": 5.4165072188445734e-06, + "loss": 1.7039, + "step": 5986 + }, + { + "epoch": 0.49, + "grad_norm": 3.598222072883564, + "learning_rate": 5.415188026056565e-06, + "loss": 0.6402, + "step": 5987 + }, + { + "epoch": 0.49, + "grad_norm": 5.553321277530451, + "learning_rate": 5.413868804165682e-06, + "loss": 1.097, + "step": 5988 + }, + { + "epoch": 0.49, + "grad_norm": 3.4588707286392295, + "learning_rate": 5.412549553264399e-06, + "loss": 0.6014, + "step": 5989 + }, + { + "epoch": 0.49, + "grad_norm": 4.733512858828318, + "learning_rate": 5.411230273445186e-06, + "loss": 1.325, + "step": 5990 + }, + { + "epoch": 0.49, + "grad_norm": 4.39984972508648, + "learning_rate": 5.409910964800522e-06, + "loss": 0.8622, + "step": 5991 + }, + { + "epoch": 0.49, + "grad_norm": 2.2789657345602468, + "learning_rate": 5.4085916274228825e-06, + "loss": 0.5902, + "step": 5992 + }, + { + "epoch": 0.49, + "grad_norm": 2.1865544219152837, + "learning_rate": 5.407272261404748e-06, + "loss": 0.4441, + "step": 5993 + }, + { + "epoch": 0.49, + "grad_norm": 2.593265950272095, + "learning_rate": 5.405952866838602e-06, + "loss": 0.384, + "step": 5994 + }, + { + "epoch": 0.49, + "grad_norm": 4.557730997750645, + "learning_rate": 5.4046334438169245e-06, + "loss": 1.0818, + "step": 5995 + }, + { + "epoch": 0.49, + "grad_norm": 3.413490038438569, + "learning_rate": 5.403313992432203e-06, + "loss": 0.5347, + "step": 5996 + }, + { + "epoch": 0.49, + "grad_norm": 6.295959641215288, + "learning_rate": 5.401994512776928e-06, + "loss": 1.3955, + "step": 5997 + }, + { + "epoch": 0.49, + "grad_norm": 2.796602048676265, + "learning_rate": 5.4006750049435864e-06, + "loss": 0.3992, + "step": 5998 + }, + { + "epoch": 0.49, + "grad_norm": 5.21319372425049, + "learning_rate": 5.3993554690246695e-06, + "loss": 1.3535, + "step": 5999 + }, + { + "epoch": 0.49, + "grad_norm": 3.4363423550782217, + "learning_rate": 5.398035905112675e-06, + "loss": 0.9095, + "step": 6000 + }, + { + "epoch": 0.49, + "grad_norm": 4.160294196177696, + "learning_rate": 5.396716313300094e-06, + "loss": 0.6002, + "step": 6001 + }, + { + "epoch": 0.49, + "grad_norm": 4.867758171193, + "learning_rate": 5.395396693679427e-06, + "loss": 0.987, + "step": 6002 + }, + { + "epoch": 0.49, + "grad_norm": 1.9501210659651882, + "learning_rate": 5.394077046343172e-06, + "loss": 0.4098, + "step": 6003 + }, + { + "epoch": 0.49, + "grad_norm": 3.628940380104001, + "learning_rate": 5.39275737138383e-06, + "loss": 0.8241, + "step": 6004 + }, + { + "epoch": 0.49, + "grad_norm": 3.718599098761617, + "learning_rate": 5.3914376688939065e-06, + "loss": 0.919, + "step": 6005 + }, + { + "epoch": 0.49, + "grad_norm": 3.045665200502083, + "learning_rate": 5.390117938965906e-06, + "loss": 0.7792, + "step": 6006 + }, + { + "epoch": 0.49, + "grad_norm": 3.8960198644132213, + "learning_rate": 5.388798181692335e-06, + "loss": 0.8319, + "step": 6007 + }, + { + "epoch": 0.49, + "grad_norm": 4.9175820202923095, + "learning_rate": 5.387478397165704e-06, + "loss": 1.3319, + "step": 6008 + }, + { + "epoch": 0.49, + "grad_norm": 2.8390832555859196, + "learning_rate": 5.386158585478525e-06, + "loss": 0.6011, + "step": 6009 + }, + { + "epoch": 0.49, + "grad_norm": 3.63872663669692, + "learning_rate": 5.384838746723308e-06, + "loss": 0.6307, + "step": 6010 + }, + { + "epoch": 0.49, + "grad_norm": 3.2631288297706647, + "learning_rate": 5.383518880992571e-06, + "loss": 0.45, + "step": 6011 + }, + { + "epoch": 0.49, + "grad_norm": 4.06846177820315, + "learning_rate": 5.382198988378829e-06, + "loss": 0.9493, + "step": 6012 + }, + { + "epoch": 0.49, + "grad_norm": 2.471610145989451, + "learning_rate": 5.380879068974599e-06, + "loss": 0.426, + "step": 6013 + }, + { + "epoch": 0.49, + "grad_norm": 1.05154167912491, + "learning_rate": 5.3795591228724065e-06, + "loss": 0.1397, + "step": 6014 + }, + { + "epoch": 0.49, + "grad_norm": 4.269178799749522, + "learning_rate": 5.37823915016477e-06, + "loss": 0.868, + "step": 6015 + }, + { + "epoch": 0.49, + "grad_norm": 2.6985714026759426, + "learning_rate": 5.376919150944218e-06, + "loss": 0.3655, + "step": 6016 + }, + { + "epoch": 0.49, + "grad_norm": 3.5445661109934754, + "learning_rate": 5.375599125303272e-06, + "loss": 0.5883, + "step": 6017 + }, + { + "epoch": 0.49, + "grad_norm": 1.865373846194751, + "learning_rate": 5.3742790733344604e-06, + "loss": 0.3618, + "step": 6018 + }, + { + "epoch": 0.49, + "grad_norm": 2.484862095770465, + "learning_rate": 5.372958995130315e-06, + "loss": 0.5057, + "step": 6019 + }, + { + "epoch": 0.49, + "grad_norm": 4.6606923982642225, + "learning_rate": 5.37163889078337e-06, + "loss": 0.748, + "step": 6020 + }, + { + "epoch": 0.49, + "grad_norm": 4.877526635623892, + "learning_rate": 5.3703187603861525e-06, + "loss": 0.7487, + "step": 6021 + }, + { + "epoch": 0.49, + "grad_norm": 3.2999462796503423, + "learning_rate": 5.368998604031202e-06, + "loss": 0.8011, + "step": 6022 + }, + { + "epoch": 0.49, + "grad_norm": 3.3706799983308726, + "learning_rate": 5.367678421811058e-06, + "loss": 0.4402, + "step": 6023 + }, + { + "epoch": 0.49, + "grad_norm": 2.3353806291212935, + "learning_rate": 5.366358213818256e-06, + "loss": 0.4826, + "step": 6024 + }, + { + "epoch": 0.49, + "grad_norm": 4.558573600171047, + "learning_rate": 5.365037980145337e-06, + "loss": 1.0699, + "step": 6025 + }, + { + "epoch": 0.49, + "grad_norm": 3.7952480984435892, + "learning_rate": 5.3637177208848435e-06, + "loss": 0.5923, + "step": 6026 + }, + { + "epoch": 0.49, + "grad_norm": 3.223010186727015, + "learning_rate": 5.362397436129321e-06, + "loss": 0.6329, + "step": 6027 + }, + { + "epoch": 0.49, + "grad_norm": 3.1927538308642323, + "learning_rate": 5.361077125971316e-06, + "loss": 0.5016, + "step": 6028 + }, + { + "epoch": 0.49, + "grad_norm": 4.116713591518483, + "learning_rate": 5.359756790503376e-06, + "loss": 0.6132, + "step": 6029 + }, + { + "epoch": 0.49, + "grad_norm": 4.433978801843281, + "learning_rate": 5.358436429818049e-06, + "loss": 0.6, + "step": 6030 + }, + { + "epoch": 0.49, + "grad_norm": 1.227780056767419, + "learning_rate": 5.357116044007889e-06, + "loss": 0.2047, + "step": 6031 + }, + { + "epoch": 0.49, + "grad_norm": 2.257502964558977, + "learning_rate": 5.35579563316545e-06, + "loss": 0.3571, + "step": 6032 + }, + { + "epoch": 0.49, + "grad_norm": 5.06788660150286, + "learning_rate": 5.354475197383284e-06, + "loss": 1.581, + "step": 6033 + }, + { + "epoch": 0.49, + "grad_norm": 3.744145590496548, + "learning_rate": 5.353154736753951e-06, + "loss": 0.5749, + "step": 6034 + }, + { + "epoch": 0.49, + "grad_norm": 4.09242606935554, + "learning_rate": 5.351834251370006e-06, + "loss": 0.9254, + "step": 6035 + }, + { + "epoch": 0.49, + "grad_norm": 1.771394588947226, + "learning_rate": 5.350513741324011e-06, + "loss": 0.3295, + "step": 6036 + }, + { + "epoch": 0.49, + "grad_norm": 5.021615405626378, + "learning_rate": 5.349193206708529e-06, + "loss": 1.1829, + "step": 6037 + }, + { + "epoch": 0.49, + "grad_norm": 5.202720037583286, + "learning_rate": 5.347872647616122e-06, + "loss": 1.2606, + "step": 6038 + }, + { + "epoch": 0.49, + "grad_norm": 3.6572563355449743, + "learning_rate": 5.3465520641393585e-06, + "loss": 0.7677, + "step": 6039 + }, + { + "epoch": 0.49, + "grad_norm": 3.8552302348051444, + "learning_rate": 5.345231456370802e-06, + "loss": 0.7232, + "step": 6040 + }, + { + "epoch": 0.49, + "grad_norm": 4.806366339065954, + "learning_rate": 5.3439108244030234e-06, + "loss": 1.2052, + "step": 6041 + }, + { + "epoch": 0.49, + "grad_norm": 4.29929892416941, + "learning_rate": 5.342590168328592e-06, + "loss": 0.7961, + "step": 6042 + }, + { + "epoch": 0.49, + "grad_norm": 2.1248953723976363, + "learning_rate": 5.341269488240082e-06, + "loss": 0.4013, + "step": 6043 + }, + { + "epoch": 0.49, + "grad_norm": 5.355464824501211, + "learning_rate": 5.3399487842300646e-06, + "loss": 1.2155, + "step": 6044 + }, + { + "epoch": 0.49, + "grad_norm": 4.124892612547487, + "learning_rate": 5.338628056391118e-06, + "loss": 1.2099, + "step": 6045 + }, + { + "epoch": 0.49, + "grad_norm": 2.18600294308645, + "learning_rate": 5.337307304815817e-06, + "loss": 0.343, + "step": 6046 + }, + { + "epoch": 0.49, + "grad_norm": 2.844076954517568, + "learning_rate": 5.335986529596743e-06, + "loss": 0.6046, + "step": 6047 + }, + { + "epoch": 0.49, + "grad_norm": 6.3044142195789785, + "learning_rate": 5.334665730826476e-06, + "loss": 1.1133, + "step": 6048 + }, + { + "epoch": 0.49, + "grad_norm": 1.3205392762697725, + "learning_rate": 5.333344908597597e-06, + "loss": 0.1942, + "step": 6049 + }, + { + "epoch": 0.49, + "grad_norm": 4.773085470119928, + "learning_rate": 5.332024063002691e-06, + "loss": 0.9705, + "step": 6050 + }, + { + "epoch": 0.49, + "grad_norm": 5.0438150380204245, + "learning_rate": 5.330703194134342e-06, + "loss": 1.1824, + "step": 6051 + }, + { + "epoch": 0.49, + "grad_norm": 5.62512093898664, + "learning_rate": 5.32938230208514e-06, + "loss": 1.051, + "step": 6052 + }, + { + "epoch": 0.49, + "grad_norm": 4.944231217257914, + "learning_rate": 5.328061386947671e-06, + "loss": 0.9748, + "step": 6053 + }, + { + "epoch": 0.49, + "grad_norm": 4.627745480283834, + "learning_rate": 5.326740448814527e-06, + "loss": 0.8678, + "step": 6054 + }, + { + "epoch": 0.49, + "grad_norm": 3.288780733394661, + "learning_rate": 5.325419487778299e-06, + "loss": 0.7457, + "step": 6055 + }, + { + "epoch": 0.49, + "grad_norm": 3.2709540185919006, + "learning_rate": 5.324098503931581e-06, + "loss": 0.4442, + "step": 6056 + }, + { + "epoch": 0.5, + "grad_norm": 4.528635346577336, + "learning_rate": 5.3227774973669695e-06, + "loss": 1.0207, + "step": 6057 + }, + { + "epoch": 0.5, + "grad_norm": 3.666482261079773, + "learning_rate": 5.3214564681770585e-06, + "loss": 0.9279, + "step": 6058 + }, + { + "epoch": 0.5, + "grad_norm": 3.9690261119748724, + "learning_rate": 5.320135416454448e-06, + "loss": 0.9664, + "step": 6059 + }, + { + "epoch": 0.5, + "grad_norm": 3.266908773209643, + "learning_rate": 5.3188143422917405e-06, + "loss": 0.8157, + "step": 6060 + }, + { + "epoch": 0.5, + "grad_norm": 3.098677175359244, + "learning_rate": 5.317493245781533e-06, + "loss": 0.5497, + "step": 6061 + }, + { + "epoch": 0.5, + "grad_norm": 4.276565373552879, + "learning_rate": 5.316172127016431e-06, + "loss": 0.7332, + "step": 6062 + }, + { + "epoch": 0.5, + "grad_norm": 1.2610912101476668, + "learning_rate": 5.31485098608904e-06, + "loss": 0.2074, + "step": 6063 + }, + { + "epoch": 0.5, + "grad_norm": 2.128417890513426, + "learning_rate": 5.313529823091964e-06, + "loss": 0.3317, + "step": 6064 + }, + { + "epoch": 0.5, + "grad_norm": 3.5045172622483607, + "learning_rate": 5.312208638117812e-06, + "loss": 1.0077, + "step": 6065 + }, + { + "epoch": 0.5, + "grad_norm": 4.508505709824206, + "learning_rate": 5.310887431259194e-06, + "loss": 0.9134, + "step": 6066 + }, + { + "epoch": 0.5, + "grad_norm": 2.28128242779539, + "learning_rate": 5.309566202608719e-06, + "loss": 0.3332, + "step": 6067 + }, + { + "epoch": 0.5, + "grad_norm": 3.8888855207366695, + "learning_rate": 5.3082449522590005e-06, + "loss": 1.0126, + "step": 6068 + }, + { + "epoch": 0.5, + "grad_norm": 3.5042120031476163, + "learning_rate": 5.306923680302654e-06, + "loss": 1.0205, + "step": 6069 + }, + { + "epoch": 0.5, + "grad_norm": 3.3608982135572427, + "learning_rate": 5.30560238683229e-06, + "loss": 0.6756, + "step": 6070 + }, + { + "epoch": 0.5, + "grad_norm": 3.1357296635241734, + "learning_rate": 5.304281071940532e-06, + "loss": 0.6604, + "step": 6071 + }, + { + "epoch": 0.5, + "grad_norm": 3.487126975007773, + "learning_rate": 5.302959735719995e-06, + "loss": 0.7766, + "step": 6072 + }, + { + "epoch": 0.5, + "grad_norm": 4.043631275556565, + "learning_rate": 5.301638378263296e-06, + "loss": 0.8359, + "step": 6073 + }, + { + "epoch": 0.5, + "grad_norm": 3.251135154978727, + "learning_rate": 5.300316999663062e-06, + "loss": 0.7924, + "step": 6074 + }, + { + "epoch": 0.5, + "grad_norm": 3.0227537491324274, + "learning_rate": 5.298995600011912e-06, + "loss": 0.5663, + "step": 6075 + }, + { + "epoch": 0.5, + "grad_norm": 2.749566650478806, + "learning_rate": 5.2976741794024725e-06, + "loss": 0.4459, + "step": 6076 + }, + { + "epoch": 0.5, + "grad_norm": 2.2032780087289554, + "learning_rate": 5.296352737927368e-06, + "loss": 0.2584, + "step": 6077 + }, + { + "epoch": 0.5, + "grad_norm": 2.4978819867980775, + "learning_rate": 5.295031275679226e-06, + "loss": 0.5388, + "step": 6078 + }, + { + "epoch": 0.5, + "grad_norm": 3.7285154432660215, + "learning_rate": 5.293709792750677e-06, + "loss": 0.7644, + "step": 6079 + }, + { + "epoch": 0.5, + "grad_norm": 3.7192776482024046, + "learning_rate": 5.292388289234349e-06, + "loss": 0.9788, + "step": 6080 + }, + { + "epoch": 0.5, + "grad_norm": 4.711842440350777, + "learning_rate": 5.2910667652228735e-06, + "loss": 0.7359, + "step": 6081 + }, + { + "epoch": 0.5, + "grad_norm": 3.673850306310747, + "learning_rate": 5.289745220808885e-06, + "loss": 0.924, + "step": 6082 + }, + { + "epoch": 0.5, + "grad_norm": 4.4479694632064355, + "learning_rate": 5.288423656085018e-06, + "loss": 0.8344, + "step": 6083 + }, + { + "epoch": 0.5, + "grad_norm": 2.3718104486576355, + "learning_rate": 5.287102071143907e-06, + "loss": 0.3449, + "step": 6084 + }, + { + "epoch": 0.5, + "grad_norm": 1.9814963008856785, + "learning_rate": 5.28578046607819e-06, + "loss": 0.2176, + "step": 6085 + }, + { + "epoch": 0.5, + "grad_norm": 3.0309721955972324, + "learning_rate": 5.284458840980507e-06, + "loss": 0.7822, + "step": 6086 + }, + { + "epoch": 0.5, + "grad_norm": 3.8376790697897962, + "learning_rate": 5.283137195943499e-06, + "loss": 0.6084, + "step": 6087 + }, + { + "epoch": 0.5, + "grad_norm": 4.032045832585798, + "learning_rate": 5.281815531059803e-06, + "loss": 0.7281, + "step": 6088 + }, + { + "epoch": 0.5, + "grad_norm": 2.844734848963236, + "learning_rate": 5.280493846422066e-06, + "loss": 0.5976, + "step": 6089 + }, + { + "epoch": 0.5, + "grad_norm": 4.2656608970985594, + "learning_rate": 5.27917214212293e-06, + "loss": 1.0984, + "step": 6090 + }, + { + "epoch": 0.5, + "grad_norm": 4.5982297716675875, + "learning_rate": 5.2778504182550436e-06, + "loss": 1.1228, + "step": 6091 + }, + { + "epoch": 0.5, + "grad_norm": 3.9160003755177946, + "learning_rate": 5.27652867491105e-06, + "loss": 0.9268, + "step": 6092 + }, + { + "epoch": 0.5, + "grad_norm": 2.7159953624083575, + "learning_rate": 5.2752069121836e-06, + "loss": 0.7194, + "step": 6093 + }, + { + "epoch": 0.5, + "grad_norm": 3.542153783435038, + "learning_rate": 5.273885130165345e-06, + "loss": 0.7737, + "step": 6094 + }, + { + "epoch": 0.5, + "grad_norm": 2.4585385374304933, + "learning_rate": 5.2725633289489345e-06, + "loss": 0.5026, + "step": 6095 + }, + { + "epoch": 0.5, + "grad_norm": 3.0917108234195774, + "learning_rate": 5.2712415086270185e-06, + "loss": 0.655, + "step": 6096 + }, + { + "epoch": 0.5, + "grad_norm": 5.3591110112259255, + "learning_rate": 5.2699196692922546e-06, + "loss": 0.8652, + "step": 6097 + }, + { + "epoch": 0.5, + "grad_norm": 4.300957812764537, + "learning_rate": 5.268597811037296e-06, + "loss": 0.538, + "step": 6098 + }, + { + "epoch": 0.5, + "grad_norm": 3.2486258407781285, + "learning_rate": 5.2672759339547995e-06, + "loss": 0.6235, + "step": 6099 + }, + { + "epoch": 0.5, + "grad_norm": 4.393035122156463, + "learning_rate": 5.265954038137424e-06, + "loss": 0.7664, + "step": 6100 + }, + { + "epoch": 0.5, + "grad_norm": 3.9881075428557233, + "learning_rate": 5.264632123677827e-06, + "loss": 0.5406, + "step": 6101 + }, + { + "epoch": 0.5, + "grad_norm": 4.355037993476339, + "learning_rate": 5.2633101906686715e-06, + "loss": 1.1116, + "step": 6102 + }, + { + "epoch": 0.5, + "grad_norm": 1.8713903279823412, + "learning_rate": 5.261988239202617e-06, + "loss": 0.396, + "step": 6103 + }, + { + "epoch": 0.5, + "grad_norm": 2.868093306142094, + "learning_rate": 5.260666269372327e-06, + "loss": 0.3755, + "step": 6104 + }, + { + "epoch": 0.5, + "grad_norm": 3.782449515322387, + "learning_rate": 5.259344281270464e-06, + "loss": 0.8487, + "step": 6105 + }, + { + "epoch": 0.5, + "grad_norm": 4.327406827144394, + "learning_rate": 5.258022274989698e-06, + "loss": 0.8965, + "step": 6106 + }, + { + "epoch": 0.5, + "grad_norm": 5.280276130996807, + "learning_rate": 5.256700250622692e-06, + "loss": 1.35, + "step": 6107 + }, + { + "epoch": 0.5, + "grad_norm": 2.8376123252688825, + "learning_rate": 5.2553782082621155e-06, + "loss": 0.6479, + "step": 6108 + }, + { + "epoch": 0.5, + "grad_norm": 4.390103833093076, + "learning_rate": 5.2540561480006395e-06, + "loss": 0.9966, + "step": 6109 + }, + { + "epoch": 0.5, + "grad_norm": 3.1779116496556297, + "learning_rate": 5.252734069930933e-06, + "loss": 0.5502, + "step": 6110 + }, + { + "epoch": 0.5, + "grad_norm": 4.844221005521417, + "learning_rate": 5.251411974145667e-06, + "loss": 1.1597, + "step": 6111 + }, + { + "epoch": 0.5, + "grad_norm": 3.0666922562733765, + "learning_rate": 5.250089860737516e-06, + "loss": 0.7088, + "step": 6112 + }, + { + "epoch": 0.5, + "grad_norm": 2.681093129070991, + "learning_rate": 5.248767729799153e-06, + "loss": 0.567, + "step": 6113 + }, + { + "epoch": 0.5, + "grad_norm": 5.172353609353667, + "learning_rate": 5.247445581423257e-06, + "loss": 1.5891, + "step": 6114 + }, + { + "epoch": 0.5, + "grad_norm": 4.01038345251461, + "learning_rate": 5.246123415702502e-06, + "loss": 1.059, + "step": 6115 + }, + { + "epoch": 0.5, + "grad_norm": 3.3530754017556252, + "learning_rate": 5.244801232729566e-06, + "loss": 0.9773, + "step": 6116 + }, + { + "epoch": 0.5, + "grad_norm": 5.0136245644424715, + "learning_rate": 5.2434790325971295e-06, + "loss": 0.901, + "step": 6117 + }, + { + "epoch": 0.5, + "grad_norm": 2.13231353398788, + "learning_rate": 5.242156815397873e-06, + "loss": 0.3978, + "step": 6118 + }, + { + "epoch": 0.5, + "grad_norm": 1.7810045992983448, + "learning_rate": 5.240834581224476e-06, + "loss": 0.3124, + "step": 6119 + }, + { + "epoch": 0.5, + "grad_norm": 4.194978801765924, + "learning_rate": 5.239512330169625e-06, + "loss": 0.5603, + "step": 6120 + }, + { + "epoch": 0.5, + "grad_norm": 4.131501788834688, + "learning_rate": 5.238190062326001e-06, + "loss": 1.1013, + "step": 6121 + }, + { + "epoch": 0.5, + "grad_norm": 2.673214352109646, + "learning_rate": 5.23686777778629e-06, + "loss": 0.3919, + "step": 6122 + }, + { + "epoch": 0.5, + "grad_norm": 2.9256462722989505, + "learning_rate": 5.235545476643179e-06, + "loss": 0.8098, + "step": 6123 + }, + { + "epoch": 0.5, + "grad_norm": 4.373331473673059, + "learning_rate": 5.234223158989354e-06, + "loss": 1.2181, + "step": 6124 + }, + { + "epoch": 0.5, + "grad_norm": 2.1702997037447873, + "learning_rate": 5.232900824917507e-06, + "loss": 0.5741, + "step": 6125 + }, + { + "epoch": 0.5, + "grad_norm": 4.186281271164072, + "learning_rate": 5.231578474520324e-06, + "loss": 0.8313, + "step": 6126 + }, + { + "epoch": 0.5, + "grad_norm": 4.9746730564601425, + "learning_rate": 5.230256107890499e-06, + "loss": 0.8272, + "step": 6127 + }, + { + "epoch": 0.5, + "grad_norm": 2.1480789590876506, + "learning_rate": 5.228933725120722e-06, + "loss": 0.2823, + "step": 6128 + }, + { + "epoch": 0.5, + "grad_norm": 4.664596301551104, + "learning_rate": 5.227611326303688e-06, + "loss": 1.1076, + "step": 6129 + }, + { + "epoch": 0.5, + "grad_norm": 3.57939184414976, + "learning_rate": 5.2262889115320895e-06, + "loss": 0.8099, + "step": 6130 + }, + { + "epoch": 0.5, + "grad_norm": 3.7668009677088365, + "learning_rate": 5.224966480898624e-06, + "loss": 0.772, + "step": 6131 + }, + { + "epoch": 0.5, + "grad_norm": 4.9670168505602925, + "learning_rate": 5.2236440344959875e-06, + "loss": 1.3492, + "step": 6132 + }, + { + "epoch": 0.5, + "grad_norm": 3.797135588049277, + "learning_rate": 5.2223215724168764e-06, + "loss": 0.8794, + "step": 6133 + }, + { + "epoch": 0.5, + "grad_norm": 3.8553143869273296, + "learning_rate": 5.220999094753992e-06, + "loss": 0.4619, + "step": 6134 + }, + { + "epoch": 0.5, + "grad_norm": 3.0458033099846755, + "learning_rate": 5.2196766016000325e-06, + "loss": 0.7381, + "step": 6135 + }, + { + "epoch": 0.5, + "grad_norm": 3.306552937733528, + "learning_rate": 5.218354093047697e-06, + "loss": 0.6208, + "step": 6136 + }, + { + "epoch": 0.5, + "grad_norm": 3.4600195846023656, + "learning_rate": 5.217031569189692e-06, + "loss": 0.6653, + "step": 6137 + }, + { + "epoch": 0.5, + "grad_norm": 1.174761429229127, + "learning_rate": 5.215709030118718e-06, + "loss": 0.1854, + "step": 6138 + }, + { + "epoch": 0.5, + "grad_norm": 2.469828478962, + "learning_rate": 5.21438647592748e-06, + "loss": 0.425, + "step": 6139 + }, + { + "epoch": 0.5, + "grad_norm": 4.028015358329215, + "learning_rate": 5.213063906708683e-06, + "loss": 0.7259, + "step": 6140 + }, + { + "epoch": 0.5, + "grad_norm": 3.181936284915569, + "learning_rate": 5.211741322555034e-06, + "loss": 0.6182, + "step": 6141 + }, + { + "epoch": 0.5, + "grad_norm": 3.5836489621305465, + "learning_rate": 5.2104187235592395e-06, + "loss": 0.797, + "step": 6142 + }, + { + "epoch": 0.5, + "grad_norm": 3.5992488846652266, + "learning_rate": 5.209096109814008e-06, + "loss": 0.6928, + "step": 6143 + }, + { + "epoch": 0.5, + "grad_norm": 3.4981195911857967, + "learning_rate": 5.207773481412049e-06, + "loss": 0.8769, + "step": 6144 + }, + { + "epoch": 0.5, + "grad_norm": 3.6780818571798677, + "learning_rate": 5.206450838446072e-06, + "loss": 0.5248, + "step": 6145 + }, + { + "epoch": 0.5, + "grad_norm": 3.7670184348930027, + "learning_rate": 5.205128181008791e-06, + "loss": 0.9068, + "step": 6146 + }, + { + "epoch": 0.5, + "grad_norm": 3.5566362645193568, + "learning_rate": 5.203805509192917e-06, + "loss": 0.6849, + "step": 6147 + }, + { + "epoch": 0.5, + "grad_norm": 2.4882825346129476, + "learning_rate": 5.202482823091165e-06, + "loss": 0.8195, + "step": 6148 + }, + { + "epoch": 0.5, + "grad_norm": 2.611746586855792, + "learning_rate": 5.201160122796247e-06, + "loss": 0.3935, + "step": 6149 + }, + { + "epoch": 0.5, + "grad_norm": 3.2290167217233137, + "learning_rate": 5.19983740840088e-06, + "loss": 0.3995, + "step": 6150 + }, + { + "epoch": 0.5, + "grad_norm": 2.734378775660485, + "learning_rate": 5.198514679997782e-06, + "loss": 0.6287, + "step": 6151 + }, + { + "epoch": 0.5, + "grad_norm": 4.602912475170529, + "learning_rate": 5.197191937679667e-06, + "loss": 0.9427, + "step": 6152 + }, + { + "epoch": 0.5, + "grad_norm": 3.735567133731998, + "learning_rate": 5.195869181539255e-06, + "loss": 0.8277, + "step": 6153 + }, + { + "epoch": 0.5, + "grad_norm": 6.010553948935826, + "learning_rate": 5.194546411669267e-06, + "loss": 0.8849, + "step": 6154 + }, + { + "epoch": 0.5, + "grad_norm": 4.871751178871961, + "learning_rate": 5.193223628162421e-06, + "loss": 0.8736, + "step": 6155 + }, + { + "epoch": 0.5, + "grad_norm": 0.9287174402242686, + "learning_rate": 5.19190083111144e-06, + "loss": 0.1051, + "step": 6156 + }, + { + "epoch": 0.5, + "grad_norm": 2.7633676823644064, + "learning_rate": 5.190578020609047e-06, + "loss": 0.7295, + "step": 6157 + }, + { + "epoch": 0.5, + "grad_norm": 4.490580213260556, + "learning_rate": 5.189255196747964e-06, + "loss": 0.7785, + "step": 6158 + }, + { + "epoch": 0.5, + "grad_norm": 3.2016378207556, + "learning_rate": 5.187932359620914e-06, + "loss": 0.7724, + "step": 6159 + }, + { + "epoch": 0.5, + "grad_norm": 3.911124554425165, + "learning_rate": 5.186609509320625e-06, + "loss": 0.5837, + "step": 6160 + }, + { + "epoch": 0.5, + "grad_norm": 2.103299272865019, + "learning_rate": 5.18528664593982e-06, + "loss": 0.3336, + "step": 6161 + }, + { + "epoch": 0.5, + "grad_norm": 3.763653361213138, + "learning_rate": 5.183963769571227e-06, + "loss": 0.8515, + "step": 6162 + }, + { + "epoch": 0.5, + "grad_norm": 2.0625643276009, + "learning_rate": 5.1826408803075765e-06, + "loss": 0.4117, + "step": 6163 + }, + { + "epoch": 0.5, + "grad_norm": 5.418750418805527, + "learning_rate": 5.181317978241595e-06, + "loss": 1.5736, + "step": 6164 + }, + { + "epoch": 0.5, + "grad_norm": 3.5187906029484766, + "learning_rate": 5.179995063466011e-06, + "loss": 0.7462, + "step": 6165 + }, + { + "epoch": 0.5, + "grad_norm": 4.098670287517148, + "learning_rate": 5.178672136073558e-06, + "loss": 0.7015, + "step": 6166 + }, + { + "epoch": 0.5, + "grad_norm": 4.194162905885539, + "learning_rate": 5.177349196156964e-06, + "loss": 0.7261, + "step": 6167 + }, + { + "epoch": 0.5, + "grad_norm": 4.153913117205848, + "learning_rate": 5.1760262438089636e-06, + "loss": 0.8085, + "step": 6168 + }, + { + "epoch": 0.5, + "grad_norm": 4.997762346294353, + "learning_rate": 5.174703279122291e-06, + "loss": 0.8826, + "step": 6169 + }, + { + "epoch": 0.5, + "grad_norm": 3.476328227990508, + "learning_rate": 5.173380302189676e-06, + "loss": 0.9064, + "step": 6170 + }, + { + "epoch": 0.5, + "grad_norm": 5.570371625510055, + "learning_rate": 5.172057313103859e-06, + "loss": 1.4817, + "step": 6171 + }, + { + "epoch": 0.5, + "grad_norm": 1.9306079358977413, + "learning_rate": 5.170734311957572e-06, + "loss": 0.3788, + "step": 6172 + }, + { + "epoch": 0.5, + "grad_norm": 4.549389996259477, + "learning_rate": 5.169411298843554e-06, + "loss": 0.7813, + "step": 6173 + }, + { + "epoch": 0.5, + "grad_norm": 5.499649837486986, + "learning_rate": 5.16808827385454e-06, + "loss": 0.7076, + "step": 6174 + }, + { + "epoch": 0.5, + "grad_norm": 3.989391726629326, + "learning_rate": 5.16676523708327e-06, + "loss": 0.9742, + "step": 6175 + }, + { + "epoch": 0.5, + "grad_norm": 3.67099707567816, + "learning_rate": 5.165442188622482e-06, + "loss": 0.879, + "step": 6176 + }, + { + "epoch": 0.5, + "grad_norm": 4.558072060843167, + "learning_rate": 5.164119128564917e-06, + "loss": 0.8436, + "step": 6177 + }, + { + "epoch": 0.5, + "grad_norm": 1.8061979255149667, + "learning_rate": 5.162796057003316e-06, + "loss": 0.4058, + "step": 6178 + }, + { + "epoch": 0.51, + "grad_norm": 3.1751579642441854, + "learning_rate": 5.161472974030418e-06, + "loss": 0.7461, + "step": 6179 + }, + { + "epoch": 0.51, + "grad_norm": 4.176957644498074, + "learning_rate": 5.1601498797389695e-06, + "loss": 0.758, + "step": 6180 + }, + { + "epoch": 0.51, + "grad_norm": 3.112975382854141, + "learning_rate": 5.158826774221711e-06, + "loss": 0.5926, + "step": 6181 + }, + { + "epoch": 0.51, + "grad_norm": 2.8319967477483363, + "learning_rate": 5.157503657571386e-06, + "loss": 0.6073, + "step": 6182 + }, + { + "epoch": 0.51, + "grad_norm": 0.9127903859105102, + "learning_rate": 5.156180529880741e-06, + "loss": 0.1528, + "step": 6183 + }, + { + "epoch": 0.51, + "grad_norm": 3.757988523389512, + "learning_rate": 5.15485739124252e-06, + "loss": 0.8195, + "step": 6184 + }, + { + "epoch": 0.51, + "grad_norm": 5.149178222477253, + "learning_rate": 5.153534241749468e-06, + "loss": 1.2277, + "step": 6185 + }, + { + "epoch": 0.51, + "grad_norm": 3.1744426702739177, + "learning_rate": 5.152211081494336e-06, + "loss": 0.5887, + "step": 6186 + }, + { + "epoch": 0.51, + "grad_norm": 2.2991471817823994, + "learning_rate": 5.150887910569868e-06, + "loss": 0.3999, + "step": 6187 + }, + { + "epoch": 0.51, + "grad_norm": 3.973632128969777, + "learning_rate": 5.149564729068816e-06, + "loss": 0.6487, + "step": 6188 + }, + { + "epoch": 0.51, + "grad_norm": 2.4212412938391443, + "learning_rate": 5.148241537083928e-06, + "loss": 0.5501, + "step": 6189 + }, + { + "epoch": 0.51, + "grad_norm": 3.9912273125515885, + "learning_rate": 5.146918334707952e-06, + "loss": 0.9781, + "step": 6190 + }, + { + "epoch": 0.51, + "grad_norm": 1.7338531351318034, + "learning_rate": 5.145595122033641e-06, + "loss": 0.5279, + "step": 6191 + }, + { + "epoch": 0.51, + "grad_norm": 3.797698382201967, + "learning_rate": 5.144271899153743e-06, + "loss": 1.1141, + "step": 6192 + }, + { + "epoch": 0.51, + "grad_norm": 5.051888552737093, + "learning_rate": 5.142948666161015e-06, + "loss": 0.962, + "step": 6193 + }, + { + "epoch": 0.51, + "grad_norm": 2.695575880327883, + "learning_rate": 5.1416254231482075e-06, + "loss": 0.7048, + "step": 6194 + }, + { + "epoch": 0.51, + "grad_norm": 4.149596836313373, + "learning_rate": 5.140302170208073e-06, + "loss": 0.7707, + "step": 6195 + }, + { + "epoch": 0.51, + "grad_norm": 2.8069664635200313, + "learning_rate": 5.138978907433368e-06, + "loss": 0.4488, + "step": 6196 + }, + { + "epoch": 0.51, + "grad_norm": 1.8215426670677781, + "learning_rate": 5.137655634916847e-06, + "loss": 0.3643, + "step": 6197 + }, + { + "epoch": 0.51, + "grad_norm": 3.3808154312253578, + "learning_rate": 5.136332352751264e-06, + "loss": 0.6463, + "step": 6198 + }, + { + "epoch": 0.51, + "grad_norm": 4.444167956178485, + "learning_rate": 5.1350090610293765e-06, + "loss": 0.7728, + "step": 6199 + }, + { + "epoch": 0.51, + "grad_norm": 3.866901181736248, + "learning_rate": 5.133685759843942e-06, + "loss": 0.7861, + "step": 6200 + }, + { + "epoch": 0.51, + "grad_norm": 3.2801548514287724, + "learning_rate": 5.132362449287717e-06, + "loss": 0.8588, + "step": 6201 + }, + { + "epoch": 0.51, + "grad_norm": 3.5811069303386147, + "learning_rate": 5.13103912945346e-06, + "loss": 0.9135, + "step": 6202 + }, + { + "epoch": 0.51, + "grad_norm": 3.447064912064372, + "learning_rate": 5.129715800433931e-06, + "loss": 0.6497, + "step": 6203 + }, + { + "epoch": 0.51, + "grad_norm": 3.5511670926001306, + "learning_rate": 5.128392462321889e-06, + "loss": 0.621, + "step": 6204 + }, + { + "epoch": 0.51, + "grad_norm": 3.2851645891357455, + "learning_rate": 5.127069115210094e-06, + "loss": 0.6323, + "step": 6205 + }, + { + "epoch": 0.51, + "grad_norm": 3.0718547433639816, + "learning_rate": 5.125745759191307e-06, + "loss": 0.4425, + "step": 6206 + }, + { + "epoch": 0.51, + "grad_norm": 4.205788564576086, + "learning_rate": 5.124422394358289e-06, + "loss": 1.1922, + "step": 6207 + }, + { + "epoch": 0.51, + "grad_norm": 4.570194497022711, + "learning_rate": 5.123099020803803e-06, + "loss": 1.1588, + "step": 6208 + }, + { + "epoch": 0.51, + "grad_norm": 4.123753768682675, + "learning_rate": 5.1217756386206115e-06, + "loss": 0.9596, + "step": 6209 + }, + { + "epoch": 0.51, + "grad_norm": 3.1374146278778574, + "learning_rate": 5.120452247901477e-06, + "loss": 0.7494, + "step": 6210 + }, + { + "epoch": 0.51, + "grad_norm": 5.131343355026282, + "learning_rate": 5.119128848739165e-06, + "loss": 0.8035, + "step": 6211 + }, + { + "epoch": 0.51, + "grad_norm": 5.085041690104158, + "learning_rate": 5.11780544122644e-06, + "loss": 0.8113, + "step": 6212 + }, + { + "epoch": 0.51, + "grad_norm": 2.71463247425855, + "learning_rate": 5.116482025456066e-06, + "loss": 0.6242, + "step": 6213 + }, + { + "epoch": 0.51, + "grad_norm": 3.9995805170970105, + "learning_rate": 5.115158601520807e-06, + "loss": 1.2192, + "step": 6214 + }, + { + "epoch": 0.51, + "grad_norm": 4.687765821380948, + "learning_rate": 5.1138351695134325e-06, + "loss": 1.2597, + "step": 6215 + }, + { + "epoch": 0.51, + "grad_norm": 4.4346026177724935, + "learning_rate": 5.112511729526708e-06, + "loss": 1.0865, + "step": 6216 + }, + { + "epoch": 0.51, + "grad_norm": 2.6694227711778957, + "learning_rate": 5.111188281653401e-06, + "loss": 0.3491, + "step": 6217 + }, + { + "epoch": 0.51, + "grad_norm": 2.883057852039323, + "learning_rate": 5.109864825986278e-06, + "loss": 0.4154, + "step": 6218 + }, + { + "epoch": 0.51, + "grad_norm": 4.028272430272317, + "learning_rate": 5.1085413626181115e-06, + "loss": 1.1474, + "step": 6219 + }, + { + "epoch": 0.51, + "grad_norm": 6.025093841644022, + "learning_rate": 5.107217891641666e-06, + "loss": 1.2059, + "step": 6220 + }, + { + "epoch": 0.51, + "grad_norm": 3.402164700317038, + "learning_rate": 5.1058944131497136e-06, + "loss": 0.5818, + "step": 6221 + }, + { + "epoch": 0.51, + "grad_norm": 2.7144367551172395, + "learning_rate": 5.104570927235022e-06, + "loss": 0.7003, + "step": 6222 + }, + { + "epoch": 0.51, + "grad_norm": 3.4100266315263363, + "learning_rate": 5.103247433990366e-06, + "loss": 0.7946, + "step": 6223 + }, + { + "epoch": 0.51, + "grad_norm": 5.265470392284644, + "learning_rate": 5.1019239335085125e-06, + "loss": 1.2367, + "step": 6224 + }, + { + "epoch": 0.51, + "grad_norm": 3.9032990061558483, + "learning_rate": 5.100600425882235e-06, + "loss": 0.713, + "step": 6225 + }, + { + "epoch": 0.51, + "grad_norm": 3.8693250297510664, + "learning_rate": 5.099276911204306e-06, + "loss": 0.7159, + "step": 6226 + }, + { + "epoch": 0.51, + "grad_norm": 4.0870859928610015, + "learning_rate": 5.097953389567498e-06, + "loss": 0.8153, + "step": 6227 + }, + { + "epoch": 0.51, + "grad_norm": 4.029878114306394, + "learning_rate": 5.096629861064582e-06, + "loss": 0.9994, + "step": 6228 + }, + { + "epoch": 0.51, + "grad_norm": 3.951793258420478, + "learning_rate": 5.095306325788335e-06, + "loss": 0.7461, + "step": 6229 + }, + { + "epoch": 0.51, + "grad_norm": 5.776917444102281, + "learning_rate": 5.093982783831528e-06, + "loss": 1.5689, + "step": 6230 + }, + { + "epoch": 0.51, + "grad_norm": 2.812739997250496, + "learning_rate": 5.092659235286938e-06, + "loss": 0.5859, + "step": 6231 + }, + { + "epoch": 0.51, + "grad_norm": 3.7644079508415316, + "learning_rate": 5.091335680247339e-06, + "loss": 0.6422, + "step": 6232 + }, + { + "epoch": 0.51, + "grad_norm": 0.9220303257829688, + "learning_rate": 5.090012118805505e-06, + "loss": 0.1435, + "step": 6233 + }, + { + "epoch": 0.51, + "grad_norm": 2.62449968094465, + "learning_rate": 5.088688551054214e-06, + "loss": 0.5911, + "step": 6234 + }, + { + "epoch": 0.51, + "grad_norm": 3.742055006600364, + "learning_rate": 5.0873649770862425e-06, + "loss": 0.7453, + "step": 6235 + }, + { + "epoch": 0.51, + "grad_norm": 3.9706011107453003, + "learning_rate": 5.086041396994365e-06, + "loss": 0.9127, + "step": 6236 + }, + { + "epoch": 0.51, + "grad_norm": 3.6868597009845514, + "learning_rate": 5.08471781087136e-06, + "loss": 0.8972, + "step": 6237 + }, + { + "epoch": 0.51, + "grad_norm": 3.5694862211533263, + "learning_rate": 5.083394218810006e-06, + "loss": 0.6879, + "step": 6238 + }, + { + "epoch": 0.51, + "grad_norm": 2.4643485370094247, + "learning_rate": 5.082070620903079e-06, + "loss": 0.5668, + "step": 6239 + }, + { + "epoch": 0.51, + "grad_norm": 5.66966876203938, + "learning_rate": 5.0807470172433594e-06, + "loss": 1.0946, + "step": 6240 + }, + { + "epoch": 0.51, + "grad_norm": 4.886788766329076, + "learning_rate": 5.079423407923625e-06, + "loss": 0.9192, + "step": 6241 + }, + { + "epoch": 0.51, + "grad_norm": 2.5662657650469645, + "learning_rate": 5.078099793036656e-06, + "loss": 0.5687, + "step": 6242 + }, + { + "epoch": 0.51, + "grad_norm": 2.2423877659804696, + "learning_rate": 5.076776172675232e-06, + "loss": 0.284, + "step": 6243 + }, + { + "epoch": 0.51, + "grad_norm": 3.700604243524799, + "learning_rate": 5.0754525469321305e-06, + "loss": 0.6439, + "step": 6244 + }, + { + "epoch": 0.51, + "grad_norm": 3.5641119785064723, + "learning_rate": 5.074128915900134e-06, + "loss": 1.022, + "step": 6245 + }, + { + "epoch": 0.51, + "grad_norm": 3.13662079686435, + "learning_rate": 5.072805279672025e-06, + "loss": 0.6375, + "step": 6246 + }, + { + "epoch": 0.51, + "grad_norm": 2.752477270526784, + "learning_rate": 5.071481638340581e-06, + "loss": 0.5996, + "step": 6247 + }, + { + "epoch": 0.51, + "grad_norm": 4.72802022968144, + "learning_rate": 5.070157991998586e-06, + "loss": 0.9262, + "step": 6248 + }, + { + "epoch": 0.51, + "grad_norm": 3.94412339728263, + "learning_rate": 5.06883434073882e-06, + "loss": 0.9916, + "step": 6249 + }, + { + "epoch": 0.51, + "grad_norm": 2.737079520523553, + "learning_rate": 5.067510684654069e-06, + "loss": 0.459, + "step": 6250 + }, + { + "epoch": 0.51, + "grad_norm": 2.5092207035448744, + "learning_rate": 5.06618702383711e-06, + "loss": 0.5877, + "step": 6251 + }, + { + "epoch": 0.51, + "grad_norm": 3.1853215059093363, + "learning_rate": 5.06486335838073e-06, + "loss": 0.6728, + "step": 6252 + }, + { + "epoch": 0.51, + "grad_norm": 4.408146409003843, + "learning_rate": 5.06353968837771e-06, + "loss": 0.8563, + "step": 6253 + }, + { + "epoch": 0.51, + "grad_norm": 3.7415881893911864, + "learning_rate": 5.062216013920836e-06, + "loss": 0.6281, + "step": 6254 + }, + { + "epoch": 0.51, + "grad_norm": 5.423419492453897, + "learning_rate": 5.060892335102888e-06, + "loss": 1.5519, + "step": 6255 + }, + { + "epoch": 0.51, + "grad_norm": 2.1369108485311883, + "learning_rate": 5.0595686520166535e-06, + "loss": 0.4109, + "step": 6256 + }, + { + "epoch": 0.51, + "grad_norm": 2.6276554918514528, + "learning_rate": 5.058244964754916e-06, + "loss": 0.4997, + "step": 6257 + }, + { + "epoch": 0.51, + "grad_norm": 4.678298545740144, + "learning_rate": 5.056921273410459e-06, + "loss": 1.0561, + "step": 6258 + }, + { + "epoch": 0.51, + "grad_norm": 5.04000145001499, + "learning_rate": 5.05559757807607e-06, + "loss": 0.6706, + "step": 6259 + }, + { + "epoch": 0.51, + "grad_norm": 4.713966562253011, + "learning_rate": 5.054273878844532e-06, + "loss": 0.8137, + "step": 6260 + }, + { + "epoch": 0.51, + "grad_norm": 1.8093419343800716, + "learning_rate": 5.052950175808631e-06, + "loss": 0.217, + "step": 6261 + }, + { + "epoch": 0.51, + "grad_norm": 8.562365238511848, + "learning_rate": 5.051626469061153e-06, + "loss": 0.5402, + "step": 6262 + }, + { + "epoch": 0.51, + "grad_norm": 5.0469827554424525, + "learning_rate": 5.050302758694885e-06, + "loss": 1.1309, + "step": 6263 + }, + { + "epoch": 0.51, + "grad_norm": 4.855241797223289, + "learning_rate": 5.048979044802611e-06, + "loss": 1.0927, + "step": 6264 + }, + { + "epoch": 0.51, + "grad_norm": 3.7073773922637976, + "learning_rate": 5.047655327477119e-06, + "loss": 0.8027, + "step": 6265 + }, + { + "epoch": 0.51, + "grad_norm": 3.4058276992984235, + "learning_rate": 5.0463316068111975e-06, + "loss": 0.8036, + "step": 6266 + }, + { + "epoch": 0.51, + "grad_norm": 4.921621492746431, + "learning_rate": 5.0450078828976326e-06, + "loss": 1.1416, + "step": 6267 + }, + { + "epoch": 0.51, + "grad_norm": 2.8577221966367548, + "learning_rate": 5.0436841558292096e-06, + "loss": 0.4846, + "step": 6268 + }, + { + "epoch": 0.51, + "grad_norm": 4.471053589326682, + "learning_rate": 5.042360425698718e-06, + "loss": 0.9808, + "step": 6269 + }, + { + "epoch": 0.51, + "grad_norm": 6.235702521038686, + "learning_rate": 5.041036692598944e-06, + "loss": 1.4737, + "step": 6270 + }, + { + "epoch": 0.51, + "grad_norm": 4.378368906345673, + "learning_rate": 5.039712956622678e-06, + "loss": 1.4435, + "step": 6271 + }, + { + "epoch": 0.51, + "grad_norm": 4.277791036639248, + "learning_rate": 5.038389217862705e-06, + "loss": 1.0241, + "step": 6272 + }, + { + "epoch": 0.51, + "grad_norm": 3.796578394912698, + "learning_rate": 5.037065476411816e-06, + "loss": 0.8263, + "step": 6273 + }, + { + "epoch": 0.51, + "grad_norm": 3.3092776679300604, + "learning_rate": 5.035741732362798e-06, + "loss": 0.8006, + "step": 6274 + }, + { + "epoch": 0.51, + "grad_norm": 4.584746533139775, + "learning_rate": 5.0344179858084395e-06, + "loss": 1.0898, + "step": 6275 + }, + { + "epoch": 0.51, + "grad_norm": 4.850175703684064, + "learning_rate": 5.033094236841531e-06, + "loss": 1.0502, + "step": 6276 + }, + { + "epoch": 0.51, + "grad_norm": 3.6000543964963483, + "learning_rate": 5.03177048555486e-06, + "loss": 0.5403, + "step": 6277 + }, + { + "epoch": 0.51, + "grad_norm": 1.8518070760604197, + "learning_rate": 5.030446732041216e-06, + "loss": 0.6082, + "step": 6278 + }, + { + "epoch": 0.51, + "grad_norm": 1.5225956717460523, + "learning_rate": 5.029122976393388e-06, + "loss": 0.2299, + "step": 6279 + }, + { + "epoch": 0.51, + "grad_norm": 3.0498073241758967, + "learning_rate": 5.027799218704168e-06, + "loss": 0.6301, + "step": 6280 + }, + { + "epoch": 0.51, + "grad_norm": 4.102132833210754, + "learning_rate": 5.026475459066342e-06, + "loss": 0.5903, + "step": 6281 + }, + { + "epoch": 0.51, + "grad_norm": 3.6469790492142407, + "learning_rate": 5.025151697572703e-06, + "loss": 0.8519, + "step": 6282 + }, + { + "epoch": 0.51, + "grad_norm": 4.220063325470204, + "learning_rate": 5.023827934316039e-06, + "loss": 0.9139, + "step": 6283 + }, + { + "epoch": 0.51, + "grad_norm": 1.1852192562946298, + "learning_rate": 5.02250416938914e-06, + "loss": 0.2314, + "step": 6284 + }, + { + "epoch": 0.51, + "grad_norm": 2.9635533768960713, + "learning_rate": 5.021180402884796e-06, + "loss": 0.386, + "step": 6285 + }, + { + "epoch": 0.51, + "grad_norm": 2.860632087202993, + "learning_rate": 5.0198566348958e-06, + "loss": 0.4964, + "step": 6286 + }, + { + "epoch": 0.51, + "grad_norm": 3.56821484008037, + "learning_rate": 5.018532865514938e-06, + "loss": 0.7766, + "step": 6287 + }, + { + "epoch": 0.51, + "grad_norm": 3.670637243869879, + "learning_rate": 5.0172090948350036e-06, + "loss": 0.7336, + "step": 6288 + }, + { + "epoch": 0.51, + "grad_norm": 4.17472118045534, + "learning_rate": 5.015885322948787e-06, + "loss": 1.0167, + "step": 6289 + }, + { + "epoch": 0.51, + "grad_norm": 2.121084537108153, + "learning_rate": 5.01456154994908e-06, + "loss": 0.4099, + "step": 6290 + }, + { + "epoch": 0.51, + "grad_norm": 4.323932905163277, + "learning_rate": 5.01323777592867e-06, + "loss": 0.7663, + "step": 6291 + }, + { + "epoch": 0.51, + "grad_norm": 3.36491687728099, + "learning_rate": 5.011914000980349e-06, + "loss": 0.6422, + "step": 6292 + }, + { + "epoch": 0.51, + "grad_norm": 3.666396995504404, + "learning_rate": 5.0105902251969084e-06, + "loss": 0.904, + "step": 6293 + }, + { + "epoch": 0.51, + "grad_norm": 3.3936856557765145, + "learning_rate": 5.00926644867114e-06, + "loss": 0.9992, + "step": 6294 + }, + { + "epoch": 0.51, + "grad_norm": 4.7203280943851595, + "learning_rate": 5.007942671495832e-06, + "loss": 1.2324, + "step": 6295 + }, + { + "epoch": 0.51, + "grad_norm": 3.3110721080028003, + "learning_rate": 5.006618893763779e-06, + "loss": 0.6198, + "step": 6296 + }, + { + "epoch": 0.51, + "grad_norm": 3.771613517384583, + "learning_rate": 5.005295115567771e-06, + "loss": 0.8101, + "step": 6297 + }, + { + "epoch": 0.51, + "grad_norm": 3.2345002368899296, + "learning_rate": 5.003971337000597e-06, + "loss": 0.4533, + "step": 6298 + }, + { + "epoch": 0.51, + "grad_norm": 4.523863241460051, + "learning_rate": 5.00264755815505e-06, + "loss": 0.8525, + "step": 6299 + }, + { + "epoch": 0.51, + "grad_norm": 2.7989059343538045, + "learning_rate": 5.001323779123921e-06, + "loss": 0.5445, + "step": 6300 + }, + { + "epoch": 0.52, + "grad_norm": 4.8013321983918775, + "learning_rate": 5e-06, + "loss": 0.8655, + "step": 6301 + }, + { + "epoch": 0.52, + "grad_norm": 4.150377577679675, + "learning_rate": 4.99867622087608e-06, + "loss": 0.8885, + "step": 6302 + }, + { + "epoch": 0.52, + "grad_norm": 4.902739896702521, + "learning_rate": 4.99735244184495e-06, + "loss": 0.8761, + "step": 6303 + }, + { + "epoch": 0.52, + "grad_norm": 3.774687840814019, + "learning_rate": 4.996028662999405e-06, + "loss": 0.5963, + "step": 6304 + }, + { + "epoch": 0.52, + "grad_norm": 4.410054218319989, + "learning_rate": 4.994704884432231e-06, + "loss": 1.0872, + "step": 6305 + }, + { + "epoch": 0.52, + "grad_norm": 1.0244463177855765, + "learning_rate": 4.9933811062362224e-06, + "loss": 0.1778, + "step": 6306 + }, + { + "epoch": 0.52, + "grad_norm": 4.6620882328925575, + "learning_rate": 4.992057328504169e-06, + "loss": 0.884, + "step": 6307 + }, + { + "epoch": 0.52, + "grad_norm": 3.403301419532233, + "learning_rate": 4.990733551328862e-06, + "loss": 0.5739, + "step": 6308 + }, + { + "epoch": 0.52, + "grad_norm": 3.9845782164457884, + "learning_rate": 4.989409774803092e-06, + "loss": 0.7148, + "step": 6309 + }, + { + "epoch": 0.52, + "grad_norm": 3.2284050123991626, + "learning_rate": 4.988085999019654e-06, + "loss": 0.5754, + "step": 6310 + }, + { + "epoch": 0.52, + "grad_norm": 2.799427569150806, + "learning_rate": 4.9867622240713325e-06, + "loss": 0.585, + "step": 6311 + }, + { + "epoch": 0.52, + "grad_norm": 4.501545703443735, + "learning_rate": 4.985438450050922e-06, + "loss": 0.901, + "step": 6312 + }, + { + "epoch": 0.52, + "grad_norm": 3.6819769592865965, + "learning_rate": 4.984114677051214e-06, + "loss": 0.7882, + "step": 6313 + }, + { + "epoch": 0.52, + "grad_norm": 3.440819132850262, + "learning_rate": 4.982790905164997e-06, + "loss": 0.7031, + "step": 6314 + }, + { + "epoch": 0.52, + "grad_norm": 3.6972404685211484, + "learning_rate": 4.981467134485062e-06, + "loss": 1.0036, + "step": 6315 + }, + { + "epoch": 0.52, + "grad_norm": 2.9079783686070577, + "learning_rate": 4.980143365104203e-06, + "loss": 0.5343, + "step": 6316 + }, + { + "epoch": 0.52, + "grad_norm": 3.982005751028777, + "learning_rate": 4.978819597115205e-06, + "loss": 0.4965, + "step": 6317 + }, + { + "epoch": 0.52, + "grad_norm": 2.893228389137094, + "learning_rate": 4.977495830610862e-06, + "loss": 0.4195, + "step": 6318 + }, + { + "epoch": 0.52, + "grad_norm": 2.552228796695302, + "learning_rate": 4.976172065683963e-06, + "loss": 0.7378, + "step": 6319 + }, + { + "epoch": 0.52, + "grad_norm": 3.264345436792313, + "learning_rate": 4.974848302427299e-06, + "loss": 0.8076, + "step": 6320 + }, + { + "epoch": 0.52, + "grad_norm": 2.7932063119244797, + "learning_rate": 4.9735245409336586e-06, + "loss": 0.5244, + "step": 6321 + }, + { + "epoch": 0.52, + "grad_norm": 5.227818897280464, + "learning_rate": 4.972200781295835e-06, + "loss": 1.6158, + "step": 6322 + }, + { + "epoch": 0.52, + "grad_norm": 3.2963786876135655, + "learning_rate": 4.970877023606613e-06, + "loss": 0.9132, + "step": 6323 + }, + { + "epoch": 0.52, + "grad_norm": 5.589089310372882, + "learning_rate": 4.969553267958785e-06, + "loss": 1.3736, + "step": 6324 + }, + { + "epoch": 0.52, + "grad_norm": 3.6672605443108566, + "learning_rate": 4.9682295144451415e-06, + "loss": 1.0031, + "step": 6325 + }, + { + "epoch": 0.52, + "grad_norm": 2.800499484512619, + "learning_rate": 4.96690576315847e-06, + "loss": 0.59, + "step": 6326 + }, + { + "epoch": 0.52, + "grad_norm": 3.3517919780931447, + "learning_rate": 4.965582014191562e-06, + "loss": 0.5405, + "step": 6327 + }, + { + "epoch": 0.52, + "grad_norm": 3.1593004177129544, + "learning_rate": 4.964258267637204e-06, + "loss": 0.4237, + "step": 6328 + }, + { + "epoch": 0.52, + "grad_norm": 2.1968301978355282, + "learning_rate": 4.962934523588187e-06, + "loss": 0.3499, + "step": 6329 + }, + { + "epoch": 0.52, + "grad_norm": 4.644271439604117, + "learning_rate": 4.961610782137297e-06, + "loss": 0.8678, + "step": 6330 + }, + { + "epoch": 0.52, + "grad_norm": 5.526329531410269, + "learning_rate": 4.960287043377324e-06, + "loss": 0.9463, + "step": 6331 + }, + { + "epoch": 0.52, + "grad_norm": 4.135163169342016, + "learning_rate": 4.958963307401056e-06, + "loss": 1.0386, + "step": 6332 + }, + { + "epoch": 0.52, + "grad_norm": 3.074959112540754, + "learning_rate": 4.957639574301285e-06, + "loss": 0.588, + "step": 6333 + }, + { + "epoch": 0.52, + "grad_norm": 3.475894793094254, + "learning_rate": 4.956315844170792e-06, + "loss": 0.4042, + "step": 6334 + }, + { + "epoch": 0.52, + "grad_norm": 3.1667108394781, + "learning_rate": 4.954992117102369e-06, + "loss": 0.6566, + "step": 6335 + }, + { + "epoch": 0.52, + "grad_norm": 2.6318038286029357, + "learning_rate": 4.953668393188803e-06, + "loss": 0.6653, + "step": 6336 + }, + { + "epoch": 0.52, + "grad_norm": 3.5598441681408564, + "learning_rate": 4.9523446725228805e-06, + "loss": 0.9093, + "step": 6337 + }, + { + "epoch": 0.52, + "grad_norm": 4.312639300371545, + "learning_rate": 4.95102095519739e-06, + "loss": 0.6447, + "step": 6338 + }, + { + "epoch": 0.52, + "grad_norm": 3.2869108321149354, + "learning_rate": 4.949697241305118e-06, + "loss": 0.5326, + "step": 6339 + }, + { + "epoch": 0.52, + "grad_norm": 3.2398638722023074, + "learning_rate": 4.948373530938849e-06, + "loss": 0.5487, + "step": 6340 + }, + { + "epoch": 0.52, + "grad_norm": 2.6746133544438657, + "learning_rate": 4.947049824191371e-06, + "loss": 0.3259, + "step": 6341 + }, + { + "epoch": 0.52, + "grad_norm": 5.023439343154598, + "learning_rate": 4.945726121155469e-06, + "loss": 0.7726, + "step": 6342 + }, + { + "epoch": 0.52, + "grad_norm": 3.423088694007279, + "learning_rate": 4.9444024219239315e-06, + "loss": 0.599, + "step": 6343 + }, + { + "epoch": 0.52, + "grad_norm": 4.657343174210334, + "learning_rate": 4.943078726589543e-06, + "loss": 0.8292, + "step": 6344 + }, + { + "epoch": 0.52, + "grad_norm": 4.069730396690454, + "learning_rate": 4.941755035245087e-06, + "loss": 0.7747, + "step": 6345 + }, + { + "epoch": 0.52, + "grad_norm": 3.907570960554669, + "learning_rate": 4.940431347983348e-06, + "loss": 0.5811, + "step": 6346 + }, + { + "epoch": 0.52, + "grad_norm": 3.9121204580647655, + "learning_rate": 4.9391076648971135e-06, + "loss": 0.8157, + "step": 6347 + }, + { + "epoch": 0.52, + "grad_norm": 4.920499319357153, + "learning_rate": 4.937783986079165e-06, + "loss": 0.8814, + "step": 6348 + }, + { + "epoch": 0.52, + "grad_norm": 4.867359978831976, + "learning_rate": 4.93646031162229e-06, + "loss": 1.0379, + "step": 6349 + }, + { + "epoch": 0.52, + "grad_norm": 4.988426299490411, + "learning_rate": 4.935136641619272e-06, + "loss": 1.4079, + "step": 6350 + }, + { + "epoch": 0.52, + "grad_norm": 2.295221225986864, + "learning_rate": 4.933812976162892e-06, + "loss": 0.4422, + "step": 6351 + }, + { + "epoch": 0.52, + "grad_norm": 2.7696669405353247, + "learning_rate": 4.932489315345933e-06, + "loss": 0.6909, + "step": 6352 + }, + { + "epoch": 0.52, + "grad_norm": 3.1369585460443123, + "learning_rate": 4.9311656592611804e-06, + "loss": 0.645, + "step": 6353 + }, + { + "epoch": 0.52, + "grad_norm": 2.3681224044111864, + "learning_rate": 4.929842008001415e-06, + "loss": 0.4803, + "step": 6354 + }, + { + "epoch": 0.52, + "grad_norm": 4.98677924723652, + "learning_rate": 4.92851836165942e-06, + "loss": 1.2134, + "step": 6355 + }, + { + "epoch": 0.52, + "grad_norm": 2.4940831075783816, + "learning_rate": 4.927194720327978e-06, + "loss": 0.6613, + "step": 6356 + }, + { + "epoch": 0.52, + "grad_norm": 2.3360524861854586, + "learning_rate": 4.925871084099867e-06, + "loss": 0.5912, + "step": 6357 + }, + { + "epoch": 0.52, + "grad_norm": 3.13829777093247, + "learning_rate": 4.924547453067871e-06, + "loss": 0.5976, + "step": 6358 + }, + { + "epoch": 0.52, + "grad_norm": 3.864907012819576, + "learning_rate": 4.923223827324769e-06, + "loss": 0.7684, + "step": 6359 + }, + { + "epoch": 0.52, + "grad_norm": 3.654007313074135, + "learning_rate": 4.921900206963345e-06, + "loss": 1.1836, + "step": 6360 + }, + { + "epoch": 0.52, + "grad_norm": 2.7205381939259325, + "learning_rate": 4.920576592076375e-06, + "loss": 0.4543, + "step": 6361 + }, + { + "epoch": 0.52, + "grad_norm": 4.207080349387025, + "learning_rate": 4.919252982756643e-06, + "loss": 1.1061, + "step": 6362 + }, + { + "epoch": 0.52, + "grad_norm": 4.497492926051889, + "learning_rate": 4.9179293790969225e-06, + "loss": 1.2731, + "step": 6363 + }, + { + "epoch": 0.52, + "grad_norm": 3.9592866021194957, + "learning_rate": 4.916605781189996e-06, + "loss": 0.7913, + "step": 6364 + }, + { + "epoch": 0.52, + "grad_norm": 1.789903155601039, + "learning_rate": 4.9152821891286404e-06, + "loss": 0.3982, + "step": 6365 + }, + { + "epoch": 0.52, + "grad_norm": 3.6060905109286803, + "learning_rate": 4.913958603005636e-06, + "loss": 0.6695, + "step": 6366 + }, + { + "epoch": 0.52, + "grad_norm": 2.7958090281529833, + "learning_rate": 4.91263502291376e-06, + "loss": 0.5765, + "step": 6367 + }, + { + "epoch": 0.52, + "grad_norm": 3.5029185894999735, + "learning_rate": 4.911311448945787e-06, + "loss": 0.9681, + "step": 6368 + }, + { + "epoch": 0.52, + "grad_norm": 4.238696692533975, + "learning_rate": 4.9099878811944965e-06, + "loss": 0.7806, + "step": 6369 + }, + { + "epoch": 0.52, + "grad_norm": 3.8919735811733625, + "learning_rate": 4.908664319752663e-06, + "loss": 0.9318, + "step": 6370 + }, + { + "epoch": 0.52, + "grad_norm": 3.736754873965216, + "learning_rate": 4.9073407647130625e-06, + "loss": 0.6905, + "step": 6371 + }, + { + "epoch": 0.52, + "grad_norm": 4.136277462289465, + "learning_rate": 4.906017216168471e-06, + "loss": 1.0667, + "step": 6372 + }, + { + "epoch": 0.52, + "grad_norm": 2.2902996833226683, + "learning_rate": 4.904693674211667e-06, + "loss": 0.6025, + "step": 6373 + }, + { + "epoch": 0.52, + "grad_norm": 3.8055175213525025, + "learning_rate": 4.9033701389354185e-06, + "loss": 0.7545, + "step": 6374 + }, + { + "epoch": 0.52, + "grad_norm": 1.9379557571515578, + "learning_rate": 4.902046610432504e-06, + "loss": 0.3936, + "step": 6375 + }, + { + "epoch": 0.52, + "grad_norm": 5.6127646401142055, + "learning_rate": 4.900723088795695e-06, + "loss": 0.9573, + "step": 6376 + }, + { + "epoch": 0.52, + "grad_norm": 3.634383651385539, + "learning_rate": 4.899399574117766e-06, + "loss": 0.928, + "step": 6377 + }, + { + "epoch": 0.52, + "grad_norm": 4.3841366044644765, + "learning_rate": 4.898076066491488e-06, + "loss": 0.8414, + "step": 6378 + }, + { + "epoch": 0.52, + "grad_norm": 2.8380715593656936, + "learning_rate": 4.896752566009637e-06, + "loss": 0.6047, + "step": 6379 + }, + { + "epoch": 0.52, + "grad_norm": 2.3478182534713485, + "learning_rate": 4.8954290727649785e-06, + "loss": 0.3525, + "step": 6380 + }, + { + "epoch": 0.52, + "grad_norm": 3.2704191305723413, + "learning_rate": 4.894105586850288e-06, + "loss": 0.5744, + "step": 6381 + }, + { + "epoch": 0.52, + "grad_norm": 5.296254206143929, + "learning_rate": 4.892782108358335e-06, + "loss": 0.9147, + "step": 6382 + }, + { + "epoch": 0.52, + "grad_norm": 3.577524099642258, + "learning_rate": 4.891458637381891e-06, + "loss": 0.6596, + "step": 6383 + }, + { + "epoch": 0.52, + "grad_norm": 3.5392351497191736, + "learning_rate": 4.8901351740137235e-06, + "loss": 0.7526, + "step": 6384 + }, + { + "epoch": 0.52, + "grad_norm": 3.0395114611346887, + "learning_rate": 4.888811718346602e-06, + "loss": 0.7119, + "step": 6385 + }, + { + "epoch": 0.52, + "grad_norm": 2.111179056014712, + "learning_rate": 4.887488270473294e-06, + "loss": 0.6011, + "step": 6386 + }, + { + "epoch": 0.52, + "grad_norm": 3.5648992088187637, + "learning_rate": 4.886164830486569e-06, + "loss": 0.5581, + "step": 6387 + }, + { + "epoch": 0.52, + "grad_norm": 4.3273150749747025, + "learning_rate": 4.8848413984791935e-06, + "loss": 0.8686, + "step": 6388 + }, + { + "epoch": 0.52, + "grad_norm": 4.942505465751809, + "learning_rate": 4.883517974543935e-06, + "loss": 0.9702, + "step": 6389 + }, + { + "epoch": 0.52, + "grad_norm": 3.8595179418584245, + "learning_rate": 4.882194558773562e-06, + "loss": 0.9561, + "step": 6390 + }, + { + "epoch": 0.52, + "grad_norm": 4.280663704527302, + "learning_rate": 4.8808711512608355e-06, + "loss": 0.8193, + "step": 6391 + }, + { + "epoch": 0.52, + "grad_norm": 3.2210982354831654, + "learning_rate": 4.879547752098524e-06, + "loss": 0.5106, + "step": 6392 + }, + { + "epoch": 0.52, + "grad_norm": 4.337642173676028, + "learning_rate": 4.878224361379389e-06, + "loss": 1.0615, + "step": 6393 + }, + { + "epoch": 0.52, + "grad_norm": 4.14583018619186, + "learning_rate": 4.8769009791961975e-06, + "loss": 0.7129, + "step": 6394 + }, + { + "epoch": 0.52, + "grad_norm": 2.1424088613650816, + "learning_rate": 4.875577605641711e-06, + "loss": 0.3761, + "step": 6395 + }, + { + "epoch": 0.52, + "grad_norm": 3.8310450197505426, + "learning_rate": 4.8742542408086955e-06, + "loss": 0.8652, + "step": 6396 + }, + { + "epoch": 0.52, + "grad_norm": 3.2018217096329407, + "learning_rate": 4.8729308847899075e-06, + "loss": 0.5413, + "step": 6397 + }, + { + "epoch": 0.52, + "grad_norm": 3.362855687172706, + "learning_rate": 4.8716075376781115e-06, + "loss": 0.5127, + "step": 6398 + }, + { + "epoch": 0.52, + "grad_norm": 2.967190852418591, + "learning_rate": 4.87028419956607e-06, + "loss": 0.5075, + "step": 6399 + }, + { + "epoch": 0.52, + "grad_norm": 3.1975202187908796, + "learning_rate": 4.86896087054654e-06, + "loss": 0.5307, + "step": 6400 + }, + { + "epoch": 0.52, + "grad_norm": 5.1581629182550195, + "learning_rate": 4.867637550712283e-06, + "loss": 0.9677, + "step": 6401 + }, + { + "epoch": 0.52, + "grad_norm": 3.6676487285144606, + "learning_rate": 4.86631424015606e-06, + "loss": 0.6706, + "step": 6402 + }, + { + "epoch": 0.52, + "grad_norm": 3.6524160507678043, + "learning_rate": 4.864990938970624e-06, + "loss": 0.6049, + "step": 6403 + }, + { + "epoch": 0.52, + "grad_norm": 4.650624751612126, + "learning_rate": 4.863667647248737e-06, + "loss": 1.1215, + "step": 6404 + }, + { + "epoch": 0.52, + "grad_norm": 4.530953242466552, + "learning_rate": 4.862344365083154e-06, + "loss": 0.9914, + "step": 6405 + }, + { + "epoch": 0.52, + "grad_norm": 3.1659566501334755, + "learning_rate": 4.861021092566633e-06, + "loss": 0.5622, + "step": 6406 + }, + { + "epoch": 0.52, + "grad_norm": 4.536544577037245, + "learning_rate": 4.859697829791927e-06, + "loss": 1.2066, + "step": 6407 + }, + { + "epoch": 0.52, + "grad_norm": 4.33958469940971, + "learning_rate": 4.858374576851795e-06, + "loss": 1.2222, + "step": 6408 + }, + { + "epoch": 0.52, + "grad_norm": 3.381614360695345, + "learning_rate": 4.857051333838987e-06, + "loss": 0.65, + "step": 6409 + }, + { + "epoch": 0.52, + "grad_norm": 3.2292333130820845, + "learning_rate": 4.855728100846258e-06, + "loss": 0.6844, + "step": 6410 + }, + { + "epoch": 0.52, + "grad_norm": 4.292140738399272, + "learning_rate": 4.854404877966361e-06, + "loss": 0.9661, + "step": 6411 + }, + { + "epoch": 0.52, + "grad_norm": 2.2999953285623556, + "learning_rate": 4.8530816652920485e-06, + "loss": 0.5868, + "step": 6412 + }, + { + "epoch": 0.52, + "grad_norm": 3.0015489970748885, + "learning_rate": 4.851758462916075e-06, + "loss": 0.7151, + "step": 6413 + }, + { + "epoch": 0.52, + "grad_norm": 0.9300620487820964, + "learning_rate": 4.850435270931184e-06, + "loss": 0.1667, + "step": 6414 + }, + { + "epoch": 0.52, + "grad_norm": 2.440839701309359, + "learning_rate": 4.849112089430133e-06, + "loss": 0.3568, + "step": 6415 + }, + { + "epoch": 0.52, + "grad_norm": 3.295708322856452, + "learning_rate": 4.847788918505665e-06, + "loss": 0.8336, + "step": 6416 + }, + { + "epoch": 0.52, + "grad_norm": 2.7894794538723233, + "learning_rate": 4.846465758250532e-06, + "loss": 0.8585, + "step": 6417 + }, + { + "epoch": 0.52, + "grad_norm": 1.0371011365815022, + "learning_rate": 4.845142608757481e-06, + "loss": 0.1409, + "step": 6418 + }, + { + "epoch": 0.52, + "grad_norm": 4.118848364575265, + "learning_rate": 4.843819470119262e-06, + "loss": 0.608, + "step": 6419 + }, + { + "epoch": 0.52, + "grad_norm": 2.2868040020579445, + "learning_rate": 4.842496342428616e-06, + "loss": 0.471, + "step": 6420 + }, + { + "epoch": 0.52, + "grad_norm": 4.479520859133651, + "learning_rate": 4.84117322577829e-06, + "loss": 0.8966, + "step": 6421 + }, + { + "epoch": 0.52, + "grad_norm": 4.594747610138365, + "learning_rate": 4.839850120261032e-06, + "loss": 1.0153, + "step": 6422 + }, + { + "epoch": 0.52, + "grad_norm": 3.33839452827434, + "learning_rate": 4.838527025969582e-06, + "loss": 0.8278, + "step": 6423 + }, + { + "epoch": 0.53, + "grad_norm": 3.867393160179802, + "learning_rate": 4.837203942996687e-06, + "loss": 0.7852, + "step": 6424 + }, + { + "epoch": 0.53, + "grad_norm": 3.9408893881153704, + "learning_rate": 4.8358808714350856e-06, + "loss": 0.3435, + "step": 6425 + }, + { + "epoch": 0.53, + "grad_norm": 2.157718397068329, + "learning_rate": 4.834557811377519e-06, + "loss": 0.3427, + "step": 6426 + }, + { + "epoch": 0.53, + "grad_norm": 3.9450794345286235, + "learning_rate": 4.833234762916731e-06, + "loss": 0.8995, + "step": 6427 + }, + { + "epoch": 0.53, + "grad_norm": 4.727524322944186, + "learning_rate": 4.831911726145461e-06, + "loss": 1.1968, + "step": 6428 + }, + { + "epoch": 0.53, + "grad_norm": 2.605545754356394, + "learning_rate": 4.830588701156448e-06, + "loss": 0.5252, + "step": 6429 + }, + { + "epoch": 0.53, + "grad_norm": 3.202165484465828, + "learning_rate": 4.829265688042429e-06, + "loss": 0.7731, + "step": 6430 + }, + { + "epoch": 0.53, + "grad_norm": 5.30692738832285, + "learning_rate": 4.827942686896143e-06, + "loss": 0.9813, + "step": 6431 + }, + { + "epoch": 0.53, + "grad_norm": 3.3485257582972343, + "learning_rate": 4.8266196978103245e-06, + "loss": 0.5746, + "step": 6432 + }, + { + "epoch": 0.53, + "grad_norm": 1.9948209222241482, + "learning_rate": 4.825296720877711e-06, + "loss": 0.4288, + "step": 6433 + }, + { + "epoch": 0.53, + "grad_norm": 3.359141276541377, + "learning_rate": 4.823973756191037e-06, + "loss": 0.4782, + "step": 6434 + }, + { + "epoch": 0.53, + "grad_norm": 3.3460161213266657, + "learning_rate": 4.822650803843037e-06, + "loss": 0.8516, + "step": 6435 + }, + { + "epoch": 0.53, + "grad_norm": 3.9047246603491295, + "learning_rate": 4.821327863926445e-06, + "loss": 0.8164, + "step": 6436 + }, + { + "epoch": 0.53, + "grad_norm": 4.389933865662566, + "learning_rate": 4.8200049365339905e-06, + "loss": 0.9688, + "step": 6437 + }, + { + "epoch": 0.53, + "grad_norm": 3.90959305816749, + "learning_rate": 4.818682021758407e-06, + "loss": 1.114, + "step": 6438 + }, + { + "epoch": 0.53, + "grad_norm": 4.746758804316483, + "learning_rate": 4.817359119692424e-06, + "loss": 1.0432, + "step": 6439 + }, + { + "epoch": 0.53, + "grad_norm": 2.4099016585770023, + "learning_rate": 4.816036230428773e-06, + "loss": 0.32, + "step": 6440 + }, + { + "epoch": 0.53, + "grad_norm": 4.881278816114833, + "learning_rate": 4.814713354060181e-06, + "loss": 1.0872, + "step": 6441 + }, + { + "epoch": 0.53, + "grad_norm": 4.622871480181095, + "learning_rate": 4.8133904906793776e-06, + "loss": 0.9624, + "step": 6442 + }, + { + "epoch": 0.53, + "grad_norm": 5.9472587561883765, + "learning_rate": 4.8120676403790875e-06, + "loss": 1.0216, + "step": 6443 + }, + { + "epoch": 0.53, + "grad_norm": 3.361458801690792, + "learning_rate": 4.8107448032520376e-06, + "loss": 0.9053, + "step": 6444 + }, + { + "epoch": 0.53, + "grad_norm": 4.2503964795396, + "learning_rate": 4.809421979390954e-06, + "loss": 0.943, + "step": 6445 + }, + { + "epoch": 0.53, + "grad_norm": 2.839225942265717, + "learning_rate": 4.8080991688885606e-06, + "loss": 0.6044, + "step": 6446 + }, + { + "epoch": 0.53, + "grad_norm": 2.701583715591647, + "learning_rate": 4.806776371837581e-06, + "loss": 0.4485, + "step": 6447 + }, + { + "epoch": 0.53, + "grad_norm": 2.4206292275300285, + "learning_rate": 4.805453588330735e-06, + "loss": 0.366, + "step": 6448 + }, + { + "epoch": 0.53, + "grad_norm": 3.9656245324891324, + "learning_rate": 4.804130818460746e-06, + "loss": 0.8929, + "step": 6449 + }, + { + "epoch": 0.53, + "grad_norm": 1.2925316239131028, + "learning_rate": 4.802808062320334e-06, + "loss": 0.181, + "step": 6450 + }, + { + "epoch": 0.53, + "grad_norm": 3.1121094496228907, + "learning_rate": 4.801485320002219e-06, + "loss": 0.9133, + "step": 6451 + }, + { + "epoch": 0.53, + "grad_norm": 4.748963455706853, + "learning_rate": 4.8001625915991205e-06, + "loss": 1.0718, + "step": 6452 + }, + { + "epoch": 0.53, + "grad_norm": 4.125724567974992, + "learning_rate": 4.798839877203754e-06, + "loss": 1.165, + "step": 6453 + }, + { + "epoch": 0.53, + "grad_norm": 3.5048961563875642, + "learning_rate": 4.7975171769088366e-06, + "loss": 0.3907, + "step": 6454 + }, + { + "epoch": 0.53, + "grad_norm": 5.241688967850177, + "learning_rate": 4.7961944908070835e-06, + "loss": 1.272, + "step": 6455 + }, + { + "epoch": 0.53, + "grad_norm": 3.530270446774279, + "learning_rate": 4.7948718189912095e-06, + "loss": 0.7915, + "step": 6456 + }, + { + "epoch": 0.53, + "grad_norm": 4.395649687464648, + "learning_rate": 4.793549161553927e-06, + "loss": 1.0097, + "step": 6457 + }, + { + "epoch": 0.53, + "grad_norm": 4.084798442453911, + "learning_rate": 4.792226518587952e-06, + "loss": 0.6873, + "step": 6458 + }, + { + "epoch": 0.53, + "grad_norm": 2.339275731224011, + "learning_rate": 4.7909038901859945e-06, + "loss": 0.4039, + "step": 6459 + }, + { + "epoch": 0.53, + "grad_norm": 3.936160088236716, + "learning_rate": 4.789581276440762e-06, + "loss": 0.7648, + "step": 6460 + }, + { + "epoch": 0.53, + "grad_norm": 5.014448484107769, + "learning_rate": 4.788258677444967e-06, + "loss": 0.9322, + "step": 6461 + }, + { + "epoch": 0.53, + "grad_norm": 2.740184648909575, + "learning_rate": 4.786936093291318e-06, + "loss": 0.7014, + "step": 6462 + }, + { + "epoch": 0.53, + "grad_norm": 2.564045541750739, + "learning_rate": 4.78561352407252e-06, + "loss": 0.4463, + "step": 6463 + }, + { + "epoch": 0.53, + "grad_norm": 3.101364495462342, + "learning_rate": 4.784290969881284e-06, + "loss": 0.5848, + "step": 6464 + }, + { + "epoch": 0.53, + "grad_norm": 4.295584779861668, + "learning_rate": 4.78296843081031e-06, + "loss": 0.9924, + "step": 6465 + }, + { + "epoch": 0.53, + "grad_norm": 3.119756801063344, + "learning_rate": 4.781645906952304e-06, + "loss": 0.4947, + "step": 6466 + }, + { + "epoch": 0.53, + "grad_norm": 3.2737077807255406, + "learning_rate": 4.78032339839997e-06, + "loss": 0.6644, + "step": 6467 + }, + { + "epoch": 0.53, + "grad_norm": 3.500344929627298, + "learning_rate": 4.779000905246009e-06, + "loss": 0.5359, + "step": 6468 + }, + { + "epoch": 0.53, + "grad_norm": 4.267609818219153, + "learning_rate": 4.777678427583124e-06, + "loss": 0.5198, + "step": 6469 + }, + { + "epoch": 0.53, + "grad_norm": 4.657578711382833, + "learning_rate": 4.776355965504015e-06, + "loss": 0.5331, + "step": 6470 + }, + { + "epoch": 0.53, + "grad_norm": 4.994108345839051, + "learning_rate": 4.775033519101378e-06, + "loss": 0.9264, + "step": 6471 + }, + { + "epoch": 0.53, + "grad_norm": 2.8699942361158346, + "learning_rate": 4.773711088467912e-06, + "loss": 0.7568, + "step": 6472 + }, + { + "epoch": 0.53, + "grad_norm": 5.175424319748729, + "learning_rate": 4.772388673696314e-06, + "loss": 1.2296, + "step": 6473 + }, + { + "epoch": 0.53, + "grad_norm": 4.502369361692485, + "learning_rate": 4.771066274879279e-06, + "loss": 0.9849, + "step": 6474 + }, + { + "epoch": 0.53, + "grad_norm": 3.651851635025855, + "learning_rate": 4.769743892109502e-06, + "loss": 0.5328, + "step": 6475 + }, + { + "epoch": 0.53, + "grad_norm": 4.544804100513222, + "learning_rate": 4.768421525479677e-06, + "loss": 1.0894, + "step": 6476 + }, + { + "epoch": 0.53, + "grad_norm": 3.1437570454179444, + "learning_rate": 4.767099175082495e-06, + "loss": 0.7833, + "step": 6477 + }, + { + "epoch": 0.53, + "grad_norm": 4.020674057653643, + "learning_rate": 4.765776841010647e-06, + "loss": 0.7353, + "step": 6478 + }, + { + "epoch": 0.53, + "grad_norm": 4.518597556006481, + "learning_rate": 4.764454523356823e-06, + "loss": 1.1217, + "step": 6479 + }, + { + "epoch": 0.53, + "grad_norm": 2.13953384114246, + "learning_rate": 4.763132222213711e-06, + "loss": 0.3948, + "step": 6480 + }, + { + "epoch": 0.53, + "grad_norm": 2.9619726314103363, + "learning_rate": 4.761809937673999e-06, + "loss": 0.6049, + "step": 6481 + }, + { + "epoch": 0.53, + "grad_norm": 3.2256903440489735, + "learning_rate": 4.760487669830377e-06, + "loss": 0.4989, + "step": 6482 + }, + { + "epoch": 0.53, + "grad_norm": 2.624195051629224, + "learning_rate": 4.7591654187755245e-06, + "loss": 0.4404, + "step": 6483 + }, + { + "epoch": 0.53, + "grad_norm": 3.8447451022651813, + "learning_rate": 4.757843184602128e-06, + "loss": 0.9409, + "step": 6484 + }, + { + "epoch": 0.53, + "grad_norm": 4.210561404766787, + "learning_rate": 4.756520967402871e-06, + "loss": 1.0253, + "step": 6485 + }, + { + "epoch": 0.53, + "grad_norm": 3.2958280305185474, + "learning_rate": 4.755198767270434e-06, + "loss": 0.6959, + "step": 6486 + }, + { + "epoch": 0.53, + "grad_norm": 3.8705923794882873, + "learning_rate": 4.7538765842975e-06, + "loss": 0.9916, + "step": 6487 + }, + { + "epoch": 0.53, + "grad_norm": 4.17973960051939, + "learning_rate": 4.752554418576744e-06, + "loss": 0.8638, + "step": 6488 + }, + { + "epoch": 0.53, + "grad_norm": 3.8058675230780707, + "learning_rate": 4.7512322702008475e-06, + "loss": 0.9882, + "step": 6489 + }, + { + "epoch": 0.53, + "grad_norm": 2.584219461622293, + "learning_rate": 4.749910139262485e-06, + "loss": 0.459, + "step": 6490 + }, + { + "epoch": 0.53, + "grad_norm": 3.5689914149607276, + "learning_rate": 4.748588025854334e-06, + "loss": 0.9858, + "step": 6491 + }, + { + "epoch": 0.53, + "grad_norm": 1.6919630395843155, + "learning_rate": 4.747265930069069e-06, + "loss": 0.2479, + "step": 6492 + }, + { + "epoch": 0.53, + "grad_norm": 3.534827490112639, + "learning_rate": 4.745943851999362e-06, + "loss": 0.8208, + "step": 6493 + }, + { + "epoch": 0.53, + "grad_norm": 5.060101817436625, + "learning_rate": 4.744621791737886e-06, + "loss": 1.1179, + "step": 6494 + }, + { + "epoch": 0.53, + "grad_norm": 4.358457354866757, + "learning_rate": 4.74329974937731e-06, + "loss": 1.2671, + "step": 6495 + }, + { + "epoch": 0.53, + "grad_norm": 3.4693194824047855, + "learning_rate": 4.741977725010304e-06, + "loss": 0.6998, + "step": 6496 + }, + { + "epoch": 0.53, + "grad_norm": 4.503786475228983, + "learning_rate": 4.740655718729537e-06, + "loss": 0.9596, + "step": 6497 + }, + { + "epoch": 0.53, + "grad_norm": 3.5345182678278477, + "learning_rate": 4.739333730627674e-06, + "loss": 0.966, + "step": 6498 + }, + { + "epoch": 0.53, + "grad_norm": 3.3117682019112844, + "learning_rate": 4.7380117607973855e-06, + "loss": 0.5517, + "step": 6499 + }, + { + "epoch": 0.53, + "grad_norm": 2.778259677896063, + "learning_rate": 4.73668980933133e-06, + "loss": 0.5979, + "step": 6500 + }, + { + "epoch": 0.53, + "grad_norm": 4.104106387260651, + "learning_rate": 4.735367876322174e-06, + "loss": 0.6847, + "step": 6501 + }, + { + "epoch": 0.53, + "grad_norm": 3.7767560467196026, + "learning_rate": 4.734045961862577e-06, + "loss": 1.1592, + "step": 6502 + }, + { + "epoch": 0.53, + "grad_norm": 3.879394886737245, + "learning_rate": 4.732724066045201e-06, + "loss": 1.032, + "step": 6503 + }, + { + "epoch": 0.53, + "grad_norm": 4.215580320281627, + "learning_rate": 4.731402188962706e-06, + "loss": 0.7044, + "step": 6504 + }, + { + "epoch": 0.53, + "grad_norm": 3.941829061345157, + "learning_rate": 4.730080330707748e-06, + "loss": 0.7978, + "step": 6505 + }, + { + "epoch": 0.53, + "grad_norm": 3.2538614465290667, + "learning_rate": 4.728758491372983e-06, + "loss": 0.5861, + "step": 6506 + }, + { + "epoch": 0.53, + "grad_norm": 4.061368353748383, + "learning_rate": 4.727436671051068e-06, + "loss": 0.9944, + "step": 6507 + }, + { + "epoch": 0.53, + "grad_norm": 3.395148902636811, + "learning_rate": 4.726114869834656e-06, + "loss": 0.4854, + "step": 6508 + }, + { + "epoch": 0.53, + "grad_norm": 4.547254005610118, + "learning_rate": 4.7247930878164e-06, + "loss": 0.892, + "step": 6509 + }, + { + "epoch": 0.53, + "grad_norm": 2.354189589986255, + "learning_rate": 4.723471325088953e-06, + "loss": 0.3938, + "step": 6510 + }, + { + "epoch": 0.53, + "grad_norm": 3.010307150721494, + "learning_rate": 4.722149581744959e-06, + "loss": 0.4741, + "step": 6511 + }, + { + "epoch": 0.53, + "grad_norm": 3.220441302810838, + "learning_rate": 4.720827857877071e-06, + "loss": 0.5945, + "step": 6512 + }, + { + "epoch": 0.53, + "grad_norm": 2.697154306835642, + "learning_rate": 4.719506153577935e-06, + "loss": 0.6007, + "step": 6513 + }, + { + "epoch": 0.53, + "grad_norm": 4.4702241418488216, + "learning_rate": 4.718184468940197e-06, + "loss": 0.9425, + "step": 6514 + }, + { + "epoch": 0.53, + "grad_norm": 3.4005861961412664, + "learning_rate": 4.716862804056503e-06, + "loss": 0.7388, + "step": 6515 + }, + { + "epoch": 0.53, + "grad_norm": 3.177697256568991, + "learning_rate": 4.7155411590194935e-06, + "loss": 0.6258, + "step": 6516 + }, + { + "epoch": 0.53, + "grad_norm": 2.5965537600315365, + "learning_rate": 4.714219533921811e-06, + "loss": 0.3597, + "step": 6517 + }, + { + "epoch": 0.53, + "grad_norm": 3.821519678293872, + "learning_rate": 4.712897928856095e-06, + "loss": 0.6991, + "step": 6518 + }, + { + "epoch": 0.53, + "grad_norm": 4.183071395135402, + "learning_rate": 4.711576343914984e-06, + "loss": 0.4475, + "step": 6519 + }, + { + "epoch": 0.53, + "grad_norm": 3.3883270732955357, + "learning_rate": 4.710254779191116e-06, + "loss": 0.4484, + "step": 6520 + }, + { + "epoch": 0.53, + "grad_norm": 3.2618431776639802, + "learning_rate": 4.7089332347771265e-06, + "loss": 0.4983, + "step": 6521 + }, + { + "epoch": 0.53, + "grad_norm": 1.8306121244153692, + "learning_rate": 4.707611710765654e-06, + "loss": 0.3587, + "step": 6522 + }, + { + "epoch": 0.53, + "grad_norm": 4.443170147328888, + "learning_rate": 4.706290207249325e-06, + "loss": 1.4299, + "step": 6523 + }, + { + "epoch": 0.53, + "grad_norm": 4.4569262766400835, + "learning_rate": 4.704968724320775e-06, + "loss": 0.7632, + "step": 6524 + }, + { + "epoch": 0.53, + "grad_norm": 2.247821939618158, + "learning_rate": 4.703647262072634e-06, + "loss": 0.353, + "step": 6525 + }, + { + "epoch": 0.53, + "grad_norm": 3.1100807442195255, + "learning_rate": 4.702325820597528e-06, + "loss": 0.5021, + "step": 6526 + }, + { + "epoch": 0.53, + "grad_norm": 4.355309216517904, + "learning_rate": 4.70100439998809e-06, + "loss": 0.8495, + "step": 6527 + }, + { + "epoch": 0.53, + "grad_norm": 3.0247338209320747, + "learning_rate": 4.699683000336941e-06, + "loss": 0.7517, + "step": 6528 + }, + { + "epoch": 0.53, + "grad_norm": 4.236116874762068, + "learning_rate": 4.698361621736705e-06, + "loss": 1.0296, + "step": 6529 + }, + { + "epoch": 0.53, + "grad_norm": 1.955894405304145, + "learning_rate": 4.6970402642800075e-06, + "loss": 0.3625, + "step": 6530 + }, + { + "epoch": 0.53, + "grad_norm": 2.878574051928466, + "learning_rate": 4.695718928059469e-06, + "loss": 0.506, + "step": 6531 + }, + { + "epoch": 0.53, + "grad_norm": 3.8906841416371174, + "learning_rate": 4.694397613167709e-06, + "loss": 0.6025, + "step": 6532 + }, + { + "epoch": 0.53, + "grad_norm": 3.6763792751475264, + "learning_rate": 4.69307631969735e-06, + "loss": 0.6466, + "step": 6533 + }, + { + "epoch": 0.53, + "grad_norm": 3.0301141217686656, + "learning_rate": 4.691755047741001e-06, + "loss": 0.6439, + "step": 6534 + }, + { + "epoch": 0.53, + "grad_norm": 4.391320284304078, + "learning_rate": 4.690433797391282e-06, + "loss": 0.8834, + "step": 6535 + }, + { + "epoch": 0.53, + "grad_norm": 3.0619941823911168, + "learning_rate": 4.689112568740807e-06, + "loss": 0.3854, + "step": 6536 + }, + { + "epoch": 0.53, + "grad_norm": 4.758029545952147, + "learning_rate": 4.687791361882188e-06, + "loss": 1.0864, + "step": 6537 + }, + { + "epoch": 0.53, + "grad_norm": 5.363849172380941, + "learning_rate": 4.6864701769080364e-06, + "loss": 1.0227, + "step": 6538 + }, + { + "epoch": 0.53, + "grad_norm": 2.9221309019995574, + "learning_rate": 4.685149013910962e-06, + "loss": 0.3992, + "step": 6539 + }, + { + "epoch": 0.53, + "grad_norm": 4.769912945729529, + "learning_rate": 4.683827872983571e-06, + "loss": 0.7576, + "step": 6540 + }, + { + "epoch": 0.53, + "grad_norm": 3.032375109346851, + "learning_rate": 4.682506754218469e-06, + "loss": 0.7517, + "step": 6541 + }, + { + "epoch": 0.53, + "grad_norm": 2.84124810640443, + "learning_rate": 4.681185657708261e-06, + "loss": 0.3858, + "step": 6542 + }, + { + "epoch": 0.53, + "grad_norm": 3.26168632843942, + "learning_rate": 4.679864583545552e-06, + "loss": 0.4775, + "step": 6543 + }, + { + "epoch": 0.53, + "grad_norm": 2.778346260856777, + "learning_rate": 4.678543531822944e-06, + "loss": 0.433, + "step": 6544 + }, + { + "epoch": 0.53, + "grad_norm": 3.001229869305055, + "learning_rate": 4.677222502633033e-06, + "loss": 0.6559, + "step": 6545 + }, + { + "epoch": 0.54, + "grad_norm": 4.434317474416247, + "learning_rate": 4.67590149606842e-06, + "loss": 0.5421, + "step": 6546 + }, + { + "epoch": 0.54, + "grad_norm": 4.331987535791356, + "learning_rate": 4.674580512221703e-06, + "loss": 0.6373, + "step": 6547 + }, + { + "epoch": 0.54, + "grad_norm": 3.1064710446924804, + "learning_rate": 4.673259551185475e-06, + "loss": 0.6903, + "step": 6548 + }, + { + "epoch": 0.54, + "grad_norm": 4.482780717530685, + "learning_rate": 4.67193861305233e-06, + "loss": 0.7344, + "step": 6549 + }, + { + "epoch": 0.54, + "grad_norm": 2.1185714991572953, + "learning_rate": 4.670617697914863e-06, + "loss": 0.3799, + "step": 6550 + }, + { + "epoch": 0.54, + "grad_norm": 2.986888769209969, + "learning_rate": 4.66929680586566e-06, + "loss": 0.7964, + "step": 6551 + }, + { + "epoch": 0.54, + "grad_norm": 2.1396347568967338, + "learning_rate": 4.667975936997311e-06, + "loss": 0.3424, + "step": 6552 + }, + { + "epoch": 0.54, + "grad_norm": 4.826732914643811, + "learning_rate": 4.666655091402404e-06, + "loss": 0.8784, + "step": 6553 + }, + { + "epoch": 0.54, + "grad_norm": 3.2725881096348632, + "learning_rate": 4.665334269173526e-06, + "loss": 0.8182, + "step": 6554 + }, + { + "epoch": 0.54, + "grad_norm": 5.082508256581759, + "learning_rate": 4.664013470403258e-06, + "loss": 1.0632, + "step": 6555 + }, + { + "epoch": 0.54, + "grad_norm": 2.5090485390867383, + "learning_rate": 4.662692695184184e-06, + "loss": 0.5078, + "step": 6556 + }, + { + "epoch": 0.54, + "grad_norm": 3.547058739510734, + "learning_rate": 4.661371943608884e-06, + "loss": 0.5732, + "step": 6557 + }, + { + "epoch": 0.54, + "grad_norm": 3.6117772081754747, + "learning_rate": 4.660051215769937e-06, + "loss": 0.5969, + "step": 6558 + }, + { + "epoch": 0.54, + "grad_norm": 5.578479510510918, + "learning_rate": 4.65873051175992e-06, + "loss": 1.3698, + "step": 6559 + }, + { + "epoch": 0.54, + "grad_norm": 5.660950646030756, + "learning_rate": 4.6574098316714086e-06, + "loss": 1.2019, + "step": 6560 + }, + { + "epoch": 0.54, + "grad_norm": 3.683994687913521, + "learning_rate": 4.656089175596978e-06, + "loss": 0.8219, + "step": 6561 + }, + { + "epoch": 0.54, + "grad_norm": 5.582453556871559, + "learning_rate": 4.6547685436292e-06, + "loss": 1.3325, + "step": 6562 + }, + { + "epoch": 0.54, + "grad_norm": 3.104896611457791, + "learning_rate": 4.653447935860642e-06, + "loss": 0.4239, + "step": 6563 + }, + { + "epoch": 0.54, + "grad_norm": 3.6478755011837922, + "learning_rate": 4.652127352383879e-06, + "loss": 0.3726, + "step": 6564 + }, + { + "epoch": 0.54, + "grad_norm": 3.3335035105942667, + "learning_rate": 4.650806793291472e-06, + "loss": 0.3934, + "step": 6565 + }, + { + "epoch": 0.54, + "grad_norm": 4.783339180629704, + "learning_rate": 4.649486258675989e-06, + "loss": 1.1521, + "step": 6566 + }, + { + "epoch": 0.54, + "grad_norm": 1.4381816934589031, + "learning_rate": 4.6481657486299965e-06, + "loss": 0.2135, + "step": 6567 + }, + { + "epoch": 0.54, + "grad_norm": 3.2725112225425272, + "learning_rate": 4.646845263246052e-06, + "loss": 0.8093, + "step": 6568 + }, + { + "epoch": 0.54, + "grad_norm": 2.744853814936621, + "learning_rate": 4.645524802616717e-06, + "loss": 0.3797, + "step": 6569 + }, + { + "epoch": 0.54, + "grad_norm": 3.3123920628629215, + "learning_rate": 4.644204366834551e-06, + "loss": 0.7096, + "step": 6570 + }, + { + "epoch": 0.54, + "grad_norm": 5.841646773780519, + "learning_rate": 4.6428839559921115e-06, + "loss": 0.9956, + "step": 6571 + }, + { + "epoch": 0.54, + "grad_norm": 3.0695846330423233, + "learning_rate": 4.6415635701819515e-06, + "loss": 0.5472, + "step": 6572 + }, + { + "epoch": 0.54, + "grad_norm": 3.7488795781565565, + "learning_rate": 4.640243209496627e-06, + "loss": 0.8055, + "step": 6573 + }, + { + "epoch": 0.54, + "grad_norm": 5.29002043121706, + "learning_rate": 4.638922874028686e-06, + "loss": 1.171, + "step": 6574 + }, + { + "epoch": 0.54, + "grad_norm": 4.652437636545116, + "learning_rate": 4.63760256387068e-06, + "loss": 0.746, + "step": 6575 + }, + { + "epoch": 0.54, + "grad_norm": 3.036073634733401, + "learning_rate": 4.636282279115157e-06, + "loss": 0.3928, + "step": 6576 + }, + { + "epoch": 0.54, + "grad_norm": 2.9691247579664775, + "learning_rate": 4.634962019854664e-06, + "loss": 0.64, + "step": 6577 + }, + { + "epoch": 0.54, + "grad_norm": 3.546770284448663, + "learning_rate": 4.633641786181746e-06, + "loss": 1.0569, + "step": 6578 + }, + { + "epoch": 0.54, + "grad_norm": 3.7128870863572634, + "learning_rate": 4.632321578188943e-06, + "loss": 0.5789, + "step": 6579 + }, + { + "epoch": 0.54, + "grad_norm": 2.9631752562069495, + "learning_rate": 4.6310013959687985e-06, + "loss": 0.7038, + "step": 6580 + }, + { + "epoch": 0.54, + "grad_norm": 3.112639351331203, + "learning_rate": 4.629681239613848e-06, + "loss": 0.499, + "step": 6581 + }, + { + "epoch": 0.54, + "grad_norm": 3.10163436923461, + "learning_rate": 4.628361109216633e-06, + "loss": 0.5696, + "step": 6582 + }, + { + "epoch": 0.54, + "grad_norm": 3.5160555089852474, + "learning_rate": 4.627041004869684e-06, + "loss": 0.7325, + "step": 6583 + }, + { + "epoch": 0.54, + "grad_norm": 4.279189696864294, + "learning_rate": 4.625720926665542e-06, + "loss": 0.8291, + "step": 6584 + }, + { + "epoch": 0.54, + "grad_norm": 3.2144240576872196, + "learning_rate": 4.624400874696731e-06, + "loss": 0.4094, + "step": 6585 + }, + { + "epoch": 0.54, + "grad_norm": 3.3151730118113907, + "learning_rate": 4.623080849055784e-06, + "loss": 0.4029, + "step": 6586 + }, + { + "epoch": 0.54, + "grad_norm": 4.492740953954248, + "learning_rate": 4.6217608498352305e-06, + "loss": 0.8886, + "step": 6587 + }, + { + "epoch": 0.54, + "grad_norm": 2.2091740236208612, + "learning_rate": 4.620440877127594e-06, + "loss": 0.5968, + "step": 6588 + }, + { + "epoch": 0.54, + "grad_norm": 5.130614751465802, + "learning_rate": 4.619120931025401e-06, + "loss": 0.9419, + "step": 6589 + }, + { + "epoch": 0.54, + "grad_norm": 4.9276537385180665, + "learning_rate": 4.617801011621175e-06, + "loss": 1.1146, + "step": 6590 + }, + { + "epoch": 0.54, + "grad_norm": 3.517852696399206, + "learning_rate": 4.6164811190074314e-06, + "loss": 0.7668, + "step": 6591 + }, + { + "epoch": 0.54, + "grad_norm": 3.9414529003498098, + "learning_rate": 4.615161253276693e-06, + "loss": 0.7907, + "step": 6592 + }, + { + "epoch": 0.54, + "grad_norm": 4.829034509793269, + "learning_rate": 4.613841414521477e-06, + "loss": 0.7181, + "step": 6593 + }, + { + "epoch": 0.54, + "grad_norm": 4.935798880433848, + "learning_rate": 4.612521602834297e-06, + "loss": 1.1686, + "step": 6594 + }, + { + "epoch": 0.54, + "grad_norm": 3.9536139118306277, + "learning_rate": 4.611201818307666e-06, + "loss": 1.3524, + "step": 6595 + }, + { + "epoch": 0.54, + "grad_norm": 5.098698829021364, + "learning_rate": 4.609882061034097e-06, + "loss": 0.9503, + "step": 6596 + }, + { + "epoch": 0.54, + "grad_norm": 4.207210196248606, + "learning_rate": 4.608562331106096e-06, + "loss": 0.9931, + "step": 6597 + }, + { + "epoch": 0.54, + "grad_norm": 3.2699870899783843, + "learning_rate": 4.607242628616171e-06, + "loss": 0.6659, + "step": 6598 + }, + { + "epoch": 0.54, + "grad_norm": 3.2458183261833398, + "learning_rate": 4.605922953656829e-06, + "loss": 0.8099, + "step": 6599 + }, + { + "epoch": 0.54, + "grad_norm": 2.6046779267216835, + "learning_rate": 4.604603306320574e-06, + "loss": 0.418, + "step": 6600 + }, + { + "epoch": 0.54, + "grad_norm": 2.6416739769033932, + "learning_rate": 4.603283686699907e-06, + "loss": 0.5294, + "step": 6601 + }, + { + "epoch": 0.54, + "grad_norm": 2.029470485869123, + "learning_rate": 4.601964094887327e-06, + "loss": 0.392, + "step": 6602 + }, + { + "epoch": 0.54, + "grad_norm": 2.069810495616436, + "learning_rate": 4.600644530975331e-06, + "loss": 0.3439, + "step": 6603 + }, + { + "epoch": 0.54, + "grad_norm": 2.1883788923176763, + "learning_rate": 4.599324995056415e-06, + "loss": 0.5165, + "step": 6604 + }, + { + "epoch": 0.54, + "grad_norm": 2.3009145317121376, + "learning_rate": 4.598005487223073e-06, + "loss": 0.3408, + "step": 6605 + }, + { + "epoch": 0.54, + "grad_norm": 4.1050672621062265, + "learning_rate": 4.596686007567797e-06, + "loss": 1.0344, + "step": 6606 + }, + { + "epoch": 0.54, + "grad_norm": 2.8337463877223708, + "learning_rate": 4.595366556183079e-06, + "loss": 0.6772, + "step": 6607 + }, + { + "epoch": 0.54, + "grad_norm": 3.48425966090482, + "learning_rate": 4.5940471331614014e-06, + "loss": 0.879, + "step": 6608 + }, + { + "epoch": 0.54, + "grad_norm": 3.4774955498265934, + "learning_rate": 4.592727738595254e-06, + "loss": 0.6558, + "step": 6609 + }, + { + "epoch": 0.54, + "grad_norm": 2.3091363448551796, + "learning_rate": 4.59140837257712e-06, + "loss": 0.668, + "step": 6610 + }, + { + "epoch": 0.54, + "grad_norm": 4.8952445582123945, + "learning_rate": 4.59008903519948e-06, + "loss": 0.9435, + "step": 6611 + }, + { + "epoch": 0.54, + "grad_norm": 4.7482706881409085, + "learning_rate": 4.588769726554814e-06, + "loss": 1.3747, + "step": 6612 + }, + { + "epoch": 0.54, + "grad_norm": 3.4056673960688952, + "learning_rate": 4.587450446735604e-06, + "loss": 0.5709, + "step": 6613 + }, + { + "epoch": 0.54, + "grad_norm": 3.7836948198696514, + "learning_rate": 4.586131195834319e-06, + "loss": 0.8782, + "step": 6614 + }, + { + "epoch": 0.54, + "grad_norm": 2.9011139557679586, + "learning_rate": 4.584811973943437e-06, + "loss": 0.4426, + "step": 6615 + }, + { + "epoch": 0.54, + "grad_norm": 3.6092322214967996, + "learning_rate": 4.583492781155428e-06, + "loss": 0.5468, + "step": 6616 + }, + { + "epoch": 0.54, + "grad_norm": 3.779247375258173, + "learning_rate": 4.582173617562764e-06, + "loss": 0.7156, + "step": 6617 + }, + { + "epoch": 0.54, + "grad_norm": 3.4377675092726236, + "learning_rate": 4.5808544832579095e-06, + "loss": 0.6088, + "step": 6618 + }, + { + "epoch": 0.54, + "grad_norm": 2.2807342570697653, + "learning_rate": 4.579535378333334e-06, + "loss": 0.31, + "step": 6619 + }, + { + "epoch": 0.54, + "grad_norm": 2.3071067500491274, + "learning_rate": 4.578216302881497e-06, + "loss": 0.3559, + "step": 6620 + }, + { + "epoch": 0.54, + "grad_norm": 3.017469989892803, + "learning_rate": 4.576897256994861e-06, + "loss": 0.5663, + "step": 6621 + }, + { + "epoch": 0.54, + "grad_norm": 4.223549494510722, + "learning_rate": 4.575578240765885e-06, + "loss": 0.5989, + "step": 6622 + }, + { + "epoch": 0.54, + "grad_norm": 6.180017186692998, + "learning_rate": 4.574259254287028e-06, + "loss": 1.4431, + "step": 6623 + }, + { + "epoch": 0.54, + "grad_norm": 4.595082314447604, + "learning_rate": 4.572940297650747e-06, + "loss": 1.1177, + "step": 6624 + }, + { + "epoch": 0.54, + "grad_norm": 1.3635989281444825, + "learning_rate": 4.57162137094949e-06, + "loss": 0.1789, + "step": 6625 + }, + { + "epoch": 0.54, + "grad_norm": 2.05576306019229, + "learning_rate": 4.570302474275712e-06, + "loss": 0.3513, + "step": 6626 + }, + { + "epoch": 0.54, + "grad_norm": 3.255732174985103, + "learning_rate": 4.568983607721859e-06, + "loss": 0.7319, + "step": 6627 + }, + { + "epoch": 0.54, + "grad_norm": 2.9994180654619003, + "learning_rate": 4.567664771380379e-06, + "loss": 0.5239, + "step": 6628 + }, + { + "epoch": 0.54, + "grad_norm": 3.9709190789507844, + "learning_rate": 4.566345965343718e-06, + "loss": 0.6596, + "step": 6629 + }, + { + "epoch": 0.54, + "grad_norm": 3.4005590334116964, + "learning_rate": 4.5650271897043195e-06, + "loss": 0.6441, + "step": 6630 + }, + { + "epoch": 0.54, + "grad_norm": 4.121247443116029, + "learning_rate": 4.563708444554619e-06, + "loss": 0.928, + "step": 6631 + }, + { + "epoch": 0.54, + "grad_norm": 3.1053500780565817, + "learning_rate": 4.562389729987059e-06, + "loss": 0.6048, + "step": 6632 + }, + { + "epoch": 0.54, + "grad_norm": 2.3267540639919053, + "learning_rate": 4.561071046094075e-06, + "loss": 0.3739, + "step": 6633 + }, + { + "epoch": 0.54, + "grad_norm": 4.113156196181085, + "learning_rate": 4.5597523929680986e-06, + "loss": 0.9381, + "step": 6634 + }, + { + "epoch": 0.54, + "grad_norm": 3.388165656572617, + "learning_rate": 4.558433770701565e-06, + "loss": 0.5184, + "step": 6635 + }, + { + "epoch": 0.54, + "grad_norm": 3.799676204479062, + "learning_rate": 4.557115179386903e-06, + "loss": 0.6542, + "step": 6636 + }, + { + "epoch": 0.54, + "grad_norm": 4.259239487215517, + "learning_rate": 4.555796619116538e-06, + "loss": 0.9032, + "step": 6637 + }, + { + "epoch": 0.54, + "grad_norm": 3.6370771361107526, + "learning_rate": 4.554478089982897e-06, + "loss": 1.0361, + "step": 6638 + }, + { + "epoch": 0.54, + "grad_norm": 3.756742079319636, + "learning_rate": 4.553159592078403e-06, + "loss": 0.6837, + "step": 6639 + }, + { + "epoch": 0.54, + "grad_norm": 4.149914278587901, + "learning_rate": 4.551841125495477e-06, + "loss": 0.8688, + "step": 6640 + }, + { + "epoch": 0.54, + "grad_norm": 3.347737195032477, + "learning_rate": 4.550522690326538e-06, + "loss": 0.7162, + "step": 6641 + }, + { + "epoch": 0.54, + "grad_norm": 3.1569733317515665, + "learning_rate": 4.5492042866640045e-06, + "loss": 0.6957, + "step": 6642 + }, + { + "epoch": 0.54, + "grad_norm": 4.681665935187307, + "learning_rate": 4.547885914600285e-06, + "loss": 1.0, + "step": 6643 + }, + { + "epoch": 0.54, + "grad_norm": 5.004368873239083, + "learning_rate": 4.546567574227796e-06, + "loss": 1.0405, + "step": 6644 + }, + { + "epoch": 0.54, + "grad_norm": 3.6669501577059718, + "learning_rate": 4.545249265638947e-06, + "loss": 0.4618, + "step": 6645 + }, + { + "epoch": 0.54, + "grad_norm": 3.91651972542203, + "learning_rate": 4.543930988926145e-06, + "loss": 0.9557, + "step": 6646 + }, + { + "epoch": 0.54, + "grad_norm": 3.1338220022417818, + "learning_rate": 4.542612744181799e-06, + "loss": 0.5671, + "step": 6647 + }, + { + "epoch": 0.54, + "grad_norm": 5.158874878095333, + "learning_rate": 4.541294531498306e-06, + "loss": 1.2718, + "step": 6648 + }, + { + "epoch": 0.54, + "grad_norm": 4.720407098990491, + "learning_rate": 4.539976350968071e-06, + "loss": 0.9709, + "step": 6649 + }, + { + "epoch": 0.54, + "grad_norm": 3.2245280801283176, + "learning_rate": 4.53865820268349e-06, + "loss": 0.4954, + "step": 6650 + }, + { + "epoch": 0.54, + "grad_norm": 2.147232656477744, + "learning_rate": 4.537340086736963e-06, + "loss": 0.4342, + "step": 6651 + }, + { + "epoch": 0.54, + "grad_norm": 3.721820243847922, + "learning_rate": 4.536022003220882e-06, + "loss": 0.9516, + "step": 6652 + }, + { + "epoch": 0.54, + "grad_norm": 6.140103242181031, + "learning_rate": 4.534703952227641e-06, + "loss": 1.5263, + "step": 6653 + }, + { + "epoch": 0.54, + "grad_norm": 1.8124082668299026, + "learning_rate": 4.533385933849626e-06, + "loss": 0.3272, + "step": 6654 + }, + { + "epoch": 0.54, + "grad_norm": 4.421852785391417, + "learning_rate": 4.532067948179227e-06, + "loss": 0.9541, + "step": 6655 + }, + { + "epoch": 0.54, + "grad_norm": 3.7558805640610675, + "learning_rate": 4.530749995308827e-06, + "loss": 0.8192, + "step": 6656 + }, + { + "epoch": 0.54, + "grad_norm": 3.297025578531439, + "learning_rate": 4.529432075330811e-06, + "loss": 0.7935, + "step": 6657 + }, + { + "epoch": 0.54, + "grad_norm": 5.023969964098497, + "learning_rate": 4.528114188337559e-06, + "loss": 1.112, + "step": 6658 + }, + { + "epoch": 0.54, + "grad_norm": 4.27069614561819, + "learning_rate": 4.52679633442145e-06, + "loss": 0.7922, + "step": 6659 + }, + { + "epoch": 0.54, + "grad_norm": 2.3307633982499767, + "learning_rate": 4.525478513674857e-06, + "loss": 0.3234, + "step": 6660 + }, + { + "epoch": 0.54, + "grad_norm": 4.15814670184417, + "learning_rate": 4.5241607261901545e-06, + "loss": 0.3978, + "step": 6661 + }, + { + "epoch": 0.54, + "grad_norm": 3.2839993498903466, + "learning_rate": 4.522842972059715e-06, + "loss": 0.46, + "step": 6662 + }, + { + "epoch": 0.54, + "grad_norm": 4.085813041441138, + "learning_rate": 4.5215252513759065e-06, + "loss": 0.9939, + "step": 6663 + }, + { + "epoch": 0.54, + "grad_norm": 4.624647901844173, + "learning_rate": 4.520207564231096e-06, + "loss": 0.7483, + "step": 6664 + }, + { + "epoch": 0.54, + "grad_norm": 4.423834373085487, + "learning_rate": 4.518889910717646e-06, + "loss": 0.892, + "step": 6665 + }, + { + "epoch": 0.54, + "grad_norm": 1.3592898624579992, + "learning_rate": 4.517572290927922e-06, + "loss": 0.1814, + "step": 6666 + }, + { + "epoch": 0.54, + "grad_norm": 4.094091936733789, + "learning_rate": 4.516254704954279e-06, + "loss": 0.9623, + "step": 6667 + }, + { + "epoch": 0.55, + "grad_norm": 4.380531451903595, + "learning_rate": 4.514937152889074e-06, + "loss": 0.9385, + "step": 6668 + }, + { + "epoch": 0.55, + "grad_norm": 1.548515060595646, + "learning_rate": 4.5136196348246655e-06, + "loss": 0.2365, + "step": 6669 + }, + { + "epoch": 0.55, + "grad_norm": 5.871283572351933, + "learning_rate": 4.5123021508534055e-06, + "loss": 1.5944, + "step": 6670 + }, + { + "epoch": 0.55, + "grad_norm": 4.586024416815253, + "learning_rate": 4.510984701067641e-06, + "loss": 0.6512, + "step": 6671 + }, + { + "epoch": 0.55, + "grad_norm": 4.684192797698732, + "learning_rate": 4.509667285559719e-06, + "loss": 0.8768, + "step": 6672 + }, + { + "epoch": 0.55, + "grad_norm": 4.4344216835852714, + "learning_rate": 4.5083499044219896e-06, + "loss": 0.9899, + "step": 6673 + }, + { + "epoch": 0.55, + "grad_norm": 3.5196756901261192, + "learning_rate": 4.50703255774679e-06, + "loss": 0.8292, + "step": 6674 + }, + { + "epoch": 0.55, + "grad_norm": 1.8425163445693746, + "learning_rate": 4.505715245626462e-06, + "loss": 0.2697, + "step": 6675 + }, + { + "epoch": 0.55, + "grad_norm": 2.7258897847745307, + "learning_rate": 4.5043979681533475e-06, + "loss": 0.5433, + "step": 6676 + }, + { + "epoch": 0.55, + "grad_norm": 3.688133066641052, + "learning_rate": 4.5030807254197755e-06, + "loss": 1.1534, + "step": 6677 + }, + { + "epoch": 0.55, + "grad_norm": 5.01387823574584, + "learning_rate": 4.501763517518082e-06, + "loss": 0.9374, + "step": 6678 + }, + { + "epoch": 0.55, + "grad_norm": 4.690055779675321, + "learning_rate": 4.500446344540598e-06, + "loss": 1.1436, + "step": 6679 + }, + { + "epoch": 0.55, + "grad_norm": 2.7781687517608864, + "learning_rate": 4.499129206579653e-06, + "loss": 0.5423, + "step": 6680 + }, + { + "epoch": 0.55, + "grad_norm": 3.301595233545369, + "learning_rate": 4.497812103727569e-06, + "loss": 0.8588, + "step": 6681 + }, + { + "epoch": 0.55, + "grad_norm": 2.7899519352888555, + "learning_rate": 4.496495036076673e-06, + "loss": 0.6174, + "step": 6682 + }, + { + "epoch": 0.55, + "grad_norm": 4.606542284391494, + "learning_rate": 4.4951780037192814e-06, + "loss": 1.2281, + "step": 6683 + }, + { + "epoch": 0.55, + "grad_norm": 2.4470797538474462, + "learning_rate": 4.4938610067477155e-06, + "loss": 0.5242, + "step": 6684 + }, + { + "epoch": 0.55, + "grad_norm": 3.6842774274565326, + "learning_rate": 4.4925440452542905e-06, + "loss": 0.7994, + "step": 6685 + }, + { + "epoch": 0.55, + "grad_norm": 2.313781227321232, + "learning_rate": 4.491227119331319e-06, + "loss": 0.7083, + "step": 6686 + }, + { + "epoch": 0.55, + "grad_norm": 3.5080033230773133, + "learning_rate": 4.489910229071113e-06, + "loss": 0.7683, + "step": 6687 + }, + { + "epoch": 0.55, + "grad_norm": 4.298710508526099, + "learning_rate": 4.488593374565979e-06, + "loss": 0.8617, + "step": 6688 + }, + { + "epoch": 0.55, + "grad_norm": 2.9292771100654753, + "learning_rate": 4.487276555908225e-06, + "loss": 0.342, + "step": 6689 + }, + { + "epoch": 0.55, + "grad_norm": 3.569969537758229, + "learning_rate": 4.4859597731901525e-06, + "loss": 0.8084, + "step": 6690 + }, + { + "epoch": 0.55, + "grad_norm": 3.8138563785328468, + "learning_rate": 4.484643026504063e-06, + "loss": 0.8283, + "step": 6691 + }, + { + "epoch": 0.55, + "grad_norm": 3.7839647659751714, + "learning_rate": 4.483326315942253e-06, + "loss": 0.5061, + "step": 6692 + }, + { + "epoch": 0.55, + "grad_norm": 2.8229391413619958, + "learning_rate": 4.482009641597023e-06, + "loss": 0.8583, + "step": 6693 + }, + { + "epoch": 0.55, + "grad_norm": 1.5440870104220314, + "learning_rate": 4.48069300356066e-06, + "loss": 0.1245, + "step": 6694 + }, + { + "epoch": 0.55, + "grad_norm": 2.9891717366161417, + "learning_rate": 4.479376401925457e-06, + "loss": 0.6948, + "step": 6695 + }, + { + "epoch": 0.55, + "grad_norm": 3.258479099635694, + "learning_rate": 4.4780598367837045e-06, + "loss": 1.1285, + "step": 6696 + }, + { + "epoch": 0.55, + "grad_norm": 3.046156263768596, + "learning_rate": 4.476743308227685e-06, + "loss": 0.3902, + "step": 6697 + }, + { + "epoch": 0.55, + "grad_norm": 2.5052250731095094, + "learning_rate": 4.475426816349682e-06, + "loss": 0.4513, + "step": 6698 + }, + { + "epoch": 0.55, + "grad_norm": 5.004491542181198, + "learning_rate": 4.4741103612419785e-06, + "loss": 1.2451, + "step": 6699 + }, + { + "epoch": 0.55, + "grad_norm": 4.030912891854148, + "learning_rate": 4.472793942996848e-06, + "loss": 0.7664, + "step": 6700 + }, + { + "epoch": 0.55, + "grad_norm": 4.0108380696713715, + "learning_rate": 4.471477561706567e-06, + "loss": 0.7946, + "step": 6701 + }, + { + "epoch": 0.55, + "grad_norm": 3.347382738183261, + "learning_rate": 4.470161217463409e-06, + "loss": 0.7192, + "step": 6702 + }, + { + "epoch": 0.55, + "grad_norm": 4.086513506690113, + "learning_rate": 4.468844910359645e-06, + "loss": 0.9626, + "step": 6703 + }, + { + "epoch": 0.55, + "grad_norm": 2.7659728831313295, + "learning_rate": 4.467528640487541e-06, + "loss": 0.6089, + "step": 6704 + }, + { + "epoch": 0.55, + "grad_norm": 3.9804538854072966, + "learning_rate": 4.466212407939362e-06, + "loss": 0.9291, + "step": 6705 + }, + { + "epoch": 0.55, + "grad_norm": 1.299522483564469, + "learning_rate": 4.464896212807369e-06, + "loss": 0.2137, + "step": 6706 + }, + { + "epoch": 0.55, + "grad_norm": 3.5323175506029703, + "learning_rate": 4.463580055183824e-06, + "loss": 0.8715, + "step": 6707 + }, + { + "epoch": 0.55, + "grad_norm": 1.7972727230387882, + "learning_rate": 4.462263935160982e-06, + "loss": 0.4094, + "step": 6708 + }, + { + "epoch": 0.55, + "grad_norm": 4.017408192743553, + "learning_rate": 4.460947852831097e-06, + "loss": 0.8983, + "step": 6709 + }, + { + "epoch": 0.55, + "grad_norm": 3.943276224413567, + "learning_rate": 4.459631808286424e-06, + "loss": 0.5783, + "step": 6710 + }, + { + "epoch": 0.55, + "grad_norm": 2.3353441614559265, + "learning_rate": 4.458315801619208e-06, + "loss": 0.37, + "step": 6711 + }, + { + "epoch": 0.55, + "grad_norm": 5.189435494692342, + "learning_rate": 4.456999832921697e-06, + "loss": 1.2434, + "step": 6712 + }, + { + "epoch": 0.55, + "grad_norm": 3.3544064433359244, + "learning_rate": 4.455683902286134e-06, + "loss": 0.9277, + "step": 6713 + }, + { + "epoch": 0.55, + "grad_norm": 4.212854139587158, + "learning_rate": 4.454368009804761e-06, + "loss": 0.913, + "step": 6714 + }, + { + "epoch": 0.55, + "grad_norm": 4.389668485127944, + "learning_rate": 4.453052155569816e-06, + "loss": 0.9254, + "step": 6715 + }, + { + "epoch": 0.55, + "grad_norm": 2.041303875057696, + "learning_rate": 4.451736339673536e-06, + "loss": 0.5614, + "step": 6716 + }, + { + "epoch": 0.55, + "grad_norm": 3.9140860955081225, + "learning_rate": 4.450420562208151e-06, + "loss": 0.62, + "step": 6717 + }, + { + "epoch": 0.55, + "grad_norm": 3.6971464338906084, + "learning_rate": 4.449104823265893e-06, + "loss": 0.8889, + "step": 6718 + }, + { + "epoch": 0.55, + "grad_norm": 2.4099851679890616, + "learning_rate": 4.4477891229389895e-06, + "loss": 0.6155, + "step": 6719 + }, + { + "epoch": 0.55, + "grad_norm": 4.341321347347397, + "learning_rate": 4.446473461319664e-06, + "loss": 1.0529, + "step": 6720 + }, + { + "epoch": 0.55, + "grad_norm": 2.3881489681389207, + "learning_rate": 4.445157838500141e-06, + "loss": 0.4215, + "step": 6721 + }, + { + "epoch": 0.55, + "grad_norm": 1.9211033591281976, + "learning_rate": 4.44384225457264e-06, + "loss": 0.4279, + "step": 6722 + }, + { + "epoch": 0.55, + "grad_norm": 2.70667426714046, + "learning_rate": 4.442526709629376e-06, + "loss": 0.4008, + "step": 6723 + }, + { + "epoch": 0.55, + "grad_norm": 3.1879485480740595, + "learning_rate": 4.441211203762562e-06, + "loss": 0.7089, + "step": 6724 + }, + { + "epoch": 0.55, + "grad_norm": 5.074765884183015, + "learning_rate": 4.439895737064411e-06, + "loss": 0.935, + "step": 6725 + }, + { + "epoch": 0.55, + "grad_norm": 4.171599425732302, + "learning_rate": 4.438580309627132e-06, + "loss": 1.009, + "step": 6726 + }, + { + "epoch": 0.55, + "grad_norm": 3.8459991702099563, + "learning_rate": 4.437264921542931e-06, + "loss": 0.7308, + "step": 6727 + }, + { + "epoch": 0.55, + "grad_norm": 3.9044927686931064, + "learning_rate": 4.4359495729040095e-06, + "loss": 0.8916, + "step": 6728 + }, + { + "epoch": 0.55, + "grad_norm": 3.7245129839287126, + "learning_rate": 4.434634263802567e-06, + "loss": 0.484, + "step": 6729 + }, + { + "epoch": 0.55, + "grad_norm": 4.745818385243437, + "learning_rate": 4.433318994330802e-06, + "loss": 1.1118, + "step": 6730 + }, + { + "epoch": 0.55, + "grad_norm": 3.9149526073559415, + "learning_rate": 4.43200376458091e-06, + "loss": 0.7711, + "step": 6731 + }, + { + "epoch": 0.55, + "grad_norm": 5.0550435934262605, + "learning_rate": 4.430688574645081e-06, + "loss": 0.9976, + "step": 6732 + }, + { + "epoch": 0.55, + "grad_norm": 4.442141227850007, + "learning_rate": 4.429373424615509e-06, + "loss": 1.1589, + "step": 6733 + }, + { + "epoch": 0.55, + "grad_norm": 3.4050159968489413, + "learning_rate": 4.428058314584373e-06, + "loss": 0.7095, + "step": 6734 + }, + { + "epoch": 0.55, + "grad_norm": 4.473274924192312, + "learning_rate": 4.426743244643862e-06, + "loss": 1.6246, + "step": 6735 + }, + { + "epoch": 0.55, + "grad_norm": 4.516636084525008, + "learning_rate": 4.425428214886153e-06, + "loss": 1.0359, + "step": 6736 + }, + { + "epoch": 0.55, + "grad_norm": 5.0959314160256275, + "learning_rate": 4.424113225403425e-06, + "loss": 1.5183, + "step": 6737 + }, + { + "epoch": 0.55, + "grad_norm": 3.9595000335141557, + "learning_rate": 4.422798276287855e-06, + "loss": 0.7305, + "step": 6738 + }, + { + "epoch": 0.55, + "grad_norm": 2.95466063717298, + "learning_rate": 4.421483367631616e-06, + "loss": 0.6199, + "step": 6739 + }, + { + "epoch": 0.55, + "grad_norm": 5.506402893224004, + "learning_rate": 4.420168499526872e-06, + "loss": 1.046, + "step": 6740 + }, + { + "epoch": 0.55, + "grad_norm": 4.537825050212767, + "learning_rate": 4.418853672065794e-06, + "loss": 1.3168, + "step": 6741 + }, + { + "epoch": 0.55, + "grad_norm": 3.8395774802404476, + "learning_rate": 4.4175388853405445e-06, + "loss": 0.8424, + "step": 6742 + }, + { + "epoch": 0.55, + "grad_norm": 5.11543314203391, + "learning_rate": 4.4162241394432834e-06, + "loss": 1.1579, + "step": 6743 + }, + { + "epoch": 0.55, + "grad_norm": 4.842197311328773, + "learning_rate": 4.414909434466172e-06, + "loss": 1.0507, + "step": 6744 + }, + { + "epoch": 0.55, + "grad_norm": 3.917293590730472, + "learning_rate": 4.41359477050136e-06, + "loss": 0.6057, + "step": 6745 + }, + { + "epoch": 0.55, + "grad_norm": 4.337147638873848, + "learning_rate": 4.412280147641003e-06, + "loss": 1.1647, + "step": 6746 + }, + { + "epoch": 0.55, + "grad_norm": 2.5815547564880315, + "learning_rate": 4.410965565977251e-06, + "loss": 0.5415, + "step": 6747 + }, + { + "epoch": 0.55, + "grad_norm": 3.4075359973084156, + "learning_rate": 4.409651025602248e-06, + "loss": 0.8486, + "step": 6748 + }, + { + "epoch": 0.55, + "grad_norm": 3.8845777983966663, + "learning_rate": 4.40833652660814e-06, + "loss": 1.0452, + "step": 6749 + }, + { + "epoch": 0.55, + "grad_norm": 4.680515425628574, + "learning_rate": 4.407022069087067e-06, + "loss": 0.7546, + "step": 6750 + }, + { + "epoch": 0.55, + "grad_norm": 2.7372695630352295, + "learning_rate": 4.405707653131166e-06, + "loss": 0.2813, + "step": 6751 + }, + { + "epoch": 0.55, + "grad_norm": 4.607317444290473, + "learning_rate": 4.404393278832572e-06, + "loss": 0.5272, + "step": 6752 + }, + { + "epoch": 0.55, + "grad_norm": 4.780700197590161, + "learning_rate": 4.403078946283416e-06, + "loss": 1.0148, + "step": 6753 + }, + { + "epoch": 0.55, + "grad_norm": 4.169287698761378, + "learning_rate": 4.401764655575828e-06, + "loss": 1.033, + "step": 6754 + }, + { + "epoch": 0.55, + "grad_norm": 3.5874236085275633, + "learning_rate": 4.400450406801935e-06, + "loss": 0.8101, + "step": 6755 + }, + { + "epoch": 0.55, + "grad_norm": 3.411895553826789, + "learning_rate": 4.39913620005386e-06, + "loss": 0.5623, + "step": 6756 + }, + { + "epoch": 0.55, + "grad_norm": 3.1675888394351133, + "learning_rate": 4.3978220354237215e-06, + "loss": 0.6919, + "step": 6757 + }, + { + "epoch": 0.55, + "grad_norm": 2.7486331496396943, + "learning_rate": 4.396507913003638e-06, + "loss": 0.3992, + "step": 6758 + }, + { + "epoch": 0.55, + "grad_norm": 4.34685623752115, + "learning_rate": 4.395193832885723e-06, + "loss": 0.7302, + "step": 6759 + }, + { + "epoch": 0.55, + "grad_norm": 4.252400234021796, + "learning_rate": 4.393879795162088e-06, + "loss": 0.9949, + "step": 6760 + }, + { + "epoch": 0.55, + "grad_norm": 3.978397095087008, + "learning_rate": 4.392565799924841e-06, + "loss": 0.7132, + "step": 6761 + }, + { + "epoch": 0.55, + "grad_norm": 4.466544139260184, + "learning_rate": 4.391251847266091e-06, + "loss": 0.7536, + "step": 6762 + }, + { + "epoch": 0.55, + "grad_norm": 1.6048107934800868, + "learning_rate": 4.389937937277934e-06, + "loss": 0.3515, + "step": 6763 + }, + { + "epoch": 0.55, + "grad_norm": 3.7738442378916877, + "learning_rate": 4.388624070052473e-06, + "loss": 0.7473, + "step": 6764 + }, + { + "epoch": 0.55, + "grad_norm": 4.139769946318573, + "learning_rate": 4.387310245681805e-06, + "loss": 0.6443, + "step": 6765 + }, + { + "epoch": 0.55, + "grad_norm": 4.117095300850483, + "learning_rate": 4.385996464258021e-06, + "loss": 0.7387, + "step": 6766 + }, + { + "epoch": 0.55, + "grad_norm": 4.228842653356213, + "learning_rate": 4.384682725873215e-06, + "loss": 1.046, + "step": 6767 + }, + { + "epoch": 0.55, + "grad_norm": 3.4775465019126184, + "learning_rate": 4.383369030619471e-06, + "loss": 0.7722, + "step": 6768 + }, + { + "epoch": 0.55, + "grad_norm": 3.1534807109644785, + "learning_rate": 4.382055378588873e-06, + "loss": 0.5986, + "step": 6769 + }, + { + "epoch": 0.55, + "grad_norm": 3.5132800394612183, + "learning_rate": 4.380741769873504e-06, + "loss": 0.5259, + "step": 6770 + }, + { + "epoch": 0.55, + "grad_norm": 5.468051046678788, + "learning_rate": 4.379428204565442e-06, + "loss": 1.0535, + "step": 6771 + }, + { + "epoch": 0.55, + "grad_norm": 4.447498264682462, + "learning_rate": 4.378114682756764e-06, + "loss": 0.6449, + "step": 6772 + }, + { + "epoch": 0.55, + "grad_norm": 2.113338299264036, + "learning_rate": 4.3768012045395395e-06, + "loss": 0.4076, + "step": 6773 + }, + { + "epoch": 0.55, + "grad_norm": 3.2080865823927436, + "learning_rate": 4.375487770005837e-06, + "loss": 0.5992, + "step": 6774 + }, + { + "epoch": 0.55, + "grad_norm": 2.74180423498849, + "learning_rate": 4.374174379247726e-06, + "loss": 0.5277, + "step": 6775 + }, + { + "epoch": 0.55, + "grad_norm": 2.7026056758885635, + "learning_rate": 4.372861032357265e-06, + "loss": 0.5905, + "step": 6776 + }, + { + "epoch": 0.55, + "grad_norm": 4.380073082270201, + "learning_rate": 4.371547729426517e-06, + "loss": 1.1251, + "step": 6777 + }, + { + "epoch": 0.55, + "grad_norm": 4.17091378817613, + "learning_rate": 4.370234470547538e-06, + "loss": 0.9241, + "step": 6778 + }, + { + "epoch": 0.55, + "grad_norm": 3.807829620202627, + "learning_rate": 4.3689212558123846e-06, + "loss": 0.9373, + "step": 6779 + }, + { + "epoch": 0.55, + "grad_norm": 4.167161769805908, + "learning_rate": 4.367608085313102e-06, + "loss": 0.9693, + "step": 6780 + }, + { + "epoch": 0.55, + "grad_norm": 3.0216638364730692, + "learning_rate": 4.36629495914174e-06, + "loss": 0.467, + "step": 6781 + }, + { + "epoch": 0.55, + "grad_norm": 4.565712444915998, + "learning_rate": 4.364981877390345e-06, + "loss": 0.8293, + "step": 6782 + }, + { + "epoch": 0.55, + "grad_norm": 4.587781430703773, + "learning_rate": 4.363668840150956e-06, + "loss": 0.8427, + "step": 6783 + }, + { + "epoch": 0.55, + "grad_norm": 4.102385554796526, + "learning_rate": 4.362355847515614e-06, + "loss": 0.6292, + "step": 6784 + }, + { + "epoch": 0.55, + "grad_norm": 2.8173956924667825, + "learning_rate": 4.36104289957635e-06, + "loss": 0.5476, + "step": 6785 + }, + { + "epoch": 0.55, + "grad_norm": 3.8232940754466638, + "learning_rate": 4.359729996425198e-06, + "loss": 0.9364, + "step": 6786 + }, + { + "epoch": 0.55, + "grad_norm": 2.1896318124601213, + "learning_rate": 4.358417138154186e-06, + "loss": 0.274, + "step": 6787 + }, + { + "epoch": 0.55, + "grad_norm": 2.882635089458406, + "learning_rate": 4.357104324855342e-06, + "loss": 0.6152, + "step": 6788 + }, + { + "epoch": 0.55, + "grad_norm": 3.9231177114410576, + "learning_rate": 4.355791556620686e-06, + "loss": 0.6004, + "step": 6789 + }, + { + "epoch": 0.55, + "grad_norm": 3.074891731812101, + "learning_rate": 4.35447883354224e-06, + "loss": 0.306, + "step": 6790 + }, + { + "epoch": 0.56, + "grad_norm": 4.845996157750704, + "learning_rate": 4.353166155712018e-06, + "loss": 1.2494, + "step": 6791 + }, + { + "epoch": 0.56, + "grad_norm": 4.174181340098687, + "learning_rate": 4.351853523222032e-06, + "loss": 0.7239, + "step": 6792 + }, + { + "epoch": 0.56, + "grad_norm": 4.133578831771037, + "learning_rate": 4.350540936164293e-06, + "loss": 1.085, + "step": 6793 + }, + { + "epoch": 0.56, + "grad_norm": 4.31131228805281, + "learning_rate": 4.349228394630808e-06, + "loss": 0.84, + "step": 6794 + }, + { + "epoch": 0.56, + "grad_norm": 3.599116948148214, + "learning_rate": 4.347915898713581e-06, + "loss": 0.6953, + "step": 6795 + }, + { + "epoch": 0.56, + "grad_norm": 4.989679839429888, + "learning_rate": 4.346603448504614e-06, + "loss": 1.3605, + "step": 6796 + }, + { + "epoch": 0.56, + "grad_norm": 3.9755423751338297, + "learning_rate": 4.345291044095898e-06, + "loss": 0.581, + "step": 6797 + }, + { + "epoch": 0.56, + "grad_norm": 4.652500888585743, + "learning_rate": 4.343978685579433e-06, + "loss": 1.1582, + "step": 6798 + }, + { + "epoch": 0.56, + "grad_norm": 3.579899946824376, + "learning_rate": 4.342666373047207e-06, + "loss": 0.8965, + "step": 6799 + }, + { + "epoch": 0.56, + "grad_norm": 4.180070198560346, + "learning_rate": 4.341354106591205e-06, + "loss": 1.0277, + "step": 6800 + }, + { + "epoch": 0.56, + "grad_norm": 3.1973548176785074, + "learning_rate": 4.340041886303415e-06, + "loss": 0.7563, + "step": 6801 + }, + { + "epoch": 0.56, + "grad_norm": 2.585822185199269, + "learning_rate": 4.338729712275818e-06, + "loss": 0.4064, + "step": 6802 + }, + { + "epoch": 0.56, + "grad_norm": 2.914939939545324, + "learning_rate": 4.337417584600389e-06, + "loss": 0.586, + "step": 6803 + }, + { + "epoch": 0.56, + "grad_norm": 3.983914984009011, + "learning_rate": 4.336105503369104e-06, + "loss": 1.0038, + "step": 6804 + }, + { + "epoch": 0.56, + "grad_norm": 4.850325783736151, + "learning_rate": 4.334793468673935e-06, + "loss": 0.913, + "step": 6805 + }, + { + "epoch": 0.56, + "grad_norm": 2.1456873238432825, + "learning_rate": 4.333481480606847e-06, + "loss": 0.3679, + "step": 6806 + }, + { + "epoch": 0.56, + "grad_norm": 3.485251348069992, + "learning_rate": 4.332169539259809e-06, + "loss": 0.8325, + "step": 6807 + }, + { + "epoch": 0.56, + "grad_norm": 2.295788439384921, + "learning_rate": 4.330857644724778e-06, + "loss": 0.2682, + "step": 6808 + }, + { + "epoch": 0.56, + "grad_norm": 2.598856570533403, + "learning_rate": 4.329545797093713e-06, + "loss": 0.287, + "step": 6809 + }, + { + "epoch": 0.56, + "grad_norm": 3.7782431799103495, + "learning_rate": 4.3282339964585705e-06, + "loss": 0.3696, + "step": 6810 + }, + { + "epoch": 0.56, + "grad_norm": 5.723726663255735, + "learning_rate": 4.326922242911302e-06, + "loss": 0.9142, + "step": 6811 + }, + { + "epoch": 0.56, + "grad_norm": 5.014153303089965, + "learning_rate": 4.325610536543855e-06, + "loss": 1.1812, + "step": 6812 + }, + { + "epoch": 0.56, + "grad_norm": 4.004017551772608, + "learning_rate": 4.324298877448176e-06, + "loss": 0.98, + "step": 6813 + }, + { + "epoch": 0.56, + "grad_norm": 2.3388195160299645, + "learning_rate": 4.3229872657162034e-06, + "loss": 0.3867, + "step": 6814 + }, + { + "epoch": 0.56, + "grad_norm": 4.417142475877144, + "learning_rate": 4.3216757014398755e-06, + "loss": 0.9917, + "step": 6815 + }, + { + "epoch": 0.56, + "grad_norm": 2.5876620978645044, + "learning_rate": 4.320364184711131e-06, + "loss": 0.3205, + "step": 6816 + }, + { + "epoch": 0.56, + "grad_norm": 4.843499879562751, + "learning_rate": 4.319052715621898e-06, + "loss": 0.7618, + "step": 6817 + }, + { + "epoch": 0.56, + "grad_norm": 4.722391630062092, + "learning_rate": 4.317741294264106e-06, + "loss": 0.8201, + "step": 6818 + }, + { + "epoch": 0.56, + "grad_norm": 3.7302836216449453, + "learning_rate": 4.3164299207296824e-06, + "loss": 0.6738, + "step": 6819 + }, + { + "epoch": 0.56, + "grad_norm": 2.365487702840651, + "learning_rate": 4.315118595110545e-06, + "loss": 0.4978, + "step": 6820 + }, + { + "epoch": 0.56, + "grad_norm": 3.5141520687520944, + "learning_rate": 4.313807317498614e-06, + "loss": 0.608, + "step": 6821 + }, + { + "epoch": 0.56, + "grad_norm": 5.628472427927134, + "learning_rate": 4.312496087985802e-06, + "loss": 1.4771, + "step": 6822 + }, + { + "epoch": 0.56, + "grad_norm": 3.272710188604787, + "learning_rate": 4.3111849066640234e-06, + "loss": 0.6231, + "step": 6823 + }, + { + "epoch": 0.56, + "grad_norm": 3.9297442148002624, + "learning_rate": 4.309873773625187e-06, + "loss": 0.8096, + "step": 6824 + }, + { + "epoch": 0.56, + "grad_norm": 2.0858669492068054, + "learning_rate": 4.308562688961193e-06, + "loss": 0.448, + "step": 6825 + }, + { + "epoch": 0.56, + "grad_norm": 3.9865323854928296, + "learning_rate": 4.3072516527639456e-06, + "loss": 0.874, + "step": 6826 + }, + { + "epoch": 0.56, + "grad_norm": 2.3713817084126427, + "learning_rate": 4.305940665125342e-06, + "loss": 0.5426, + "step": 6827 + }, + { + "epoch": 0.56, + "grad_norm": 3.9105183931123766, + "learning_rate": 4.304629726137279e-06, + "loss": 0.7191, + "step": 6828 + }, + { + "epoch": 0.56, + "grad_norm": 2.5341255524071977, + "learning_rate": 4.303318835891645e-06, + "loss": 0.7492, + "step": 6829 + }, + { + "epoch": 0.56, + "grad_norm": 1.3118491259015854, + "learning_rate": 4.302007994480331e-06, + "loss": 0.1992, + "step": 6830 + }, + { + "epoch": 0.56, + "grad_norm": 3.6402227805381098, + "learning_rate": 4.300697201995216e-06, + "loss": 0.8198, + "step": 6831 + }, + { + "epoch": 0.56, + "grad_norm": 2.861804706465785, + "learning_rate": 4.299386458528184e-06, + "loss": 0.5432, + "step": 6832 + }, + { + "epoch": 0.56, + "grad_norm": 3.6059084259740115, + "learning_rate": 4.298075764171112e-06, + "loss": 0.581, + "step": 6833 + }, + { + "epoch": 0.56, + "grad_norm": 2.4516925600421895, + "learning_rate": 4.2967651190158745e-06, + "loss": 0.5308, + "step": 6834 + }, + { + "epoch": 0.56, + "grad_norm": 4.462719347637248, + "learning_rate": 4.295454523154342e-06, + "loss": 0.7217, + "step": 6835 + }, + { + "epoch": 0.56, + "grad_norm": 2.8513362667586333, + "learning_rate": 4.294143976678382e-06, + "loss": 0.4031, + "step": 6836 + }, + { + "epoch": 0.56, + "grad_norm": 3.175222583089542, + "learning_rate": 4.292833479679857e-06, + "loss": 1.0811, + "step": 6837 + }, + { + "epoch": 0.56, + "grad_norm": 5.269094356348749, + "learning_rate": 4.291523032250627e-06, + "loss": 1.0881, + "step": 6838 + }, + { + "epoch": 0.56, + "grad_norm": 2.5117461525024996, + "learning_rate": 4.290212634482549e-06, + "loss": 0.3385, + "step": 6839 + }, + { + "epoch": 0.56, + "grad_norm": 5.76109821823398, + "learning_rate": 4.2889022864674755e-06, + "loss": 1.2529, + "step": 6840 + }, + { + "epoch": 0.56, + "grad_norm": 4.260502714074846, + "learning_rate": 4.287591988297257e-06, + "loss": 0.9448, + "step": 6841 + }, + { + "epoch": 0.56, + "grad_norm": 4.6874940118687105, + "learning_rate": 4.286281740063743e-06, + "loss": 0.8363, + "step": 6842 + }, + { + "epoch": 0.56, + "grad_norm": 3.32013293811396, + "learning_rate": 4.28497154185877e-06, + "loss": 0.3973, + "step": 6843 + }, + { + "epoch": 0.56, + "grad_norm": 3.7190494425481817, + "learning_rate": 4.283661393774181e-06, + "loss": 0.8758, + "step": 6844 + }, + { + "epoch": 0.56, + "grad_norm": 3.4261964662966773, + "learning_rate": 4.28235129590181e-06, + "loss": 1.0139, + "step": 6845 + }, + { + "epoch": 0.56, + "grad_norm": 5.030158882763015, + "learning_rate": 4.28104124833349e-06, + "loss": 0.8224, + "step": 6846 + }, + { + "epoch": 0.56, + "grad_norm": 2.473704631683547, + "learning_rate": 4.279731251161051e-06, + "loss": 0.5438, + "step": 6847 + }, + { + "epoch": 0.56, + "grad_norm": 3.1995239724376523, + "learning_rate": 4.278421304476316e-06, + "loss": 0.574, + "step": 6848 + }, + { + "epoch": 0.56, + "grad_norm": 4.054270936042657, + "learning_rate": 4.277111408371106e-06, + "loss": 0.704, + "step": 6849 + }, + { + "epoch": 0.56, + "grad_norm": 3.0846166069226713, + "learning_rate": 4.27580156293724e-06, + "loss": 1.0241, + "step": 6850 + }, + { + "epoch": 0.56, + "grad_norm": 4.395305755295111, + "learning_rate": 4.274491768266535e-06, + "loss": 1.1476, + "step": 6851 + }, + { + "epoch": 0.56, + "grad_norm": 2.7818834248217987, + "learning_rate": 4.273182024450799e-06, + "loss": 0.856, + "step": 6852 + }, + { + "epoch": 0.56, + "grad_norm": 2.8201990121495544, + "learning_rate": 4.271872331581841e-06, + "loss": 0.4176, + "step": 6853 + }, + { + "epoch": 0.56, + "grad_norm": 3.316887402577849, + "learning_rate": 4.270562689751461e-06, + "loss": 0.7332, + "step": 6854 + }, + { + "epoch": 0.56, + "grad_norm": 3.173018717309746, + "learning_rate": 4.2692530990514625e-06, + "loss": 0.5993, + "step": 6855 + }, + { + "epoch": 0.56, + "grad_norm": 3.8334914810271368, + "learning_rate": 4.267943559573642e-06, + "loss": 0.7014, + "step": 6856 + }, + { + "epoch": 0.56, + "grad_norm": 3.8593540784128573, + "learning_rate": 4.2666340714097915e-06, + "loss": 0.6737, + "step": 6857 + }, + { + "epoch": 0.56, + "grad_norm": 3.774865952838886, + "learning_rate": 4.265324634651703e-06, + "loss": 0.4685, + "step": 6858 + }, + { + "epoch": 0.56, + "grad_norm": 3.985799646092007, + "learning_rate": 4.264015249391159e-06, + "loss": 0.4988, + "step": 6859 + }, + { + "epoch": 0.56, + "grad_norm": 4.204955750493154, + "learning_rate": 4.2627059157199435e-06, + "loss": 0.6423, + "step": 6860 + }, + { + "epoch": 0.56, + "grad_norm": 4.36506220921179, + "learning_rate": 4.261396633729834e-06, + "loss": 1.126, + "step": 6861 + }, + { + "epoch": 0.56, + "grad_norm": 3.383543259218011, + "learning_rate": 4.260087403512605e-06, + "loss": 0.7018, + "step": 6862 + }, + { + "epoch": 0.56, + "grad_norm": 2.8830217325549654, + "learning_rate": 4.25877822516003e-06, + "loss": 0.6265, + "step": 6863 + }, + { + "epoch": 0.56, + "grad_norm": 4.628668987731295, + "learning_rate": 4.2574690987638745e-06, + "loss": 0.8785, + "step": 6864 + }, + { + "epoch": 0.56, + "grad_norm": 3.219833557255795, + "learning_rate": 4.2561600244159066e-06, + "loss": 0.7595, + "step": 6865 + }, + { + "epoch": 0.56, + "grad_norm": 2.6218901336310694, + "learning_rate": 4.254851002207882e-06, + "loss": 0.5658, + "step": 6866 + }, + { + "epoch": 0.56, + "grad_norm": 4.121018907344332, + "learning_rate": 4.253542032231559e-06, + "loss": 0.8594, + "step": 6867 + }, + { + "epoch": 0.56, + "grad_norm": 4.638940263029625, + "learning_rate": 4.252233114578691e-06, + "loss": 0.5599, + "step": 6868 + }, + { + "epoch": 0.56, + "grad_norm": 3.9511356064965457, + "learning_rate": 4.250924249341028e-06, + "loss": 0.7547, + "step": 6869 + }, + { + "epoch": 0.56, + "grad_norm": 5.170116867479131, + "learning_rate": 4.249615436610316e-06, + "loss": 1.0425, + "step": 6870 + }, + { + "epoch": 0.56, + "grad_norm": 3.177039311445076, + "learning_rate": 4.248306676478295e-06, + "loss": 0.7487, + "step": 6871 + }, + { + "epoch": 0.56, + "grad_norm": 2.817298145174552, + "learning_rate": 4.246997969036703e-06, + "loss": 0.3305, + "step": 6872 + }, + { + "epoch": 0.56, + "grad_norm": 4.12940330283581, + "learning_rate": 4.245689314377277e-06, + "loss": 0.6051, + "step": 6873 + }, + { + "epoch": 0.56, + "grad_norm": 3.131631486801754, + "learning_rate": 4.244380712591749e-06, + "loss": 0.4731, + "step": 6874 + }, + { + "epoch": 0.56, + "grad_norm": 2.4150405961163743, + "learning_rate": 4.243072163771843e-06, + "loss": 0.3221, + "step": 6875 + }, + { + "epoch": 0.56, + "grad_norm": 4.999646529642497, + "learning_rate": 4.241763668009286e-06, + "loss": 0.9084, + "step": 6876 + }, + { + "epoch": 0.56, + "grad_norm": 3.358170987405386, + "learning_rate": 4.240455225395796e-06, + "loss": 0.7714, + "step": 6877 + }, + { + "epoch": 0.56, + "grad_norm": 2.5208093951344193, + "learning_rate": 4.239146836023087e-06, + "loss": 0.3809, + "step": 6878 + }, + { + "epoch": 0.56, + "grad_norm": 4.927345876190294, + "learning_rate": 4.237838499982874e-06, + "loss": 1.2175, + "step": 6879 + }, + { + "epoch": 0.56, + "grad_norm": 5.817855918603794, + "learning_rate": 4.236530217366865e-06, + "loss": 1.1805, + "step": 6880 + }, + { + "epoch": 0.56, + "grad_norm": 3.6204962988593254, + "learning_rate": 4.235221988266766e-06, + "loss": 0.547, + "step": 6881 + }, + { + "epoch": 0.56, + "grad_norm": 4.289319688627827, + "learning_rate": 4.233913812774278e-06, + "loss": 0.899, + "step": 6882 + }, + { + "epoch": 0.56, + "grad_norm": 3.9054539055753694, + "learning_rate": 4.232605690981096e-06, + "loss": 0.9904, + "step": 6883 + }, + { + "epoch": 0.56, + "grad_norm": 4.786545958744005, + "learning_rate": 4.231297622978917e-06, + "loss": 0.9344, + "step": 6884 + }, + { + "epoch": 0.56, + "grad_norm": 4.10221020007079, + "learning_rate": 4.229989608859428e-06, + "loss": 1.1117, + "step": 6885 + }, + { + "epoch": 0.56, + "grad_norm": 3.2718030494194648, + "learning_rate": 4.228681648714317e-06, + "loss": 0.4801, + "step": 6886 + }, + { + "epoch": 0.56, + "grad_norm": 3.260089357399006, + "learning_rate": 4.2273737426352665e-06, + "loss": 0.851, + "step": 6887 + }, + { + "epoch": 0.56, + "grad_norm": 3.5932974222685434, + "learning_rate": 4.226065890713953e-06, + "loss": 0.781, + "step": 6888 + }, + { + "epoch": 0.56, + "grad_norm": 3.915015621717631, + "learning_rate": 4.224758093042052e-06, + "loss": 0.6147, + "step": 6889 + }, + { + "epoch": 0.56, + "grad_norm": 2.5451163006412014, + "learning_rate": 4.223450349711235e-06, + "loss": 0.732, + "step": 6890 + }, + { + "epoch": 0.56, + "grad_norm": 4.196982720790917, + "learning_rate": 4.222142660813169e-06, + "loss": 1.0102, + "step": 6891 + }, + { + "epoch": 0.56, + "grad_norm": 3.8062168968693655, + "learning_rate": 4.220835026439517e-06, + "loss": 0.7792, + "step": 6892 + }, + { + "epoch": 0.56, + "grad_norm": 4.01107608177263, + "learning_rate": 4.219527446681941e-06, + "loss": 0.9339, + "step": 6893 + }, + { + "epoch": 0.56, + "grad_norm": 4.502385615477792, + "learning_rate": 4.218219921632093e-06, + "loss": 0.9728, + "step": 6894 + }, + { + "epoch": 0.56, + "grad_norm": 4.103095125130039, + "learning_rate": 4.2169124513816245e-06, + "loss": 0.9507, + "step": 6895 + }, + { + "epoch": 0.56, + "grad_norm": 4.0188942558864795, + "learning_rate": 4.2156050360221855e-06, + "loss": 0.9009, + "step": 6896 + }, + { + "epoch": 0.56, + "grad_norm": 4.223842439731416, + "learning_rate": 4.21429767564542e-06, + "loss": 0.9399, + "step": 6897 + }, + { + "epoch": 0.56, + "grad_norm": 4.502148157216099, + "learning_rate": 4.21299037034297e-06, + "loss": 0.6637, + "step": 6898 + }, + { + "epoch": 0.56, + "grad_norm": 5.084211832079616, + "learning_rate": 4.211683120206469e-06, + "loss": 1.052, + "step": 6899 + }, + { + "epoch": 0.56, + "grad_norm": 3.043921168635646, + "learning_rate": 4.21037592532755e-06, + "loss": 0.3334, + "step": 6900 + }, + { + "epoch": 0.56, + "grad_norm": 3.333588962768206, + "learning_rate": 4.209068785797842e-06, + "loss": 0.6464, + "step": 6901 + }, + { + "epoch": 0.56, + "grad_norm": 4.1643242914476115, + "learning_rate": 4.20776170170897e-06, + "loss": 1.1004, + "step": 6902 + }, + { + "epoch": 0.56, + "grad_norm": 3.581833199513512, + "learning_rate": 4.2064546731525545e-06, + "loss": 0.766, + "step": 6903 + }, + { + "epoch": 0.56, + "grad_norm": 3.16260367601079, + "learning_rate": 4.205147700220214e-06, + "loss": 0.6797, + "step": 6904 + }, + { + "epoch": 0.56, + "grad_norm": 3.0079859962392805, + "learning_rate": 4.203840783003561e-06, + "loss": 0.8811, + "step": 6905 + }, + { + "epoch": 0.56, + "grad_norm": 3.7685315304775404, + "learning_rate": 4.202533921594203e-06, + "loss": 0.5685, + "step": 6906 + }, + { + "epoch": 0.56, + "grad_norm": 3.8447103263989066, + "learning_rate": 4.201227116083747e-06, + "loss": 0.8439, + "step": 6907 + }, + { + "epoch": 0.56, + "grad_norm": 1.3527188800025902, + "learning_rate": 4.199920366563793e-06, + "loss": 0.1809, + "step": 6908 + }, + { + "epoch": 0.56, + "grad_norm": 2.6407532064114183, + "learning_rate": 4.19861367312594e-06, + "loss": 0.4898, + "step": 6909 + }, + { + "epoch": 0.56, + "grad_norm": 4.9041728475041895, + "learning_rate": 4.197307035861783e-06, + "loss": 0.7451, + "step": 6910 + }, + { + "epoch": 0.56, + "grad_norm": 3.247228990935387, + "learning_rate": 4.196000454862907e-06, + "loss": 0.598, + "step": 6911 + }, + { + "epoch": 0.56, + "grad_norm": 1.7470822297571256, + "learning_rate": 4.194693930220899e-06, + "loss": 0.3764, + "step": 6912 + }, + { + "epoch": 0.57, + "grad_norm": 3.8398047777086575, + "learning_rate": 4.193387462027343e-06, + "loss": 0.8945, + "step": 6913 + }, + { + "epoch": 0.57, + "grad_norm": 3.47561128374342, + "learning_rate": 4.1920810503738165e-06, + "loss": 0.6011, + "step": 6914 + }, + { + "epoch": 0.57, + "grad_norm": 3.356979311294287, + "learning_rate": 4.190774695351891e-06, + "loss": 0.6791, + "step": 6915 + }, + { + "epoch": 0.57, + "grad_norm": 1.6033166663574987, + "learning_rate": 4.18946839705314e-06, + "loss": 0.3343, + "step": 6916 + }, + { + "epoch": 0.57, + "grad_norm": 4.180106896231907, + "learning_rate": 4.188162155569124e-06, + "loss": 0.9223, + "step": 6917 + }, + { + "epoch": 0.57, + "grad_norm": 3.3692075779863893, + "learning_rate": 4.186855970991409e-06, + "loss": 0.5588, + "step": 6918 + }, + { + "epoch": 0.57, + "grad_norm": 2.250783680882565, + "learning_rate": 4.18554984341155e-06, + "loss": 0.2383, + "step": 6919 + }, + { + "epoch": 0.57, + "grad_norm": 4.012464368258494, + "learning_rate": 4.184243772921104e-06, + "loss": 0.8274, + "step": 6920 + }, + { + "epoch": 0.57, + "grad_norm": 4.615573314560767, + "learning_rate": 4.18293775961162e-06, + "loss": 1.0733, + "step": 6921 + }, + { + "epoch": 0.57, + "grad_norm": 4.063651310014804, + "learning_rate": 4.181631803574643e-06, + "loss": 0.7932, + "step": 6922 + }, + { + "epoch": 0.57, + "grad_norm": 4.69790970871073, + "learning_rate": 4.180325904901715e-06, + "loss": 0.9528, + "step": 6923 + }, + { + "epoch": 0.57, + "grad_norm": 2.9197981238712365, + "learning_rate": 4.179020063684373e-06, + "loss": 0.6375, + "step": 6924 + }, + { + "epoch": 0.57, + "grad_norm": 3.274650253765275, + "learning_rate": 4.177714280014151e-06, + "loss": 0.8576, + "step": 6925 + }, + { + "epoch": 0.57, + "grad_norm": 5.550335842713816, + "learning_rate": 4.176408553982581e-06, + "loss": 1.2013, + "step": 6926 + }, + { + "epoch": 0.57, + "grad_norm": 1.934264354807582, + "learning_rate": 4.175102885681187e-06, + "loss": 0.4646, + "step": 6927 + }, + { + "epoch": 0.57, + "grad_norm": 3.6260526181564816, + "learning_rate": 4.17379727520149e-06, + "loss": 0.834, + "step": 6928 + }, + { + "epoch": 0.57, + "grad_norm": 5.768560143083979, + "learning_rate": 4.1724917226350084e-06, + "loss": 1.3616, + "step": 6929 + }, + { + "epoch": 0.57, + "grad_norm": 3.8626606591188324, + "learning_rate": 4.171186228073256e-06, + "loss": 0.8391, + "step": 6930 + }, + { + "epoch": 0.57, + "grad_norm": 5.188468437834785, + "learning_rate": 4.169880791607741e-06, + "loss": 1.2928, + "step": 6931 + }, + { + "epoch": 0.57, + "grad_norm": 3.109587675048897, + "learning_rate": 4.168575413329971e-06, + "loss": 0.6591, + "step": 6932 + }, + { + "epoch": 0.57, + "grad_norm": 3.8197203035624305, + "learning_rate": 4.167270093331447e-06, + "loss": 0.6995, + "step": 6933 + }, + { + "epoch": 0.57, + "grad_norm": 2.8125429356691902, + "learning_rate": 4.165964831703663e-06, + "loss": 0.4956, + "step": 6934 + }, + { + "epoch": 0.57, + "grad_norm": 5.289835103679579, + "learning_rate": 4.164659628538116e-06, + "loss": 1.5974, + "step": 6935 + }, + { + "epoch": 0.57, + "grad_norm": 3.8978445321359225, + "learning_rate": 4.163354483926292e-06, + "loss": 0.5473, + "step": 6936 + }, + { + "epoch": 0.57, + "grad_norm": 2.442305017248432, + "learning_rate": 4.1620493979596795e-06, + "loss": 0.3806, + "step": 6937 + }, + { + "epoch": 0.57, + "grad_norm": 4.3640488602564815, + "learning_rate": 4.160744370729757e-06, + "loss": 0.9416, + "step": 6938 + }, + { + "epoch": 0.57, + "grad_norm": 4.333629385556718, + "learning_rate": 4.159439402328003e-06, + "loss": 0.8327, + "step": 6939 + }, + { + "epoch": 0.57, + "grad_norm": 2.066121830157949, + "learning_rate": 4.158134492845886e-06, + "loss": 0.4121, + "step": 6940 + }, + { + "epoch": 0.57, + "grad_norm": 3.269603193669817, + "learning_rate": 4.1568296423748785e-06, + "loss": 0.6631, + "step": 6941 + }, + { + "epoch": 0.57, + "grad_norm": 1.0426816193321784, + "learning_rate": 4.155524851006444e-06, + "loss": 0.1427, + "step": 6942 + }, + { + "epoch": 0.57, + "grad_norm": 3.587387024359568, + "learning_rate": 4.154220118832041e-06, + "loss": 0.9754, + "step": 6943 + }, + { + "epoch": 0.57, + "grad_norm": 1.2798933960380874, + "learning_rate": 4.1529154459431285e-06, + "loss": 0.2721, + "step": 6944 + }, + { + "epoch": 0.57, + "grad_norm": 4.342283592432994, + "learning_rate": 4.151610832431156e-06, + "loss": 1.3131, + "step": 6945 + }, + { + "epoch": 0.57, + "grad_norm": 5.365002485112433, + "learning_rate": 4.150306278387573e-06, + "loss": 1.095, + "step": 6946 + }, + { + "epoch": 0.57, + "grad_norm": 2.6816522520739703, + "learning_rate": 4.14900178390382e-06, + "loss": 0.4537, + "step": 6947 + }, + { + "epoch": 0.57, + "grad_norm": 2.7082743424961486, + "learning_rate": 4.147697349071339e-06, + "loss": 0.6203, + "step": 6948 + }, + { + "epoch": 0.57, + "grad_norm": 3.7347242164312022, + "learning_rate": 4.146392973981564e-06, + "loss": 0.6242, + "step": 6949 + }, + { + "epoch": 0.57, + "grad_norm": 3.870684993167908, + "learning_rate": 4.14508865872593e-06, + "loss": 0.9103, + "step": 6950 + }, + { + "epoch": 0.57, + "grad_norm": 3.7617659166446287, + "learning_rate": 4.143784403395858e-06, + "loss": 0.5403, + "step": 6951 + }, + { + "epoch": 0.57, + "grad_norm": 4.47068227471356, + "learning_rate": 4.142480208082771e-06, + "loss": 0.9692, + "step": 6952 + }, + { + "epoch": 0.57, + "grad_norm": 4.2552461800628345, + "learning_rate": 4.141176072878093e-06, + "loss": 1.0206, + "step": 6953 + }, + { + "epoch": 0.57, + "grad_norm": 4.822356209708549, + "learning_rate": 4.1398719978732324e-06, + "loss": 1.2444, + "step": 6954 + }, + { + "epoch": 0.57, + "grad_norm": 4.07159222227165, + "learning_rate": 4.138567983159601e-06, + "loss": 0.7202, + "step": 6955 + }, + { + "epoch": 0.57, + "grad_norm": 3.20873396021626, + "learning_rate": 4.137264028828609e-06, + "loss": 0.6758, + "step": 6956 + }, + { + "epoch": 0.57, + "grad_norm": 4.56375038944988, + "learning_rate": 4.1359601349716504e-06, + "loss": 1.035, + "step": 6957 + }, + { + "epoch": 0.57, + "grad_norm": 3.0266586675579226, + "learning_rate": 4.134656301680126e-06, + "loss": 0.8107, + "step": 6958 + }, + { + "epoch": 0.57, + "grad_norm": 3.6120283958518833, + "learning_rate": 4.133352529045429e-06, + "loss": 0.6101, + "step": 6959 + }, + { + "epoch": 0.57, + "grad_norm": 3.9478032953611444, + "learning_rate": 4.13204881715895e-06, + "loss": 0.8561, + "step": 6960 + }, + { + "epoch": 0.57, + "grad_norm": 4.541264148590845, + "learning_rate": 4.130745166112069e-06, + "loss": 0.9062, + "step": 6961 + }, + { + "epoch": 0.57, + "grad_norm": 3.446684652833886, + "learning_rate": 4.129441575996172e-06, + "loss": 0.7517, + "step": 6962 + }, + { + "epoch": 0.57, + "grad_norm": 5.053587922886659, + "learning_rate": 4.128138046902629e-06, + "loss": 0.9202, + "step": 6963 + }, + { + "epoch": 0.57, + "grad_norm": 3.1761630302225887, + "learning_rate": 4.126834578922816e-06, + "loss": 0.836, + "step": 6964 + }, + { + "epoch": 0.57, + "grad_norm": 3.8479761947356645, + "learning_rate": 4.1255311721480975e-06, + "loss": 1.0355, + "step": 6965 + }, + { + "epoch": 0.57, + "grad_norm": 2.74405953449905, + "learning_rate": 4.124227826669839e-06, + "loss": 0.6668, + "step": 6966 + }, + { + "epoch": 0.57, + "grad_norm": 3.427789690531807, + "learning_rate": 4.1229245425794004e-06, + "loss": 0.4646, + "step": 6967 + }, + { + "epoch": 0.57, + "grad_norm": 5.196806288674009, + "learning_rate": 4.121621319968131e-06, + "loss": 1.1004, + "step": 6968 + }, + { + "epoch": 0.57, + "grad_norm": 3.8513313981058475, + "learning_rate": 4.120318158927387e-06, + "loss": 0.7007, + "step": 6969 + }, + { + "epoch": 0.57, + "grad_norm": 3.613814344524273, + "learning_rate": 4.11901505954851e-06, + "loss": 0.6972, + "step": 6970 + }, + { + "epoch": 0.57, + "grad_norm": 3.802800757327425, + "learning_rate": 4.117712021922843e-06, + "loss": 0.502, + "step": 6971 + }, + { + "epoch": 0.57, + "grad_norm": 3.7788402929949156, + "learning_rate": 4.116409046141725e-06, + "loss": 0.8423, + "step": 6972 + }, + { + "epoch": 0.57, + "grad_norm": 3.9727719345173997, + "learning_rate": 4.115106132296488e-06, + "loss": 1.042, + "step": 6973 + }, + { + "epoch": 0.57, + "grad_norm": 2.137717977015083, + "learning_rate": 4.113803280478458e-06, + "loss": 0.3862, + "step": 6974 + }, + { + "epoch": 0.57, + "grad_norm": 4.174766112087384, + "learning_rate": 4.112500490778962e-06, + "loss": 0.6982, + "step": 6975 + }, + { + "epoch": 0.57, + "grad_norm": 4.699326857290857, + "learning_rate": 4.1111977632893195e-06, + "loss": 0.847, + "step": 6976 + }, + { + "epoch": 0.57, + "grad_norm": 5.016945658519239, + "learning_rate": 4.109895098100845e-06, + "loss": 0.8596, + "step": 6977 + }, + { + "epoch": 0.57, + "grad_norm": 4.494855177875274, + "learning_rate": 4.108592495304851e-06, + "loss": 1.012, + "step": 6978 + }, + { + "epoch": 0.57, + "grad_norm": 2.9046747025960418, + "learning_rate": 4.107289954992646e-06, + "loss": 0.5896, + "step": 6979 + }, + { + "epoch": 0.57, + "grad_norm": 4.042097661435592, + "learning_rate": 4.1059874772555265e-06, + "loss": 0.8972, + "step": 6980 + }, + { + "epoch": 0.57, + "grad_norm": 3.9732746412026763, + "learning_rate": 4.104685062184795e-06, + "loss": 0.8474, + "step": 6981 + }, + { + "epoch": 0.57, + "grad_norm": 4.6544356724749, + "learning_rate": 4.103382709871744e-06, + "loss": 1.3651, + "step": 6982 + }, + { + "epoch": 0.57, + "grad_norm": 3.6851193126074655, + "learning_rate": 4.102080420407662e-06, + "loss": 0.6084, + "step": 6983 + }, + { + "epoch": 0.57, + "grad_norm": 4.122859640156609, + "learning_rate": 4.100778193883838e-06, + "loss": 1.1105, + "step": 6984 + }, + { + "epoch": 0.57, + "grad_norm": 3.0783988319102744, + "learning_rate": 4.099476030391548e-06, + "loss": 0.4396, + "step": 6985 + }, + { + "epoch": 0.57, + "grad_norm": 4.685476561129729, + "learning_rate": 4.098173930022069e-06, + "loss": 1.0255, + "step": 6986 + }, + { + "epoch": 0.57, + "grad_norm": 5.006861256635884, + "learning_rate": 4.096871892866672e-06, + "loss": 1.0051, + "step": 6987 + }, + { + "epoch": 0.57, + "grad_norm": 3.209919906489276, + "learning_rate": 4.095569919016624e-06, + "loss": 0.5489, + "step": 6988 + }, + { + "epoch": 0.57, + "grad_norm": 4.427039188500597, + "learning_rate": 4.0942680085631896e-06, + "loss": 0.844, + "step": 6989 + }, + { + "epoch": 0.57, + "grad_norm": 4.688646828015336, + "learning_rate": 4.092966161597628e-06, + "loss": 1.3874, + "step": 6990 + }, + { + "epoch": 0.57, + "grad_norm": 3.9209706639075392, + "learning_rate": 4.0916643782111885e-06, + "loss": 0.7576, + "step": 6991 + }, + { + "epoch": 0.57, + "grad_norm": 3.602547173452972, + "learning_rate": 4.0903626584951235e-06, + "loss": 0.7423, + "step": 6992 + }, + { + "epoch": 0.57, + "grad_norm": 1.218153447306228, + "learning_rate": 4.089061002540678e-06, + "loss": 0.1575, + "step": 6993 + }, + { + "epoch": 0.57, + "grad_norm": 3.162180261822791, + "learning_rate": 4.087759410439091e-06, + "loss": 0.8636, + "step": 6994 + }, + { + "epoch": 0.57, + "grad_norm": 4.211893565610061, + "learning_rate": 4.0864578822815996e-06, + "loss": 0.5571, + "step": 6995 + }, + { + "epoch": 0.57, + "grad_norm": 1.93994923532354, + "learning_rate": 4.085156418159436e-06, + "loss": 0.255, + "step": 6996 + }, + { + "epoch": 0.57, + "grad_norm": 4.078531851812658, + "learning_rate": 4.083855018163825e-06, + "loss": 0.7768, + "step": 6997 + }, + { + "epoch": 0.57, + "grad_norm": 3.6106329231752055, + "learning_rate": 4.0825536823859895e-06, + "loss": 0.8278, + "step": 6998 + }, + { + "epoch": 0.57, + "grad_norm": 3.9039369701823583, + "learning_rate": 4.081252410917148e-06, + "loss": 0.705, + "step": 6999 + }, + { + "epoch": 0.57, + "grad_norm": 3.937544893436525, + "learning_rate": 4.079951203848515e-06, + "loss": 0.8428, + "step": 7000 + }, + { + "epoch": 0.57, + "grad_norm": 5.265860819313912, + "learning_rate": 4.078650061271298e-06, + "loss": 1.3106, + "step": 7001 + }, + { + "epoch": 0.57, + "grad_norm": 1.076213834629379, + "learning_rate": 4.077348983276705e-06, + "loss": 0.1628, + "step": 7002 + }, + { + "epoch": 0.57, + "grad_norm": 5.9747004961131625, + "learning_rate": 4.0760479699559295e-06, + "loss": 1.2271, + "step": 7003 + }, + { + "epoch": 0.57, + "grad_norm": 3.229486804362292, + "learning_rate": 4.074747021400171e-06, + "loss": 0.5791, + "step": 7004 + }, + { + "epoch": 0.57, + "grad_norm": 3.696071073445764, + "learning_rate": 4.073446137700619e-06, + "loss": 0.9656, + "step": 7005 + }, + { + "epoch": 0.57, + "grad_norm": 4.023258765103961, + "learning_rate": 4.072145318948461e-06, + "loss": 0.9494, + "step": 7006 + }, + { + "epoch": 0.57, + "grad_norm": 2.3544085903462877, + "learning_rate": 4.0708445652348795e-06, + "loss": 0.3698, + "step": 7007 + }, + { + "epoch": 0.57, + "grad_norm": 2.5439805146070493, + "learning_rate": 4.069543876651048e-06, + "loss": 0.4269, + "step": 7008 + }, + { + "epoch": 0.57, + "grad_norm": 3.0477291576943464, + "learning_rate": 4.068243253288143e-06, + "loss": 0.491, + "step": 7009 + }, + { + "epoch": 0.57, + "grad_norm": 5.424843315953494, + "learning_rate": 4.0669426952373305e-06, + "loss": 0.8975, + "step": 7010 + }, + { + "epoch": 0.57, + "grad_norm": 4.902101146987203, + "learning_rate": 4.065642202589774e-06, + "loss": 1.0385, + "step": 7011 + }, + { + "epoch": 0.57, + "grad_norm": 3.566245633850951, + "learning_rate": 4.064341775436632e-06, + "loss": 0.6051, + "step": 7012 + }, + { + "epoch": 0.57, + "grad_norm": 4.341015013701762, + "learning_rate": 4.063041413869062e-06, + "loss": 0.9417, + "step": 7013 + }, + { + "epoch": 0.57, + "grad_norm": 4.06159040114982, + "learning_rate": 4.061741117978209e-06, + "loss": 0.7986, + "step": 7014 + }, + { + "epoch": 0.57, + "grad_norm": 4.947224703436263, + "learning_rate": 4.06044088785522e-06, + "loss": 1.0728, + "step": 7015 + }, + { + "epoch": 0.57, + "grad_norm": 3.263300477571825, + "learning_rate": 4.059140723591238e-06, + "loss": 0.6382, + "step": 7016 + }, + { + "epoch": 0.57, + "grad_norm": 4.3658893841201705, + "learning_rate": 4.057840625277395e-06, + "loss": 0.8812, + "step": 7017 + }, + { + "epoch": 0.57, + "grad_norm": 4.202865379750988, + "learning_rate": 4.056540593004823e-06, + "loss": 0.484, + "step": 7018 + }, + { + "epoch": 0.57, + "grad_norm": 3.352692183555134, + "learning_rate": 4.0552406268646524e-06, + "loss": 0.6808, + "step": 7019 + }, + { + "epoch": 0.57, + "grad_norm": 3.1367246178274346, + "learning_rate": 4.053940726948001e-06, + "loss": 0.6093, + "step": 7020 + }, + { + "epoch": 0.57, + "grad_norm": 2.8307141323573917, + "learning_rate": 4.052640893345986e-06, + "loss": 0.3585, + "step": 7021 + }, + { + "epoch": 0.57, + "grad_norm": 2.9444468720987205, + "learning_rate": 4.051341126149722e-06, + "loss": 0.4466, + "step": 7022 + }, + { + "epoch": 0.57, + "grad_norm": 1.089568245849912, + "learning_rate": 4.0500414254503174e-06, + "loss": 0.1585, + "step": 7023 + }, + { + "epoch": 0.57, + "grad_norm": 3.5099561653851397, + "learning_rate": 4.048741791338874e-06, + "loss": 0.7526, + "step": 7024 + }, + { + "epoch": 0.57, + "grad_norm": 3.843930801874921, + "learning_rate": 4.047442223906493e-06, + "loss": 0.7592, + "step": 7025 + }, + { + "epoch": 0.57, + "grad_norm": 3.210986763199321, + "learning_rate": 4.046142723244264e-06, + "loss": 0.5776, + "step": 7026 + }, + { + "epoch": 0.57, + "grad_norm": 4.040105628903596, + "learning_rate": 4.044843289443279e-06, + "loss": 1.2542, + "step": 7027 + }, + { + "epoch": 0.57, + "grad_norm": 4.132099269451299, + "learning_rate": 4.043543922594623e-06, + "loss": 0.8952, + "step": 7028 + }, + { + "epoch": 0.57, + "grad_norm": 6.111833355656927, + "learning_rate": 4.042244622789376e-06, + "loss": 1.1106, + "step": 7029 + }, + { + "epoch": 0.57, + "grad_norm": 3.988197594451673, + "learning_rate": 4.040945390118614e-06, + "loss": 0.6804, + "step": 7030 + }, + { + "epoch": 0.57, + "grad_norm": 4.509084378635519, + "learning_rate": 4.039646224673404e-06, + "loss": 1.1252, + "step": 7031 + }, + { + "epoch": 0.57, + "grad_norm": 3.7563337982585474, + "learning_rate": 4.038347126544816e-06, + "loss": 0.9299, + "step": 7032 + }, + { + "epoch": 0.57, + "grad_norm": 3.396042004521007, + "learning_rate": 4.037048095823907e-06, + "loss": 0.8917, + "step": 7033 + }, + { + "epoch": 0.57, + "grad_norm": 3.6091353439783886, + "learning_rate": 4.035749132601738e-06, + "loss": 0.8339, + "step": 7034 + }, + { + "epoch": 0.58, + "grad_norm": 3.324665985027785, + "learning_rate": 4.034450236969357e-06, + "loss": 0.8793, + "step": 7035 + }, + { + "epoch": 0.58, + "grad_norm": 3.889280898988449, + "learning_rate": 4.033151409017814e-06, + "loss": 0.6727, + "step": 7036 + }, + { + "epoch": 0.58, + "grad_norm": 4.705194395612646, + "learning_rate": 4.031852648838148e-06, + "loss": 1.1622, + "step": 7037 + }, + { + "epoch": 0.58, + "grad_norm": 4.899128719302082, + "learning_rate": 4.030553956521397e-06, + "loss": 0.9026, + "step": 7038 + }, + { + "epoch": 0.58, + "grad_norm": 3.4238661464589204, + "learning_rate": 4.029255332158597e-06, + "loss": 0.641, + "step": 7039 + }, + { + "epoch": 0.58, + "grad_norm": 3.179772961300419, + "learning_rate": 4.0279567758407715e-06, + "loss": 0.738, + "step": 7040 + }, + { + "epoch": 0.58, + "grad_norm": 4.810268008598644, + "learning_rate": 4.026658287658947e-06, + "loss": 0.7484, + "step": 7041 + }, + { + "epoch": 0.58, + "grad_norm": 4.924214179817267, + "learning_rate": 4.025359867704141e-06, + "loss": 0.8885, + "step": 7042 + }, + { + "epoch": 0.58, + "grad_norm": 4.578335547143393, + "learning_rate": 4.024061516067365e-06, + "loss": 1.2875, + "step": 7043 + }, + { + "epoch": 0.58, + "grad_norm": 2.0325952968801584, + "learning_rate": 4.02276323283963e-06, + "loss": 0.4974, + "step": 7044 + }, + { + "epoch": 0.58, + "grad_norm": 6.395851544449063, + "learning_rate": 4.021465018111939e-06, + "loss": 1.2998, + "step": 7045 + }, + { + "epoch": 0.58, + "grad_norm": 3.706246330303951, + "learning_rate": 4.020166871975293e-06, + "loss": 0.8945, + "step": 7046 + }, + { + "epoch": 0.58, + "grad_norm": 4.104683337642188, + "learning_rate": 4.0188687945206846e-06, + "loss": 1.0408, + "step": 7047 + }, + { + "epoch": 0.58, + "grad_norm": 1.8628401219992465, + "learning_rate": 4.0175707858391035e-06, + "loss": 0.4056, + "step": 7048 + }, + { + "epoch": 0.58, + "grad_norm": 4.183498230905607, + "learning_rate": 4.016272846021534e-06, + "loss": 0.9974, + "step": 7049 + }, + { + "epoch": 0.58, + "grad_norm": 3.068505142474466, + "learning_rate": 4.014974975158958e-06, + "loss": 0.8056, + "step": 7050 + }, + { + "epoch": 0.58, + "grad_norm": 3.3480882349045986, + "learning_rate": 4.013677173342348e-06, + "loss": 0.6373, + "step": 7051 + }, + { + "epoch": 0.58, + "grad_norm": 3.9158743043304876, + "learning_rate": 4.012379440662676e-06, + "loss": 0.7279, + "step": 7052 + }, + { + "epoch": 0.58, + "grad_norm": 5.9453187149906, + "learning_rate": 4.011081777210909e-06, + "loss": 1.505, + "step": 7053 + }, + { + "epoch": 0.58, + "grad_norm": 1.6622951104295558, + "learning_rate": 4.009784183078004e-06, + "loss": 0.3531, + "step": 7054 + }, + { + "epoch": 0.58, + "grad_norm": 2.9462489165130714, + "learning_rate": 4.008486658354919e-06, + "loss": 0.4783, + "step": 7055 + }, + { + "epoch": 0.58, + "grad_norm": 4.435618489485107, + "learning_rate": 4.007189203132603e-06, + "loss": 0.9431, + "step": 7056 + }, + { + "epoch": 0.58, + "grad_norm": 5.505739082053664, + "learning_rate": 4.005891817502004e-06, + "loss": 1.2068, + "step": 7057 + }, + { + "epoch": 0.58, + "grad_norm": 4.567645285380558, + "learning_rate": 4.004594501554061e-06, + "loss": 0.8364, + "step": 7058 + }, + { + "epoch": 0.58, + "grad_norm": 3.8855141545565215, + "learning_rate": 4.003297255379715e-06, + "loss": 0.6913, + "step": 7059 + }, + { + "epoch": 0.58, + "grad_norm": 3.4254509751515205, + "learning_rate": 4.0020000790698895e-06, + "loss": 0.5555, + "step": 7060 + }, + { + "epoch": 0.58, + "grad_norm": 3.424551143787788, + "learning_rate": 4.0007029727155165e-06, + "loss": 0.5029, + "step": 7061 + }, + { + "epoch": 0.58, + "grad_norm": 1.8812165633556992, + "learning_rate": 3.999405936407517e-06, + "loss": 0.3657, + "step": 7062 + }, + { + "epoch": 0.58, + "grad_norm": 3.6522064217633807, + "learning_rate": 3.998108970236807e-06, + "loss": 0.6894, + "step": 7063 + }, + { + "epoch": 0.58, + "grad_norm": 3.6574313938311116, + "learning_rate": 3.9968120742942965e-06, + "loss": 1.0746, + "step": 7064 + }, + { + "epoch": 0.58, + "grad_norm": 3.8574976031253674, + "learning_rate": 3.995515248670896e-06, + "loss": 0.8057, + "step": 7065 + }, + { + "epoch": 0.58, + "grad_norm": 4.902393834271197, + "learning_rate": 3.994218493457503e-06, + "loss": 0.8585, + "step": 7066 + }, + { + "epoch": 0.58, + "grad_norm": 1.7013551577078005, + "learning_rate": 3.992921808745016e-06, + "loss": 0.3055, + "step": 7067 + }, + { + "epoch": 0.58, + "grad_norm": 3.1942799397653476, + "learning_rate": 3.991625194624328e-06, + "loss": 0.7487, + "step": 7068 + }, + { + "epoch": 0.58, + "grad_norm": 2.478872092128973, + "learning_rate": 3.990328651186326e-06, + "loss": 0.2751, + "step": 7069 + }, + { + "epoch": 0.58, + "grad_norm": 5.540871815467057, + "learning_rate": 3.989032178521892e-06, + "loss": 0.8078, + "step": 7070 + }, + { + "epoch": 0.58, + "grad_norm": 4.353383629970022, + "learning_rate": 3.987735776721902e-06, + "loss": 1.0104, + "step": 7071 + }, + { + "epoch": 0.58, + "grad_norm": 4.733842174247616, + "learning_rate": 3.9864394458772275e-06, + "loss": 0.9171, + "step": 7072 + }, + { + "epoch": 0.58, + "grad_norm": 4.91998825679797, + "learning_rate": 3.9851431860787376e-06, + "loss": 0.9968, + "step": 7073 + }, + { + "epoch": 0.58, + "grad_norm": 4.401450605560654, + "learning_rate": 3.983846997417293e-06, + "loss": 0.5211, + "step": 7074 + }, + { + "epoch": 0.58, + "grad_norm": 3.6189428504676515, + "learning_rate": 3.982550879983752e-06, + "loss": 0.9904, + "step": 7075 + }, + { + "epoch": 0.58, + "grad_norm": 2.908054204358741, + "learning_rate": 3.981254833868968e-06, + "loss": 0.5373, + "step": 7076 + }, + { + "epoch": 0.58, + "grad_norm": 5.439929750668236, + "learning_rate": 3.979958859163785e-06, + "loss": 1.0696, + "step": 7077 + }, + { + "epoch": 0.58, + "grad_norm": 2.936184099302287, + "learning_rate": 3.978662955959047e-06, + "loss": 0.5715, + "step": 7078 + }, + { + "epoch": 0.58, + "grad_norm": 3.8749727121931197, + "learning_rate": 3.977367124345591e-06, + "loss": 0.9434, + "step": 7079 + }, + { + "epoch": 0.58, + "grad_norm": 3.5969238585770107, + "learning_rate": 3.976071364414248e-06, + "loss": 0.661, + "step": 7080 + }, + { + "epoch": 0.58, + "grad_norm": 3.761432159512601, + "learning_rate": 3.974775676255847e-06, + "loss": 0.8004, + "step": 7081 + }, + { + "epoch": 0.58, + "grad_norm": 5.105157516199045, + "learning_rate": 3.973480059961211e-06, + "loss": 1.0297, + "step": 7082 + }, + { + "epoch": 0.58, + "grad_norm": 3.2195043443638456, + "learning_rate": 3.9721845156211535e-06, + "loss": 0.6143, + "step": 7083 + }, + { + "epoch": 0.58, + "grad_norm": 3.3895344869403745, + "learning_rate": 3.970889043326488e-06, + "loss": 0.9823, + "step": 7084 + }, + { + "epoch": 0.58, + "grad_norm": 3.4684250527361247, + "learning_rate": 3.969593643168022e-06, + "loss": 0.4505, + "step": 7085 + }, + { + "epoch": 0.58, + "grad_norm": 3.388774080285407, + "learning_rate": 3.968298315236558e-06, + "loss": 0.7791, + "step": 7086 + }, + { + "epoch": 0.58, + "grad_norm": 3.0887367584929875, + "learning_rate": 3.967003059622893e-06, + "loss": 0.6912, + "step": 7087 + }, + { + "epoch": 0.58, + "grad_norm": 4.66271295613892, + "learning_rate": 3.965707876417818e-06, + "loss": 1.1753, + "step": 7088 + }, + { + "epoch": 0.58, + "grad_norm": 4.455896539629951, + "learning_rate": 3.964412765712118e-06, + "loss": 0.811, + "step": 7089 + }, + { + "epoch": 0.58, + "grad_norm": 3.664757526270815, + "learning_rate": 3.963117727596576e-06, + "loss": 0.674, + "step": 7090 + }, + { + "epoch": 0.58, + "grad_norm": 2.662817213759074, + "learning_rate": 3.961822762161969e-06, + "loss": 0.6596, + "step": 7091 + }, + { + "epoch": 0.58, + "grad_norm": 3.843816071724626, + "learning_rate": 3.960527869499068e-06, + "loss": 0.868, + "step": 7092 + }, + { + "epoch": 0.58, + "grad_norm": 3.971949880221828, + "learning_rate": 3.959233049698642e-06, + "loss": 0.5862, + "step": 7093 + }, + { + "epoch": 0.58, + "grad_norm": 4.403184894041416, + "learning_rate": 3.957938302851447e-06, + "loss": 0.8985, + "step": 7094 + }, + { + "epoch": 0.58, + "grad_norm": 2.151657750427979, + "learning_rate": 3.956643629048244e-06, + "loss": 0.3323, + "step": 7095 + }, + { + "epoch": 0.58, + "grad_norm": 4.326153968617495, + "learning_rate": 3.95534902837978e-06, + "loss": 0.893, + "step": 7096 + }, + { + "epoch": 0.58, + "grad_norm": 4.0435757373843595, + "learning_rate": 3.954054500936803e-06, + "loss": 0.8832, + "step": 7097 + }, + { + "epoch": 0.58, + "grad_norm": 2.846159514245381, + "learning_rate": 3.952760046810054e-06, + "loss": 0.4438, + "step": 7098 + }, + { + "epoch": 0.58, + "grad_norm": 5.182475428073442, + "learning_rate": 3.951465666090269e-06, + "loss": 0.975, + "step": 7099 + }, + { + "epoch": 0.58, + "grad_norm": 5.230426826177786, + "learning_rate": 3.950171358868177e-06, + "loss": 1.4132, + "step": 7100 + }, + { + "epoch": 0.58, + "grad_norm": 3.8291254257521725, + "learning_rate": 3.948877125234502e-06, + "loss": 0.6377, + "step": 7101 + }, + { + "epoch": 0.58, + "grad_norm": 3.216084276924933, + "learning_rate": 3.947582965279969e-06, + "loss": 0.6111, + "step": 7102 + }, + { + "epoch": 0.58, + "grad_norm": 1.6706962900628781, + "learning_rate": 3.9462888790952885e-06, + "loss": 0.2765, + "step": 7103 + }, + { + "epoch": 0.58, + "grad_norm": 3.138040175328101, + "learning_rate": 3.944994866771171e-06, + "loss": 0.339, + "step": 7104 + }, + { + "epoch": 0.58, + "grad_norm": 3.604511535772637, + "learning_rate": 3.943700928398325e-06, + "loss": 0.7229, + "step": 7105 + }, + { + "epoch": 0.58, + "grad_norm": 3.5932734228415804, + "learning_rate": 3.942407064067444e-06, + "loss": 0.5461, + "step": 7106 + }, + { + "epoch": 0.58, + "grad_norm": 3.790858424347929, + "learning_rate": 3.941113273869226e-06, + "loss": 0.7984, + "step": 7107 + }, + { + "epoch": 0.58, + "grad_norm": 3.991797473632268, + "learning_rate": 3.939819557894358e-06, + "loss": 0.7425, + "step": 7108 + }, + { + "epoch": 0.58, + "grad_norm": 4.2829158230152045, + "learning_rate": 3.938525916233527e-06, + "loss": 1.1413, + "step": 7109 + }, + { + "epoch": 0.58, + "grad_norm": 2.27835612607791, + "learning_rate": 3.93723234897741e-06, + "loss": 0.3811, + "step": 7110 + }, + { + "epoch": 0.58, + "grad_norm": 4.0617761846025, + "learning_rate": 3.93593885621668e-06, + "loss": 0.6642, + "step": 7111 + }, + { + "epoch": 0.58, + "grad_norm": 4.211114673808272, + "learning_rate": 3.934645438042004e-06, + "loss": 1.2995, + "step": 7112 + }, + { + "epoch": 0.58, + "grad_norm": 3.1915693087058945, + "learning_rate": 3.933352094544045e-06, + "loss": 0.7407, + "step": 7113 + }, + { + "epoch": 0.58, + "grad_norm": 4.701250053167851, + "learning_rate": 3.932058825813464e-06, + "loss": 1.1612, + "step": 7114 + }, + { + "epoch": 0.58, + "grad_norm": 4.970892171852544, + "learning_rate": 3.930765631940911e-06, + "loss": 0.8714, + "step": 7115 + }, + { + "epoch": 0.58, + "grad_norm": 3.527673185339682, + "learning_rate": 3.929472513017036e-06, + "loss": 0.653, + "step": 7116 + }, + { + "epoch": 0.58, + "grad_norm": 3.623928334585889, + "learning_rate": 3.928179469132477e-06, + "loss": 0.8409, + "step": 7117 + }, + { + "epoch": 0.58, + "grad_norm": 4.8302572266053065, + "learning_rate": 3.926886500377874e-06, + "loss": 1.0464, + "step": 7118 + }, + { + "epoch": 0.58, + "grad_norm": 3.6459442820124863, + "learning_rate": 3.925593606843856e-06, + "loss": 0.8235, + "step": 7119 + }, + { + "epoch": 0.58, + "grad_norm": 3.4826241592611815, + "learning_rate": 3.924300788621049e-06, + "loss": 0.6283, + "step": 7120 + }, + { + "epoch": 0.58, + "grad_norm": 2.9437138488386823, + "learning_rate": 3.923008045800077e-06, + "loss": 0.461, + "step": 7121 + }, + { + "epoch": 0.58, + "grad_norm": 3.932708836589702, + "learning_rate": 3.921715378471555e-06, + "loss": 0.9496, + "step": 7122 + }, + { + "epoch": 0.58, + "grad_norm": 3.194910499503078, + "learning_rate": 3.92042278672609e-06, + "loss": 0.6686, + "step": 7123 + }, + { + "epoch": 0.58, + "grad_norm": 3.1726502179347773, + "learning_rate": 3.91913027065429e-06, + "loss": 0.5938, + "step": 7124 + }, + { + "epoch": 0.58, + "grad_norm": 1.8286749940870537, + "learning_rate": 3.917837830346754e-06, + "loss": 0.4451, + "step": 7125 + }, + { + "epoch": 0.58, + "grad_norm": 3.945794551030212, + "learning_rate": 3.916545465894077e-06, + "loss": 1.0091, + "step": 7126 + }, + { + "epoch": 0.58, + "grad_norm": 2.53195597074014, + "learning_rate": 3.915253177386849e-06, + "loss": 0.6858, + "step": 7127 + }, + { + "epoch": 0.58, + "grad_norm": 2.440713321478638, + "learning_rate": 3.91396096491565e-06, + "loss": 0.4462, + "step": 7128 + }, + { + "epoch": 0.58, + "grad_norm": 3.8823848511405292, + "learning_rate": 3.912668828571061e-06, + "loss": 0.9204, + "step": 7129 + }, + { + "epoch": 0.58, + "grad_norm": 3.9367897793515216, + "learning_rate": 3.9113767684436555e-06, + "loss": 0.73, + "step": 7130 + }, + { + "epoch": 0.58, + "grad_norm": 4.190586554285153, + "learning_rate": 3.910084784624001e-06, + "loss": 0.9722, + "step": 7131 + }, + { + "epoch": 0.58, + "grad_norm": 4.9779115794191515, + "learning_rate": 3.90879287720266e-06, + "loss": 1.2202, + "step": 7132 + }, + { + "epoch": 0.58, + "grad_norm": 2.5050948355733933, + "learning_rate": 3.907501046270189e-06, + "loss": 0.4069, + "step": 7133 + }, + { + "epoch": 0.58, + "grad_norm": 3.415859284537811, + "learning_rate": 3.906209291917141e-06, + "loss": 0.8027, + "step": 7134 + }, + { + "epoch": 0.58, + "grad_norm": 4.9358216022893835, + "learning_rate": 3.904917614234061e-06, + "loss": 1.2471, + "step": 7135 + }, + { + "epoch": 0.58, + "grad_norm": 3.9747677120767912, + "learning_rate": 3.903626013311489e-06, + "loss": 0.7411, + "step": 7136 + }, + { + "epoch": 0.58, + "grad_norm": 3.988415913963125, + "learning_rate": 3.902334489239963e-06, + "loss": 0.7372, + "step": 7137 + }, + { + "epoch": 0.58, + "grad_norm": 5.738282110028436, + "learning_rate": 3.901043042110012e-06, + "loss": 0.8475, + "step": 7138 + }, + { + "epoch": 0.58, + "grad_norm": 4.100754157933643, + "learning_rate": 3.899751672012163e-06, + "loss": 0.8092, + "step": 7139 + }, + { + "epoch": 0.58, + "grad_norm": 3.5068835161335983, + "learning_rate": 3.898460379036931e-06, + "loss": 0.7991, + "step": 7140 + }, + { + "epoch": 0.58, + "grad_norm": 3.933771498849496, + "learning_rate": 3.897169163274835e-06, + "loss": 0.8676, + "step": 7141 + }, + { + "epoch": 0.58, + "grad_norm": 4.602783614402582, + "learning_rate": 3.895878024816378e-06, + "loss": 0.7007, + "step": 7142 + }, + { + "epoch": 0.58, + "grad_norm": 2.797454968338988, + "learning_rate": 3.894586963752068e-06, + "loss": 0.5681, + "step": 7143 + }, + { + "epoch": 0.58, + "grad_norm": 3.1546536385429227, + "learning_rate": 3.893295980172401e-06, + "loss": 0.7844, + "step": 7144 + }, + { + "epoch": 0.58, + "grad_norm": 3.454972069711611, + "learning_rate": 3.892005074167871e-06, + "loss": 0.5962, + "step": 7145 + }, + { + "epoch": 0.58, + "grad_norm": 4.4058536007061235, + "learning_rate": 3.890714245828961e-06, + "loss": 0.7945, + "step": 7146 + }, + { + "epoch": 0.58, + "grad_norm": 4.3281395348638245, + "learning_rate": 3.889423495246155e-06, + "loss": 0.8408, + "step": 7147 + }, + { + "epoch": 0.58, + "grad_norm": 2.1678889532437804, + "learning_rate": 3.88813282250993e-06, + "loss": 0.2202, + "step": 7148 + }, + { + "epoch": 0.58, + "grad_norm": 3.6827832514151853, + "learning_rate": 3.8868422277107536e-06, + "loss": 0.7201, + "step": 7149 + }, + { + "epoch": 0.58, + "grad_norm": 2.427866915998655, + "learning_rate": 3.885551710939095e-06, + "loss": 0.5189, + "step": 7150 + }, + { + "epoch": 0.58, + "grad_norm": 3.738846069484021, + "learning_rate": 3.884261272285409e-06, + "loss": 0.8858, + "step": 7151 + }, + { + "epoch": 0.58, + "grad_norm": 2.843459236720858, + "learning_rate": 3.8829709118401525e-06, + "loss": 0.4664, + "step": 7152 + }, + { + "epoch": 0.58, + "grad_norm": 3.153689597283749, + "learning_rate": 3.881680629693774e-06, + "loss": 0.4918, + "step": 7153 + }, + { + "epoch": 0.58, + "grad_norm": 4.088250182020076, + "learning_rate": 3.8803904259367156e-06, + "loss": 0.6217, + "step": 7154 + }, + { + "epoch": 0.58, + "grad_norm": 5.021547163364894, + "learning_rate": 3.879100300659417e-06, + "loss": 1.1175, + "step": 7155 + }, + { + "epoch": 0.58, + "grad_norm": 4.582998666770339, + "learning_rate": 3.87781025395231e-06, + "loss": 1.0289, + "step": 7156 + }, + { + "epoch": 0.58, + "grad_norm": 4.112295157664457, + "learning_rate": 3.87652028590582e-06, + "loss": 0.6874, + "step": 7157 + }, + { + "epoch": 0.59, + "grad_norm": 2.8179712349560764, + "learning_rate": 3.875230396610367e-06, + "loss": 0.3803, + "step": 7158 + }, + { + "epoch": 0.59, + "grad_norm": 0.9175547172248338, + "learning_rate": 3.873940586156368e-06, + "loss": 0.1297, + "step": 7159 + }, + { + "epoch": 0.59, + "grad_norm": 2.2277994379466444, + "learning_rate": 3.8726508546342346e-06, + "loss": 0.2989, + "step": 7160 + }, + { + "epoch": 0.59, + "grad_norm": 4.338092565175346, + "learning_rate": 3.8713612021343695e-06, + "loss": 0.7526, + "step": 7161 + }, + { + "epoch": 0.59, + "grad_norm": 3.7247563648076056, + "learning_rate": 3.870071628747174e-06, + "loss": 0.7655, + "step": 7162 + }, + { + "epoch": 0.59, + "grad_norm": 5.352743472567859, + "learning_rate": 3.868782134563038e-06, + "loss": 1.3943, + "step": 7163 + }, + { + "epoch": 0.59, + "grad_norm": 3.3405860367231086, + "learning_rate": 3.867492719672352e-06, + "loss": 0.4234, + "step": 7164 + }, + { + "epoch": 0.59, + "grad_norm": 5.123619589734145, + "learning_rate": 3.866203384165497e-06, + "loss": 0.9548, + "step": 7165 + }, + { + "epoch": 0.59, + "grad_norm": 1.3149482566165072, + "learning_rate": 3.86491412813285e-06, + "loss": 0.1749, + "step": 7166 + }, + { + "epoch": 0.59, + "grad_norm": 2.0442256070497082, + "learning_rate": 3.863624951664785e-06, + "loss": 0.3793, + "step": 7167 + }, + { + "epoch": 0.59, + "grad_norm": 3.1812285417600368, + "learning_rate": 3.862335854851664e-06, + "loss": 0.7994, + "step": 7168 + }, + { + "epoch": 0.59, + "grad_norm": 3.998063794562161, + "learning_rate": 3.861046837783847e-06, + "loss": 0.5162, + "step": 7169 + }, + { + "epoch": 0.59, + "grad_norm": 1.6757687591265147, + "learning_rate": 3.859757900551691e-06, + "loss": 0.2664, + "step": 7170 + }, + { + "epoch": 0.59, + "grad_norm": 4.706957271055265, + "learning_rate": 3.8584690432455456e-06, + "loss": 1.1055, + "step": 7171 + }, + { + "epoch": 0.59, + "grad_norm": 3.327806797529627, + "learning_rate": 3.85718026595575e-06, + "loss": 0.6231, + "step": 7172 + }, + { + "epoch": 0.59, + "grad_norm": 3.6424505231979754, + "learning_rate": 3.855891568772646e-06, + "loss": 0.8499, + "step": 7173 + }, + { + "epoch": 0.59, + "grad_norm": 0.8250076554024681, + "learning_rate": 3.854602951786562e-06, + "loss": 0.1412, + "step": 7174 + }, + { + "epoch": 0.59, + "grad_norm": 2.9142664387470116, + "learning_rate": 3.8533144150878275e-06, + "loss": 0.6215, + "step": 7175 + }, + { + "epoch": 0.59, + "grad_norm": 5.347449923340581, + "learning_rate": 3.8520259587667605e-06, + "loss": 1.1855, + "step": 7176 + }, + { + "epoch": 0.59, + "grad_norm": 3.15536023408874, + "learning_rate": 3.850737582913679e-06, + "loss": 0.7286, + "step": 7177 + }, + { + "epoch": 0.59, + "grad_norm": 3.8471299159732375, + "learning_rate": 3.849449287618892e-06, + "loss": 0.9961, + "step": 7178 + }, + { + "epoch": 0.59, + "grad_norm": 4.839480131416237, + "learning_rate": 3.848161072972702e-06, + "loss": 1.4947, + "step": 7179 + }, + { + "epoch": 0.59, + "grad_norm": 3.7682019393758934, + "learning_rate": 3.846872939065409e-06, + "loss": 0.669, + "step": 7180 + }, + { + "epoch": 0.59, + "grad_norm": 3.5419341999668967, + "learning_rate": 3.8455848859873035e-06, + "loss": 0.7145, + "step": 7181 + }, + { + "epoch": 0.59, + "grad_norm": 4.1367184258575165, + "learning_rate": 3.8442969138286726e-06, + "loss": 0.6236, + "step": 7182 + }, + { + "epoch": 0.59, + "grad_norm": 3.862127518357182, + "learning_rate": 3.843009022679799e-06, + "loss": 1.1014, + "step": 7183 + }, + { + "epoch": 0.59, + "grad_norm": 2.6911367705490776, + "learning_rate": 3.841721212630958e-06, + "loss": 0.5209, + "step": 7184 + }, + { + "epoch": 0.59, + "grad_norm": 3.800166453494574, + "learning_rate": 3.8404334837724205e-06, + "loss": 0.5094, + "step": 7185 + }, + { + "epoch": 0.59, + "grad_norm": 2.127442610276738, + "learning_rate": 3.8391458361944475e-06, + "loss": 0.2214, + "step": 7186 + }, + { + "epoch": 0.59, + "grad_norm": 3.3877133824762238, + "learning_rate": 3.837858269987299e-06, + "loss": 0.8476, + "step": 7187 + }, + { + "epoch": 0.59, + "grad_norm": 4.600332999607474, + "learning_rate": 3.836570785241231e-06, + "loss": 0.7541, + "step": 7188 + }, + { + "epoch": 0.59, + "grad_norm": 3.847101954500791, + "learning_rate": 3.835283382046484e-06, + "loss": 0.6709, + "step": 7189 + }, + { + "epoch": 0.59, + "grad_norm": 4.822372588945832, + "learning_rate": 3.833996060493307e-06, + "loss": 1.0825, + "step": 7190 + }, + { + "epoch": 0.59, + "grad_norm": 5.36848710279312, + "learning_rate": 3.832708820671928e-06, + "loss": 1.3174, + "step": 7191 + }, + { + "epoch": 0.59, + "grad_norm": 4.109358838896932, + "learning_rate": 3.831421662672582e-06, + "loss": 1.0806, + "step": 7192 + }, + { + "epoch": 0.59, + "grad_norm": 5.408353703345709, + "learning_rate": 3.830134586585491e-06, + "loss": 0.8491, + "step": 7193 + }, + { + "epoch": 0.59, + "grad_norm": 4.424293970670651, + "learning_rate": 3.828847592500875e-06, + "loss": 1.0811, + "step": 7194 + }, + { + "epoch": 0.59, + "grad_norm": 3.882556551822584, + "learning_rate": 3.827560680508946e-06, + "loss": 0.8556, + "step": 7195 + }, + { + "epoch": 0.59, + "grad_norm": 5.2613405765315, + "learning_rate": 3.826273850699912e-06, + "loss": 0.8493, + "step": 7196 + }, + { + "epoch": 0.59, + "grad_norm": 4.55056975151563, + "learning_rate": 3.824987103163972e-06, + "loss": 0.5399, + "step": 7197 + }, + { + "epoch": 0.59, + "grad_norm": 5.130506068996539, + "learning_rate": 3.823700437991321e-06, + "loss": 1.0217, + "step": 7198 + }, + { + "epoch": 0.59, + "grad_norm": 3.646202870650748, + "learning_rate": 3.822413855272151e-06, + "loss": 0.7791, + "step": 7199 + }, + { + "epoch": 0.59, + "grad_norm": 3.460344371701214, + "learning_rate": 3.821127355096645e-06, + "loss": 0.6711, + "step": 7200 + }, + { + "epoch": 0.59, + "grad_norm": 2.8134014500209763, + "learning_rate": 3.81984093755498e-06, + "loss": 0.518, + "step": 7201 + }, + { + "epoch": 0.59, + "grad_norm": 4.253356241796577, + "learning_rate": 3.8185546027373325e-06, + "loss": 0.807, + "step": 7202 + }, + { + "epoch": 0.59, + "grad_norm": 3.7265383373452403, + "learning_rate": 3.817268350733862e-06, + "loss": 0.677, + "step": 7203 + }, + { + "epoch": 0.59, + "grad_norm": 3.600616746223601, + "learning_rate": 3.815982181634735e-06, + "loss": 0.6427, + "step": 7204 + }, + { + "epoch": 0.59, + "grad_norm": 2.1665978297552964, + "learning_rate": 3.814696095530103e-06, + "loss": 0.3392, + "step": 7205 + }, + { + "epoch": 0.59, + "grad_norm": 4.16134808595628, + "learning_rate": 3.813410092510116e-06, + "loss": 1.0075, + "step": 7206 + }, + { + "epoch": 0.59, + "grad_norm": 1.1975198351294647, + "learning_rate": 3.8121241726649195e-06, + "loss": 0.2097, + "step": 7207 + }, + { + "epoch": 0.59, + "grad_norm": 3.6734953521052582, + "learning_rate": 3.8108383360846467e-06, + "loss": 0.6527, + "step": 7208 + }, + { + "epoch": 0.59, + "grad_norm": 4.394152793853723, + "learning_rate": 3.809552582859432e-06, + "loss": 0.9742, + "step": 7209 + }, + { + "epoch": 0.59, + "grad_norm": 3.63757007236222, + "learning_rate": 3.8082669130793998e-06, + "loss": 0.4478, + "step": 7210 + }, + { + "epoch": 0.59, + "grad_norm": 1.644899905912618, + "learning_rate": 3.8069813268346717e-06, + "loss": 0.3406, + "step": 7211 + }, + { + "epoch": 0.59, + "grad_norm": 3.91484487437852, + "learning_rate": 3.8056958242153598e-06, + "loss": 0.7422, + "step": 7212 + }, + { + "epoch": 0.59, + "grad_norm": 4.109537964323625, + "learning_rate": 3.804410405311575e-06, + "loss": 0.6795, + "step": 7213 + }, + { + "epoch": 0.59, + "grad_norm": 4.625631627386881, + "learning_rate": 3.8031250702134148e-06, + "loss": 0.9754, + "step": 7214 + }, + { + "epoch": 0.59, + "grad_norm": 3.138406520861325, + "learning_rate": 3.801839819010979e-06, + "loss": 0.575, + "step": 7215 + }, + { + "epoch": 0.59, + "grad_norm": 3.9259807505491606, + "learning_rate": 3.800554651794357e-06, + "loss": 0.9691, + "step": 7216 + }, + { + "epoch": 0.59, + "grad_norm": 5.172195251924218, + "learning_rate": 3.7992695686536345e-06, + "loss": 1.3919, + "step": 7217 + }, + { + "epoch": 0.59, + "grad_norm": 2.575012156658763, + "learning_rate": 3.7979845696788903e-06, + "loss": 0.3482, + "step": 7218 + }, + { + "epoch": 0.59, + "grad_norm": 3.898379099253242, + "learning_rate": 3.7966996549601968e-06, + "loss": 0.6983, + "step": 7219 + }, + { + "epoch": 0.59, + "grad_norm": 3.230484488211065, + "learning_rate": 3.795414824587621e-06, + "loss": 0.7996, + "step": 7220 + }, + { + "epoch": 0.59, + "grad_norm": 3.50312455766705, + "learning_rate": 3.794130078651222e-06, + "loss": 0.8794, + "step": 7221 + }, + { + "epoch": 0.59, + "grad_norm": 1.9917695078674407, + "learning_rate": 3.7928454172410565e-06, + "loss": 0.397, + "step": 7222 + }, + { + "epoch": 0.59, + "grad_norm": 3.7089917338476215, + "learning_rate": 3.7915608404471738e-06, + "loss": 0.5728, + "step": 7223 + }, + { + "epoch": 0.59, + "grad_norm": 3.7570602077823247, + "learning_rate": 3.7902763483596173e-06, + "loss": 0.6975, + "step": 7224 + }, + { + "epoch": 0.59, + "grad_norm": 4.485543040083705, + "learning_rate": 3.7889919410684262e-06, + "loss": 0.6952, + "step": 7225 + }, + { + "epoch": 0.59, + "grad_norm": 2.7751991440072854, + "learning_rate": 3.7877076186636275e-06, + "loss": 0.5311, + "step": 7226 + }, + { + "epoch": 0.59, + "grad_norm": 2.946904427500743, + "learning_rate": 3.7864233812352497e-06, + "loss": 0.5584, + "step": 7227 + }, + { + "epoch": 0.59, + "grad_norm": 3.3960138117057532, + "learning_rate": 3.78513922887331e-06, + "loss": 0.7777, + "step": 7228 + }, + { + "epoch": 0.59, + "grad_norm": 3.8388110904642248, + "learning_rate": 3.783855161667824e-06, + "loss": 0.9353, + "step": 7229 + }, + { + "epoch": 0.59, + "grad_norm": 4.208385056053049, + "learning_rate": 3.7825711797088e-06, + "loss": 0.7865, + "step": 7230 + }, + { + "epoch": 0.59, + "grad_norm": 4.654728205483348, + "learning_rate": 3.7812872830862363e-06, + "loss": 1.0619, + "step": 7231 + }, + { + "epoch": 0.59, + "grad_norm": 4.943147912524849, + "learning_rate": 3.78000347189013e-06, + "loss": 0.9678, + "step": 7232 + }, + { + "epoch": 0.59, + "grad_norm": 1.6386007331503682, + "learning_rate": 3.778719746210471e-06, + "loss": 0.2088, + "step": 7233 + }, + { + "epoch": 0.59, + "grad_norm": 4.974326653202862, + "learning_rate": 3.777436106137244e-06, + "loss": 0.8962, + "step": 7234 + }, + { + "epoch": 0.59, + "grad_norm": 4.247202635701191, + "learning_rate": 3.7761525517604237e-06, + "loss": 0.918, + "step": 7235 + }, + { + "epoch": 0.59, + "grad_norm": 3.920840009975349, + "learning_rate": 3.7748690831699858e-06, + "loss": 0.6603, + "step": 7236 + }, + { + "epoch": 0.59, + "grad_norm": 2.024972776872732, + "learning_rate": 3.7735857004558913e-06, + "loss": 0.3891, + "step": 7237 + }, + { + "epoch": 0.59, + "grad_norm": 1.5436469189015576, + "learning_rate": 3.772302403708102e-06, + "loss": 0.2604, + "step": 7238 + }, + { + "epoch": 0.59, + "grad_norm": 4.024412906807474, + "learning_rate": 3.7710191930165705e-06, + "loss": 0.8262, + "step": 7239 + }, + { + "epoch": 0.59, + "grad_norm": 6.155739632668728, + "learning_rate": 3.769736068471246e-06, + "loss": 1.5707, + "step": 7240 + }, + { + "epoch": 0.59, + "grad_norm": 2.7645413009827906, + "learning_rate": 3.7684530301620693e-06, + "loss": 0.3664, + "step": 7241 + }, + { + "epoch": 0.59, + "grad_norm": 3.8908487487689944, + "learning_rate": 3.7671700781789753e-06, + "loss": 0.7639, + "step": 7242 + }, + { + "epoch": 0.59, + "grad_norm": 3.865174935670953, + "learning_rate": 3.7658872126118945e-06, + "loss": 0.6395, + "step": 7243 + }, + { + "epoch": 0.59, + "grad_norm": 5.682703256402187, + "learning_rate": 3.7646044335507474e-06, + "loss": 1.2663, + "step": 7244 + }, + { + "epoch": 0.59, + "grad_norm": 2.220821504408669, + "learning_rate": 3.7633217410854534e-06, + "loss": 0.4474, + "step": 7245 + }, + { + "epoch": 0.59, + "grad_norm": 4.811613429874075, + "learning_rate": 3.7620391353059232e-06, + "loss": 0.9988, + "step": 7246 + }, + { + "epoch": 0.59, + "grad_norm": 4.060604468939529, + "learning_rate": 3.760756616302064e-06, + "loss": 0.7735, + "step": 7247 + }, + { + "epoch": 0.59, + "grad_norm": 5.596254542379706, + "learning_rate": 3.759474184163771e-06, + "loss": 0.7325, + "step": 7248 + }, + { + "epoch": 0.59, + "grad_norm": 4.334206059149519, + "learning_rate": 3.7581918389809384e-06, + "loss": 0.7668, + "step": 7249 + }, + { + "epoch": 0.59, + "grad_norm": 3.1112879445181103, + "learning_rate": 3.756909580843455e-06, + "loss": 0.6829, + "step": 7250 + }, + { + "epoch": 0.59, + "grad_norm": 5.016626681173837, + "learning_rate": 3.7556274098411993e-06, + "loss": 0.9484, + "step": 7251 + }, + { + "epoch": 0.59, + "grad_norm": 3.9305174867410395, + "learning_rate": 3.754345326064046e-06, + "loss": 0.8028, + "step": 7252 + }, + { + "epoch": 0.59, + "grad_norm": 4.047174790150774, + "learning_rate": 3.7530633296018664e-06, + "loss": 0.8824, + "step": 7253 + }, + { + "epoch": 0.59, + "grad_norm": 2.7846759776594423, + "learning_rate": 3.7517814205445187e-06, + "loss": 0.5059, + "step": 7254 + }, + { + "epoch": 0.59, + "grad_norm": 4.173612013099389, + "learning_rate": 3.7504995989818615e-06, + "loss": 1.1138, + "step": 7255 + }, + { + "epoch": 0.59, + "grad_norm": 1.5987499473447744, + "learning_rate": 3.749217865003744e-06, + "loss": 0.179, + "step": 7256 + }, + { + "epoch": 0.59, + "grad_norm": 4.470430144097223, + "learning_rate": 3.747936218700012e-06, + "loss": 0.9139, + "step": 7257 + }, + { + "epoch": 0.59, + "grad_norm": 3.7070475368116615, + "learning_rate": 3.7466546601605012e-06, + "loss": 0.6481, + "step": 7258 + }, + { + "epoch": 0.59, + "grad_norm": 4.589366742179254, + "learning_rate": 3.745373189475046e-06, + "loss": 1.0235, + "step": 7259 + }, + { + "epoch": 0.59, + "grad_norm": 3.8524322868345173, + "learning_rate": 3.744091806733468e-06, + "loss": 0.8042, + "step": 7260 + }, + { + "epoch": 0.59, + "grad_norm": 5.538043867165417, + "learning_rate": 3.742810512025589e-06, + "loss": 1.122, + "step": 7261 + }, + { + "epoch": 0.59, + "grad_norm": 4.42242285248518, + "learning_rate": 3.7415293054412216e-06, + "loss": 1.1899, + "step": 7262 + }, + { + "epoch": 0.59, + "grad_norm": 5.037165970787136, + "learning_rate": 3.7402481870701722e-06, + "loss": 1.1023, + "step": 7263 + }, + { + "epoch": 0.59, + "grad_norm": 3.7586712690069137, + "learning_rate": 3.7389671570022445e-06, + "loss": 0.6374, + "step": 7264 + }, + { + "epoch": 0.59, + "grad_norm": 3.649014060677766, + "learning_rate": 3.7376862153272307e-06, + "loss": 0.6938, + "step": 7265 + }, + { + "epoch": 0.59, + "grad_norm": 3.3655819352546277, + "learning_rate": 3.7364053621349193e-06, + "loss": 0.8945, + "step": 7266 + }, + { + "epoch": 0.59, + "grad_norm": 2.0661749900771826, + "learning_rate": 3.7351245975150924e-06, + "loss": 0.3209, + "step": 7267 + }, + { + "epoch": 0.59, + "grad_norm": 3.109721130348778, + "learning_rate": 3.733843921557526e-06, + "loss": 0.8675, + "step": 7268 + }, + { + "epoch": 0.59, + "grad_norm": 3.4110127821534957, + "learning_rate": 3.7325633343519907e-06, + "loss": 0.4387, + "step": 7269 + }, + { + "epoch": 0.59, + "grad_norm": 1.3270126397093516, + "learning_rate": 3.731282835988252e-06, + "loss": 0.1857, + "step": 7270 + }, + { + "epoch": 0.59, + "grad_norm": 3.1982952028614506, + "learning_rate": 3.7300024265560623e-06, + "loss": 0.6263, + "step": 7271 + }, + { + "epoch": 0.59, + "grad_norm": 4.054983253419086, + "learning_rate": 3.7287221061451763e-06, + "loss": 0.6723, + "step": 7272 + }, + { + "epoch": 0.59, + "grad_norm": 3.006387213108985, + "learning_rate": 3.7274418748453378e-06, + "loss": 0.5593, + "step": 7273 + }, + { + "epoch": 0.59, + "grad_norm": 3.177843696204133, + "learning_rate": 3.7261617327462857e-06, + "loss": 0.4144, + "step": 7274 + }, + { + "epoch": 0.59, + "grad_norm": 2.154411830595848, + "learning_rate": 3.7248816799377517e-06, + "loss": 0.2859, + "step": 7275 + }, + { + "epoch": 0.59, + "grad_norm": 3.7085682586083197, + "learning_rate": 3.723601716509465e-06, + "loss": 0.7336, + "step": 7276 + }, + { + "epoch": 0.59, + "grad_norm": 4.532344507880409, + "learning_rate": 3.7223218425511416e-06, + "loss": 0.743, + "step": 7277 + }, + { + "epoch": 0.59, + "grad_norm": 4.1145090200213295, + "learning_rate": 3.721042058152496e-06, + "loss": 1.0092, + "step": 7278 + }, + { + "epoch": 0.59, + "grad_norm": 2.9824707968840727, + "learning_rate": 3.719762363403236e-06, + "loss": 0.6897, + "step": 7279 + }, + { + "epoch": 0.6, + "grad_norm": 4.93318616633958, + "learning_rate": 3.718482758393064e-06, + "loss": 1.0741, + "step": 7280 + }, + { + "epoch": 0.6, + "grad_norm": 3.873862285417129, + "learning_rate": 3.7172032432116724e-06, + "loss": 0.6303, + "step": 7281 + }, + { + "epoch": 0.6, + "grad_norm": 4.028763878606627, + "learning_rate": 3.715923817948752e-06, + "loss": 0.952, + "step": 7282 + }, + { + "epoch": 0.6, + "grad_norm": 4.664191244431255, + "learning_rate": 3.7146444826939828e-06, + "loss": 0.8097, + "step": 7283 + }, + { + "epoch": 0.6, + "grad_norm": 5.243476704374192, + "learning_rate": 3.7133652375370404e-06, + "loss": 0.9209, + "step": 7284 + }, + { + "epoch": 0.6, + "grad_norm": 3.81678470925151, + "learning_rate": 3.7120860825675965e-06, + "loss": 0.5091, + "step": 7285 + }, + { + "epoch": 0.6, + "grad_norm": 4.085855589423921, + "learning_rate": 3.710807017875312e-06, + "loss": 0.775, + "step": 7286 + }, + { + "epoch": 0.6, + "grad_norm": 3.6442545721216475, + "learning_rate": 3.7095280435498476e-06, + "loss": 1.1102, + "step": 7287 + }, + { + "epoch": 0.6, + "grad_norm": 3.050918337247399, + "learning_rate": 3.708249159680849e-06, + "loss": 0.4903, + "step": 7288 + }, + { + "epoch": 0.6, + "grad_norm": 2.5453549381261493, + "learning_rate": 3.7069703663579626e-06, + "loss": 0.5086, + "step": 7289 + }, + { + "epoch": 0.6, + "grad_norm": 2.55975029920748, + "learning_rate": 3.7056916636708275e-06, + "loss": 0.4368, + "step": 7290 + }, + { + "epoch": 0.6, + "grad_norm": 3.9211923351279503, + "learning_rate": 3.7044130517090725e-06, + "loss": 0.7221, + "step": 7291 + }, + { + "epoch": 0.6, + "grad_norm": 3.8695486692822603, + "learning_rate": 3.7031345305623247e-06, + "loss": 0.5645, + "step": 7292 + }, + { + "epoch": 0.6, + "grad_norm": 5.0322526274600685, + "learning_rate": 3.701856100320205e-06, + "loss": 0.993, + "step": 7293 + }, + { + "epoch": 0.6, + "grad_norm": 3.397881077392223, + "learning_rate": 3.700577761072319e-06, + "loss": 0.9325, + "step": 7294 + }, + { + "epoch": 0.6, + "grad_norm": 5.109022269602481, + "learning_rate": 3.6992995129082787e-06, + "loss": 0.9902, + "step": 7295 + }, + { + "epoch": 0.6, + "grad_norm": 3.0430714662205114, + "learning_rate": 3.6980213559176806e-06, + "loss": 0.5342, + "step": 7296 + }, + { + "epoch": 0.6, + "grad_norm": 2.979238248294013, + "learning_rate": 3.6967432901901214e-06, + "loss": 0.8024, + "step": 7297 + }, + { + "epoch": 0.6, + "grad_norm": 3.357106852408541, + "learning_rate": 3.695465315815184e-06, + "loss": 0.6754, + "step": 7298 + }, + { + "epoch": 0.6, + "grad_norm": 6.536203245639044, + "learning_rate": 3.6941874328824528e-06, + "loss": 1.4261, + "step": 7299 + }, + { + "epoch": 0.6, + "grad_norm": 4.114188179870397, + "learning_rate": 3.692909641481498e-06, + "loss": 1.0592, + "step": 7300 + }, + { + "epoch": 0.6, + "grad_norm": 3.7910836878874328, + "learning_rate": 3.691631941701889e-06, + "loss": 0.7355, + "step": 7301 + }, + { + "epoch": 0.6, + "grad_norm": 3.512324231847214, + "learning_rate": 3.690354333633186e-06, + "loss": 0.7389, + "step": 7302 + }, + { + "epoch": 0.6, + "grad_norm": 4.08733216422255, + "learning_rate": 3.689076817364945e-06, + "loss": 0.6672, + "step": 7303 + }, + { + "epoch": 0.6, + "grad_norm": 2.5066420531243563, + "learning_rate": 3.6877993929867146e-06, + "loss": 0.5613, + "step": 7304 + }, + { + "epoch": 0.6, + "grad_norm": 4.030634115273332, + "learning_rate": 3.6865220605880363e-06, + "loss": 1.0986, + "step": 7305 + }, + { + "epoch": 0.6, + "grad_norm": 5.683643009920432, + "learning_rate": 3.6852448202584457e-06, + "loss": 0.9423, + "step": 7306 + }, + { + "epoch": 0.6, + "grad_norm": 2.59612249188272, + "learning_rate": 3.6839676720874695e-06, + "loss": 0.4942, + "step": 7307 + }, + { + "epoch": 0.6, + "grad_norm": 4.3689628530978375, + "learning_rate": 3.6826906161646325e-06, + "loss": 0.9352, + "step": 7308 + }, + { + "epoch": 0.6, + "grad_norm": 3.9925971887147798, + "learning_rate": 3.681413652579451e-06, + "loss": 0.4234, + "step": 7309 + }, + { + "epoch": 0.6, + "grad_norm": 2.259614527439117, + "learning_rate": 3.680136781421435e-06, + "loss": 0.3901, + "step": 7310 + }, + { + "epoch": 0.6, + "grad_norm": 3.3481061105666785, + "learning_rate": 3.6788600027800847e-06, + "loss": 0.8297, + "step": 7311 + }, + { + "epoch": 0.6, + "grad_norm": 4.272554803335286, + "learning_rate": 3.677583316744899e-06, + "loss": 1.3087, + "step": 7312 + }, + { + "epoch": 0.6, + "grad_norm": 2.2133791927491147, + "learning_rate": 3.6763067234053686e-06, + "loss": 0.3896, + "step": 7313 + }, + { + "epoch": 0.6, + "grad_norm": 3.4051697569394843, + "learning_rate": 3.6750302228509747e-06, + "loss": 0.6691, + "step": 7314 + }, + { + "epoch": 0.6, + "grad_norm": 6.793644442129009, + "learning_rate": 3.6737538151711965e-06, + "loss": 1.5565, + "step": 7315 + }, + { + "epoch": 0.6, + "grad_norm": 2.1024584202356036, + "learning_rate": 3.6724775004555056e-06, + "loss": 0.4786, + "step": 7316 + }, + { + "epoch": 0.6, + "grad_norm": 3.2672856545005318, + "learning_rate": 3.6712012787933627e-06, + "loss": 0.8188, + "step": 7317 + }, + { + "epoch": 0.6, + "grad_norm": 3.534730237228595, + "learning_rate": 3.669925150274227e-06, + "loss": 0.7064, + "step": 7318 + }, + { + "epoch": 0.6, + "grad_norm": 4.176559579252146, + "learning_rate": 3.66864911498755e-06, + "loss": 1.1786, + "step": 7319 + }, + { + "epoch": 0.6, + "grad_norm": 3.01451385356136, + "learning_rate": 3.667373173022777e-06, + "loss": 0.6224, + "step": 7320 + }, + { + "epoch": 0.6, + "grad_norm": 2.702301279569575, + "learning_rate": 3.6660973244693443e-06, + "loss": 0.4707, + "step": 7321 + }, + { + "epoch": 0.6, + "grad_norm": 2.6518809111910415, + "learning_rate": 3.6648215694166854e-06, + "loss": 0.6508, + "step": 7322 + }, + { + "epoch": 0.6, + "grad_norm": 4.837301252009652, + "learning_rate": 3.663545907954222e-06, + "loss": 1.0315, + "step": 7323 + }, + { + "epoch": 0.6, + "grad_norm": 3.467377058353138, + "learning_rate": 3.662270340171374e-06, + "loss": 0.9296, + "step": 7324 + }, + { + "epoch": 0.6, + "grad_norm": 2.037556790617754, + "learning_rate": 3.660994866157553e-06, + "loss": 0.4212, + "step": 7325 + }, + { + "epoch": 0.6, + "grad_norm": 2.7411227792089052, + "learning_rate": 3.659719486002165e-06, + "loss": 0.5611, + "step": 7326 + }, + { + "epoch": 0.6, + "grad_norm": 4.029110782388751, + "learning_rate": 3.65844419979461e-06, + "loss": 0.7441, + "step": 7327 + }, + { + "epoch": 0.6, + "grad_norm": 3.598556112905251, + "learning_rate": 3.6571690076242762e-06, + "loss": 0.5019, + "step": 7328 + }, + { + "epoch": 0.6, + "grad_norm": 3.103631475632426, + "learning_rate": 3.6558939095805524e-06, + "loss": 0.669, + "step": 7329 + }, + { + "epoch": 0.6, + "grad_norm": 4.278943860233236, + "learning_rate": 3.654618905752814e-06, + "loss": 0.792, + "step": 7330 + }, + { + "epoch": 0.6, + "grad_norm": 3.379718370226008, + "learning_rate": 3.6533439962304363e-06, + "loss": 0.6466, + "step": 7331 + }, + { + "epoch": 0.6, + "grad_norm": 3.2511593392125406, + "learning_rate": 3.6520691811027833e-06, + "loss": 0.6463, + "step": 7332 + }, + { + "epoch": 0.6, + "grad_norm": 3.9418092034160632, + "learning_rate": 3.650794460459216e-06, + "loss": 0.7541, + "step": 7333 + }, + { + "epoch": 0.6, + "grad_norm": 3.3988686548257045, + "learning_rate": 3.6495198343890834e-06, + "loss": 0.5703, + "step": 7334 + }, + { + "epoch": 0.6, + "grad_norm": 2.596962690304631, + "learning_rate": 3.6482453029817335e-06, + "loss": 0.3518, + "step": 7335 + }, + { + "epoch": 0.6, + "grad_norm": 3.3513090857832393, + "learning_rate": 3.6469708663265058e-06, + "loss": 0.3199, + "step": 7336 + }, + { + "epoch": 0.6, + "grad_norm": 4.101769394719204, + "learning_rate": 3.645696524512731e-06, + "loss": 0.713, + "step": 7337 + }, + { + "epoch": 0.6, + "grad_norm": 3.2251311010046115, + "learning_rate": 3.6444222776297356e-06, + "loss": 0.7979, + "step": 7338 + }, + { + "epoch": 0.6, + "grad_norm": 1.6622082215963914, + "learning_rate": 3.6431481257668417e-06, + "loss": 0.2782, + "step": 7339 + }, + { + "epoch": 0.6, + "grad_norm": 2.8688816543762545, + "learning_rate": 3.641874069013357e-06, + "loss": 0.4553, + "step": 7340 + }, + { + "epoch": 0.6, + "grad_norm": 3.884857490036489, + "learning_rate": 3.640600107458589e-06, + "loss": 0.6496, + "step": 7341 + }, + { + "epoch": 0.6, + "grad_norm": 2.5034422972171426, + "learning_rate": 3.639326241191837e-06, + "loss": 0.4517, + "step": 7342 + }, + { + "epoch": 0.6, + "grad_norm": 4.427570284980379, + "learning_rate": 3.6380524703023955e-06, + "loss": 1.0919, + "step": 7343 + }, + { + "epoch": 0.6, + "grad_norm": 3.414248128192576, + "learning_rate": 3.636778794879548e-06, + "loss": 0.6897, + "step": 7344 + }, + { + "epoch": 0.6, + "grad_norm": 4.009681478728431, + "learning_rate": 3.6355052150125756e-06, + "loss": 0.8974, + "step": 7345 + }, + { + "epoch": 0.6, + "grad_norm": 3.60090679232242, + "learning_rate": 3.6342317307907476e-06, + "loss": 0.7755, + "step": 7346 + }, + { + "epoch": 0.6, + "grad_norm": 5.080563646163504, + "learning_rate": 3.632958342303331e-06, + "loss": 1.1131, + "step": 7347 + }, + { + "epoch": 0.6, + "grad_norm": 2.6892240634984996, + "learning_rate": 3.6316850496395863e-06, + "loss": 0.7426, + "step": 7348 + }, + { + "epoch": 0.6, + "grad_norm": 3.709703790317675, + "learning_rate": 3.630411852888763e-06, + "loss": 0.776, + "step": 7349 + }, + { + "epoch": 0.6, + "grad_norm": 2.7744602064414847, + "learning_rate": 3.6291387521401116e-06, + "loss": 0.3538, + "step": 7350 + }, + { + "epoch": 0.6, + "grad_norm": 4.682406043099396, + "learning_rate": 3.6278657474828655e-06, + "loss": 1.0515, + "step": 7351 + }, + { + "epoch": 0.6, + "grad_norm": 3.959454819170579, + "learning_rate": 3.62659283900626e-06, + "loss": 0.477, + "step": 7352 + }, + { + "epoch": 0.6, + "grad_norm": 3.224783213547431, + "learning_rate": 3.625320026799518e-06, + "loss": 0.7098, + "step": 7353 + }, + { + "epoch": 0.6, + "grad_norm": 2.6252261600463815, + "learning_rate": 3.6240473109518595e-06, + "loss": 0.3263, + "step": 7354 + }, + { + "epoch": 0.6, + "grad_norm": 4.377159100802485, + "learning_rate": 3.6227746915524964e-06, + "loss": 1.1255, + "step": 7355 + }, + { + "epoch": 0.6, + "grad_norm": 4.271968038367136, + "learning_rate": 3.621502168690636e-06, + "loss": 0.6547, + "step": 7356 + }, + { + "epoch": 0.6, + "grad_norm": 3.449237596352372, + "learning_rate": 3.6202297424554723e-06, + "loss": 0.7299, + "step": 7357 + }, + { + "epoch": 0.6, + "grad_norm": 1.6632260784943322, + "learning_rate": 3.618957412936199e-06, + "loss": 0.23, + "step": 7358 + }, + { + "epoch": 0.6, + "grad_norm": 2.7017260069150733, + "learning_rate": 3.6176851802220015e-06, + "loss": 0.4136, + "step": 7359 + }, + { + "epoch": 0.6, + "grad_norm": 3.1140772619513952, + "learning_rate": 3.6164130444020557e-06, + "loss": 0.6248, + "step": 7360 + }, + { + "epoch": 0.6, + "grad_norm": 4.261273199477408, + "learning_rate": 3.6151410055655346e-06, + "loss": 0.9102, + "step": 7361 + }, + { + "epoch": 0.6, + "grad_norm": 3.254035443186992, + "learning_rate": 3.613869063801604e-06, + "loss": 0.5402, + "step": 7362 + }, + { + "epoch": 0.6, + "grad_norm": 3.826738840401367, + "learning_rate": 3.6125972191994167e-06, + "loss": 0.8809, + "step": 7363 + }, + { + "epoch": 0.6, + "grad_norm": 4.627710868115313, + "learning_rate": 3.611325471848127e-06, + "loss": 1.0343, + "step": 7364 + }, + { + "epoch": 0.6, + "grad_norm": 4.942724274888949, + "learning_rate": 3.6100538218368788e-06, + "loss": 1.0607, + "step": 7365 + }, + { + "epoch": 0.6, + "grad_norm": 2.6652416919731916, + "learning_rate": 3.608782269254809e-06, + "loss": 0.7414, + "step": 7366 + }, + { + "epoch": 0.6, + "grad_norm": 6.567687807888815, + "learning_rate": 3.6075108141910477e-06, + "loss": 1.2659, + "step": 7367 + }, + { + "epoch": 0.6, + "grad_norm": 5.565997631667151, + "learning_rate": 3.606239456734718e-06, + "loss": 1.0212, + "step": 7368 + }, + { + "epoch": 0.6, + "grad_norm": 2.7521879553116104, + "learning_rate": 3.604968196974936e-06, + "loss": 0.4801, + "step": 7369 + }, + { + "epoch": 0.6, + "grad_norm": 4.227513652989928, + "learning_rate": 3.6036970350008117e-06, + "loss": 0.8487, + "step": 7370 + }, + { + "epoch": 0.6, + "grad_norm": 3.8028013106733445, + "learning_rate": 3.6024259709014485e-06, + "loss": 0.7161, + "step": 7371 + }, + { + "epoch": 0.6, + "grad_norm": 4.561632017753492, + "learning_rate": 3.601155004765943e-06, + "loss": 0.9349, + "step": 7372 + }, + { + "epoch": 0.6, + "grad_norm": 3.8234288647297845, + "learning_rate": 3.599884136683386e-06, + "loss": 0.6112, + "step": 7373 + }, + { + "epoch": 0.6, + "grad_norm": 2.1618931501398477, + "learning_rate": 3.5986133667428552e-06, + "loss": 0.29, + "step": 7374 + }, + { + "epoch": 0.6, + "grad_norm": 4.341504592456218, + "learning_rate": 3.59734269503343e-06, + "loss": 0.9025, + "step": 7375 + }, + { + "epoch": 0.6, + "grad_norm": 5.567997868746615, + "learning_rate": 3.596072121644176e-06, + "loss": 1.6976, + "step": 7376 + }, + { + "epoch": 0.6, + "grad_norm": 3.8884371462644887, + "learning_rate": 3.5948016466641565e-06, + "loss": 0.9424, + "step": 7377 + }, + { + "epoch": 0.6, + "grad_norm": 3.3051617998588774, + "learning_rate": 3.593531270182426e-06, + "loss": 0.6257, + "step": 7378 + }, + { + "epoch": 0.6, + "grad_norm": 4.1393146329806285, + "learning_rate": 3.5922609922880347e-06, + "loss": 0.5962, + "step": 7379 + }, + { + "epoch": 0.6, + "grad_norm": 4.380352244126183, + "learning_rate": 3.5909908130700196e-06, + "loss": 0.8863, + "step": 7380 + }, + { + "epoch": 0.6, + "grad_norm": 3.7033105237332506, + "learning_rate": 3.5897207326174162e-06, + "loss": 0.7133, + "step": 7381 + }, + { + "epoch": 0.6, + "grad_norm": 4.520638027193085, + "learning_rate": 3.5884507510192524e-06, + "loss": 0.9772, + "step": 7382 + }, + { + "epoch": 0.6, + "grad_norm": 6.268886224791158, + "learning_rate": 3.5871808683645475e-06, + "loss": 1.332, + "step": 7383 + }, + { + "epoch": 0.6, + "grad_norm": 5.086688366684082, + "learning_rate": 3.585911084742315e-06, + "loss": 0.7312, + "step": 7384 + }, + { + "epoch": 0.6, + "grad_norm": 5.756162826036348, + "learning_rate": 3.584641400241563e-06, + "loss": 1.1717, + "step": 7385 + }, + { + "epoch": 0.6, + "grad_norm": 5.053332697897676, + "learning_rate": 3.5833718149512874e-06, + "loss": 0.8567, + "step": 7386 + }, + { + "epoch": 0.6, + "grad_norm": 3.551526636895038, + "learning_rate": 3.582102328960483e-06, + "loss": 0.7164, + "step": 7387 + }, + { + "epoch": 0.6, + "grad_norm": 4.299671804905078, + "learning_rate": 3.580832942358134e-06, + "loss": 0.7368, + "step": 7388 + }, + { + "epoch": 0.6, + "grad_norm": 1.5397836578283757, + "learning_rate": 3.5795636552332203e-06, + "loss": 0.1627, + "step": 7389 + }, + { + "epoch": 0.6, + "grad_norm": 2.319864688208959, + "learning_rate": 3.5782944676747135e-06, + "loss": 0.3598, + "step": 7390 + }, + { + "epoch": 0.6, + "grad_norm": 3.855430350446534, + "learning_rate": 3.5770253797715747e-06, + "loss": 0.8059, + "step": 7391 + }, + { + "epoch": 0.6, + "grad_norm": 3.4382552861938604, + "learning_rate": 3.5757563916127665e-06, + "loss": 0.848, + "step": 7392 + }, + { + "epoch": 0.6, + "grad_norm": 5.002761732304879, + "learning_rate": 3.574487503287235e-06, + "loss": 0.9404, + "step": 7393 + }, + { + "epoch": 0.6, + "grad_norm": 3.8270837023387534, + "learning_rate": 3.5732187148839257e-06, + "loss": 0.5003, + "step": 7394 + }, + { + "epoch": 0.6, + "grad_norm": 4.440678145686728, + "learning_rate": 3.571950026491776e-06, + "loss": 1.0051, + "step": 7395 + }, + { + "epoch": 0.6, + "grad_norm": 3.452605562559225, + "learning_rate": 3.5706814381997157e-06, + "loss": 0.6493, + "step": 7396 + }, + { + "epoch": 0.6, + "grad_norm": 5.001457635681139, + "learning_rate": 3.5694129500966645e-06, + "loss": 0.7764, + "step": 7397 + }, + { + "epoch": 0.6, + "grad_norm": 2.664373470123157, + "learning_rate": 3.5681445622715396e-06, + "loss": 0.5855, + "step": 7398 + }, + { + "epoch": 0.6, + "grad_norm": 3.8412498830805792, + "learning_rate": 3.56687627481325e-06, + "loss": 0.6159, + "step": 7399 + }, + { + "epoch": 0.6, + "grad_norm": 3.8788036431099524, + "learning_rate": 3.5656080878106957e-06, + "loss": 0.6772, + "step": 7400 + }, + { + "epoch": 0.6, + "grad_norm": 5.203657324782098, + "learning_rate": 3.5643400013527723e-06, + "loss": 1.1349, + "step": 7401 + }, + { + "epoch": 0.61, + "grad_norm": 3.3686955244781855, + "learning_rate": 3.5630720155283686e-06, + "loss": 0.5233, + "step": 7402 + }, + { + "epoch": 0.61, + "grad_norm": 1.3251001828269917, + "learning_rate": 3.561804130426361e-06, + "loss": 0.2024, + "step": 7403 + }, + { + "epoch": 0.61, + "grad_norm": 3.5517867637136455, + "learning_rate": 3.560536346135625e-06, + "loss": 0.4885, + "step": 7404 + }, + { + "epoch": 0.61, + "grad_norm": 3.65419707229043, + "learning_rate": 3.559268662745027e-06, + "loss": 0.8909, + "step": 7405 + }, + { + "epoch": 0.61, + "grad_norm": 3.0702596670441102, + "learning_rate": 3.5580010803434254e-06, + "loss": 0.6835, + "step": 7406 + }, + { + "epoch": 0.61, + "grad_norm": 2.9999027181662297, + "learning_rate": 3.5567335990196725e-06, + "loss": 0.4085, + "step": 7407 + }, + { + "epoch": 0.61, + "grad_norm": 2.6525097086960625, + "learning_rate": 3.5554662188626147e-06, + "loss": 0.5371, + "step": 7408 + }, + { + "epoch": 0.61, + "grad_norm": 1.987470859525362, + "learning_rate": 3.5541989399610866e-06, + "loss": 0.1876, + "step": 7409 + }, + { + "epoch": 0.61, + "grad_norm": 3.367384021483217, + "learning_rate": 3.5529317624039205e-06, + "loss": 0.4064, + "step": 7410 + }, + { + "epoch": 0.61, + "grad_norm": 2.4214064440204153, + "learning_rate": 3.5516646862799404e-06, + "loss": 0.414, + "step": 7411 + }, + { + "epoch": 0.61, + "grad_norm": 1.4276191576511317, + "learning_rate": 3.5503977116779624e-06, + "loss": 0.2154, + "step": 7412 + }, + { + "epoch": 0.61, + "grad_norm": 4.0645046994455445, + "learning_rate": 3.5491308386867983e-06, + "loss": 0.9574, + "step": 7413 + }, + { + "epoch": 0.61, + "grad_norm": 2.187906423787895, + "learning_rate": 3.5478640673952456e-06, + "loss": 0.3924, + "step": 7414 + }, + { + "epoch": 0.61, + "grad_norm": 3.386770954240138, + "learning_rate": 3.5465973978921042e-06, + "loss": 0.9238, + "step": 7415 + }, + { + "epoch": 0.61, + "grad_norm": 3.7501762987264438, + "learning_rate": 3.545330830266158e-06, + "loss": 0.5472, + "step": 7416 + }, + { + "epoch": 0.61, + "grad_norm": 3.476586699802735, + "learning_rate": 3.54406436460619e-06, + "loss": 0.3458, + "step": 7417 + }, + { + "epoch": 0.61, + "grad_norm": 3.781993328385959, + "learning_rate": 3.5427980010009746e-06, + "loss": 0.713, + "step": 7418 + }, + { + "epoch": 0.61, + "grad_norm": 4.432373455583828, + "learning_rate": 3.541531739539279e-06, + "loss": 0.9409, + "step": 7419 + }, + { + "epoch": 0.61, + "grad_norm": 3.6775218937185987, + "learning_rate": 3.540265580309859e-06, + "loss": 0.8305, + "step": 7420 + }, + { + "epoch": 0.61, + "grad_norm": 3.84216649186372, + "learning_rate": 3.538999523401469e-06, + "loss": 0.723, + "step": 7421 + }, + { + "epoch": 0.61, + "grad_norm": 3.6949795452225156, + "learning_rate": 3.5377335689028556e-06, + "loss": 0.5788, + "step": 7422 + }, + { + "epoch": 0.61, + "grad_norm": 3.958017014079015, + "learning_rate": 3.536467716902754e-06, + "loss": 1.0746, + "step": 7423 + }, + { + "epoch": 0.61, + "grad_norm": 1.173675695552111, + "learning_rate": 3.5352019674898956e-06, + "loss": 0.1809, + "step": 7424 + }, + { + "epoch": 0.61, + "grad_norm": 3.5554370486237157, + "learning_rate": 3.533936320753007e-06, + "loss": 0.786, + "step": 7425 + }, + { + "epoch": 0.61, + "grad_norm": 3.5327408285560624, + "learning_rate": 3.5326707767808e-06, + "loss": 0.5137, + "step": 7426 + }, + { + "epoch": 0.61, + "grad_norm": 5.520860779424227, + "learning_rate": 3.5314053356619852e-06, + "loss": 1.2974, + "step": 7427 + }, + { + "epoch": 0.61, + "grad_norm": 4.882673289960211, + "learning_rate": 3.5301399974852656e-06, + "loss": 0.9882, + "step": 7428 + }, + { + "epoch": 0.61, + "grad_norm": 4.430372009046639, + "learning_rate": 3.528874762339336e-06, + "loss": 0.6721, + "step": 7429 + }, + { + "epoch": 0.61, + "grad_norm": 2.8215136183775615, + "learning_rate": 3.5276096303128837e-06, + "loss": 0.5899, + "step": 7430 + }, + { + "epoch": 0.61, + "grad_norm": 3.9066179246724344, + "learning_rate": 3.526344601494588e-06, + "loss": 0.8211, + "step": 7431 + }, + { + "epoch": 0.61, + "grad_norm": 2.62528976844446, + "learning_rate": 3.525079675973121e-06, + "loss": 0.6035, + "step": 7432 + }, + { + "epoch": 0.61, + "grad_norm": 4.377924604021585, + "learning_rate": 3.5238148538371506e-06, + "loss": 0.9099, + "step": 7433 + }, + { + "epoch": 0.61, + "grad_norm": 3.741153913875198, + "learning_rate": 3.5225501351753346e-06, + "loss": 0.5172, + "step": 7434 + }, + { + "epoch": 0.61, + "grad_norm": 3.934678384231139, + "learning_rate": 3.5212855200763237e-06, + "loss": 0.9718, + "step": 7435 + }, + { + "epoch": 0.61, + "grad_norm": 4.491095188918565, + "learning_rate": 3.5200210086287646e-06, + "loss": 1.1595, + "step": 7436 + }, + { + "epoch": 0.61, + "grad_norm": 5.229394049045571, + "learning_rate": 3.51875660092129e-06, + "loss": 1.2175, + "step": 7437 + }, + { + "epoch": 0.61, + "grad_norm": 1.1439429695257528, + "learning_rate": 3.5174922970425317e-06, + "loss": 0.1642, + "step": 7438 + }, + { + "epoch": 0.61, + "grad_norm": 2.755070223779588, + "learning_rate": 3.5162280970811115e-06, + "loss": 0.3445, + "step": 7439 + }, + { + "epoch": 0.61, + "grad_norm": 2.578523707958233, + "learning_rate": 3.5149640011256438e-06, + "loss": 0.5381, + "step": 7440 + }, + { + "epoch": 0.61, + "grad_norm": 4.715438769454317, + "learning_rate": 3.5137000092647366e-06, + "loss": 0.8441, + "step": 7441 + }, + { + "epoch": 0.61, + "grad_norm": 2.880341497534685, + "learning_rate": 3.512436121586993e-06, + "loss": 0.5466, + "step": 7442 + }, + { + "epoch": 0.61, + "grad_norm": 4.493807636256478, + "learning_rate": 3.5111723381810005e-06, + "loss": 0.9816, + "step": 7443 + }, + { + "epoch": 0.61, + "grad_norm": 1.8737191790534322, + "learning_rate": 3.509908659135348e-06, + "loss": 0.4204, + "step": 7444 + }, + { + "epoch": 0.61, + "grad_norm": 2.901209827546255, + "learning_rate": 3.5086450845386145e-06, + "loss": 0.5692, + "step": 7445 + }, + { + "epoch": 0.61, + "grad_norm": 4.1984061838214535, + "learning_rate": 3.5073816144793695e-06, + "loss": 0.6392, + "step": 7446 + }, + { + "epoch": 0.61, + "grad_norm": 6.097471089521399, + "learning_rate": 3.5061182490461775e-06, + "loss": 1.7763, + "step": 7447 + }, + { + "epoch": 0.61, + "grad_norm": 4.848732075469702, + "learning_rate": 3.5048549883275962e-06, + "loss": 0.867, + "step": 7448 + }, + { + "epoch": 0.61, + "grad_norm": 4.370455763998571, + "learning_rate": 3.503591832412172e-06, + "loss": 1.0785, + "step": 7449 + }, + { + "epoch": 0.61, + "grad_norm": 4.194037593480006, + "learning_rate": 3.5023287813884476e-06, + "loss": 1.0598, + "step": 7450 + }, + { + "epoch": 0.61, + "grad_norm": 3.4061901521566673, + "learning_rate": 3.5010658353449576e-06, + "loss": 0.9436, + "step": 7451 + }, + { + "epoch": 0.61, + "grad_norm": 3.807685581479717, + "learning_rate": 3.4998029943702305e-06, + "loss": 0.7093, + "step": 7452 + }, + { + "epoch": 0.61, + "grad_norm": 4.000582686325802, + "learning_rate": 3.498540258552785e-06, + "loss": 0.7585, + "step": 7453 + }, + { + "epoch": 0.61, + "grad_norm": 3.807383909205339, + "learning_rate": 3.497277627981132e-06, + "loss": 0.7692, + "step": 7454 + }, + { + "epoch": 0.61, + "grad_norm": 4.908451982034939, + "learning_rate": 3.496015102743777e-06, + "loss": 0.7361, + "step": 7455 + }, + { + "epoch": 0.61, + "grad_norm": 4.320412268232292, + "learning_rate": 3.4947526829292177e-06, + "loss": 0.9214, + "step": 7456 + }, + { + "epoch": 0.61, + "grad_norm": 3.731564301514826, + "learning_rate": 3.4934903686259445e-06, + "loss": 0.8081, + "step": 7457 + }, + { + "epoch": 0.61, + "grad_norm": 2.433002279457299, + "learning_rate": 3.4922281599224404e-06, + "loss": 0.515, + "step": 7458 + }, + { + "epoch": 0.61, + "grad_norm": 4.935834415095173, + "learning_rate": 3.4909660569071823e-06, + "loss": 1.3236, + "step": 7459 + }, + { + "epoch": 0.61, + "grad_norm": 3.0732773788320507, + "learning_rate": 3.4897040596686345e-06, + "loss": 0.7249, + "step": 7460 + }, + { + "epoch": 0.61, + "grad_norm": 2.9806807216526545, + "learning_rate": 3.4884421682952596e-06, + "loss": 0.5422, + "step": 7461 + }, + { + "epoch": 0.61, + "grad_norm": 3.475308889726864, + "learning_rate": 3.4871803828755102e-06, + "loss": 0.8781, + "step": 7462 + }, + { + "epoch": 0.61, + "grad_norm": 4.0605076079031805, + "learning_rate": 3.4859187034978315e-06, + "loss": 0.8476, + "step": 7463 + }, + { + "epoch": 0.61, + "grad_norm": 3.7484248994060683, + "learning_rate": 3.4846571302506624e-06, + "loss": 0.5632, + "step": 7464 + }, + { + "epoch": 0.61, + "grad_norm": 4.288806473145816, + "learning_rate": 3.4833956632224364e-06, + "loss": 0.6753, + "step": 7465 + }, + { + "epoch": 0.61, + "grad_norm": 1.7944051302550061, + "learning_rate": 3.482134302501572e-06, + "loss": 0.3302, + "step": 7466 + }, + { + "epoch": 0.61, + "grad_norm": 4.089755641139934, + "learning_rate": 3.480873048176486e-06, + "loss": 0.8287, + "step": 7467 + }, + { + "epoch": 0.61, + "grad_norm": 3.201852703903197, + "learning_rate": 3.47961190033559e-06, + "loss": 0.5108, + "step": 7468 + }, + { + "epoch": 0.61, + "grad_norm": 5.298715241648617, + "learning_rate": 3.478350859067282e-06, + "loss": 1.2063, + "step": 7469 + }, + { + "epoch": 0.61, + "grad_norm": 3.5977723557337526, + "learning_rate": 3.477089924459959e-06, + "loss": 0.5909, + "step": 7470 + }, + { + "epoch": 0.61, + "grad_norm": 4.519710106347817, + "learning_rate": 3.475829096602002e-06, + "loss": 0.8826, + "step": 7471 + }, + { + "epoch": 0.61, + "grad_norm": 3.543261817142673, + "learning_rate": 3.4745683755817917e-06, + "loss": 0.6256, + "step": 7472 + }, + { + "epoch": 0.61, + "grad_norm": 3.2491847333109622, + "learning_rate": 3.4733077614877003e-06, + "loss": 0.7993, + "step": 7473 + }, + { + "epoch": 0.61, + "grad_norm": 3.2529523946365866, + "learning_rate": 3.472047254408091e-06, + "loss": 0.9358, + "step": 7474 + }, + { + "epoch": 0.61, + "grad_norm": 4.239751609942161, + "learning_rate": 3.4707868544313196e-06, + "loss": 0.9912, + "step": 7475 + }, + { + "epoch": 0.61, + "grad_norm": 3.688500912592552, + "learning_rate": 3.469526561645735e-06, + "loss": 0.8995, + "step": 7476 + }, + { + "epoch": 0.61, + "grad_norm": 4.623117656250236, + "learning_rate": 3.4682663761396773e-06, + "loss": 0.8724, + "step": 7477 + }, + { + "epoch": 0.61, + "grad_norm": 3.9838805874911793, + "learning_rate": 3.4670062980014795e-06, + "loss": 0.9372, + "step": 7478 + }, + { + "epoch": 0.61, + "grad_norm": 4.245121124665357, + "learning_rate": 3.465746327319469e-06, + "loss": 1.4008, + "step": 7479 + }, + { + "epoch": 0.61, + "grad_norm": 4.453068472780212, + "learning_rate": 3.4644864641819635e-06, + "loss": 1.1487, + "step": 7480 + }, + { + "epoch": 0.61, + "grad_norm": 1.5552816233293498, + "learning_rate": 3.463226708677275e-06, + "loss": 0.3086, + "step": 7481 + }, + { + "epoch": 0.61, + "grad_norm": 5.190192010096277, + "learning_rate": 3.4619670608937074e-06, + "loss": 1.0783, + "step": 7482 + }, + { + "epoch": 0.61, + "grad_norm": 3.978473378064943, + "learning_rate": 3.460707520919554e-06, + "loss": 1.2116, + "step": 7483 + }, + { + "epoch": 0.61, + "grad_norm": 4.974293050204034, + "learning_rate": 3.4594480888431046e-06, + "loss": 0.9243, + "step": 7484 + }, + { + "epoch": 0.61, + "grad_norm": 4.470372717599689, + "learning_rate": 3.4581887647526393e-06, + "loss": 0.7718, + "step": 7485 + }, + { + "epoch": 0.61, + "grad_norm": 3.1744644582526265, + "learning_rate": 3.456929548736431e-06, + "loss": 0.8462, + "step": 7486 + }, + { + "epoch": 0.61, + "grad_norm": 3.8233489871206396, + "learning_rate": 3.455670440882746e-06, + "loss": 0.5939, + "step": 7487 + }, + { + "epoch": 0.61, + "grad_norm": 3.1547175216729215, + "learning_rate": 3.4544114412798447e-06, + "loss": 0.5356, + "step": 7488 + }, + { + "epoch": 0.61, + "grad_norm": 3.0007252413208176, + "learning_rate": 3.4531525500159724e-06, + "loss": 0.6602, + "step": 7489 + }, + { + "epoch": 0.61, + "grad_norm": 2.97396173912392, + "learning_rate": 3.451893767179375e-06, + "loss": 0.4509, + "step": 7490 + }, + { + "epoch": 0.61, + "grad_norm": 4.641610811802933, + "learning_rate": 3.4506350928582878e-06, + "loss": 1.0176, + "step": 7491 + }, + { + "epoch": 0.61, + "grad_norm": 4.148555565788872, + "learning_rate": 3.449376527140936e-06, + "loss": 0.8331, + "step": 7492 + }, + { + "epoch": 0.61, + "grad_norm": 4.6461205122890075, + "learning_rate": 3.4481180701155435e-06, + "loss": 0.7634, + "step": 7493 + }, + { + "epoch": 0.61, + "grad_norm": 3.602022670249473, + "learning_rate": 3.4468597218703203e-06, + "loss": 0.6074, + "step": 7494 + }, + { + "epoch": 0.61, + "grad_norm": 4.776633567319659, + "learning_rate": 3.44560148249347e-06, + "loss": 1.1803, + "step": 7495 + }, + { + "epoch": 0.61, + "grad_norm": 3.1504778031021816, + "learning_rate": 3.4443433520731908e-06, + "loss": 0.5468, + "step": 7496 + }, + { + "epoch": 0.61, + "grad_norm": 3.802423189114528, + "learning_rate": 3.443085330697673e-06, + "loss": 0.7454, + "step": 7497 + }, + { + "epoch": 0.61, + "grad_norm": 3.1758347245807057, + "learning_rate": 3.441827418455098e-06, + "loss": 0.7281, + "step": 7498 + }, + { + "epoch": 0.61, + "grad_norm": 2.841314650957273, + "learning_rate": 3.44056961543364e-06, + "loss": 0.6456, + "step": 7499 + }, + { + "epoch": 0.61, + "grad_norm": 5.301548555979058, + "learning_rate": 3.4393119217214643e-06, + "loss": 1.1869, + "step": 7500 + }, + { + "epoch": 0.61, + "grad_norm": 3.4204896042815256, + "learning_rate": 3.438054337406732e-06, + "loss": 0.551, + "step": 7501 + }, + { + "epoch": 0.61, + "grad_norm": 3.2855473071915275, + "learning_rate": 3.4367968625775923e-06, + "loss": 0.793, + "step": 7502 + }, + { + "epoch": 0.61, + "grad_norm": 4.264205516013644, + "learning_rate": 3.435539497322189e-06, + "loss": 0.9784, + "step": 7503 + }, + { + "epoch": 0.61, + "grad_norm": 5.189963158184375, + "learning_rate": 3.4342822417286586e-06, + "loss": 0.9427, + "step": 7504 + }, + { + "epoch": 0.61, + "grad_norm": 4.31453330800363, + "learning_rate": 3.433025095885131e-06, + "loss": 0.922, + "step": 7505 + }, + { + "epoch": 0.61, + "grad_norm": 3.0832892899562063, + "learning_rate": 3.4317680598797227e-06, + "loss": 0.4771, + "step": 7506 + }, + { + "epoch": 0.61, + "grad_norm": 3.9636349208458657, + "learning_rate": 3.4305111338005483e-06, + "loss": 0.931, + "step": 7507 + }, + { + "epoch": 0.61, + "grad_norm": 2.7181152910442608, + "learning_rate": 3.429254317735714e-06, + "loss": 0.4166, + "step": 7508 + }, + { + "epoch": 0.61, + "grad_norm": 5.991948585753611, + "learning_rate": 3.4279976117733148e-06, + "loss": 1.1894, + "step": 7509 + }, + { + "epoch": 0.61, + "grad_norm": 2.5817551031001433, + "learning_rate": 3.426741016001444e-06, + "loss": 0.5615, + "step": 7510 + }, + { + "epoch": 0.61, + "grad_norm": 2.5481343706597617, + "learning_rate": 3.4254845305081796e-06, + "loss": 0.5636, + "step": 7511 + }, + { + "epoch": 0.61, + "grad_norm": 1.2740628976561783, + "learning_rate": 3.4242281553815963e-06, + "loss": 0.2066, + "step": 7512 + }, + { + "epoch": 0.61, + "grad_norm": 3.854818236466965, + "learning_rate": 3.422971890709762e-06, + "loss": 1.035, + "step": 7513 + }, + { + "epoch": 0.61, + "grad_norm": 1.9314099983526627, + "learning_rate": 3.4217157365807352e-06, + "loss": 0.435, + "step": 7514 + }, + { + "epoch": 0.61, + "grad_norm": 3.1837886110940596, + "learning_rate": 3.4204596930825674e-06, + "loss": 0.547, + "step": 7515 + }, + { + "epoch": 0.61, + "grad_norm": 3.7329810664237026, + "learning_rate": 3.419203760303301e-06, + "loss": 0.7076, + "step": 7516 + }, + { + "epoch": 0.61, + "grad_norm": 3.96816950610144, + "learning_rate": 3.417947938330971e-06, + "loss": 0.6511, + "step": 7517 + }, + { + "epoch": 0.61, + "grad_norm": 3.106187483351225, + "learning_rate": 3.416692227253604e-06, + "loss": 0.6246, + "step": 7518 + }, + { + "epoch": 0.61, + "grad_norm": 2.4304170799997538, + "learning_rate": 3.4154366271592222e-06, + "loss": 0.2782, + "step": 7519 + }, + { + "epoch": 0.61, + "grad_norm": 4.7879286041422535, + "learning_rate": 3.4141811381358364e-06, + "loss": 1.486, + "step": 7520 + }, + { + "epoch": 0.61, + "grad_norm": 3.3751720368744564, + "learning_rate": 3.4129257602714514e-06, + "loss": 0.6315, + "step": 7521 + }, + { + "epoch": 0.61, + "grad_norm": 4.683938628475828, + "learning_rate": 3.4116704936540656e-06, + "loss": 0.7881, + "step": 7522 + }, + { + "epoch": 0.61, + "grad_norm": 3.1457861012787807, + "learning_rate": 3.4104153383716644e-06, + "loss": 0.5855, + "step": 7523 + }, + { + "epoch": 0.61, + "grad_norm": 3.8336870542509214, + "learning_rate": 3.4091602945122305e-06, + "loss": 0.5423, + "step": 7524 + }, + { + "epoch": 0.62, + "grad_norm": 3.984032714223586, + "learning_rate": 3.4079053621637346e-06, + "loss": 0.7277, + "step": 7525 + }, + { + "epoch": 0.62, + "grad_norm": 4.337185833290463, + "learning_rate": 3.4066505414141453e-06, + "loss": 1.0906, + "step": 7526 + }, + { + "epoch": 0.62, + "grad_norm": 2.3899786036667607, + "learning_rate": 3.4053958323514185e-06, + "loss": 0.3782, + "step": 7527 + }, + { + "epoch": 0.62, + "grad_norm": 2.017552575570357, + "learning_rate": 3.404141235063506e-06, + "loss": 0.377, + "step": 7528 + }, + { + "epoch": 0.62, + "grad_norm": 4.178231000840538, + "learning_rate": 3.4028867496383454e-06, + "loss": 0.6578, + "step": 7529 + }, + { + "epoch": 0.62, + "grad_norm": 4.787115596496799, + "learning_rate": 3.4016323761638737e-06, + "loss": 0.8246, + "step": 7530 + }, + { + "epoch": 0.62, + "grad_norm": 3.1835348520912077, + "learning_rate": 3.400378114728017e-06, + "loss": 0.4493, + "step": 7531 + }, + { + "epoch": 0.62, + "grad_norm": 3.0554416692959383, + "learning_rate": 3.399123965418692e-06, + "loss": 0.7489, + "step": 7532 + }, + { + "epoch": 0.62, + "grad_norm": 4.752411187164596, + "learning_rate": 3.3978699283238117e-06, + "loss": 0.7197, + "step": 7533 + }, + { + "epoch": 0.62, + "grad_norm": 3.4033966339527217, + "learning_rate": 3.396616003531275e-06, + "loss": 0.4122, + "step": 7534 + }, + { + "epoch": 0.62, + "grad_norm": 1.9913020044422074, + "learning_rate": 3.3953621911289784e-06, + "loss": 0.3394, + "step": 7535 + }, + { + "epoch": 0.62, + "grad_norm": 4.767458762737945, + "learning_rate": 3.3941084912048094e-06, + "loss": 1.2963, + "step": 7536 + }, + { + "epoch": 0.62, + "grad_norm": 3.405917623453279, + "learning_rate": 3.392854903846645e-06, + "loss": 0.8285, + "step": 7537 + }, + { + "epoch": 0.62, + "grad_norm": 4.751814868947692, + "learning_rate": 3.39160142914236e-06, + "loss": 0.8188, + "step": 7538 + }, + { + "epoch": 0.62, + "grad_norm": 5.227358384881683, + "learning_rate": 3.3903480671798145e-06, + "loss": 1.3352, + "step": 7539 + }, + { + "epoch": 0.62, + "grad_norm": 3.904301672916866, + "learning_rate": 3.389094818046864e-06, + "loss": 0.5046, + "step": 7540 + }, + { + "epoch": 0.62, + "grad_norm": 4.168266207804141, + "learning_rate": 3.3878416818313555e-06, + "loss": 0.9672, + "step": 7541 + }, + { + "epoch": 0.62, + "grad_norm": 2.752994067863077, + "learning_rate": 3.3865886586211285e-06, + "loss": 0.6885, + "step": 7542 + }, + { + "epoch": 0.62, + "grad_norm": 4.105697614699745, + "learning_rate": 3.385335748504015e-06, + "loss": 0.9677, + "step": 7543 + }, + { + "epoch": 0.62, + "grad_norm": 2.653123686297841, + "learning_rate": 3.3840829515678386e-06, + "loss": 0.7451, + "step": 7544 + }, + { + "epoch": 0.62, + "grad_norm": 1.8318854848639077, + "learning_rate": 3.382830267900417e-06, + "loss": 0.3442, + "step": 7545 + }, + { + "epoch": 0.62, + "grad_norm": 5.125126939314708, + "learning_rate": 3.381577697589554e-06, + "loss": 1.1589, + "step": 7546 + }, + { + "epoch": 0.62, + "grad_norm": 3.436915972131372, + "learning_rate": 3.380325240723051e-06, + "loss": 0.5953, + "step": 7547 + }, + { + "epoch": 0.62, + "grad_norm": 4.305223721072696, + "learning_rate": 3.3790728973886994e-06, + "loss": 0.8719, + "step": 7548 + }, + { + "epoch": 0.62, + "grad_norm": 4.119188080328333, + "learning_rate": 3.377820667674283e-06, + "loss": 0.8093, + "step": 7549 + }, + { + "epoch": 0.62, + "grad_norm": 3.495245249192985, + "learning_rate": 3.3765685516675805e-06, + "loss": 0.7286, + "step": 7550 + }, + { + "epoch": 0.62, + "grad_norm": 3.3455194062619977, + "learning_rate": 3.3753165494563554e-06, + "loss": 0.7161, + "step": 7551 + }, + { + "epoch": 0.62, + "grad_norm": 4.926704792998524, + "learning_rate": 3.3740646611283687e-06, + "loss": 0.9703, + "step": 7552 + }, + { + "epoch": 0.62, + "grad_norm": 4.408141112156821, + "learning_rate": 3.3728128867713743e-06, + "loss": 0.8305, + "step": 7553 + }, + { + "epoch": 0.62, + "grad_norm": 4.8160753357356905, + "learning_rate": 3.3715612264731155e-06, + "loss": 1.3461, + "step": 7554 + }, + { + "epoch": 0.62, + "grad_norm": 4.103611211003639, + "learning_rate": 3.3703096803213263e-06, + "loss": 1.0364, + "step": 7555 + }, + { + "epoch": 0.62, + "grad_norm": 4.048244769376617, + "learning_rate": 3.3690582484037393e-06, + "loss": 0.8223, + "step": 7556 + }, + { + "epoch": 0.62, + "grad_norm": 4.33130685895782, + "learning_rate": 3.367806930808068e-06, + "loss": 0.9185, + "step": 7557 + }, + { + "epoch": 0.62, + "grad_norm": 3.541873494179832, + "learning_rate": 3.366555727622028e-06, + "loss": 0.6951, + "step": 7558 + }, + { + "epoch": 0.62, + "grad_norm": 2.521856016581727, + "learning_rate": 3.365304638933322e-06, + "loss": 0.2948, + "step": 7559 + }, + { + "epoch": 0.62, + "grad_norm": 3.633589064668164, + "learning_rate": 3.3640536648296473e-06, + "loss": 1.1136, + "step": 7560 + }, + { + "epoch": 0.62, + "grad_norm": 2.977653755333384, + "learning_rate": 3.362802805398692e-06, + "loss": 0.4834, + "step": 7561 + }, + { + "epoch": 0.62, + "grad_norm": 3.62936734920231, + "learning_rate": 3.361552060728135e-06, + "loss": 0.9729, + "step": 7562 + }, + { + "epoch": 0.62, + "grad_norm": 4.7147232100280645, + "learning_rate": 3.360301430905648e-06, + "loss": 1.0255, + "step": 7563 + }, + { + "epoch": 0.62, + "grad_norm": 4.422361900806105, + "learning_rate": 3.3590509160188935e-06, + "loss": 0.7291, + "step": 7564 + }, + { + "epoch": 0.62, + "grad_norm": 3.136258418736888, + "learning_rate": 3.3578005161555284e-06, + "loss": 0.6473, + "step": 7565 + }, + { + "epoch": 0.62, + "grad_norm": 5.260504074934148, + "learning_rate": 3.3565502314032006e-06, + "loss": 1.5827, + "step": 7566 + }, + { + "epoch": 0.62, + "grad_norm": 3.064556639812398, + "learning_rate": 3.355300061849549e-06, + "loss": 0.7217, + "step": 7567 + }, + { + "epoch": 0.62, + "grad_norm": 3.6113118562312545, + "learning_rate": 3.354050007582207e-06, + "loss": 0.5611, + "step": 7568 + }, + { + "epoch": 0.62, + "grad_norm": 3.3387791111899765, + "learning_rate": 3.3528000686887946e-06, + "loss": 0.6625, + "step": 7569 + }, + { + "epoch": 0.62, + "grad_norm": 3.446240931359145, + "learning_rate": 3.3515502452569293e-06, + "loss": 0.7795, + "step": 7570 + }, + { + "epoch": 0.62, + "grad_norm": 1.9553336893169155, + "learning_rate": 3.350300537374217e-06, + "loss": 0.3551, + "step": 7571 + }, + { + "epoch": 0.62, + "grad_norm": 3.8888905259600195, + "learning_rate": 3.3490509451282582e-06, + "loss": 0.6538, + "step": 7572 + }, + { + "epoch": 0.62, + "grad_norm": 4.248790654389525, + "learning_rate": 3.3478014686066448e-06, + "loss": 0.8886, + "step": 7573 + }, + { + "epoch": 0.62, + "grad_norm": 4.075163711164652, + "learning_rate": 3.3465521078969552e-06, + "loss": 0.4495, + "step": 7574 + }, + { + "epoch": 0.62, + "grad_norm": 2.875334794198349, + "learning_rate": 3.345302863086768e-06, + "loss": 0.6683, + "step": 7575 + }, + { + "epoch": 0.62, + "grad_norm": 3.0236885132985227, + "learning_rate": 3.3440537342636483e-06, + "loss": 0.4801, + "step": 7576 + }, + { + "epoch": 0.62, + "grad_norm": 5.222717550677892, + "learning_rate": 3.3428047215151566e-06, + "loss": 1.1999, + "step": 7577 + }, + { + "epoch": 0.62, + "grad_norm": 4.616336755588772, + "learning_rate": 3.3415558249288404e-06, + "loss": 0.9043, + "step": 7578 + }, + { + "epoch": 0.62, + "grad_norm": 4.039906669102835, + "learning_rate": 3.340307044592245e-06, + "loss": 1.0051, + "step": 7579 + }, + { + "epoch": 0.62, + "grad_norm": 2.1416453078958284, + "learning_rate": 3.3390583805929016e-06, + "loss": 0.5973, + "step": 7580 + }, + { + "epoch": 0.62, + "grad_norm": 3.480569071267736, + "learning_rate": 3.3378098330183366e-06, + "loss": 0.6455, + "step": 7581 + }, + { + "epoch": 0.62, + "grad_norm": 2.8108546519330186, + "learning_rate": 3.336561401956069e-06, + "loss": 0.568, + "step": 7582 + }, + { + "epoch": 0.62, + "grad_norm": 3.7071940548697038, + "learning_rate": 3.3353130874936074e-06, + "loss": 1.0497, + "step": 7583 + }, + { + "epoch": 0.62, + "grad_norm": 4.649638239315057, + "learning_rate": 3.3340648897184546e-06, + "loss": 0.6986, + "step": 7584 + }, + { + "epoch": 0.62, + "grad_norm": 4.0271069934779264, + "learning_rate": 3.3328168087181036e-06, + "loss": 0.744, + "step": 7585 + }, + { + "epoch": 0.62, + "grad_norm": 3.646900193708152, + "learning_rate": 3.3315688445800376e-06, + "loss": 0.9999, + "step": 7586 + }, + { + "epoch": 0.62, + "grad_norm": 4.4062328965541, + "learning_rate": 3.330320997391734e-06, + "loss": 1.0649, + "step": 7587 + }, + { + "epoch": 0.62, + "grad_norm": 1.968267573998116, + "learning_rate": 3.329073267240662e-06, + "loss": 0.3551, + "step": 7588 + }, + { + "epoch": 0.62, + "grad_norm": 3.2727032722386675, + "learning_rate": 3.3278256542142818e-06, + "loss": 0.5636, + "step": 7589 + }, + { + "epoch": 0.62, + "grad_norm": 2.485911056813762, + "learning_rate": 3.326578158400049e-06, + "loss": 0.3504, + "step": 7590 + }, + { + "epoch": 0.62, + "grad_norm": 4.810087278594246, + "learning_rate": 3.325330779885401e-06, + "loss": 0.8325, + "step": 7591 + }, + { + "epoch": 0.62, + "grad_norm": 4.5155577878663395, + "learning_rate": 3.324083518757778e-06, + "loss": 0.7222, + "step": 7592 + }, + { + "epoch": 0.62, + "grad_norm": 4.226243750137132, + "learning_rate": 3.322836375104608e-06, + "loss": 0.8349, + "step": 7593 + }, + { + "epoch": 0.62, + "grad_norm": 4.579907909260929, + "learning_rate": 3.3215893490133076e-06, + "loss": 0.8511, + "step": 7594 + }, + { + "epoch": 0.62, + "grad_norm": 2.839432439727437, + "learning_rate": 3.320342440571289e-06, + "loss": 0.4744, + "step": 7595 + }, + { + "epoch": 0.62, + "grad_norm": 5.348776624156341, + "learning_rate": 3.319095649865958e-06, + "loss": 1.0481, + "step": 7596 + }, + { + "epoch": 0.62, + "grad_norm": 2.9393705216616834, + "learning_rate": 3.3178489769847046e-06, + "loss": 0.7586, + "step": 7597 + }, + { + "epoch": 0.62, + "grad_norm": 3.455380150704934, + "learning_rate": 3.3166024220149173e-06, + "loss": 0.5484, + "step": 7598 + }, + { + "epoch": 0.62, + "grad_norm": 5.055728788130167, + "learning_rate": 3.3153559850439737e-06, + "loss": 1.0842, + "step": 7599 + }, + { + "epoch": 0.62, + "grad_norm": 2.7015896855427592, + "learning_rate": 3.3141096661592455e-06, + "loss": 0.5129, + "step": 7600 + }, + { + "epoch": 0.62, + "grad_norm": 5.337951165458357, + "learning_rate": 3.3128634654480906e-06, + "loss": 0.9046, + "step": 7601 + }, + { + "epoch": 0.62, + "grad_norm": 4.379378514832218, + "learning_rate": 3.3116173829978666e-06, + "loss": 0.8637, + "step": 7602 + }, + { + "epoch": 0.62, + "grad_norm": 3.7526183219145404, + "learning_rate": 3.3103714188959156e-06, + "loss": 0.6631, + "step": 7603 + }, + { + "epoch": 0.62, + "grad_norm": 3.788574484025864, + "learning_rate": 3.3091255732295736e-06, + "loss": 0.9459, + "step": 7604 + }, + { + "epoch": 0.62, + "grad_norm": 3.6683996965418135, + "learning_rate": 3.3078798460861704e-06, + "loss": 0.5804, + "step": 7605 + }, + { + "epoch": 0.62, + "grad_norm": 3.916929575285865, + "learning_rate": 3.306634237553026e-06, + "loss": 0.9159, + "step": 7606 + }, + { + "epoch": 0.62, + "grad_norm": 4.091661616270836, + "learning_rate": 3.305388747717453e-06, + "loss": 0.7485, + "step": 7607 + }, + { + "epoch": 0.62, + "grad_norm": 2.253125457820861, + "learning_rate": 3.3041433766667535e-06, + "loss": 0.5195, + "step": 7608 + }, + { + "epoch": 0.62, + "grad_norm": 3.9477457676776226, + "learning_rate": 3.302898124488222e-06, + "loss": 0.8162, + "step": 7609 + }, + { + "epoch": 0.62, + "grad_norm": 3.8355738202745426, + "learning_rate": 3.3016529912691476e-06, + "loss": 0.9991, + "step": 7610 + }, + { + "epoch": 0.62, + "grad_norm": 4.310163980369584, + "learning_rate": 3.3004079770968055e-06, + "loss": 0.6773, + "step": 7611 + }, + { + "epoch": 0.62, + "grad_norm": 2.6459081353394605, + "learning_rate": 3.299163082058468e-06, + "loss": 0.5445, + "step": 7612 + }, + { + "epoch": 0.62, + "grad_norm": 3.2563150817337876, + "learning_rate": 3.297918306241399e-06, + "loss": 0.745, + "step": 7613 + }, + { + "epoch": 0.62, + "grad_norm": 3.496579311994659, + "learning_rate": 3.2966736497328463e-06, + "loss": 0.3865, + "step": 7614 + }, + { + "epoch": 0.62, + "grad_norm": 4.477069154674302, + "learning_rate": 3.2954291126200577e-06, + "loss": 0.8421, + "step": 7615 + }, + { + "epoch": 0.62, + "grad_norm": 4.45013467679716, + "learning_rate": 3.294184694990271e-06, + "loss": 1.1175, + "step": 7616 + }, + { + "epoch": 0.62, + "grad_norm": 3.9947369308971163, + "learning_rate": 3.2929403969307137e-06, + "loss": 0.7213, + "step": 7617 + }, + { + "epoch": 0.62, + "grad_norm": 4.387494139290766, + "learning_rate": 3.291696218528605e-06, + "loss": 1.2012, + "step": 7618 + }, + { + "epoch": 0.62, + "grad_norm": 4.169223623551574, + "learning_rate": 3.290452159871158e-06, + "loss": 1.0328, + "step": 7619 + }, + { + "epoch": 0.62, + "grad_norm": 2.866520823535412, + "learning_rate": 3.289208221045573e-06, + "loss": 0.5849, + "step": 7620 + }, + { + "epoch": 0.62, + "grad_norm": 1.9425109571369379, + "learning_rate": 3.2879644021390468e-06, + "loss": 0.3425, + "step": 7621 + }, + { + "epoch": 0.62, + "grad_norm": 3.468745940756694, + "learning_rate": 3.286720703238765e-06, + "loss": 0.7145, + "step": 7622 + }, + { + "epoch": 0.62, + "grad_norm": 3.913128123597565, + "learning_rate": 3.2854771244319052e-06, + "loss": 0.7324, + "step": 7623 + }, + { + "epoch": 0.62, + "grad_norm": 4.211279066072754, + "learning_rate": 3.2842336658056383e-06, + "loss": 0.6421, + "step": 7624 + }, + { + "epoch": 0.62, + "grad_norm": 3.2310589376655137, + "learning_rate": 3.2829903274471253e-06, + "loss": 0.5612, + "step": 7625 + }, + { + "epoch": 0.62, + "grad_norm": 5.2109699398316724, + "learning_rate": 3.281747109443517e-06, + "loss": 1.3728, + "step": 7626 + }, + { + "epoch": 0.62, + "grad_norm": 4.4741011651583875, + "learning_rate": 3.2805040118819574e-06, + "loss": 0.9257, + "step": 7627 + }, + { + "epoch": 0.62, + "grad_norm": 2.96943882155435, + "learning_rate": 3.279261034849584e-06, + "loss": 0.7778, + "step": 7628 + }, + { + "epoch": 0.62, + "grad_norm": 2.230742837335636, + "learning_rate": 3.278018178433523e-06, + "loss": 0.5448, + "step": 7629 + }, + { + "epoch": 0.62, + "grad_norm": 3.6382394352891763, + "learning_rate": 3.276775442720896e-06, + "loss": 0.7383, + "step": 7630 + }, + { + "epoch": 0.62, + "grad_norm": 5.943406264757444, + "learning_rate": 3.2755328277988084e-06, + "loss": 1.0892, + "step": 7631 + }, + { + "epoch": 0.62, + "grad_norm": 4.364702175304437, + "learning_rate": 3.274290333754365e-06, + "loss": 1.0101, + "step": 7632 + }, + { + "epoch": 0.62, + "grad_norm": 3.5276837531138274, + "learning_rate": 3.2730479606746594e-06, + "loss": 0.5491, + "step": 7633 + }, + { + "epoch": 0.62, + "grad_norm": 3.3067665969011113, + "learning_rate": 3.271805708646776e-06, + "loss": 0.7095, + "step": 7634 + }, + { + "epoch": 0.62, + "grad_norm": 3.819460150118888, + "learning_rate": 3.2705635777577904e-06, + "loss": 0.5795, + "step": 7635 + }, + { + "epoch": 0.62, + "grad_norm": 4.61967776942314, + "learning_rate": 3.2693215680947737e-06, + "loss": 0.6914, + "step": 7636 + }, + { + "epoch": 0.62, + "grad_norm": 2.6878796081754066, + "learning_rate": 3.268079679744781e-06, + "loss": 0.5596, + "step": 7637 + }, + { + "epoch": 0.62, + "grad_norm": 2.0924427201373974, + "learning_rate": 3.2668379127948656e-06, + "loss": 0.3269, + "step": 7638 + }, + { + "epoch": 0.62, + "grad_norm": 4.884712377878976, + "learning_rate": 3.26559626733207e-06, + "loss": 0.8669, + "step": 7639 + }, + { + "epoch": 0.62, + "grad_norm": 4.37203318965418, + "learning_rate": 3.264354743443429e-06, + "loss": 0.8481, + "step": 7640 + }, + { + "epoch": 0.62, + "grad_norm": 3.399144750533389, + "learning_rate": 3.2631133412159656e-06, + "loss": 0.8149, + "step": 7641 + }, + { + "epoch": 0.62, + "grad_norm": 4.265972866984133, + "learning_rate": 3.2618720607367e-06, + "loss": 0.6379, + "step": 7642 + }, + { + "epoch": 0.62, + "grad_norm": 5.488346454035597, + "learning_rate": 3.2606309020926364e-06, + "loss": 1.2761, + "step": 7643 + }, + { + "epoch": 0.62, + "grad_norm": 3.0438213532347667, + "learning_rate": 3.2593898653707773e-06, + "loss": 0.6676, + "step": 7644 + }, + { + "epoch": 0.62, + "grad_norm": 5.061186580438532, + "learning_rate": 3.2581489506581134e-06, + "loss": 1.1957, + "step": 7645 + }, + { + "epoch": 0.62, + "grad_norm": 3.092072730761351, + "learning_rate": 3.2569081580416273e-06, + "loss": 0.562, + "step": 7646 + }, + { + "epoch": 0.63, + "grad_norm": 2.5801424147558554, + "learning_rate": 3.2556674876082937e-06, + "loss": 0.527, + "step": 7647 + }, + { + "epoch": 0.63, + "grad_norm": 3.061391757672997, + "learning_rate": 3.254426939445079e-06, + "loss": 0.7016, + "step": 7648 + }, + { + "epoch": 0.63, + "grad_norm": 3.898385610961387, + "learning_rate": 3.2531865136389383e-06, + "loss": 0.7654, + "step": 7649 + }, + { + "epoch": 0.63, + "grad_norm": 4.1875028295410495, + "learning_rate": 3.251946210276821e-06, + "loss": 1.0348, + "step": 7650 + }, + { + "epoch": 0.63, + "grad_norm": 3.034755909633507, + "learning_rate": 3.2507060294456653e-06, + "loss": 0.5311, + "step": 7651 + }, + { + "epoch": 0.63, + "grad_norm": 2.705049010180583, + "learning_rate": 3.249465971232405e-06, + "loss": 0.5876, + "step": 7652 + }, + { + "epoch": 0.63, + "grad_norm": 2.3703704410665343, + "learning_rate": 3.248226035723963e-06, + "loss": 0.4805, + "step": 7653 + }, + { + "epoch": 0.63, + "grad_norm": 1.7354395150446371, + "learning_rate": 3.2469862230072507e-06, + "loss": 0.2601, + "step": 7654 + }, + { + "epoch": 0.63, + "grad_norm": 4.281087596216457, + "learning_rate": 3.245746533169175e-06, + "loss": 0.8449, + "step": 7655 + }, + { + "epoch": 0.63, + "grad_norm": 3.8102663809612016, + "learning_rate": 3.244506966296633e-06, + "loss": 0.9758, + "step": 7656 + }, + { + "epoch": 0.63, + "grad_norm": 2.0974875755818427, + "learning_rate": 3.2432675224765133e-06, + "loss": 0.2948, + "step": 7657 + }, + { + "epoch": 0.63, + "grad_norm": 2.934298887973439, + "learning_rate": 3.242028201795694e-06, + "loss": 0.8, + "step": 7658 + }, + { + "epoch": 0.63, + "grad_norm": 3.9409185012799575, + "learning_rate": 3.240789004341049e-06, + "loss": 0.846, + "step": 7659 + }, + { + "epoch": 0.63, + "grad_norm": 2.956801842865033, + "learning_rate": 3.2395499301994366e-06, + "loss": 0.272, + "step": 7660 + }, + { + "epoch": 0.63, + "grad_norm": 3.100245111197015, + "learning_rate": 3.238310979457713e-06, + "loss": 0.5009, + "step": 7661 + }, + { + "epoch": 0.63, + "grad_norm": 2.908941130615876, + "learning_rate": 3.2370721522027226e-06, + "loss": 0.8223, + "step": 7662 + }, + { + "epoch": 0.63, + "grad_norm": 3.062481040532317, + "learning_rate": 3.235833448521303e-06, + "loss": 0.5634, + "step": 7663 + }, + { + "epoch": 0.63, + "grad_norm": 4.42252314474069, + "learning_rate": 3.2345948685002796e-06, + "loss": 1.076, + "step": 7664 + }, + { + "epoch": 0.63, + "grad_norm": 3.611775133421519, + "learning_rate": 3.2333564122264755e-06, + "loss": 0.5999, + "step": 7665 + }, + { + "epoch": 0.63, + "grad_norm": 3.5563105544668465, + "learning_rate": 3.2321180797866962e-06, + "loss": 0.7487, + "step": 7666 + }, + { + "epoch": 0.63, + "grad_norm": 6.936191030571098, + "learning_rate": 3.2308798712677456e-06, + "loss": 1.237, + "step": 7667 + }, + { + "epoch": 0.63, + "grad_norm": 4.145405830437945, + "learning_rate": 3.2296417867564166e-06, + "loss": 0.5232, + "step": 7668 + }, + { + "epoch": 0.63, + "grad_norm": 3.453801351461035, + "learning_rate": 3.2284038263394946e-06, + "loss": 0.4514, + "step": 7669 + }, + { + "epoch": 0.63, + "grad_norm": 3.517791485006161, + "learning_rate": 3.2271659901037555e-06, + "loss": 0.6854, + "step": 7670 + }, + { + "epoch": 0.63, + "grad_norm": 3.6261401874406745, + "learning_rate": 3.2259282781359634e-06, + "loss": 0.763, + "step": 7671 + }, + { + "epoch": 0.63, + "grad_norm": 4.000540569718012, + "learning_rate": 3.224690690522879e-06, + "loss": 0.693, + "step": 7672 + }, + { + "epoch": 0.63, + "grad_norm": 4.120716139586882, + "learning_rate": 3.22345322735125e-06, + "loss": 1.1604, + "step": 7673 + }, + { + "epoch": 0.63, + "grad_norm": 1.383118903590275, + "learning_rate": 3.2222158887078187e-06, + "loss": 0.2229, + "step": 7674 + }, + { + "epoch": 0.63, + "grad_norm": 4.976290591456961, + "learning_rate": 3.2209786746793163e-06, + "loss": 0.9455, + "step": 7675 + }, + { + "epoch": 0.63, + "grad_norm": 1.5314161645389535, + "learning_rate": 3.219741585352469e-06, + "loss": 0.2351, + "step": 7676 + }, + { + "epoch": 0.63, + "grad_norm": 2.191337605130976, + "learning_rate": 3.218504620813986e-06, + "loss": 0.442, + "step": 7677 + }, + { + "epoch": 0.63, + "grad_norm": 3.561383212432562, + "learning_rate": 3.2172677811505766e-06, + "loss": 0.5061, + "step": 7678 + }, + { + "epoch": 0.63, + "grad_norm": 3.391575981241691, + "learning_rate": 3.216031066448938e-06, + "loss": 0.6082, + "step": 7679 + }, + { + "epoch": 0.63, + "grad_norm": 5.234858555107159, + "learning_rate": 3.2147944767957565e-06, + "loss": 1.1577, + "step": 7680 + }, + { + "epoch": 0.63, + "grad_norm": 1.6846154268784963, + "learning_rate": 3.213558012277713e-06, + "loss": 0.3, + "step": 7681 + }, + { + "epoch": 0.63, + "grad_norm": 5.810587396979411, + "learning_rate": 3.212321672981481e-06, + "loss": 1.0758, + "step": 7682 + }, + { + "epoch": 0.63, + "grad_norm": 2.22152061579591, + "learning_rate": 3.2110854589937166e-06, + "loss": 0.259, + "step": 7683 + }, + { + "epoch": 0.63, + "grad_norm": 4.48217190544856, + "learning_rate": 3.2098493704010768e-06, + "loss": 0.741, + "step": 7684 + }, + { + "epoch": 0.63, + "grad_norm": 4.013228565717424, + "learning_rate": 3.208613407290206e-06, + "loss": 0.9747, + "step": 7685 + }, + { + "epoch": 0.63, + "grad_norm": 3.6159848966723724, + "learning_rate": 3.2073775697477393e-06, + "loss": 0.8999, + "step": 7686 + }, + { + "epoch": 0.63, + "grad_norm": 2.7380397280342086, + "learning_rate": 3.2061418578603028e-06, + "loss": 0.5477, + "step": 7687 + }, + { + "epoch": 0.63, + "grad_norm": 2.539704720378294, + "learning_rate": 3.2049062717145168e-06, + "loss": 0.4176, + "step": 7688 + }, + { + "epoch": 0.63, + "grad_norm": 4.274538445434723, + "learning_rate": 3.203670811396987e-06, + "loss": 0.975, + "step": 7689 + }, + { + "epoch": 0.63, + "grad_norm": 4.67172399250756, + "learning_rate": 3.2024354769943163e-06, + "loss": 0.9177, + "step": 7690 + }, + { + "epoch": 0.63, + "grad_norm": 3.475114485121228, + "learning_rate": 3.2012002685930947e-06, + "loss": 0.8625, + "step": 7691 + }, + { + "epoch": 0.63, + "grad_norm": 3.3914588561677315, + "learning_rate": 3.1999651862799063e-06, + "loss": 0.5683, + "step": 7692 + }, + { + "epoch": 0.63, + "grad_norm": 2.081062531847189, + "learning_rate": 3.198730230141327e-06, + "loss": 0.4778, + "step": 7693 + }, + { + "epoch": 0.63, + "grad_norm": 5.118738109466712, + "learning_rate": 3.197495400263917e-06, + "loss": 0.9559, + "step": 7694 + }, + { + "epoch": 0.63, + "grad_norm": 2.8691710480218955, + "learning_rate": 3.1962606967342356e-06, + "loss": 0.7172, + "step": 7695 + }, + { + "epoch": 0.63, + "grad_norm": 4.794101928588226, + "learning_rate": 3.1950261196388287e-06, + "loss": 0.974, + "step": 7696 + }, + { + "epoch": 0.63, + "grad_norm": 4.493952052033231, + "learning_rate": 3.1937916690642356e-06, + "loss": 0.9769, + "step": 7697 + }, + { + "epoch": 0.63, + "grad_norm": 3.172623602492804, + "learning_rate": 3.192557345096986e-06, + "loss": 0.4682, + "step": 7698 + }, + { + "epoch": 0.63, + "grad_norm": 2.5391023927997174, + "learning_rate": 3.191323147823602e-06, + "loss": 0.3686, + "step": 7699 + }, + { + "epoch": 0.63, + "grad_norm": 3.2908197921338616, + "learning_rate": 3.1900890773305926e-06, + "loss": 0.613, + "step": 7700 + }, + { + "epoch": 0.63, + "grad_norm": 0.957444384242329, + "learning_rate": 3.1888551337044615e-06, + "loss": 0.1528, + "step": 7701 + }, + { + "epoch": 0.63, + "grad_norm": 1.736174740514147, + "learning_rate": 3.1876213170317048e-06, + "loss": 0.2971, + "step": 7702 + }, + { + "epoch": 0.63, + "grad_norm": 3.9588233427147275, + "learning_rate": 3.186387627398805e-06, + "loss": 0.7388, + "step": 7703 + }, + { + "epoch": 0.63, + "grad_norm": 2.5151779126495444, + "learning_rate": 3.1851540648922398e-06, + "loss": 0.5993, + "step": 7704 + }, + { + "epoch": 0.63, + "grad_norm": 2.3530189237922574, + "learning_rate": 3.1839206295984786e-06, + "loss": 0.3692, + "step": 7705 + }, + { + "epoch": 0.63, + "grad_norm": 3.095440207688822, + "learning_rate": 3.1826873216039757e-06, + "loss": 0.3849, + "step": 7706 + }, + { + "epoch": 0.63, + "grad_norm": 3.292342872790543, + "learning_rate": 3.181454140995182e-06, + "loss": 0.6714, + "step": 7707 + }, + { + "epoch": 0.63, + "grad_norm": 3.1720375945195083, + "learning_rate": 3.1802210878585395e-06, + "loss": 0.5537, + "step": 7708 + }, + { + "epoch": 0.63, + "grad_norm": 4.183990698007337, + "learning_rate": 3.1789881622804797e-06, + "loss": 0.8163, + "step": 7709 + }, + { + "epoch": 0.63, + "grad_norm": 3.5311093273622, + "learning_rate": 3.1777553643474247e-06, + "loss": 0.6283, + "step": 7710 + }, + { + "epoch": 0.63, + "grad_norm": 3.442566058851378, + "learning_rate": 3.1765226941457866e-06, + "loss": 0.8416, + "step": 7711 + }, + { + "epoch": 0.63, + "grad_norm": 4.1627473864282765, + "learning_rate": 3.1752901517619733e-06, + "loss": 0.6516, + "step": 7712 + }, + { + "epoch": 0.63, + "grad_norm": 3.5284453351576794, + "learning_rate": 3.1740577372823785e-06, + "loss": 0.6561, + "step": 7713 + }, + { + "epoch": 0.63, + "grad_norm": 2.3797594442551833, + "learning_rate": 3.1728254507933892e-06, + "loss": 0.5099, + "step": 7714 + }, + { + "epoch": 0.63, + "grad_norm": 4.576113825725778, + "learning_rate": 3.1715932923813843e-06, + "loss": 1.1334, + "step": 7715 + }, + { + "epoch": 0.63, + "grad_norm": 4.286629317778869, + "learning_rate": 3.170361262132734e-06, + "loss": 0.845, + "step": 7716 + }, + { + "epoch": 0.63, + "grad_norm": 4.009390554053943, + "learning_rate": 3.1691293601337953e-06, + "loss": 0.8019, + "step": 7717 + }, + { + "epoch": 0.63, + "grad_norm": 3.370703724590661, + "learning_rate": 3.16789758647092e-06, + "loss": 0.7708, + "step": 7718 + }, + { + "epoch": 0.63, + "grad_norm": 5.200191529868398, + "learning_rate": 3.166665941230451e-06, + "loss": 1.1286, + "step": 7719 + }, + { + "epoch": 0.63, + "grad_norm": 4.135015759148786, + "learning_rate": 3.1654344244987213e-06, + "loss": 1.1156, + "step": 7720 + }, + { + "epoch": 0.63, + "grad_norm": 2.4909081881591635, + "learning_rate": 3.1642030363620534e-06, + "loss": 0.5092, + "step": 7721 + }, + { + "epoch": 0.63, + "grad_norm": 5.492886760434505, + "learning_rate": 3.1629717769067654e-06, + "loss": 1.2737, + "step": 7722 + }, + { + "epoch": 0.63, + "grad_norm": 4.733333203752356, + "learning_rate": 3.161740646219159e-06, + "loss": 0.7632, + "step": 7723 + }, + { + "epoch": 0.63, + "grad_norm": 3.695872923314846, + "learning_rate": 3.1605096443855333e-06, + "loss": 0.853, + "step": 7724 + }, + { + "epoch": 0.63, + "grad_norm": 3.2332939949985526, + "learning_rate": 3.159278771492176e-06, + "loss": 0.4684, + "step": 7725 + }, + { + "epoch": 0.63, + "grad_norm": 4.055708716202285, + "learning_rate": 3.1580480276253665e-06, + "loss": 1.2795, + "step": 7726 + }, + { + "epoch": 0.63, + "grad_norm": 2.895087699370973, + "learning_rate": 3.1568174128713738e-06, + "loss": 0.5558, + "step": 7727 + }, + { + "epoch": 0.63, + "grad_norm": 4.45024502271313, + "learning_rate": 3.15558692731646e-06, + "loss": 0.7325, + "step": 7728 + }, + { + "epoch": 0.63, + "grad_norm": 2.3016898771741525, + "learning_rate": 3.1543565710468743e-06, + "loss": 0.4324, + "step": 7729 + }, + { + "epoch": 0.63, + "grad_norm": 3.6489388052277896, + "learning_rate": 3.1531263441488607e-06, + "loss": 0.7163, + "step": 7730 + }, + { + "epoch": 0.63, + "grad_norm": 5.193545400934648, + "learning_rate": 3.1518962467086527e-06, + "loss": 1.1451, + "step": 7731 + }, + { + "epoch": 0.63, + "grad_norm": 4.479125338088091, + "learning_rate": 3.150666278812475e-06, + "loss": 1.0333, + "step": 7732 + }, + { + "epoch": 0.63, + "grad_norm": 3.0994094237191834, + "learning_rate": 3.149436440546545e-06, + "loss": 0.7642, + "step": 7733 + }, + { + "epoch": 0.63, + "grad_norm": 5.9580754736867, + "learning_rate": 3.1482067319970642e-06, + "loss": 1.2643, + "step": 7734 + }, + { + "epoch": 0.63, + "grad_norm": 4.753015333630123, + "learning_rate": 3.1469771532502336e-06, + "loss": 0.8311, + "step": 7735 + }, + { + "epoch": 0.63, + "grad_norm": 3.4290167032293177, + "learning_rate": 3.145747704392239e-06, + "loss": 0.8285, + "step": 7736 + }, + { + "epoch": 0.63, + "grad_norm": 3.3129908290943595, + "learning_rate": 3.144518385509261e-06, + "loss": 0.5139, + "step": 7737 + }, + { + "epoch": 0.63, + "grad_norm": 4.672563867413507, + "learning_rate": 3.143289196687469e-06, + "loss": 1.1612, + "step": 7738 + }, + { + "epoch": 0.63, + "grad_norm": 6.207846649305042, + "learning_rate": 3.142060138013026e-06, + "loss": 1.1907, + "step": 7739 + }, + { + "epoch": 0.63, + "grad_norm": 2.780734791010286, + "learning_rate": 3.1408312095720794e-06, + "loss": 0.508, + "step": 7740 + }, + { + "epoch": 0.63, + "grad_norm": 3.380561111288435, + "learning_rate": 3.139602411450774e-06, + "loss": 0.6048, + "step": 7741 + }, + { + "epoch": 0.63, + "grad_norm": 2.586262330486288, + "learning_rate": 3.138373743735244e-06, + "loss": 0.4451, + "step": 7742 + }, + { + "epoch": 0.63, + "grad_norm": 2.8144155997734766, + "learning_rate": 3.1371452065116116e-06, + "loss": 0.476, + "step": 7743 + }, + { + "epoch": 0.63, + "grad_norm": 4.15381259807162, + "learning_rate": 3.1359167998659933e-06, + "loss": 0.752, + "step": 7744 + }, + { + "epoch": 0.63, + "grad_norm": 4.023697205289873, + "learning_rate": 3.134688523884497e-06, + "loss": 0.9533, + "step": 7745 + }, + { + "epoch": 0.63, + "grad_norm": 3.9939570834213014, + "learning_rate": 3.1334603786532147e-06, + "loss": 1.0225, + "step": 7746 + }, + { + "epoch": 0.63, + "grad_norm": 1.9617810999492078, + "learning_rate": 3.1322323642582374e-06, + "loss": 0.4235, + "step": 7747 + }, + { + "epoch": 0.63, + "grad_norm": 4.096482581655145, + "learning_rate": 3.131004480785642e-06, + "loss": 0.7987, + "step": 7748 + }, + { + "epoch": 0.63, + "grad_norm": 3.7038563822744615, + "learning_rate": 3.1297767283214998e-06, + "loss": 0.9277, + "step": 7749 + }, + { + "epoch": 0.63, + "grad_norm": 3.025907679321576, + "learning_rate": 3.1285491069518705e-06, + "loss": 0.5833, + "step": 7750 + }, + { + "epoch": 0.63, + "grad_norm": 3.7108010699875966, + "learning_rate": 3.127321616762803e-06, + "loss": 0.6736, + "step": 7751 + }, + { + "epoch": 0.63, + "grad_norm": 4.533908261684587, + "learning_rate": 3.1260942578403395e-06, + "loss": 0.9325, + "step": 7752 + }, + { + "epoch": 0.63, + "grad_norm": 3.5851016859848137, + "learning_rate": 3.1248670302705143e-06, + "loss": 0.614, + "step": 7753 + }, + { + "epoch": 0.63, + "grad_norm": 4.291879126802277, + "learning_rate": 3.1236399341393486e-06, + "loss": 0.8511, + "step": 7754 + }, + { + "epoch": 0.63, + "grad_norm": 3.166298720756765, + "learning_rate": 3.122412969532858e-06, + "loss": 0.413, + "step": 7755 + }, + { + "epoch": 0.63, + "grad_norm": 2.7107254456828316, + "learning_rate": 3.121186136537049e-06, + "loss": 0.2174, + "step": 7756 + }, + { + "epoch": 0.63, + "grad_norm": 2.405980873573781, + "learning_rate": 3.119959435237913e-06, + "loss": 0.4214, + "step": 7757 + }, + { + "epoch": 0.63, + "grad_norm": 5.297894197921948, + "learning_rate": 3.11873286572144e-06, + "loss": 1.178, + "step": 7758 + }, + { + "epoch": 0.63, + "grad_norm": 1.4532413310352328, + "learning_rate": 3.1175064280736044e-06, + "loss": 0.2372, + "step": 7759 + }, + { + "epoch": 0.63, + "grad_norm": 4.050043333659418, + "learning_rate": 3.1162801223803756e-06, + "loss": 0.8248, + "step": 7760 + }, + { + "epoch": 0.63, + "grad_norm": 4.781108835281411, + "learning_rate": 3.1150539487277125e-06, + "loss": 1.0726, + "step": 7761 + }, + { + "epoch": 0.63, + "grad_norm": 3.1836495197151167, + "learning_rate": 3.1138279072015666e-06, + "loss": 0.8683, + "step": 7762 + }, + { + "epoch": 0.63, + "grad_norm": 4.032521770289908, + "learning_rate": 3.112601997887873e-06, + "loss": 0.7148, + "step": 7763 + }, + { + "epoch": 0.63, + "grad_norm": 3.2801631855651623, + "learning_rate": 3.111376220872565e-06, + "loss": 0.567, + "step": 7764 + }, + { + "epoch": 0.63, + "grad_norm": 1.9504549299211302, + "learning_rate": 3.1101505762415668e-06, + "loss": 0.3778, + "step": 7765 + }, + { + "epoch": 0.63, + "grad_norm": 4.444505613029491, + "learning_rate": 3.1089250640807865e-06, + "loss": 1.1581, + "step": 7766 + }, + { + "epoch": 0.63, + "grad_norm": 4.876983373389087, + "learning_rate": 3.10769968447613e-06, + "loss": 0.9147, + "step": 7767 + }, + { + "epoch": 0.63, + "grad_norm": 3.9787989385327642, + "learning_rate": 3.106474437513492e-06, + "loss": 0.5907, + "step": 7768 + }, + { + "epoch": 0.64, + "grad_norm": 4.775997969258128, + "learning_rate": 3.1052493232787533e-06, + "loss": 0.9217, + "step": 7769 + }, + { + "epoch": 0.64, + "grad_norm": 4.468930199595321, + "learning_rate": 3.104024341857791e-06, + "loss": 0.9899, + "step": 7770 + }, + { + "epoch": 0.64, + "grad_norm": 4.359483541220007, + "learning_rate": 3.1027994933364715e-06, + "loss": 0.9143, + "step": 7771 + }, + { + "epoch": 0.64, + "grad_norm": 3.138079374083297, + "learning_rate": 3.101574777800651e-06, + "loss": 0.5309, + "step": 7772 + }, + { + "epoch": 0.64, + "grad_norm": 4.473590000027721, + "learning_rate": 3.100350195336177e-06, + "loss": 0.7457, + "step": 7773 + }, + { + "epoch": 0.64, + "grad_norm": 2.1993030318420637, + "learning_rate": 3.099125746028887e-06, + "loss": 0.3841, + "step": 7774 + }, + { + "epoch": 0.64, + "grad_norm": 2.940398928712938, + "learning_rate": 3.0979014299646088e-06, + "loss": 0.4528, + "step": 7775 + }, + { + "epoch": 0.64, + "grad_norm": 1.1397946602839084, + "learning_rate": 3.0966772472291623e-06, + "loss": 0.1403, + "step": 7776 + }, + { + "epoch": 0.64, + "grad_norm": 3.985916223616124, + "learning_rate": 3.0954531979083575e-06, + "loss": 0.7692, + "step": 7777 + }, + { + "epoch": 0.64, + "grad_norm": 4.298616697163275, + "learning_rate": 3.094229282087995e-06, + "loss": 1.0578, + "step": 7778 + }, + { + "epoch": 0.64, + "grad_norm": 4.393406375595787, + "learning_rate": 3.0930054998538672e-06, + "loss": 0.7686, + "step": 7779 + }, + { + "epoch": 0.64, + "grad_norm": 2.8192625380118126, + "learning_rate": 3.091781851291753e-06, + "loss": 0.5322, + "step": 7780 + }, + { + "epoch": 0.64, + "grad_norm": 5.437817787193546, + "learning_rate": 3.0905583364874282e-06, + "loss": 1.3211, + "step": 7781 + }, + { + "epoch": 0.64, + "grad_norm": 4.235151842126615, + "learning_rate": 3.0893349555266517e-06, + "loss": 1.1217, + "step": 7782 + }, + { + "epoch": 0.64, + "grad_norm": 3.0124361892118876, + "learning_rate": 3.088111708495181e-06, + "loss": 0.5699, + "step": 7783 + }, + { + "epoch": 0.64, + "grad_norm": 3.278913011180968, + "learning_rate": 3.0868885954787577e-06, + "loss": 0.6601, + "step": 7784 + }, + { + "epoch": 0.64, + "grad_norm": 2.3234343380067917, + "learning_rate": 3.0856656165631204e-06, + "loss": 0.4015, + "step": 7785 + }, + { + "epoch": 0.64, + "grad_norm": 3.225939320324209, + "learning_rate": 3.084442771833991e-06, + "loss": 0.4539, + "step": 7786 + }, + { + "epoch": 0.64, + "grad_norm": 3.555604538407607, + "learning_rate": 3.0832200613770857e-06, + "loss": 0.5867, + "step": 7787 + }, + { + "epoch": 0.64, + "grad_norm": 2.8838307134062102, + "learning_rate": 3.081997485278113e-06, + "loss": 0.3765, + "step": 7788 + }, + { + "epoch": 0.64, + "grad_norm": 3.6069289209766198, + "learning_rate": 3.0807750436227695e-06, + "loss": 0.7507, + "step": 7789 + }, + { + "epoch": 0.64, + "grad_norm": 2.254856263935714, + "learning_rate": 3.079552736496745e-06, + "loss": 0.3731, + "step": 7790 + }, + { + "epoch": 0.64, + "grad_norm": 3.8397696175368514, + "learning_rate": 3.0783305639857132e-06, + "loss": 1.0205, + "step": 7791 + }, + { + "epoch": 0.64, + "grad_norm": 6.492414526493002, + "learning_rate": 3.077108526175345e-06, + "loss": 1.1767, + "step": 7792 + }, + { + "epoch": 0.64, + "grad_norm": 4.120289953683984, + "learning_rate": 3.075886623151302e-06, + "loss": 0.856, + "step": 7793 + }, + { + "epoch": 0.64, + "grad_norm": 3.1113371636214087, + "learning_rate": 3.074664854999232e-06, + "loss": 0.6099, + "step": 7794 + }, + { + "epoch": 0.64, + "grad_norm": 3.380585449987341, + "learning_rate": 3.0734432218047783e-06, + "loss": 0.6947, + "step": 7795 + }, + { + "epoch": 0.64, + "grad_norm": 3.683271562483727, + "learning_rate": 3.07222172365357e-06, + "loss": 0.6046, + "step": 7796 + }, + { + "epoch": 0.64, + "grad_norm": 3.127171353450437, + "learning_rate": 3.0710003606312292e-06, + "loss": 0.6197, + "step": 7797 + }, + { + "epoch": 0.64, + "grad_norm": 5.421346023231678, + "learning_rate": 3.069779132823367e-06, + "loss": 0.9477, + "step": 7798 + }, + { + "epoch": 0.64, + "grad_norm": 5.286625588838192, + "learning_rate": 3.068558040315588e-06, + "loss": 1.1859, + "step": 7799 + }, + { + "epoch": 0.64, + "grad_norm": 2.930364597159306, + "learning_rate": 3.0673370831934833e-06, + "loss": 0.6226, + "step": 7800 + }, + { + "epoch": 0.64, + "grad_norm": 2.8869195032304655, + "learning_rate": 3.066116261542639e-06, + "loss": 0.5016, + "step": 7801 + }, + { + "epoch": 0.64, + "grad_norm": 4.3190297058512295, + "learning_rate": 3.064895575448631e-06, + "loss": 0.9508, + "step": 7802 + }, + { + "epoch": 0.64, + "grad_norm": 4.201891244728968, + "learning_rate": 3.0636750249970184e-06, + "loss": 0.6445, + "step": 7803 + }, + { + "epoch": 0.64, + "grad_norm": 1.9073656928455973, + "learning_rate": 3.062454610273361e-06, + "loss": 0.3428, + "step": 7804 + }, + { + "epoch": 0.64, + "grad_norm": 3.1274350126950825, + "learning_rate": 3.061234331363203e-06, + "loss": 0.5776, + "step": 7805 + }, + { + "epoch": 0.64, + "grad_norm": 3.757643213513995, + "learning_rate": 3.0600141883520796e-06, + "loss": 0.8018, + "step": 7806 + }, + { + "epoch": 0.64, + "grad_norm": 3.816896954660318, + "learning_rate": 3.0587941813255196e-06, + "loss": 0.6786, + "step": 7807 + }, + { + "epoch": 0.64, + "grad_norm": 4.991835587302752, + "learning_rate": 3.0575743103690408e-06, + "loss": 1.0708, + "step": 7808 + }, + { + "epoch": 0.64, + "grad_norm": 4.723297776792715, + "learning_rate": 3.056354575568148e-06, + "loss": 1.0296, + "step": 7809 + }, + { + "epoch": 0.64, + "grad_norm": 6.138433420633675, + "learning_rate": 3.05513497700834e-06, + "loss": 1.1589, + "step": 7810 + }, + { + "epoch": 0.64, + "grad_norm": 3.7835659165003084, + "learning_rate": 3.0539155147751074e-06, + "loss": 0.3927, + "step": 7811 + }, + { + "epoch": 0.64, + "grad_norm": 4.000530651589436, + "learning_rate": 3.0526961889539265e-06, + "loss": 0.642, + "step": 7812 + }, + { + "epoch": 0.64, + "grad_norm": 5.289083728058848, + "learning_rate": 3.0514769996302696e-06, + "loss": 1.0834, + "step": 7813 + }, + { + "epoch": 0.64, + "grad_norm": 4.179387690922487, + "learning_rate": 3.050257946889594e-06, + "loss": 0.7185, + "step": 7814 + }, + { + "epoch": 0.64, + "grad_norm": 3.614143591546221, + "learning_rate": 3.049039030817351e-06, + "loss": 0.636, + "step": 7815 + }, + { + "epoch": 0.64, + "grad_norm": 2.0175919934837028, + "learning_rate": 3.0478202514989813e-06, + "loss": 0.3539, + "step": 7816 + }, + { + "epoch": 0.64, + "grad_norm": 5.425152565734742, + "learning_rate": 3.046601609019916e-06, + "loss": 1.0657, + "step": 7817 + }, + { + "epoch": 0.64, + "grad_norm": 1.7070207030060107, + "learning_rate": 3.0453831034655766e-06, + "loss": 0.2859, + "step": 7818 + }, + { + "epoch": 0.64, + "grad_norm": 5.551122283558936, + "learning_rate": 3.0441647349213764e-06, + "loss": 1.3125, + "step": 7819 + }, + { + "epoch": 0.64, + "grad_norm": 4.221502422957849, + "learning_rate": 3.042946503472716e-06, + "loss": 0.9485, + "step": 7820 + }, + { + "epoch": 0.64, + "grad_norm": 2.348367201920117, + "learning_rate": 3.041728409204988e-06, + "loss": 0.3886, + "step": 7821 + }, + { + "epoch": 0.64, + "grad_norm": 3.970306040011278, + "learning_rate": 3.040510452203576e-06, + "loss": 1.1752, + "step": 7822 + }, + { + "epoch": 0.64, + "grad_norm": 4.290881887789476, + "learning_rate": 3.039292632553853e-06, + "loss": 0.9586, + "step": 7823 + }, + { + "epoch": 0.64, + "grad_norm": 5.038877725597535, + "learning_rate": 3.038074950341184e-06, + "loss": 0.799, + "step": 7824 + }, + { + "epoch": 0.64, + "grad_norm": 1.033598891945941, + "learning_rate": 3.036857405650925e-06, + "loss": 0.1455, + "step": 7825 + }, + { + "epoch": 0.64, + "grad_norm": 3.3779552426693624, + "learning_rate": 3.0356399985684153e-06, + "loss": 0.7625, + "step": 7826 + }, + { + "epoch": 0.64, + "grad_norm": 4.673095220192702, + "learning_rate": 3.0344227291789928e-06, + "loss": 0.9734, + "step": 7827 + }, + { + "epoch": 0.64, + "grad_norm": 3.6096029345475955, + "learning_rate": 3.033205597567984e-06, + "loss": 0.4377, + "step": 7828 + }, + { + "epoch": 0.64, + "grad_norm": 4.324901287473498, + "learning_rate": 3.0319886038207023e-06, + "loss": 0.9661, + "step": 7829 + }, + { + "epoch": 0.64, + "grad_norm": 2.727965111416931, + "learning_rate": 3.0307717480224572e-06, + "loss": 0.3719, + "step": 7830 + }, + { + "epoch": 0.64, + "grad_norm": 4.456816251346994, + "learning_rate": 3.02955503025854e-06, + "loss": 0.8164, + "step": 7831 + }, + { + "epoch": 0.64, + "grad_norm": 3.2486302261587734, + "learning_rate": 3.0283384506142397e-06, + "loss": 0.572, + "step": 7832 + }, + { + "epoch": 0.64, + "grad_norm": 1.0676076427367587, + "learning_rate": 3.027122009174834e-06, + "loss": 0.1451, + "step": 7833 + }, + { + "epoch": 0.64, + "grad_norm": 4.957496653333703, + "learning_rate": 3.0259057060255887e-06, + "loss": 0.7304, + "step": 7834 + }, + { + "epoch": 0.64, + "grad_norm": 4.362065522951962, + "learning_rate": 3.024689541251763e-06, + "loss": 0.9303, + "step": 7835 + }, + { + "epoch": 0.64, + "grad_norm": 4.788726112158268, + "learning_rate": 3.023473514938604e-06, + "loss": 1.4238, + "step": 7836 + }, + { + "epoch": 0.64, + "grad_norm": 3.3604292834068765, + "learning_rate": 3.02225762717135e-06, + "loss": 0.5941, + "step": 7837 + }, + { + "epoch": 0.64, + "grad_norm": 4.26210210267571, + "learning_rate": 3.021041878035228e-06, + "loss": 0.8583, + "step": 7838 + }, + { + "epoch": 0.64, + "grad_norm": 3.0775307873462916, + "learning_rate": 3.0198262676154583e-06, + "loss": 0.573, + "step": 7839 + }, + { + "epoch": 0.64, + "grad_norm": 2.0254216863292878, + "learning_rate": 3.018610795997249e-06, + "loss": 0.4853, + "step": 7840 + }, + { + "epoch": 0.64, + "grad_norm": 2.7059526815793205, + "learning_rate": 3.0173954632657996e-06, + "loss": 0.3955, + "step": 7841 + }, + { + "epoch": 0.64, + "grad_norm": 5.240168978264894, + "learning_rate": 3.0161802695063024e-06, + "loss": 0.8435, + "step": 7842 + }, + { + "epoch": 0.64, + "grad_norm": 5.0858055095332695, + "learning_rate": 3.014965214803933e-06, + "loss": 1.332, + "step": 7843 + }, + { + "epoch": 0.64, + "grad_norm": 3.2462707758145446, + "learning_rate": 3.013750299243864e-06, + "loss": 0.5558, + "step": 7844 + }, + { + "epoch": 0.64, + "grad_norm": 3.650193179594442, + "learning_rate": 3.0125355229112536e-06, + "loss": 1.0065, + "step": 7845 + }, + { + "epoch": 0.64, + "grad_norm": 3.7676518700399604, + "learning_rate": 3.0113208858912533e-06, + "loss": 1.1567, + "step": 7846 + }, + { + "epoch": 0.64, + "grad_norm": 1.9609440709283663, + "learning_rate": 3.0101063882690046e-06, + "loss": 0.3932, + "step": 7847 + }, + { + "epoch": 0.64, + "grad_norm": 4.373511093110997, + "learning_rate": 3.00889203012964e-06, + "loss": 0.6438, + "step": 7848 + }, + { + "epoch": 0.64, + "grad_norm": 2.3333735327867577, + "learning_rate": 3.007677811558276e-06, + "loss": 0.3598, + "step": 7849 + }, + { + "epoch": 0.64, + "grad_norm": 4.5025719551068635, + "learning_rate": 3.006463732640028e-06, + "loss": 0.8174, + "step": 7850 + }, + { + "epoch": 0.64, + "grad_norm": 3.132660480178307, + "learning_rate": 3.0052497934599966e-06, + "loss": 0.771, + "step": 7851 + }, + { + "epoch": 0.64, + "grad_norm": 5.51776375842111, + "learning_rate": 3.0040359941032727e-06, + "loss": 1.3559, + "step": 7852 + }, + { + "epoch": 0.64, + "grad_norm": 2.977461470056396, + "learning_rate": 3.0028223346549413e-06, + "loss": 0.6852, + "step": 7853 + }, + { + "epoch": 0.64, + "grad_norm": 3.09231298367694, + "learning_rate": 3.00160881520007e-06, + "loss": 0.5279, + "step": 7854 + }, + { + "epoch": 0.64, + "grad_norm": 2.180758071905827, + "learning_rate": 3.000395435823724e-06, + "loss": 0.3719, + "step": 7855 + }, + { + "epoch": 0.64, + "grad_norm": 4.916943911528527, + "learning_rate": 2.9991821966109558e-06, + "loss": 0.4142, + "step": 7856 + }, + { + "epoch": 0.64, + "grad_norm": 5.163191128066598, + "learning_rate": 2.9979690976468083e-06, + "loss": 1.2684, + "step": 7857 + }, + { + "epoch": 0.64, + "grad_norm": 5.612341928275986, + "learning_rate": 2.9967561390163148e-06, + "loss": 1.0136, + "step": 7858 + }, + { + "epoch": 0.64, + "grad_norm": 1.9913312384897335, + "learning_rate": 2.9955433208044983e-06, + "loss": 0.2951, + "step": 7859 + }, + { + "epoch": 0.64, + "grad_norm": 4.2750060420121265, + "learning_rate": 2.994330643096371e-06, + "loss": 0.6136, + "step": 7860 + }, + { + "epoch": 0.64, + "grad_norm": 2.1301120622750145, + "learning_rate": 2.993118105976936e-06, + "loss": 0.3387, + "step": 7861 + }, + { + "epoch": 0.64, + "grad_norm": 3.1105291461179583, + "learning_rate": 2.9919057095311874e-06, + "loss": 0.6402, + "step": 7862 + }, + { + "epoch": 0.64, + "grad_norm": 3.6144575613625025, + "learning_rate": 2.99069345384411e-06, + "loss": 0.6512, + "step": 7863 + }, + { + "epoch": 0.64, + "grad_norm": 4.065422619060882, + "learning_rate": 2.9894813390006773e-06, + "loss": 0.6632, + "step": 7864 + }, + { + "epoch": 0.64, + "grad_norm": 2.9735292436274023, + "learning_rate": 2.988269365085854e-06, + "loss": 0.7177, + "step": 7865 + }, + { + "epoch": 0.64, + "grad_norm": 3.2401111070258053, + "learning_rate": 2.9870575321845916e-06, + "loss": 0.5627, + "step": 7866 + }, + { + "epoch": 0.64, + "grad_norm": 4.697499884583697, + "learning_rate": 2.985845840381837e-06, + "loss": 1.6977, + "step": 7867 + }, + { + "epoch": 0.64, + "grad_norm": 5.468404310057439, + "learning_rate": 2.9846342897625215e-06, + "loss": 1.0907, + "step": 7868 + }, + { + "epoch": 0.64, + "grad_norm": 3.3973242492973816, + "learning_rate": 2.983422880411572e-06, + "loss": 0.5834, + "step": 7869 + }, + { + "epoch": 0.64, + "grad_norm": 4.437769930198844, + "learning_rate": 2.9822116124139045e-06, + "loss": 0.9437, + "step": 7870 + }, + { + "epoch": 0.64, + "grad_norm": 6.109339166692898, + "learning_rate": 2.9810004858544194e-06, + "loss": 1.3969, + "step": 7871 + }, + { + "epoch": 0.64, + "grad_norm": 2.323342271677376, + "learning_rate": 2.9797895008180135e-06, + "loss": 0.3465, + "step": 7872 + }, + { + "epoch": 0.64, + "grad_norm": 5.320943319032637, + "learning_rate": 2.9785786573895713e-06, + "loss": 0.9443, + "step": 7873 + }, + { + "epoch": 0.64, + "grad_norm": 4.368512254444603, + "learning_rate": 2.9773679556539696e-06, + "loss": 1.0702, + "step": 7874 + }, + { + "epoch": 0.64, + "grad_norm": 4.100965499627617, + "learning_rate": 2.9761573956960706e-06, + "loss": 0.424, + "step": 7875 + }, + { + "epoch": 0.64, + "grad_norm": 4.743011989969749, + "learning_rate": 2.9749469776007324e-06, + "loss": 0.9294, + "step": 7876 + }, + { + "epoch": 0.64, + "grad_norm": 5.097132767859566, + "learning_rate": 2.973736701452795e-06, + "loss": 1.146, + "step": 7877 + }, + { + "epoch": 0.64, + "grad_norm": 3.7628372451622654, + "learning_rate": 2.9725265673370973e-06, + "loss": 0.8206, + "step": 7878 + }, + { + "epoch": 0.64, + "grad_norm": 5.387763699050225, + "learning_rate": 2.971316575338464e-06, + "loss": 1.0001, + "step": 7879 + }, + { + "epoch": 0.64, + "grad_norm": 4.202384180316912, + "learning_rate": 2.9701067255417092e-06, + "loss": 0.9383, + "step": 7880 + }, + { + "epoch": 0.64, + "grad_norm": 2.36968601017581, + "learning_rate": 2.96889701803164e-06, + "loss": 0.4805, + "step": 7881 + }, + { + "epoch": 0.64, + "grad_norm": 3.9970162535064464, + "learning_rate": 2.967687452893051e-06, + "loss": 0.4226, + "step": 7882 + }, + { + "epoch": 0.64, + "grad_norm": 3.5169857613363527, + "learning_rate": 2.9664780302107266e-06, + "loss": 0.9481, + "step": 7883 + }, + { + "epoch": 0.64, + "grad_norm": 2.560675619392749, + "learning_rate": 2.965268750069441e-06, + "loss": 0.5625, + "step": 7884 + }, + { + "epoch": 0.64, + "grad_norm": 3.6906020056209887, + "learning_rate": 2.964059612553961e-06, + "loss": 0.5511, + "step": 7885 + }, + { + "epoch": 0.64, + "grad_norm": 5.3707827503539995, + "learning_rate": 2.962850617749042e-06, + "loss": 0.9854, + "step": 7886 + }, + { + "epoch": 0.64, + "grad_norm": 3.169952648374127, + "learning_rate": 2.961641765739429e-06, + "loss": 0.6199, + "step": 7887 + }, + { + "epoch": 0.64, + "grad_norm": 4.658972696363917, + "learning_rate": 2.9604330566098588e-06, + "loss": 1.0271, + "step": 7888 + }, + { + "epoch": 0.64, + "grad_norm": 2.5783860166921713, + "learning_rate": 2.9592244904450536e-06, + "loss": 0.6164, + "step": 7889 + }, + { + "epoch": 0.64, + "grad_norm": 2.6210062336360935, + "learning_rate": 2.9580160673297307e-06, + "loss": 0.4776, + "step": 7890 + }, + { + "epoch": 0.64, + "grad_norm": 5.656576655258052, + "learning_rate": 2.956807787348594e-06, + "loss": 1.2903, + "step": 7891 + }, + { + "epoch": 0.65, + "grad_norm": 4.450910711609922, + "learning_rate": 2.9555996505863394e-06, + "loss": 0.779, + "step": 7892 + }, + { + "epoch": 0.65, + "grad_norm": 4.602170961498176, + "learning_rate": 2.954391657127654e-06, + "loss": 0.9086, + "step": 7893 + }, + { + "epoch": 0.65, + "grad_norm": 4.2815459596395264, + "learning_rate": 2.9531838070572084e-06, + "loss": 1.129, + "step": 7894 + }, + { + "epoch": 0.65, + "grad_norm": 3.8505282976510635, + "learning_rate": 2.9519761004596708e-06, + "loss": 0.8004, + "step": 7895 + }, + { + "epoch": 0.65, + "grad_norm": 3.8476698443942707, + "learning_rate": 2.9507685374196954e-06, + "loss": 0.7421, + "step": 7896 + }, + { + "epoch": 0.65, + "grad_norm": 3.2051223760366647, + "learning_rate": 2.9495611180219287e-06, + "loss": 0.4676, + "step": 7897 + }, + { + "epoch": 0.65, + "grad_norm": 4.059341674196565, + "learning_rate": 2.948353842351002e-06, + "loss": 0.7832, + "step": 7898 + }, + { + "epoch": 0.65, + "grad_norm": 4.376156592708591, + "learning_rate": 2.947146710491545e-06, + "loss": 0.9234, + "step": 7899 + }, + { + "epoch": 0.65, + "grad_norm": 4.525761659169275, + "learning_rate": 2.9459397225281673e-06, + "loss": 0.8849, + "step": 7900 + }, + { + "epoch": 0.65, + "grad_norm": 4.880761815644048, + "learning_rate": 2.9447328785454752e-06, + "loss": 0.9877, + "step": 7901 + }, + { + "epoch": 0.65, + "grad_norm": 2.1428597004178456, + "learning_rate": 2.9435261786280645e-06, + "loss": 0.509, + "step": 7902 + }, + { + "epoch": 0.65, + "grad_norm": 3.394815004925326, + "learning_rate": 2.942319622860519e-06, + "loss": 0.363, + "step": 7903 + }, + { + "epoch": 0.65, + "grad_norm": 4.4582482783216015, + "learning_rate": 2.9411132113274132e-06, + "loss": 0.8826, + "step": 7904 + }, + { + "epoch": 0.65, + "grad_norm": 3.753784692097969, + "learning_rate": 2.9399069441133116e-06, + "loss": 0.5, + "step": 7905 + }, + { + "epoch": 0.65, + "grad_norm": 3.0731841609712225, + "learning_rate": 2.9387008213027675e-06, + "loss": 0.6792, + "step": 7906 + }, + { + "epoch": 0.65, + "grad_norm": 3.9450796550415745, + "learning_rate": 2.937494842980324e-06, + "loss": 0.7015, + "step": 7907 + }, + { + "epoch": 0.65, + "grad_norm": 1.1214717249721415, + "learning_rate": 2.9362890092305158e-06, + "loss": 0.1444, + "step": 7908 + }, + { + "epoch": 0.65, + "grad_norm": 3.938371891994917, + "learning_rate": 2.935083320137867e-06, + "loss": 0.9034, + "step": 7909 + }, + { + "epoch": 0.65, + "grad_norm": 3.9877577235309896, + "learning_rate": 2.9338777757868923e-06, + "loss": 0.8414, + "step": 7910 + }, + { + "epoch": 0.65, + "grad_norm": 2.6328141567571284, + "learning_rate": 2.9326723762620924e-06, + "loss": 0.4078, + "step": 7911 + }, + { + "epoch": 0.65, + "grad_norm": 3.990145345446904, + "learning_rate": 2.931467121647962e-06, + "loss": 0.7359, + "step": 7912 + }, + { + "epoch": 0.65, + "grad_norm": 5.807798979886192, + "learning_rate": 2.930262012028984e-06, + "loss": 0.8907, + "step": 7913 + }, + { + "epoch": 0.65, + "grad_norm": 3.8619260709390737, + "learning_rate": 2.929057047489632e-06, + "loss": 0.7976, + "step": 7914 + }, + { + "epoch": 0.65, + "grad_norm": 4.245285559043202, + "learning_rate": 2.9278522281143667e-06, + "loss": 0.681, + "step": 7915 + }, + { + "epoch": 0.65, + "grad_norm": 4.452101166075296, + "learning_rate": 2.9266475539876447e-06, + "loss": 0.5975, + "step": 7916 + }, + { + "epoch": 0.65, + "grad_norm": 5.729503388721803, + "learning_rate": 2.9254430251939046e-06, + "loss": 1.127, + "step": 7917 + }, + { + "epoch": 0.65, + "grad_norm": 4.718684875541518, + "learning_rate": 2.9242386418175793e-06, + "loss": 0.9964, + "step": 7918 + }, + { + "epoch": 0.65, + "grad_norm": 3.6566710096133983, + "learning_rate": 2.9230344039430913e-06, + "loss": 0.8345, + "step": 7919 + }, + { + "epoch": 0.65, + "grad_norm": 2.0860241466730605, + "learning_rate": 2.921830311654853e-06, + "loss": 0.332, + "step": 7920 + }, + { + "epoch": 0.65, + "grad_norm": 3.050894560130508, + "learning_rate": 2.9206263650372668e-06, + "loss": 0.5202, + "step": 7921 + }, + { + "epoch": 0.65, + "grad_norm": 3.6740888860709515, + "learning_rate": 2.919422564174722e-06, + "loss": 0.7684, + "step": 7922 + }, + { + "epoch": 0.65, + "grad_norm": 5.539719103044569, + "learning_rate": 2.9182189091516017e-06, + "loss": 1.2597, + "step": 7923 + }, + { + "epoch": 0.65, + "grad_norm": 2.534927915504683, + "learning_rate": 2.9170154000522744e-06, + "loss": 0.3663, + "step": 7924 + }, + { + "epoch": 0.65, + "grad_norm": 3.5698272821477635, + "learning_rate": 2.915812036961103e-06, + "loss": 1.0274, + "step": 7925 + }, + { + "epoch": 0.65, + "grad_norm": 3.478769023476825, + "learning_rate": 2.914608819962437e-06, + "loss": 0.6595, + "step": 7926 + }, + { + "epoch": 0.65, + "grad_norm": 4.068271121802564, + "learning_rate": 2.9134057491406163e-06, + "loss": 0.662, + "step": 7927 + }, + { + "epoch": 0.65, + "grad_norm": 6.19661409649374, + "learning_rate": 2.912202824579975e-06, + "loss": 1.6517, + "step": 7928 + }, + { + "epoch": 0.65, + "grad_norm": 3.4816319057866587, + "learning_rate": 2.911000046364827e-06, + "loss": 0.4724, + "step": 7929 + }, + { + "epoch": 0.65, + "grad_norm": 5.149261359307092, + "learning_rate": 2.9097974145794843e-06, + "loss": 0.6429, + "step": 7930 + }, + { + "epoch": 0.65, + "grad_norm": 2.424747973768166, + "learning_rate": 2.908594929308246e-06, + "loss": 0.45, + "step": 7931 + }, + { + "epoch": 0.65, + "grad_norm": 2.8112624800692414, + "learning_rate": 2.907392590635404e-06, + "loss": 0.3496, + "step": 7932 + }, + { + "epoch": 0.65, + "grad_norm": 3.0302301469921433, + "learning_rate": 2.9061903986452323e-06, + "loss": 0.6052, + "step": 7933 + }, + { + "epoch": 0.65, + "grad_norm": 2.9909120510204357, + "learning_rate": 2.904988353422003e-06, + "loss": 0.6163, + "step": 7934 + }, + { + "epoch": 0.65, + "grad_norm": 4.088865606963298, + "learning_rate": 2.9037864550499704e-06, + "loss": 0.9871, + "step": 7935 + }, + { + "epoch": 0.65, + "grad_norm": 4.7197420214941594, + "learning_rate": 2.902584703613385e-06, + "loss": 1.0631, + "step": 7936 + }, + { + "epoch": 0.65, + "grad_norm": 3.4037252525132544, + "learning_rate": 2.9013830991964838e-06, + "loss": 0.5726, + "step": 7937 + }, + { + "epoch": 0.65, + "grad_norm": 4.86857257087212, + "learning_rate": 2.900181641883494e-06, + "loss": 1.2704, + "step": 7938 + }, + { + "epoch": 0.65, + "grad_norm": 2.9548905996573103, + "learning_rate": 2.8989803317586353e-06, + "loss": 0.4708, + "step": 7939 + }, + { + "epoch": 0.65, + "grad_norm": 3.606607452653022, + "learning_rate": 2.8977791689061087e-06, + "loss": 0.7239, + "step": 7940 + }, + { + "epoch": 0.65, + "grad_norm": 5.19923764561386, + "learning_rate": 2.8965781534101132e-06, + "loss": 1.0878, + "step": 7941 + }, + { + "epoch": 0.65, + "grad_norm": 3.4083004770293286, + "learning_rate": 2.895377285354836e-06, + "loss": 0.5902, + "step": 7942 + }, + { + "epoch": 0.65, + "grad_norm": 3.7862425947863994, + "learning_rate": 2.8941765648244513e-06, + "loss": 0.7949, + "step": 7943 + }, + { + "epoch": 0.65, + "grad_norm": 5.819961564630001, + "learning_rate": 2.892975991903125e-06, + "loss": 1.3285, + "step": 7944 + }, + { + "epoch": 0.65, + "grad_norm": 5.505179163497323, + "learning_rate": 2.891775566675014e-06, + "loss": 1.2131, + "step": 7945 + }, + { + "epoch": 0.65, + "grad_norm": 3.800564826454519, + "learning_rate": 2.8905752892242587e-06, + "loss": 1.0754, + "step": 7946 + }, + { + "epoch": 0.65, + "grad_norm": 2.6659245553942985, + "learning_rate": 2.889375159634995e-06, + "loss": 0.2595, + "step": 7947 + }, + { + "epoch": 0.65, + "grad_norm": 4.867679753509104, + "learning_rate": 2.8881751779913498e-06, + "loss": 0.8299, + "step": 7948 + }, + { + "epoch": 0.65, + "grad_norm": 3.1805636058524747, + "learning_rate": 2.886975344377432e-06, + "loss": 0.7716, + "step": 7949 + }, + { + "epoch": 0.65, + "grad_norm": 2.9457803081893306, + "learning_rate": 2.8857756588773457e-06, + "loss": 0.673, + "step": 7950 + }, + { + "epoch": 0.65, + "grad_norm": 3.854688609728728, + "learning_rate": 2.884576121575187e-06, + "loss": 0.8473, + "step": 7951 + }, + { + "epoch": 0.65, + "grad_norm": 3.55813344986194, + "learning_rate": 2.8833767325550345e-06, + "loss": 0.8929, + "step": 7952 + }, + { + "epoch": 0.65, + "grad_norm": 4.062536876817694, + "learning_rate": 2.8821774919009605e-06, + "loss": 1.2235, + "step": 7953 + }, + { + "epoch": 0.65, + "grad_norm": 4.91308594382761, + "learning_rate": 2.8809783996970274e-06, + "loss": 0.9671, + "step": 7954 + }, + { + "epoch": 0.65, + "grad_norm": 4.705628943410485, + "learning_rate": 2.8797794560272875e-06, + "loss": 1.1877, + "step": 7955 + }, + { + "epoch": 0.65, + "grad_norm": 4.095020489564123, + "learning_rate": 2.8785806609757815e-06, + "loss": 0.8418, + "step": 7956 + }, + { + "epoch": 0.65, + "grad_norm": 4.12676106657002, + "learning_rate": 2.8773820146265375e-06, + "loss": 1.25, + "step": 7957 + }, + { + "epoch": 0.65, + "grad_norm": 4.389999860397088, + "learning_rate": 2.8761835170635765e-06, + "loss": 0.8575, + "step": 7958 + }, + { + "epoch": 0.65, + "grad_norm": 3.4720159253229315, + "learning_rate": 2.8749851683709072e-06, + "loss": 0.6071, + "step": 7959 + }, + { + "epoch": 0.65, + "grad_norm": 2.980189054467754, + "learning_rate": 2.8737869686325304e-06, + "loss": 0.4888, + "step": 7960 + }, + { + "epoch": 0.65, + "grad_norm": 3.750422703747019, + "learning_rate": 2.872588917932434e-06, + "loss": 0.8012, + "step": 7961 + }, + { + "epoch": 0.65, + "grad_norm": 4.50027378894247, + "learning_rate": 2.871391016354597e-06, + "loss": 0.9064, + "step": 7962 + }, + { + "epoch": 0.65, + "grad_norm": 3.299368076828362, + "learning_rate": 2.8701932639829846e-06, + "loss": 0.5323, + "step": 7963 + }, + { + "epoch": 0.65, + "grad_norm": 3.9160283537957787, + "learning_rate": 2.868995660901557e-06, + "loss": 1.0933, + "step": 7964 + }, + { + "epoch": 0.65, + "grad_norm": 2.6159973575585713, + "learning_rate": 2.867798207194258e-06, + "loss": 0.4785, + "step": 7965 + }, + { + "epoch": 0.65, + "grad_norm": 2.854649565232945, + "learning_rate": 2.866600902945025e-06, + "loss": 0.2629, + "step": 7966 + }, + { + "epoch": 0.65, + "grad_norm": 3.442187897459728, + "learning_rate": 2.865403748237784e-06, + "loss": 0.6322, + "step": 7967 + }, + { + "epoch": 0.65, + "grad_norm": 5.0672308602635106, + "learning_rate": 2.864206743156453e-06, + "loss": 0.5079, + "step": 7968 + }, + { + "epoch": 0.65, + "grad_norm": 4.447223811256291, + "learning_rate": 2.8630098877849322e-06, + "loss": 0.6896, + "step": 7969 + }, + { + "epoch": 0.65, + "grad_norm": 3.772683191649797, + "learning_rate": 2.861813182207117e-06, + "loss": 1.1045, + "step": 7970 + }, + { + "epoch": 0.65, + "grad_norm": 4.90869334797886, + "learning_rate": 2.8606166265068935e-06, + "loss": 1.2193, + "step": 7971 + }, + { + "epoch": 0.65, + "grad_norm": 3.66447128756878, + "learning_rate": 2.8594202207681333e-06, + "loss": 0.9918, + "step": 7972 + }, + { + "epoch": 0.65, + "grad_norm": 2.1709142790837173, + "learning_rate": 2.8582239650747024e-06, + "loss": 0.3579, + "step": 7973 + }, + { + "epoch": 0.65, + "grad_norm": 3.542718894700696, + "learning_rate": 2.8570278595104478e-06, + "loss": 0.4313, + "step": 7974 + }, + { + "epoch": 0.65, + "grad_norm": 5.470565752435574, + "learning_rate": 2.855831904159214e-06, + "loss": 1.3507, + "step": 7975 + }, + { + "epoch": 0.65, + "grad_norm": 3.017187077630708, + "learning_rate": 2.8546360991048325e-06, + "loss": 0.4235, + "step": 7976 + }, + { + "epoch": 0.65, + "grad_norm": 3.411440865234151, + "learning_rate": 2.8534404444311235e-06, + "loss": 0.6309, + "step": 7977 + }, + { + "epoch": 0.65, + "grad_norm": 5.200528672997894, + "learning_rate": 2.8522449402218984e-06, + "loss": 1.0034, + "step": 7978 + }, + { + "epoch": 0.65, + "grad_norm": 2.373192243595398, + "learning_rate": 2.8510495865609573e-06, + "loss": 0.6623, + "step": 7979 + }, + { + "epoch": 0.65, + "grad_norm": 4.968360904913381, + "learning_rate": 2.8498543835320856e-06, + "loss": 1.2938, + "step": 7980 + }, + { + "epoch": 0.65, + "grad_norm": 3.224467989703306, + "learning_rate": 2.8486593312190668e-06, + "loss": 0.5887, + "step": 7981 + }, + { + "epoch": 0.65, + "grad_norm": 2.654242604119693, + "learning_rate": 2.8474644297056643e-06, + "loss": 0.4886, + "step": 7982 + }, + { + "epoch": 0.65, + "grad_norm": 3.7602370743626894, + "learning_rate": 2.8462696790756362e-06, + "loss": 0.6524, + "step": 7983 + }, + { + "epoch": 0.65, + "grad_norm": 4.902133042146245, + "learning_rate": 2.845075079412731e-06, + "loss": 0.9172, + "step": 7984 + }, + { + "epoch": 0.65, + "grad_norm": 5.455886719214672, + "learning_rate": 2.8438806308006874e-06, + "loss": 1.2014, + "step": 7985 + }, + { + "epoch": 0.65, + "grad_norm": 2.8729653038084955, + "learning_rate": 2.842686333323226e-06, + "loss": 0.5934, + "step": 7986 + }, + { + "epoch": 0.65, + "grad_norm": 2.189609924684086, + "learning_rate": 2.841492187064063e-06, + "loss": 0.3625, + "step": 7987 + }, + { + "epoch": 0.65, + "grad_norm": 3.3526536316346363, + "learning_rate": 2.8402981921069044e-06, + "loss": 0.4967, + "step": 7988 + }, + { + "epoch": 0.65, + "grad_norm": 2.623895560107847, + "learning_rate": 2.8391043485354436e-06, + "loss": 0.5459, + "step": 7989 + }, + { + "epoch": 0.65, + "grad_norm": 3.154512611660718, + "learning_rate": 2.8379106564333637e-06, + "loss": 0.4984, + "step": 7990 + }, + { + "epoch": 0.65, + "grad_norm": 1.224795514043236, + "learning_rate": 2.8367171158843386e-06, + "loss": 0.1345, + "step": 7991 + }, + { + "epoch": 0.65, + "grad_norm": 3.1610715773663984, + "learning_rate": 2.835523726972028e-06, + "loss": 0.6905, + "step": 7992 + }, + { + "epoch": 0.65, + "grad_norm": 3.3696818259978287, + "learning_rate": 2.834330489780084e-06, + "loss": 0.7145, + "step": 7993 + }, + { + "epoch": 0.65, + "grad_norm": 3.2641499570271626, + "learning_rate": 2.8331374043921472e-06, + "loss": 0.7458, + "step": 7994 + }, + { + "epoch": 0.65, + "grad_norm": 3.941652968343014, + "learning_rate": 2.831944470891851e-06, + "loss": 1.0364, + "step": 7995 + }, + { + "epoch": 0.65, + "grad_norm": 4.5782653321947455, + "learning_rate": 2.8307516893628097e-06, + "loss": 1.2017, + "step": 7996 + }, + { + "epoch": 0.65, + "grad_norm": 3.768539344559609, + "learning_rate": 2.8295590598886356e-06, + "loss": 0.9792, + "step": 7997 + }, + { + "epoch": 0.65, + "grad_norm": 3.882226709861375, + "learning_rate": 2.828366582552924e-06, + "loss": 0.7185, + "step": 7998 + }, + { + "epoch": 0.65, + "grad_norm": 5.30969061938397, + "learning_rate": 2.827174257439265e-06, + "loss": 0.8603, + "step": 7999 + }, + { + "epoch": 0.65, + "grad_norm": 3.865148421794005, + "learning_rate": 2.8259820846312326e-06, + "loss": 0.6394, + "step": 8000 + }, + { + "epoch": 0.65, + "grad_norm": 3.216494413050757, + "learning_rate": 2.824790064212396e-06, + "loss": 0.4633, + "step": 8001 + }, + { + "epoch": 0.65, + "grad_norm": 5.186243312675972, + "learning_rate": 2.8235981962663107e-06, + "loss": 0.5483, + "step": 8002 + }, + { + "epoch": 0.65, + "grad_norm": 5.440202878375771, + "learning_rate": 2.8224064808765182e-06, + "loss": 0.9243, + "step": 8003 + }, + { + "epoch": 0.65, + "grad_norm": 4.79307107805714, + "learning_rate": 2.8212149181265547e-06, + "loss": 1.046, + "step": 8004 + }, + { + "epoch": 0.65, + "grad_norm": 3.234343446088011, + "learning_rate": 2.820023508099944e-06, + "loss": 0.5725, + "step": 8005 + }, + { + "epoch": 0.65, + "grad_norm": 3.7712916691735443, + "learning_rate": 2.8188322508801967e-06, + "loss": 0.9586, + "step": 8006 + }, + { + "epoch": 0.65, + "grad_norm": 5.110195433030602, + "learning_rate": 2.817641146550817e-06, + "loss": 0.8975, + "step": 8007 + }, + { + "epoch": 0.65, + "grad_norm": 3.564456190651159, + "learning_rate": 2.8164501951952973e-06, + "loss": 0.7762, + "step": 8008 + }, + { + "epoch": 0.65, + "grad_norm": 3.222844540856964, + "learning_rate": 2.8152593968971143e-06, + "loss": 0.7783, + "step": 8009 + }, + { + "epoch": 0.65, + "grad_norm": 3.2829279961044495, + "learning_rate": 2.814068751739739e-06, + "loss": 0.8104, + "step": 8010 + }, + { + "epoch": 0.65, + "grad_norm": 2.6984117927456697, + "learning_rate": 2.8128782598066327e-06, + "loss": 0.4785, + "step": 8011 + }, + { + "epoch": 0.65, + "grad_norm": 3.0633281365883747, + "learning_rate": 2.8116879211812407e-06, + "loss": 0.7631, + "step": 8012 + }, + { + "epoch": 0.65, + "grad_norm": 4.632535475645752, + "learning_rate": 2.810497735947003e-06, + "loss": 0.5679, + "step": 8013 + }, + { + "epoch": 0.66, + "grad_norm": 3.8235228300560817, + "learning_rate": 2.809307704187344e-06, + "loss": 0.3668, + "step": 8014 + }, + { + "epoch": 0.66, + "grad_norm": 5.161518638795988, + "learning_rate": 2.8081178259856813e-06, + "loss": 1.2164, + "step": 8015 + }, + { + "epoch": 0.66, + "grad_norm": 3.788168760095346, + "learning_rate": 2.806928101425419e-06, + "loss": 0.748, + "step": 8016 + }, + { + "epoch": 0.66, + "grad_norm": 3.595830211979231, + "learning_rate": 2.8057385305899533e-06, + "loss": 0.7062, + "step": 8017 + }, + { + "epoch": 0.66, + "grad_norm": 3.2491092651746767, + "learning_rate": 2.804549113562667e-06, + "loss": 0.7872, + "step": 8018 + }, + { + "epoch": 0.66, + "grad_norm": 2.167759757082825, + "learning_rate": 2.803359850426935e-06, + "loss": 0.296, + "step": 8019 + }, + { + "epoch": 0.66, + "grad_norm": 3.6383704732156836, + "learning_rate": 2.802170741266116e-06, + "loss": 0.8412, + "step": 8020 + }, + { + "epoch": 0.66, + "grad_norm": 2.4452165564431327, + "learning_rate": 2.8009817861635622e-06, + "loss": 0.4422, + "step": 8021 + }, + { + "epoch": 0.66, + "grad_norm": 4.406342192462903, + "learning_rate": 2.7997929852026164e-06, + "loss": 1.0347, + "step": 8022 + }, + { + "epoch": 0.66, + "grad_norm": 2.505017668203887, + "learning_rate": 2.7986043384666055e-06, + "loss": 0.4232, + "step": 8023 + }, + { + "epoch": 0.66, + "grad_norm": 2.8697517932358667, + "learning_rate": 2.797415846038851e-06, + "loss": 0.4334, + "step": 8024 + }, + { + "epoch": 0.66, + "grad_norm": 3.7275342691111857, + "learning_rate": 2.7962275080026612e-06, + "loss": 0.6212, + "step": 8025 + }, + { + "epoch": 0.66, + "grad_norm": 4.908361652822283, + "learning_rate": 2.795039324441331e-06, + "loss": 0.8908, + "step": 8026 + }, + { + "epoch": 0.66, + "grad_norm": 4.727079111363858, + "learning_rate": 2.7938512954381503e-06, + "loss": 1.0258, + "step": 8027 + }, + { + "epoch": 0.66, + "grad_norm": 3.144923134552206, + "learning_rate": 2.79266342107639e-06, + "loss": 0.6403, + "step": 8028 + }, + { + "epoch": 0.66, + "grad_norm": 2.8843594757838744, + "learning_rate": 2.791475701439317e-06, + "loss": 0.3594, + "step": 8029 + }, + { + "epoch": 0.66, + "grad_norm": 4.49582032249646, + "learning_rate": 2.790288136610187e-06, + "loss": 1.0168, + "step": 8030 + }, + { + "epoch": 0.66, + "grad_norm": 3.0150535405758636, + "learning_rate": 2.7891007266722435e-06, + "loss": 0.7466, + "step": 8031 + }, + { + "epoch": 0.66, + "grad_norm": 5.821039224362037, + "learning_rate": 2.787913471708715e-06, + "loss": 0.7878, + "step": 8032 + }, + { + "epoch": 0.66, + "grad_norm": 3.5964597099929008, + "learning_rate": 2.7867263718028246e-06, + "loss": 0.8349, + "step": 8033 + }, + { + "epoch": 0.66, + "grad_norm": 5.215248501926799, + "learning_rate": 2.7855394270377843e-06, + "loss": 0.7862, + "step": 8034 + }, + { + "epoch": 0.66, + "grad_norm": 5.023413160171227, + "learning_rate": 2.784352637496792e-06, + "loss": 1.2161, + "step": 8035 + }, + { + "epoch": 0.66, + "grad_norm": 3.616951739827295, + "learning_rate": 2.7831660032630405e-06, + "loss": 0.7639, + "step": 8036 + }, + { + "epoch": 0.66, + "grad_norm": 4.343166757027107, + "learning_rate": 2.781979524419701e-06, + "loss": 0.9321, + "step": 8037 + }, + { + "epoch": 0.66, + "grad_norm": 4.542165060427963, + "learning_rate": 2.780793201049945e-06, + "loss": 0.6174, + "step": 8038 + }, + { + "epoch": 0.66, + "grad_norm": 5.00732678304251, + "learning_rate": 2.7796070332369274e-06, + "loss": 1.2496, + "step": 8039 + }, + { + "epoch": 0.66, + "grad_norm": 6.386606258896781, + "learning_rate": 2.7784210210637937e-06, + "loss": 1.5798, + "step": 8040 + }, + { + "epoch": 0.66, + "grad_norm": 4.456299499160534, + "learning_rate": 2.7772351646136795e-06, + "loss": 0.6512, + "step": 8041 + }, + { + "epoch": 0.66, + "grad_norm": 4.750388118818402, + "learning_rate": 2.776049463969705e-06, + "loss": 1.2437, + "step": 8042 + }, + { + "epoch": 0.66, + "grad_norm": 2.1606977279478032, + "learning_rate": 2.7748639192149863e-06, + "loss": 0.3467, + "step": 8043 + }, + { + "epoch": 0.66, + "grad_norm": 1.8466243448013244, + "learning_rate": 2.7736785304326217e-06, + "loss": 0.3452, + "step": 8044 + }, + { + "epoch": 0.66, + "grad_norm": 3.14618056656971, + "learning_rate": 2.772493297705703e-06, + "loss": 0.7901, + "step": 8045 + }, + { + "epoch": 0.66, + "grad_norm": 4.886642021842415, + "learning_rate": 2.771308221117309e-06, + "loss": 1.0205, + "step": 8046 + }, + { + "epoch": 0.66, + "grad_norm": 2.977361372003828, + "learning_rate": 2.7701233007505104e-06, + "loss": 0.6876, + "step": 8047 + }, + { + "epoch": 0.66, + "grad_norm": 2.668384948430321, + "learning_rate": 2.7689385366883654e-06, + "loss": 0.3277, + "step": 8048 + }, + { + "epoch": 0.66, + "grad_norm": 4.426996210231428, + "learning_rate": 2.7677539290139177e-06, + "loss": 1.0532, + "step": 8049 + }, + { + "epoch": 0.66, + "grad_norm": 4.458992091406994, + "learning_rate": 2.766569477810205e-06, + "loss": 0.9114, + "step": 8050 + }, + { + "epoch": 0.66, + "grad_norm": 4.174900594278151, + "learning_rate": 2.7653851831602514e-06, + "loss": 0.7051, + "step": 8051 + }, + { + "epoch": 0.66, + "grad_norm": 3.8040453815644266, + "learning_rate": 2.764201045147071e-06, + "loss": 0.8327, + "step": 8052 + }, + { + "epoch": 0.66, + "grad_norm": 4.293419797368659, + "learning_rate": 2.76301706385367e-06, + "loss": 1.0003, + "step": 8053 + }, + { + "epoch": 0.66, + "grad_norm": 3.393621564583542, + "learning_rate": 2.7618332393630353e-06, + "loss": 0.6693, + "step": 8054 + }, + { + "epoch": 0.66, + "grad_norm": 4.725986573141455, + "learning_rate": 2.7606495717581498e-06, + "loss": 0.7114, + "step": 8055 + }, + { + "epoch": 0.66, + "grad_norm": 4.669401274710586, + "learning_rate": 2.7594660611219838e-06, + "loss": 1.4499, + "step": 8056 + }, + { + "epoch": 0.66, + "grad_norm": 4.61047399172766, + "learning_rate": 2.7582827075374987e-06, + "loss": 0.9271, + "step": 8057 + }, + { + "epoch": 0.66, + "grad_norm": 5.410563440010257, + "learning_rate": 2.7570995110876364e-06, + "loss": 1.7803, + "step": 8058 + }, + { + "epoch": 0.66, + "grad_norm": 3.106155063712242, + "learning_rate": 2.75591647185534e-06, + "loss": 0.3749, + "step": 8059 + }, + { + "epoch": 0.66, + "grad_norm": 3.6487211232728236, + "learning_rate": 2.7547335899235304e-06, + "loss": 0.6993, + "step": 8060 + }, + { + "epoch": 0.66, + "grad_norm": 3.1968521220751, + "learning_rate": 2.7535508653751252e-06, + "loss": 0.5003, + "step": 8061 + }, + { + "epoch": 0.66, + "grad_norm": 2.1849582457043737, + "learning_rate": 2.7523682982930278e-06, + "loss": 0.4475, + "step": 8062 + }, + { + "epoch": 0.66, + "grad_norm": 5.046657932174976, + "learning_rate": 2.7511858887601304e-06, + "loss": 0.7241, + "step": 8063 + }, + { + "epoch": 0.66, + "grad_norm": 5.054131823721945, + "learning_rate": 2.7500036368593153e-06, + "loss": 1.1251, + "step": 8064 + }, + { + "epoch": 0.66, + "grad_norm": 3.0832769446657418, + "learning_rate": 2.7488215426734554e-06, + "loss": 0.6398, + "step": 8065 + }, + { + "epoch": 0.66, + "grad_norm": 3.3105219144880476, + "learning_rate": 2.7476396062854065e-06, + "loss": 0.5005, + "step": 8066 + }, + { + "epoch": 0.66, + "grad_norm": 2.8711836856018884, + "learning_rate": 2.7464578277780187e-06, + "loss": 0.5241, + "step": 8067 + }, + { + "epoch": 0.66, + "grad_norm": 4.941614254848235, + "learning_rate": 2.74527620723413e-06, + "loss": 0.7623, + "step": 8068 + }, + { + "epoch": 0.66, + "grad_norm": 5.47330668948666, + "learning_rate": 2.7440947447365664e-06, + "loss": 0.9966, + "step": 8069 + }, + { + "epoch": 0.66, + "grad_norm": 3.6291212023548796, + "learning_rate": 2.7429134403681435e-06, + "loss": 0.5474, + "step": 8070 + }, + { + "epoch": 0.66, + "grad_norm": 4.157885256218511, + "learning_rate": 2.741732294211667e-06, + "loss": 1.1754, + "step": 8071 + }, + { + "epoch": 0.66, + "grad_norm": 3.6000847904713003, + "learning_rate": 2.740551306349927e-06, + "loss": 0.9091, + "step": 8072 + }, + { + "epoch": 0.66, + "grad_norm": 0.8847891185750434, + "learning_rate": 2.739370476865707e-06, + "loss": 0.1354, + "step": 8073 + }, + { + "epoch": 0.66, + "grad_norm": 2.179574267260189, + "learning_rate": 2.738189805841781e-06, + "loss": 0.4367, + "step": 8074 + }, + { + "epoch": 0.66, + "grad_norm": 4.54789867852281, + "learning_rate": 2.7370092933609037e-06, + "loss": 1.2261, + "step": 8075 + }, + { + "epoch": 0.66, + "grad_norm": 3.0687699554112644, + "learning_rate": 2.7358289395058284e-06, + "loss": 0.5557, + "step": 8076 + }, + { + "epoch": 0.66, + "grad_norm": 3.7615195258812313, + "learning_rate": 2.7346487443592888e-06, + "loss": 0.8794, + "step": 8077 + }, + { + "epoch": 0.66, + "grad_norm": 3.876920388057226, + "learning_rate": 2.7334687080040134e-06, + "loss": 1.0369, + "step": 8078 + }, + { + "epoch": 0.66, + "grad_norm": 4.344281397286893, + "learning_rate": 2.732288830522718e-06, + "loss": 0.9742, + "step": 8079 + }, + { + "epoch": 0.66, + "grad_norm": 3.4912933701112414, + "learning_rate": 2.731109111998106e-06, + "loss": 0.6292, + "step": 8080 + }, + { + "epoch": 0.66, + "grad_norm": 5.110955242715626, + "learning_rate": 2.729929552512871e-06, + "loss": 1.0262, + "step": 8081 + }, + { + "epoch": 0.66, + "grad_norm": 3.7977212752673126, + "learning_rate": 2.7287501521496966e-06, + "loss": 0.7897, + "step": 8082 + }, + { + "epoch": 0.66, + "grad_norm": 3.5688064240040456, + "learning_rate": 2.7275709109912506e-06, + "loss": 0.8668, + "step": 8083 + }, + { + "epoch": 0.66, + "grad_norm": 3.3072778372473155, + "learning_rate": 2.726391829120194e-06, + "loss": 0.579, + "step": 8084 + }, + { + "epoch": 0.66, + "grad_norm": 4.019471690935791, + "learning_rate": 2.7252129066191758e-06, + "loss": 0.6334, + "step": 8085 + }, + { + "epoch": 0.66, + "grad_norm": 1.1563780617412596, + "learning_rate": 2.7240341435708316e-06, + "loss": 0.1597, + "step": 8086 + }, + { + "epoch": 0.66, + "grad_norm": 3.7132075006676404, + "learning_rate": 2.7228555400577904e-06, + "loss": 0.5548, + "step": 8087 + }, + { + "epoch": 0.66, + "grad_norm": 4.412152437555208, + "learning_rate": 2.7216770961626672e-06, + "loss": 0.8515, + "step": 8088 + }, + { + "epoch": 0.66, + "grad_norm": 3.37663916419196, + "learning_rate": 2.720498811968062e-06, + "loss": 0.6956, + "step": 8089 + }, + { + "epoch": 0.66, + "grad_norm": 2.9745511389014134, + "learning_rate": 2.7193206875565715e-06, + "loss": 0.764, + "step": 8090 + }, + { + "epoch": 0.66, + "grad_norm": 4.1630440192612665, + "learning_rate": 2.7181427230107738e-06, + "loss": 0.8398, + "step": 8091 + }, + { + "epoch": 0.66, + "grad_norm": 1.285493124951717, + "learning_rate": 2.7169649184132403e-06, + "loss": 0.1758, + "step": 8092 + }, + { + "epoch": 0.66, + "grad_norm": 2.303442121768274, + "learning_rate": 2.7157872738465317e-06, + "loss": 0.4663, + "step": 8093 + }, + { + "epoch": 0.66, + "grad_norm": 2.4441901971587376, + "learning_rate": 2.714609789393193e-06, + "loss": 0.5931, + "step": 8094 + }, + { + "epoch": 0.66, + "grad_norm": 4.705057991518503, + "learning_rate": 2.7134324651357625e-06, + "loss": 0.9862, + "step": 8095 + }, + { + "epoch": 0.66, + "grad_norm": 4.168949543449048, + "learning_rate": 2.7122553011567636e-06, + "loss": 0.5788, + "step": 8096 + }, + { + "epoch": 0.66, + "grad_norm": 4.597665183472927, + "learning_rate": 2.711078297538713e-06, + "loss": 0.8572, + "step": 8097 + }, + { + "epoch": 0.66, + "grad_norm": 3.510929055765163, + "learning_rate": 2.7099014543641116e-06, + "loss": 0.8463, + "step": 8098 + }, + { + "epoch": 0.66, + "grad_norm": 2.4589615554959883, + "learning_rate": 2.708724771715454e-06, + "loss": 0.3028, + "step": 8099 + }, + { + "epoch": 0.66, + "grad_norm": 1.1060126440401574, + "learning_rate": 2.707548249675216e-06, + "loss": 0.155, + "step": 8100 + }, + { + "epoch": 0.66, + "grad_norm": 2.7680275308789666, + "learning_rate": 2.7063718883258694e-06, + "loss": 0.4032, + "step": 8101 + }, + { + "epoch": 0.66, + "grad_norm": 2.683328697382099, + "learning_rate": 2.7051956877498707e-06, + "loss": 0.2433, + "step": 8102 + }, + { + "epoch": 0.66, + "grad_norm": 3.528927015598015, + "learning_rate": 2.7040196480296677e-06, + "loss": 0.6996, + "step": 8103 + }, + { + "epoch": 0.66, + "grad_norm": 1.9414316471303987, + "learning_rate": 2.702843769247698e-06, + "loss": 0.2005, + "step": 8104 + }, + { + "epoch": 0.66, + "grad_norm": 2.1785227594638092, + "learning_rate": 2.7016680514863796e-06, + "loss": 0.3247, + "step": 8105 + }, + { + "epoch": 0.66, + "grad_norm": 4.559538548866543, + "learning_rate": 2.700492494828131e-06, + "loss": 0.8933, + "step": 8106 + }, + { + "epoch": 0.66, + "grad_norm": 2.3675280676526933, + "learning_rate": 2.699317099355349e-06, + "loss": 0.5503, + "step": 8107 + }, + { + "epoch": 0.66, + "grad_norm": 3.46769595659413, + "learning_rate": 2.6981418651504256e-06, + "loss": 0.6654, + "step": 8108 + }, + { + "epoch": 0.66, + "grad_norm": 1.3525915329662892, + "learning_rate": 2.69696679229574e-06, + "loss": 0.1927, + "step": 8109 + }, + { + "epoch": 0.66, + "grad_norm": 3.8039875762312847, + "learning_rate": 2.6957918808736593e-06, + "loss": 0.7026, + "step": 8110 + }, + { + "epoch": 0.66, + "grad_norm": 3.980857406008913, + "learning_rate": 2.6946171309665413e-06, + "loss": 0.7007, + "step": 8111 + }, + { + "epoch": 0.66, + "grad_norm": 2.9225626645846536, + "learning_rate": 2.693442542656728e-06, + "loss": 0.6474, + "step": 8112 + }, + { + "epoch": 0.66, + "grad_norm": 6.054818429499179, + "learning_rate": 2.692268116026554e-06, + "loss": 1.148, + "step": 8113 + }, + { + "epoch": 0.66, + "grad_norm": 3.626438590520175, + "learning_rate": 2.6910938511583424e-06, + "loss": 0.7244, + "step": 8114 + }, + { + "epoch": 0.66, + "grad_norm": 4.309158002175827, + "learning_rate": 2.689919748134403e-06, + "loss": 1.201, + "step": 8115 + }, + { + "epoch": 0.66, + "grad_norm": 3.8702321853862456, + "learning_rate": 2.6887458070370374e-06, + "loss": 0.726, + "step": 8116 + }, + { + "epoch": 0.66, + "grad_norm": 3.6861780001976747, + "learning_rate": 2.6875720279485305e-06, + "loss": 0.8203, + "step": 8117 + }, + { + "epoch": 0.66, + "grad_norm": 4.616881851475864, + "learning_rate": 2.686398410951161e-06, + "loss": 0.5582, + "step": 8118 + }, + { + "epoch": 0.66, + "grad_norm": 2.726228925977059, + "learning_rate": 2.685224956127194e-06, + "loss": 0.4601, + "step": 8119 + }, + { + "epoch": 0.66, + "grad_norm": 3.5189700331903744, + "learning_rate": 2.684051663558884e-06, + "loss": 0.7785, + "step": 8120 + }, + { + "epoch": 0.66, + "grad_norm": 3.1994821182522797, + "learning_rate": 2.6828785333284736e-06, + "loss": 0.5161, + "step": 8121 + }, + { + "epoch": 0.66, + "grad_norm": 1.233334262619315, + "learning_rate": 2.6817055655181947e-06, + "loss": 0.1662, + "step": 8122 + }, + { + "epoch": 0.66, + "grad_norm": 4.166189713882294, + "learning_rate": 2.6805327602102647e-06, + "loss": 1.0081, + "step": 8123 + }, + { + "epoch": 0.66, + "grad_norm": 4.427378529153368, + "learning_rate": 2.6793601174868934e-06, + "loss": 0.8678, + "step": 8124 + }, + { + "epoch": 0.66, + "grad_norm": 3.995741956743334, + "learning_rate": 2.678187637430279e-06, + "loss": 0.6496, + "step": 8125 + }, + { + "epoch": 0.66, + "grad_norm": 2.462789535181405, + "learning_rate": 2.677015320122607e-06, + "loss": 0.4996, + "step": 8126 + }, + { + "epoch": 0.66, + "grad_norm": 4.286132345182877, + "learning_rate": 2.6758431656460503e-06, + "loss": 1.2132, + "step": 8127 + }, + { + "epoch": 0.66, + "grad_norm": 4.582607649074299, + "learning_rate": 2.6746711740827757e-06, + "loss": 0.9266, + "step": 8128 + }, + { + "epoch": 0.66, + "grad_norm": 2.279407521043276, + "learning_rate": 2.6734993455149295e-06, + "loss": 0.2849, + "step": 8129 + }, + { + "epoch": 0.66, + "grad_norm": 3.293848497282886, + "learning_rate": 2.6723276800246544e-06, + "loss": 0.7391, + "step": 8130 + }, + { + "epoch": 0.66, + "grad_norm": 4.5846162274642515, + "learning_rate": 2.671156177694079e-06, + "loss": 0.8377, + "step": 8131 + }, + { + "epoch": 0.66, + "grad_norm": 4.437207797111822, + "learning_rate": 2.6699848386053208e-06, + "loss": 1.0667, + "step": 8132 + }, + { + "epoch": 0.66, + "grad_norm": 3.6627445024534078, + "learning_rate": 2.6688136628404858e-06, + "loss": 0.9905, + "step": 8133 + }, + { + "epoch": 0.66, + "grad_norm": 3.202182827673672, + "learning_rate": 2.6676426504816666e-06, + "loss": 0.5172, + "step": 8134 + }, + { + "epoch": 0.66, + "grad_norm": 4.655701150778445, + "learning_rate": 2.666471801610947e-06, + "loss": 0.7972, + "step": 8135 + }, + { + "epoch": 0.67, + "grad_norm": 5.265227474862339, + "learning_rate": 2.665301116310401e-06, + "loss": 1.0035, + "step": 8136 + }, + { + "epoch": 0.67, + "grad_norm": 2.2479087704532916, + "learning_rate": 2.664130594662083e-06, + "loss": 0.3387, + "step": 8137 + }, + { + "epoch": 0.67, + "grad_norm": 4.905996753323291, + "learning_rate": 2.6629602367480456e-06, + "loss": 1.3322, + "step": 8138 + }, + { + "epoch": 0.67, + "grad_norm": 2.489019225321688, + "learning_rate": 2.6617900426503267e-06, + "loss": 0.4995, + "step": 8139 + }, + { + "epoch": 0.67, + "grad_norm": 3.9753243013472632, + "learning_rate": 2.6606200124509474e-06, + "loss": 0.9357, + "step": 8140 + }, + { + "epoch": 0.67, + "grad_norm": 3.2508496008821526, + "learning_rate": 2.6594501462319243e-06, + "loss": 0.4631, + "step": 8141 + }, + { + "epoch": 0.67, + "grad_norm": 3.6037522115894522, + "learning_rate": 2.658280444075261e-06, + "loss": 0.8874, + "step": 8142 + }, + { + "epoch": 0.67, + "grad_norm": 2.9317164559359346, + "learning_rate": 2.657110906062946e-06, + "loss": 0.6426, + "step": 8143 + }, + { + "epoch": 0.67, + "grad_norm": 6.017729186475728, + "learning_rate": 2.6559415322769604e-06, + "loss": 1.4262, + "step": 8144 + }, + { + "epoch": 0.67, + "grad_norm": 2.8748166235299153, + "learning_rate": 2.654772322799274e-06, + "loss": 0.509, + "step": 8145 + }, + { + "epoch": 0.67, + "grad_norm": 3.122497071149187, + "learning_rate": 2.65360327771184e-06, + "loss": 0.7123, + "step": 8146 + }, + { + "epoch": 0.67, + "grad_norm": 1.6791406594043676, + "learning_rate": 2.6524343970966036e-06, + "loss": 0.1717, + "step": 8147 + }, + { + "epoch": 0.67, + "grad_norm": 1.7615678659571652, + "learning_rate": 2.6512656810355e-06, + "loss": 0.3311, + "step": 8148 + }, + { + "epoch": 0.67, + "grad_norm": 3.697669974612375, + "learning_rate": 2.6500971296104506e-06, + "loss": 0.84, + "step": 8149 + }, + { + "epoch": 0.67, + "grad_norm": 2.7332577255496306, + "learning_rate": 2.648928742903367e-06, + "loss": 0.5106, + "step": 8150 + }, + { + "epoch": 0.67, + "grad_norm": 4.330105445552753, + "learning_rate": 2.6477605209961453e-06, + "loss": 0.7977, + "step": 8151 + }, + { + "epoch": 0.67, + "grad_norm": 4.589037364480364, + "learning_rate": 2.6465924639706753e-06, + "loss": 1.2426, + "step": 8152 + }, + { + "epoch": 0.67, + "grad_norm": 1.6558531337873237, + "learning_rate": 2.64542457190883e-06, + "loss": 0.1925, + "step": 8153 + }, + { + "epoch": 0.67, + "grad_norm": 3.077278142721715, + "learning_rate": 2.6442568448924754e-06, + "loss": 0.4792, + "step": 8154 + }, + { + "epoch": 0.67, + "grad_norm": 5.43354957949788, + "learning_rate": 2.6430892830034634e-06, + "loss": 0.8561, + "step": 8155 + }, + { + "epoch": 0.67, + "grad_norm": 3.8153529256250165, + "learning_rate": 2.6419218863236374e-06, + "loss": 0.4848, + "step": 8156 + }, + { + "epoch": 0.67, + "grad_norm": 2.555375178166355, + "learning_rate": 2.640754654934823e-06, + "loss": 0.4334, + "step": 8157 + }, + { + "epoch": 0.67, + "grad_norm": 5.515588811786006, + "learning_rate": 2.6395875889188393e-06, + "loss": 1.1924, + "step": 8158 + }, + { + "epoch": 0.67, + "grad_norm": 3.011717132887081, + "learning_rate": 2.638420688357493e-06, + "loss": 0.5709, + "step": 8159 + }, + { + "epoch": 0.67, + "grad_norm": 2.4878893153708304, + "learning_rate": 2.6372539533325793e-06, + "loss": 0.546, + "step": 8160 + }, + { + "epoch": 0.67, + "grad_norm": 4.902388199732581, + "learning_rate": 2.6360873839258804e-06, + "loss": 1.0235, + "step": 8161 + }, + { + "epoch": 0.67, + "grad_norm": 4.194007613848852, + "learning_rate": 2.63492098021917e-06, + "loss": 0.7716, + "step": 8162 + }, + { + "epoch": 0.67, + "grad_norm": 3.464355787218569, + "learning_rate": 2.6337547422942046e-06, + "loss": 0.4862, + "step": 8163 + }, + { + "epoch": 0.67, + "grad_norm": 4.964077826087806, + "learning_rate": 2.6325886702327335e-06, + "loss": 1.1743, + "step": 8164 + }, + { + "epoch": 0.67, + "grad_norm": 4.992992442416339, + "learning_rate": 2.6314227641164936e-06, + "loss": 1.2173, + "step": 8165 + }, + { + "epoch": 0.67, + "grad_norm": 4.2534889852906685, + "learning_rate": 2.6302570240272118e-06, + "loss": 1.0937, + "step": 8166 + }, + { + "epoch": 0.67, + "grad_norm": 2.036733498750401, + "learning_rate": 2.629091450046598e-06, + "loss": 0.3887, + "step": 8167 + }, + { + "epoch": 0.67, + "grad_norm": 3.7368829772271814, + "learning_rate": 2.6279260422563567e-06, + "loss": 0.7874, + "step": 8168 + }, + { + "epoch": 0.67, + "grad_norm": 2.411251795911712, + "learning_rate": 2.6267608007381745e-06, + "loss": 0.4679, + "step": 8169 + }, + { + "epoch": 0.67, + "grad_norm": 3.112337241361299, + "learning_rate": 2.625595725573732e-06, + "loss": 0.5023, + "step": 8170 + }, + { + "epoch": 0.67, + "grad_norm": 2.904542380357021, + "learning_rate": 2.6244308168446958e-06, + "loss": 0.6493, + "step": 8171 + }, + { + "epoch": 0.67, + "grad_norm": 2.8234626865452395, + "learning_rate": 2.623266074632721e-06, + "loss": 0.5458, + "step": 8172 + }, + { + "epoch": 0.67, + "grad_norm": 5.07468524187268, + "learning_rate": 2.622101499019453e-06, + "loss": 1.0779, + "step": 8173 + }, + { + "epoch": 0.67, + "grad_norm": 3.548773180308887, + "learning_rate": 2.6209370900865183e-06, + "loss": 0.4463, + "step": 8174 + }, + { + "epoch": 0.67, + "grad_norm": 3.957265224808206, + "learning_rate": 2.6197728479155403e-06, + "loss": 0.79, + "step": 8175 + }, + { + "epoch": 0.67, + "grad_norm": 1.9613969150310653, + "learning_rate": 2.618608772588127e-06, + "loss": 0.3682, + "step": 8176 + }, + { + "epoch": 0.67, + "grad_norm": 4.452334592025262, + "learning_rate": 2.6174448641858744e-06, + "loss": 0.4594, + "step": 8177 + }, + { + "epoch": 0.67, + "grad_norm": 5.301706376374276, + "learning_rate": 2.6162811227903683e-06, + "loss": 0.8597, + "step": 8178 + }, + { + "epoch": 0.67, + "grad_norm": 5.402394813245053, + "learning_rate": 2.6151175484831835e-06, + "loss": 1.2955, + "step": 8179 + }, + { + "epoch": 0.67, + "grad_norm": 4.030558375461062, + "learning_rate": 2.613954141345878e-06, + "loss": 0.63, + "step": 8180 + }, + { + "epoch": 0.67, + "grad_norm": 4.090141052480777, + "learning_rate": 2.612790901460003e-06, + "loss": 0.8791, + "step": 8181 + }, + { + "epoch": 0.67, + "grad_norm": 2.8795763058173005, + "learning_rate": 2.6116278289070963e-06, + "loss": 0.6026, + "step": 8182 + }, + { + "epoch": 0.67, + "grad_norm": 5.105086730112901, + "learning_rate": 2.6104649237686864e-06, + "loss": 1.0118, + "step": 8183 + }, + { + "epoch": 0.67, + "grad_norm": 2.711704806278075, + "learning_rate": 2.609302186126284e-06, + "loss": 0.324, + "step": 8184 + }, + { + "epoch": 0.67, + "grad_norm": 5.544883390181114, + "learning_rate": 2.6081396160613957e-06, + "loss": 1.4639, + "step": 8185 + }, + { + "epoch": 0.67, + "grad_norm": 3.220550620883011, + "learning_rate": 2.60697721365551e-06, + "loss": 0.5899, + "step": 8186 + }, + { + "epoch": 0.67, + "grad_norm": 2.0355312131320495, + "learning_rate": 2.6058149789901066e-06, + "loss": 0.2735, + "step": 8187 + }, + { + "epoch": 0.67, + "grad_norm": 3.8453055950699087, + "learning_rate": 2.6046529121466537e-06, + "loss": 0.6375, + "step": 8188 + }, + { + "epoch": 0.67, + "grad_norm": 4.551515254155337, + "learning_rate": 2.6034910132066066e-06, + "loss": 0.675, + "step": 8189 + }, + { + "epoch": 0.67, + "grad_norm": 3.613874741705259, + "learning_rate": 2.60232928225141e-06, + "loss": 0.5582, + "step": 8190 + }, + { + "epoch": 0.67, + "grad_norm": 4.874853243116013, + "learning_rate": 2.6011677193624984e-06, + "loss": 1.1525, + "step": 8191 + }, + { + "epoch": 0.67, + "grad_norm": 4.102358534071514, + "learning_rate": 2.6000063246212882e-06, + "loss": 0.68, + "step": 8192 + }, + { + "epoch": 0.67, + "grad_norm": 4.009976735808661, + "learning_rate": 2.598845098109189e-06, + "loss": 1.2542, + "step": 8193 + }, + { + "epoch": 0.67, + "grad_norm": 3.4149214680031434, + "learning_rate": 2.5976840399075987e-06, + "loss": 0.8226, + "step": 8194 + }, + { + "epoch": 0.67, + "grad_norm": 2.9917850748077006, + "learning_rate": 2.5965231500979026e-06, + "loss": 0.5335, + "step": 8195 + }, + { + "epoch": 0.67, + "grad_norm": 4.042737043474195, + "learning_rate": 2.595362428761476e-06, + "loss": 0.8692, + "step": 8196 + }, + { + "epoch": 0.67, + "grad_norm": 4.079629908302849, + "learning_rate": 2.5942018759796756e-06, + "loss": 1.0781, + "step": 8197 + }, + { + "epoch": 0.67, + "grad_norm": 6.623417473132643, + "learning_rate": 2.5930414918338542e-06, + "loss": 1.107, + "step": 8198 + }, + { + "epoch": 0.67, + "grad_norm": 4.6628269852169915, + "learning_rate": 2.59188127640535e-06, + "loss": 1.1345, + "step": 8199 + }, + { + "epoch": 0.67, + "grad_norm": 3.9365790682542925, + "learning_rate": 2.590721229775487e-06, + "loss": 0.7339, + "step": 8200 + }, + { + "epoch": 0.67, + "grad_norm": 3.549101199921481, + "learning_rate": 2.589561352025581e-06, + "loss": 0.5565, + "step": 8201 + }, + { + "epoch": 0.67, + "grad_norm": 3.8907557400194692, + "learning_rate": 2.5884016432369352e-06, + "loss": 0.827, + "step": 8202 + }, + { + "epoch": 0.67, + "grad_norm": 5.923777107347025, + "learning_rate": 2.587242103490837e-06, + "loss": 0.958, + "step": 8203 + }, + { + "epoch": 0.67, + "grad_norm": 3.0709012492772443, + "learning_rate": 2.586082732868567e-06, + "loss": 0.5223, + "step": 8204 + }, + { + "epoch": 0.67, + "grad_norm": 4.845501551249635, + "learning_rate": 2.5849235314513923e-06, + "loss": 0.9612, + "step": 8205 + }, + { + "epoch": 0.67, + "grad_norm": 3.085875781413251, + "learning_rate": 2.583764499320567e-06, + "loss": 0.64, + "step": 8206 + }, + { + "epoch": 0.67, + "grad_norm": 1.994995047525389, + "learning_rate": 2.5826056365573356e-06, + "loss": 0.421, + "step": 8207 + }, + { + "epoch": 0.67, + "grad_norm": 3.005993934062253, + "learning_rate": 2.58144694324293e-06, + "loss": 0.7486, + "step": 8208 + }, + { + "epoch": 0.67, + "grad_norm": 3.7070171019910383, + "learning_rate": 2.5802884194585664e-06, + "loss": 0.4059, + "step": 8209 + }, + { + "epoch": 0.67, + "grad_norm": 3.6311262154020905, + "learning_rate": 2.5791300652854536e-06, + "loss": 0.421, + "step": 8210 + }, + { + "epoch": 0.67, + "grad_norm": 3.126244190997835, + "learning_rate": 2.5779718808047882e-06, + "loss": 0.8678, + "step": 8211 + }, + { + "epoch": 0.67, + "grad_norm": 4.054898585744836, + "learning_rate": 2.576813866097753e-06, + "loss": 0.7063, + "step": 8212 + }, + { + "epoch": 0.67, + "grad_norm": 5.269210773538143, + "learning_rate": 2.5756560212455216e-06, + "loss": 1.0215, + "step": 8213 + }, + { + "epoch": 0.67, + "grad_norm": 5.403454994734814, + "learning_rate": 2.5744983463292504e-06, + "loss": 0.9039, + "step": 8214 + }, + { + "epoch": 0.67, + "grad_norm": 2.943293988287031, + "learning_rate": 2.5733408414300914e-06, + "loss": 0.5114, + "step": 8215 + }, + { + "epoch": 0.67, + "grad_norm": 2.0435797640128968, + "learning_rate": 2.5721835066291767e-06, + "loss": 0.2742, + "step": 8216 + }, + { + "epoch": 0.67, + "grad_norm": 3.8488265573426546, + "learning_rate": 2.571026342007632e-06, + "loss": 0.9684, + "step": 8217 + }, + { + "epoch": 0.67, + "grad_norm": 2.298377465252303, + "learning_rate": 2.5698693476465704e-06, + "loss": 0.3515, + "step": 8218 + }, + { + "epoch": 0.67, + "grad_norm": 3.391490206326064, + "learning_rate": 2.568712523627093e-06, + "loss": 0.6765, + "step": 8219 + }, + { + "epoch": 0.67, + "grad_norm": 4.595083739065445, + "learning_rate": 2.567555870030285e-06, + "loss": 1.0814, + "step": 8220 + }, + { + "epoch": 0.67, + "grad_norm": 5.22699420619472, + "learning_rate": 2.5663993869372244e-06, + "loss": 0.8324, + "step": 8221 + }, + { + "epoch": 0.67, + "grad_norm": 3.8398058751542443, + "learning_rate": 2.565243074428976e-06, + "loss": 0.7015, + "step": 8222 + }, + { + "epoch": 0.67, + "grad_norm": 2.710784195102657, + "learning_rate": 2.5640869325865912e-06, + "loss": 0.4557, + "step": 8223 + }, + { + "epoch": 0.67, + "grad_norm": 3.9712039669774226, + "learning_rate": 2.5629309614911123e-06, + "loss": 0.6181, + "step": 8224 + }, + { + "epoch": 0.67, + "grad_norm": 4.573667204632561, + "learning_rate": 2.561775161223568e-06, + "loss": 1.0692, + "step": 8225 + }, + { + "epoch": 0.67, + "grad_norm": 3.3745376434268177, + "learning_rate": 2.560619531864972e-06, + "loss": 0.6221, + "step": 8226 + }, + { + "epoch": 0.67, + "grad_norm": 3.8847605733345594, + "learning_rate": 2.5594640734963306e-06, + "loss": 0.7908, + "step": 8227 + }, + { + "epoch": 0.67, + "grad_norm": 4.16579864794284, + "learning_rate": 2.5583087861986365e-06, + "loss": 0.7152, + "step": 8228 + }, + { + "epoch": 0.67, + "grad_norm": 1.332205382754697, + "learning_rate": 2.557153670052872e-06, + "loss": 0.1777, + "step": 8229 + }, + { + "epoch": 0.67, + "grad_norm": 3.4438633903948213, + "learning_rate": 2.5559987251400024e-06, + "loss": 0.5551, + "step": 8230 + }, + { + "epoch": 0.67, + "grad_norm": 1.765037699508793, + "learning_rate": 2.554843951540987e-06, + "loss": 0.1958, + "step": 8231 + }, + { + "epoch": 0.67, + "grad_norm": 4.59795873464682, + "learning_rate": 2.553689349336769e-06, + "loss": 0.8692, + "step": 8232 + }, + { + "epoch": 0.67, + "grad_norm": 4.002247911399786, + "learning_rate": 2.5525349186082793e-06, + "loss": 0.8691, + "step": 8233 + }, + { + "epoch": 0.67, + "grad_norm": 3.9778972950174603, + "learning_rate": 2.551380659436441e-06, + "loss": 0.5852, + "step": 8234 + }, + { + "epoch": 0.67, + "grad_norm": 3.141822415205915, + "learning_rate": 2.550226571902162e-06, + "loss": 0.4145, + "step": 8235 + }, + { + "epoch": 0.67, + "grad_norm": 3.8667126450551237, + "learning_rate": 2.549072656086341e-06, + "loss": 0.6026, + "step": 8236 + }, + { + "epoch": 0.67, + "grad_norm": 4.996300879810245, + "learning_rate": 2.547918912069859e-06, + "loss": 0.9623, + "step": 8237 + }, + { + "epoch": 0.67, + "grad_norm": 3.6904477810105543, + "learning_rate": 2.546765339933589e-06, + "loss": 0.7617, + "step": 8238 + }, + { + "epoch": 0.67, + "grad_norm": 4.581868726425581, + "learning_rate": 2.5456119397583923e-06, + "loss": 0.853, + "step": 8239 + }, + { + "epoch": 0.67, + "grad_norm": 6.0197529089066615, + "learning_rate": 2.544458711625117e-06, + "loss": 1.4534, + "step": 8240 + }, + { + "epoch": 0.67, + "grad_norm": 3.83866500070881, + "learning_rate": 2.5433056556145996e-06, + "loss": 0.9129, + "step": 8241 + }, + { + "epoch": 0.67, + "grad_norm": 3.852122294853424, + "learning_rate": 2.5421527718076657e-06, + "loss": 0.7918, + "step": 8242 + }, + { + "epoch": 0.67, + "grad_norm": 4.895027520673276, + "learning_rate": 2.541000060285125e-06, + "loss": 0.7276, + "step": 8243 + }, + { + "epoch": 0.67, + "grad_norm": 3.2427021641886995, + "learning_rate": 2.539847521127777e-06, + "loss": 0.5534, + "step": 8244 + }, + { + "epoch": 0.67, + "grad_norm": 1.890394714776982, + "learning_rate": 2.538695154416414e-06, + "loss": 0.2905, + "step": 8245 + }, + { + "epoch": 0.67, + "grad_norm": 5.208652062064113, + "learning_rate": 2.537542960231807e-06, + "loss": 1.1838, + "step": 8246 + }, + { + "epoch": 0.67, + "grad_norm": 5.599313100308787, + "learning_rate": 2.536390938654722e-06, + "loss": 1.379, + "step": 8247 + }, + { + "epoch": 0.67, + "grad_norm": 2.5905100343439336, + "learning_rate": 2.535239089765912e-06, + "loss": 0.6203, + "step": 8248 + }, + { + "epoch": 0.67, + "grad_norm": 3.955544128632737, + "learning_rate": 2.5340874136461138e-06, + "loss": 0.8615, + "step": 8249 + }, + { + "epoch": 0.67, + "grad_norm": 2.1298616750007944, + "learning_rate": 2.5329359103760555e-06, + "loss": 0.5261, + "step": 8250 + }, + { + "epoch": 0.67, + "grad_norm": 4.328799199804836, + "learning_rate": 2.5317845800364538e-06, + "loss": 1.198, + "step": 8251 + }, + { + "epoch": 0.67, + "grad_norm": 1.3281557786608587, + "learning_rate": 2.530633422708011e-06, + "loss": 0.1665, + "step": 8252 + }, + { + "epoch": 0.67, + "grad_norm": 3.7688107112634257, + "learning_rate": 2.529482438471421e-06, + "loss": 0.6611, + "step": 8253 + }, + { + "epoch": 0.67, + "grad_norm": 6.398786157877805, + "learning_rate": 2.5283316274073577e-06, + "loss": 1.1927, + "step": 8254 + }, + { + "epoch": 0.67, + "grad_norm": 4.653815342421929, + "learning_rate": 2.527180989596491e-06, + "loss": 1.0186, + "step": 8255 + }, + { + "epoch": 0.67, + "grad_norm": 4.075590632175973, + "learning_rate": 2.526030525119475e-06, + "loss": 0.6921, + "step": 8256 + }, + { + "epoch": 0.67, + "grad_norm": 4.855568441119522, + "learning_rate": 2.524880234056952e-06, + "loss": 1.2602, + "step": 8257 + }, + { + "epoch": 0.67, + "grad_norm": 4.687147851686595, + "learning_rate": 2.5237301164895538e-06, + "loss": 0.6533, + "step": 8258 + }, + { + "epoch": 0.68, + "grad_norm": 4.554934045264841, + "learning_rate": 2.5225801724978994e-06, + "loss": 0.7608, + "step": 8259 + }, + { + "epoch": 0.68, + "grad_norm": 5.047030105038087, + "learning_rate": 2.5214304021625906e-06, + "loss": 0.8476, + "step": 8260 + }, + { + "epoch": 0.68, + "grad_norm": 2.319276269283469, + "learning_rate": 2.5202808055642264e-06, + "loss": 0.2347, + "step": 8261 + }, + { + "epoch": 0.68, + "grad_norm": 4.030062028737353, + "learning_rate": 2.5191313827833834e-06, + "loss": 0.6413, + "step": 8262 + }, + { + "epoch": 0.68, + "grad_norm": 4.86433153661269, + "learning_rate": 2.517982133900634e-06, + "loss": 0.6122, + "step": 8263 + }, + { + "epoch": 0.68, + "grad_norm": 4.360572337027347, + "learning_rate": 2.5168330589965356e-06, + "loss": 0.8149, + "step": 8264 + }, + { + "epoch": 0.68, + "grad_norm": 4.517862354534247, + "learning_rate": 2.5156841581516344e-06, + "loss": 0.7741, + "step": 8265 + }, + { + "epoch": 0.68, + "grad_norm": 5.048209300032961, + "learning_rate": 2.5145354314464606e-06, + "loss": 1.0705, + "step": 8266 + }, + { + "epoch": 0.68, + "grad_norm": 4.9624911465341786, + "learning_rate": 2.5133868789615357e-06, + "loss": 1.0833, + "step": 8267 + }, + { + "epoch": 0.68, + "grad_norm": 2.4665013356509604, + "learning_rate": 2.5122385007773685e-06, + "loss": 0.337, + "step": 8268 + }, + { + "epoch": 0.68, + "grad_norm": 3.2481453941790934, + "learning_rate": 2.5110902969744567e-06, + "loss": 0.4088, + "step": 8269 + }, + { + "epoch": 0.68, + "grad_norm": 2.853603954957759, + "learning_rate": 2.5099422676332825e-06, + "loss": 0.3283, + "step": 8270 + }, + { + "epoch": 0.68, + "grad_norm": 5.159783771639473, + "learning_rate": 2.508794412834321e-06, + "loss": 1.1678, + "step": 8271 + }, + { + "epoch": 0.68, + "grad_norm": 5.484698844000725, + "learning_rate": 2.507646732658027e-06, + "loss": 0.9776, + "step": 8272 + }, + { + "epoch": 0.68, + "grad_norm": 4.201518136585807, + "learning_rate": 2.5064992271848504e-06, + "loss": 0.5837, + "step": 8273 + }, + { + "epoch": 0.68, + "grad_norm": 2.9111991079769775, + "learning_rate": 2.505351896495226e-06, + "loss": 0.7384, + "step": 8274 + }, + { + "epoch": 0.68, + "grad_norm": 4.718743486607333, + "learning_rate": 2.504204740669579e-06, + "loss": 0.7954, + "step": 8275 + }, + { + "epoch": 0.68, + "grad_norm": 3.008261239270777, + "learning_rate": 2.5030577597883166e-06, + "loss": 0.6152, + "step": 8276 + }, + { + "epoch": 0.68, + "grad_norm": 7.10932136638636, + "learning_rate": 2.5019109539318374e-06, + "loss": 1.3207, + "step": 8277 + }, + { + "epoch": 0.68, + "grad_norm": 3.8281334738196775, + "learning_rate": 2.5007643231805316e-06, + "loss": 0.5473, + "step": 8278 + }, + { + "epoch": 0.68, + "grad_norm": 2.88466449570412, + "learning_rate": 2.499617867614768e-06, + "loss": 0.5475, + "step": 8279 + }, + { + "epoch": 0.68, + "grad_norm": 4.86738131822349, + "learning_rate": 2.4984715873149097e-06, + "loss": 0.9485, + "step": 8280 + }, + { + "epoch": 0.68, + "grad_norm": 4.210014976007869, + "learning_rate": 2.497325482361307e-06, + "loss": 0.8218, + "step": 8281 + }, + { + "epoch": 0.68, + "grad_norm": 3.485578773064971, + "learning_rate": 2.4961795528342977e-06, + "loss": 0.6658, + "step": 8282 + }, + { + "epoch": 0.68, + "grad_norm": 5.424818209340035, + "learning_rate": 2.495033798814203e-06, + "loss": 1.1699, + "step": 8283 + }, + { + "epoch": 0.68, + "grad_norm": 2.1328923324244364, + "learning_rate": 2.493888220381338e-06, + "loss": 0.3901, + "step": 8284 + }, + { + "epoch": 0.68, + "grad_norm": 4.107847943479869, + "learning_rate": 2.492742817616002e-06, + "loss": 1.0158, + "step": 8285 + }, + { + "epoch": 0.68, + "grad_norm": 3.08409785382572, + "learning_rate": 2.4915975905984825e-06, + "loss": 0.5241, + "step": 8286 + }, + { + "epoch": 0.68, + "grad_norm": 2.605708211617868, + "learning_rate": 2.490452539409055e-06, + "loss": 0.6845, + "step": 8287 + }, + { + "epoch": 0.68, + "grad_norm": 3.7541933540948054, + "learning_rate": 2.4893076641279857e-06, + "loss": 0.7459, + "step": 8288 + }, + { + "epoch": 0.68, + "grad_norm": 4.325334689993187, + "learning_rate": 2.4881629648355197e-06, + "loss": 0.7375, + "step": 8289 + }, + { + "epoch": 0.68, + "grad_norm": 4.571401693590738, + "learning_rate": 2.487018441611899e-06, + "loss": 1.1225, + "step": 8290 + }, + { + "epoch": 0.68, + "grad_norm": 4.972920543433598, + "learning_rate": 2.485874094537349e-06, + "loss": 1.2411, + "step": 8291 + }, + { + "epoch": 0.68, + "grad_norm": 2.8754292288507934, + "learning_rate": 2.484729923692085e-06, + "loss": 0.6286, + "step": 8292 + }, + { + "epoch": 0.68, + "grad_norm": 1.3322231779253642, + "learning_rate": 2.4835859291563054e-06, + "loss": 0.2827, + "step": 8293 + }, + { + "epoch": 0.68, + "grad_norm": 4.1227954724856115, + "learning_rate": 2.4824421110102022e-06, + "loss": 0.7501, + "step": 8294 + }, + { + "epoch": 0.68, + "grad_norm": 3.5513355488999294, + "learning_rate": 2.481298469333949e-06, + "loss": 0.5861, + "step": 8295 + }, + { + "epoch": 0.68, + "grad_norm": 3.4934041611030273, + "learning_rate": 2.4801550042077118e-06, + "loss": 0.8104, + "step": 8296 + }, + { + "epoch": 0.68, + "grad_norm": 4.914640469913183, + "learning_rate": 2.4790117157116417e-06, + "loss": 1.4283, + "step": 8297 + }, + { + "epoch": 0.68, + "grad_norm": 5.321212069371395, + "learning_rate": 2.477868603925879e-06, + "loss": 0.7995, + "step": 8298 + }, + { + "epoch": 0.68, + "grad_norm": 2.9954868489166917, + "learning_rate": 2.4767256689305537e-06, + "loss": 0.8571, + "step": 8299 + }, + { + "epoch": 0.68, + "grad_norm": 2.999989814980641, + "learning_rate": 2.475582910805775e-06, + "loss": 0.6888, + "step": 8300 + }, + { + "epoch": 0.68, + "grad_norm": 3.4585531621945518, + "learning_rate": 2.474440329631648e-06, + "loss": 0.7443, + "step": 8301 + }, + { + "epoch": 0.68, + "grad_norm": 3.4130655659528393, + "learning_rate": 2.473297925488263e-06, + "loss": 0.8947, + "step": 8302 + }, + { + "epoch": 0.68, + "grad_norm": 7.689007018264186, + "learning_rate": 2.4721556984556968e-06, + "loss": 1.1999, + "step": 8303 + }, + { + "epoch": 0.68, + "grad_norm": 4.470032069770875, + "learning_rate": 2.471013648614015e-06, + "loss": 1.0237, + "step": 8304 + }, + { + "epoch": 0.68, + "grad_norm": 2.3840771568384413, + "learning_rate": 2.4698717760432723e-06, + "loss": 0.3854, + "step": 8305 + }, + { + "epoch": 0.68, + "grad_norm": 3.5984289964252936, + "learning_rate": 2.468730080823505e-06, + "loss": 0.5884, + "step": 8306 + }, + { + "epoch": 0.68, + "grad_norm": 2.580650107432294, + "learning_rate": 2.4675885630347423e-06, + "loss": 0.4019, + "step": 8307 + }, + { + "epoch": 0.68, + "grad_norm": 3.814392672951054, + "learning_rate": 2.466447222757003e-06, + "loss": 1.1511, + "step": 8308 + }, + { + "epoch": 0.68, + "grad_norm": 3.8923529634070535, + "learning_rate": 2.465306060070285e-06, + "loss": 0.7966, + "step": 8309 + }, + { + "epoch": 0.68, + "grad_norm": 2.927392623000533, + "learning_rate": 2.4641650750545816e-06, + "loss": 0.4876, + "step": 8310 + }, + { + "epoch": 0.68, + "grad_norm": 2.713209401249808, + "learning_rate": 2.4630242677898718e-06, + "loss": 0.5281, + "step": 8311 + }, + { + "epoch": 0.68, + "grad_norm": 4.069931845338178, + "learning_rate": 2.461883638356118e-06, + "loss": 1.0768, + "step": 8312 + }, + { + "epoch": 0.68, + "grad_norm": 3.043007353870255, + "learning_rate": 2.4607431868332756e-06, + "loss": 0.7361, + "step": 8313 + }, + { + "epoch": 0.68, + "grad_norm": 4.543555884846014, + "learning_rate": 2.4596029133012845e-06, + "loss": 0.5821, + "step": 8314 + }, + { + "epoch": 0.68, + "grad_norm": 3.6930194005161634, + "learning_rate": 2.4584628178400737e-06, + "loss": 0.4279, + "step": 8315 + }, + { + "epoch": 0.68, + "grad_norm": 3.3378103611275263, + "learning_rate": 2.45732290052956e-06, + "loss": 0.591, + "step": 8316 + }, + { + "epoch": 0.68, + "grad_norm": 3.6515986206695095, + "learning_rate": 2.456183161449644e-06, + "loss": 0.696, + "step": 8317 + }, + { + "epoch": 0.68, + "grad_norm": 2.368809991491645, + "learning_rate": 2.455043600680217e-06, + "loss": 0.237, + "step": 8318 + }, + { + "epoch": 0.68, + "grad_norm": 4.278257713890669, + "learning_rate": 2.4539042183011585e-06, + "loss": 1.0608, + "step": 8319 + }, + { + "epoch": 0.68, + "grad_norm": 3.390456938320225, + "learning_rate": 2.4527650143923334e-06, + "loss": 0.5158, + "step": 8320 + }, + { + "epoch": 0.68, + "grad_norm": 2.320303803659349, + "learning_rate": 2.4516259890335947e-06, + "loss": 0.322, + "step": 8321 + }, + { + "epoch": 0.68, + "grad_norm": 3.9903038975532277, + "learning_rate": 2.450487142304786e-06, + "loss": 0.8123, + "step": 8322 + }, + { + "epoch": 0.68, + "grad_norm": 3.1891243338684485, + "learning_rate": 2.4493484742857316e-06, + "loss": 0.4653, + "step": 8323 + }, + { + "epoch": 0.68, + "grad_norm": 3.5908340128294984, + "learning_rate": 2.4482099850562496e-06, + "loss": 0.6931, + "step": 8324 + }, + { + "epoch": 0.68, + "grad_norm": 3.3660223577689057, + "learning_rate": 2.447071674696141e-06, + "loss": 0.4745, + "step": 8325 + }, + { + "epoch": 0.68, + "grad_norm": 3.8100675807839512, + "learning_rate": 2.4459335432851977e-06, + "loss": 0.9717, + "step": 8326 + }, + { + "epoch": 0.68, + "grad_norm": 4.339816506479468, + "learning_rate": 2.4447955909031973e-06, + "loss": 1.1936, + "step": 8327 + }, + { + "epoch": 0.68, + "grad_norm": 4.270165734853613, + "learning_rate": 2.443657817629908e-06, + "loss": 1.266, + "step": 8328 + }, + { + "epoch": 0.68, + "grad_norm": 1.1624670879267303, + "learning_rate": 2.442520223545078e-06, + "loss": 0.1972, + "step": 8329 + }, + { + "epoch": 0.68, + "grad_norm": 3.0678233836269317, + "learning_rate": 2.4413828087284504e-06, + "loss": 0.2616, + "step": 8330 + }, + { + "epoch": 0.68, + "grad_norm": 4.583266213174848, + "learning_rate": 2.440245573259753e-06, + "loss": 0.9658, + "step": 8331 + }, + { + "epoch": 0.68, + "grad_norm": 4.120895013381585, + "learning_rate": 2.4391085172187005e-06, + "loss": 0.6695, + "step": 8332 + }, + { + "epoch": 0.68, + "grad_norm": 2.680942057669382, + "learning_rate": 2.437971640684998e-06, + "loss": 0.5309, + "step": 8333 + }, + { + "epoch": 0.68, + "grad_norm": 5.200500902511551, + "learning_rate": 2.4368349437383314e-06, + "loss": 1.583, + "step": 8334 + }, + { + "epoch": 0.68, + "grad_norm": 4.202678182159394, + "learning_rate": 2.4356984264583806e-06, + "loss": 0.8447, + "step": 8335 + }, + { + "epoch": 0.68, + "grad_norm": 2.9494206946055037, + "learning_rate": 2.43456208892481e-06, + "loss": 0.5021, + "step": 8336 + }, + { + "epoch": 0.68, + "grad_norm": 1.2127148178284841, + "learning_rate": 2.433425931217272e-06, + "loss": 0.1644, + "step": 8337 + }, + { + "epoch": 0.68, + "grad_norm": 4.0608201347588615, + "learning_rate": 2.4322899534154085e-06, + "loss": 0.9022, + "step": 8338 + }, + { + "epoch": 0.68, + "grad_norm": 4.657162564141952, + "learning_rate": 2.4311541555988433e-06, + "loss": 0.8073, + "step": 8339 + }, + { + "epoch": 0.68, + "grad_norm": 3.011262415440675, + "learning_rate": 2.430018537847193e-06, + "loss": 0.4338, + "step": 8340 + }, + { + "epoch": 0.68, + "grad_norm": 4.61578686555714, + "learning_rate": 2.4288831002400574e-06, + "loss": 1.118, + "step": 8341 + }, + { + "epoch": 0.68, + "grad_norm": 4.3072127189866585, + "learning_rate": 2.427747842857027e-06, + "loss": 0.9043, + "step": 8342 + }, + { + "epoch": 0.68, + "grad_norm": 2.891045801602025, + "learning_rate": 2.4266127657776777e-06, + "loss": 0.6071, + "step": 8343 + }, + { + "epoch": 0.68, + "grad_norm": 4.199359033982037, + "learning_rate": 2.4254778690815743e-06, + "loss": 0.8963, + "step": 8344 + }, + { + "epoch": 0.68, + "grad_norm": 4.821403285387405, + "learning_rate": 2.42434315284827e-06, + "loss": 0.7843, + "step": 8345 + }, + { + "epoch": 0.68, + "grad_norm": 2.576793763179655, + "learning_rate": 2.4232086171572993e-06, + "loss": 0.3895, + "step": 8346 + }, + { + "epoch": 0.68, + "grad_norm": 4.1108494663998565, + "learning_rate": 2.4220742620881906e-06, + "loss": 0.9387, + "step": 8347 + }, + { + "epoch": 0.68, + "grad_norm": 4.403348312465952, + "learning_rate": 2.420940087720457e-06, + "loss": 0.6194, + "step": 8348 + }, + { + "epoch": 0.68, + "grad_norm": 2.4689509241681535, + "learning_rate": 2.4198060941335987e-06, + "loss": 0.5313, + "step": 8349 + }, + { + "epoch": 0.68, + "grad_norm": 2.4975703045065374, + "learning_rate": 2.4186722814071043e-06, + "loss": 0.5038, + "step": 8350 + }, + { + "epoch": 0.68, + "grad_norm": 1.4524371048512315, + "learning_rate": 2.4175386496204513e-06, + "loss": 0.3769, + "step": 8351 + }, + { + "epoch": 0.68, + "grad_norm": 5.33869852814967, + "learning_rate": 2.416405198853098e-06, + "loss": 0.9853, + "step": 8352 + }, + { + "epoch": 0.68, + "grad_norm": 2.479560559718521, + "learning_rate": 2.415271929184496e-06, + "loss": 0.3689, + "step": 8353 + }, + { + "epoch": 0.68, + "grad_norm": 3.584706802703382, + "learning_rate": 2.4141388406940852e-06, + "loss": 0.5739, + "step": 8354 + }, + { + "epoch": 0.68, + "grad_norm": 3.314608982252052, + "learning_rate": 2.413005933461286e-06, + "loss": 0.6743, + "step": 8355 + }, + { + "epoch": 0.68, + "grad_norm": 3.9993676635570075, + "learning_rate": 2.4118732075655144e-06, + "loss": 0.7207, + "step": 8356 + }, + { + "epoch": 0.68, + "grad_norm": 5.742565487556265, + "learning_rate": 2.410740663086165e-06, + "loss": 0.722, + "step": 8357 + }, + { + "epoch": 0.68, + "grad_norm": 2.9454738545193697, + "learning_rate": 2.409608300102627e-06, + "loss": 0.6875, + "step": 8358 + }, + { + "epoch": 0.68, + "grad_norm": 1.9855888175733343, + "learning_rate": 2.4084761186942734e-06, + "loss": 0.3532, + "step": 8359 + }, + { + "epoch": 0.68, + "grad_norm": 3.011871695896224, + "learning_rate": 2.4073441189404657e-06, + "loss": 0.5849, + "step": 8360 + }, + { + "epoch": 0.68, + "grad_norm": 3.309430061951973, + "learning_rate": 2.4062123009205525e-06, + "loss": 0.5575, + "step": 8361 + }, + { + "epoch": 0.68, + "grad_norm": 2.5880549181074146, + "learning_rate": 2.40508066471387e-06, + "loss": 0.3133, + "step": 8362 + }, + { + "epoch": 0.68, + "grad_norm": 3.3269320174595007, + "learning_rate": 2.403949210399738e-06, + "loss": 0.7437, + "step": 8363 + }, + { + "epoch": 0.68, + "grad_norm": 4.732629529579054, + "learning_rate": 2.4028179380574684e-06, + "loss": 0.7449, + "step": 8364 + }, + { + "epoch": 0.68, + "grad_norm": 4.433683946189046, + "learning_rate": 2.4016868477663586e-06, + "loss": 0.3557, + "step": 8365 + }, + { + "epoch": 0.68, + "grad_norm": 4.778321708777118, + "learning_rate": 2.400555939605693e-06, + "loss": 0.8214, + "step": 8366 + }, + { + "epoch": 0.68, + "grad_norm": 3.4557327901594586, + "learning_rate": 2.3994252136547426e-06, + "loss": 0.6301, + "step": 8367 + }, + { + "epoch": 0.68, + "grad_norm": 4.922938761690179, + "learning_rate": 2.398294669992769e-06, + "loss": 1.1046, + "step": 8368 + }, + { + "epoch": 0.68, + "grad_norm": 3.402524175059253, + "learning_rate": 2.397164308699014e-06, + "loss": 0.6317, + "step": 8369 + }, + { + "epoch": 0.68, + "grad_norm": 3.8519980969718537, + "learning_rate": 2.396034129852716e-06, + "loss": 0.9654, + "step": 8370 + }, + { + "epoch": 0.68, + "grad_norm": 3.287095492080058, + "learning_rate": 2.3949041335330914e-06, + "loss": 0.5373, + "step": 8371 + }, + { + "epoch": 0.68, + "grad_norm": 2.4762121347365547, + "learning_rate": 2.3937743198193493e-06, + "loss": 0.3418, + "step": 8372 + }, + { + "epoch": 0.68, + "grad_norm": 4.290139824225999, + "learning_rate": 2.3926446887906867e-06, + "loss": 0.6132, + "step": 8373 + }, + { + "epoch": 0.68, + "grad_norm": 3.6184754356381212, + "learning_rate": 2.3915152405262824e-06, + "loss": 0.5967, + "step": 8374 + }, + { + "epoch": 0.68, + "grad_norm": 4.263744342832122, + "learning_rate": 2.390385975105308e-06, + "loss": 0.6789, + "step": 8375 + }, + { + "epoch": 0.68, + "grad_norm": 3.8193430996779605, + "learning_rate": 2.3892568926069186e-06, + "loss": 0.8379, + "step": 8376 + }, + { + "epoch": 0.68, + "grad_norm": 3.210336423817697, + "learning_rate": 2.3881279931102602e-06, + "loss": 0.5075, + "step": 8377 + }, + { + "epoch": 0.68, + "grad_norm": 4.762238365586828, + "learning_rate": 2.386999276694462e-06, + "loss": 1.466, + "step": 8378 + }, + { + "epoch": 0.68, + "grad_norm": 5.179692768554767, + "learning_rate": 2.3858707434386447e-06, + "loss": 1.0722, + "step": 8379 + }, + { + "epoch": 0.68, + "grad_norm": 1.3857918978716115, + "learning_rate": 2.3847423934219094e-06, + "loss": 0.2436, + "step": 8380 + }, + { + "epoch": 0.69, + "grad_norm": 4.555509662623433, + "learning_rate": 2.383614226723351e-06, + "loss": 0.7331, + "step": 8381 + }, + { + "epoch": 0.69, + "grad_norm": 3.9332803872256683, + "learning_rate": 2.382486243422049e-06, + "loss": 0.5333, + "step": 8382 + }, + { + "epoch": 0.69, + "grad_norm": 3.0154124204619657, + "learning_rate": 2.381358443597069e-06, + "loss": 0.5797, + "step": 8383 + }, + { + "epoch": 0.69, + "grad_norm": 3.465284294569563, + "learning_rate": 2.3802308273274682e-06, + "loss": 0.7175, + "step": 8384 + }, + { + "epoch": 0.69, + "grad_norm": 3.100309840782991, + "learning_rate": 2.379103394692284e-06, + "loss": 0.4776, + "step": 8385 + }, + { + "epoch": 0.69, + "grad_norm": 3.2263436218775166, + "learning_rate": 2.3779761457705443e-06, + "loss": 0.635, + "step": 8386 + }, + { + "epoch": 0.69, + "grad_norm": 2.996779188540018, + "learning_rate": 2.376849080641268e-06, + "loss": 0.578, + "step": 8387 + }, + { + "epoch": 0.69, + "grad_norm": 2.8123089400031707, + "learning_rate": 2.375722199383454e-06, + "loss": 0.5775, + "step": 8388 + }, + { + "epoch": 0.69, + "grad_norm": 2.928601130154547, + "learning_rate": 2.374595502076092e-06, + "loss": 0.6623, + "step": 8389 + }, + { + "epoch": 0.69, + "grad_norm": 2.828229489008046, + "learning_rate": 2.37346898879816e-06, + "loss": 0.3584, + "step": 8390 + }, + { + "epoch": 0.69, + "grad_norm": 6.349399585739406, + "learning_rate": 2.372342659628623e-06, + "loss": 1.202, + "step": 8391 + }, + { + "epoch": 0.69, + "grad_norm": 4.303782777829738, + "learning_rate": 2.371216514646428e-06, + "loss": 0.6508, + "step": 8392 + }, + { + "epoch": 0.69, + "grad_norm": 3.7366792714066372, + "learning_rate": 2.3700905539305147e-06, + "loss": 0.6159, + "step": 8393 + }, + { + "epoch": 0.69, + "grad_norm": 4.456788330418907, + "learning_rate": 2.3689647775598084e-06, + "loss": 0.9043, + "step": 8394 + }, + { + "epoch": 0.69, + "grad_norm": 4.601094095713435, + "learning_rate": 2.3678391856132203e-06, + "loss": 0.8011, + "step": 8395 + }, + { + "epoch": 0.69, + "grad_norm": 5.441912768334063, + "learning_rate": 2.366713778169653e-06, + "loss": 1.1761, + "step": 8396 + }, + { + "epoch": 0.69, + "grad_norm": 5.56436476136244, + "learning_rate": 2.365588555307987e-06, + "loss": 1.0629, + "step": 8397 + }, + { + "epoch": 0.69, + "grad_norm": 4.026481870763221, + "learning_rate": 2.364463517107099e-06, + "loss": 0.7102, + "step": 8398 + }, + { + "epoch": 0.69, + "grad_norm": 3.1710057109812704, + "learning_rate": 2.363338663645848e-06, + "loss": 0.5157, + "step": 8399 + }, + { + "epoch": 0.69, + "grad_norm": 2.685673201066071, + "learning_rate": 2.362213995003082e-06, + "loss": 0.6991, + "step": 8400 + }, + { + "epoch": 0.69, + "grad_norm": 3.694032386329679, + "learning_rate": 2.3610895112576372e-06, + "loss": 0.7543, + "step": 8401 + }, + { + "epoch": 0.69, + "grad_norm": 3.799422054752237, + "learning_rate": 2.359965212488331e-06, + "loss": 0.7221, + "step": 8402 + }, + { + "epoch": 0.69, + "grad_norm": 3.511747862169461, + "learning_rate": 2.3588410987739763e-06, + "loss": 0.6048, + "step": 8403 + }, + { + "epoch": 0.69, + "grad_norm": 2.7636626894282883, + "learning_rate": 2.3577171701933638e-06, + "loss": 0.517, + "step": 8404 + }, + { + "epoch": 0.69, + "grad_norm": 3.0276506358452417, + "learning_rate": 2.3565934268252787e-06, + "loss": 0.5485, + "step": 8405 + }, + { + "epoch": 0.69, + "grad_norm": 4.492551641659161, + "learning_rate": 2.35546986874849e-06, + "loss": 1.0428, + "step": 8406 + }, + { + "epoch": 0.69, + "grad_norm": 2.7812895303791634, + "learning_rate": 2.354346496041755e-06, + "loss": 0.5173, + "step": 8407 + }, + { + "epoch": 0.69, + "grad_norm": 5.335214488177956, + "learning_rate": 2.353223308783818e-06, + "loss": 1.0085, + "step": 8408 + }, + { + "epoch": 0.69, + "grad_norm": 4.818332111917985, + "learning_rate": 2.3521003070534065e-06, + "loss": 0.9629, + "step": 8409 + }, + { + "epoch": 0.69, + "grad_norm": 4.515678266661508, + "learning_rate": 2.35097749092924e-06, + "loss": 0.9099, + "step": 8410 + }, + { + "epoch": 0.69, + "grad_norm": 5.35139416211975, + "learning_rate": 2.349854860490023e-06, + "loss": 1.1736, + "step": 8411 + }, + { + "epoch": 0.69, + "grad_norm": 3.887872785684858, + "learning_rate": 2.3487324158144463e-06, + "loss": 0.8037, + "step": 8412 + }, + { + "epoch": 0.69, + "grad_norm": 4.827747082554949, + "learning_rate": 2.347610156981191e-06, + "loss": 1.2159, + "step": 8413 + }, + { + "epoch": 0.69, + "grad_norm": 4.086398257738493, + "learning_rate": 2.3464880840689187e-06, + "loss": 0.692, + "step": 8414 + }, + { + "epoch": 0.69, + "grad_norm": 4.767076008447161, + "learning_rate": 2.345366197156283e-06, + "loss": 0.7561, + "step": 8415 + }, + { + "epoch": 0.69, + "grad_norm": 4.47313845423047, + "learning_rate": 2.3442444963219246e-06, + "loss": 0.6902, + "step": 8416 + }, + { + "epoch": 0.69, + "grad_norm": 4.653822729716385, + "learning_rate": 2.3431229816444704e-06, + "loss": 1.0065, + "step": 8417 + }, + { + "epoch": 0.69, + "grad_norm": 5.483539108307993, + "learning_rate": 2.3420016532025312e-06, + "loss": 1.0273, + "step": 8418 + }, + { + "epoch": 0.69, + "grad_norm": 5.6041543053271745, + "learning_rate": 2.34088051107471e-06, + "loss": 0.7899, + "step": 8419 + }, + { + "epoch": 0.69, + "grad_norm": 3.3378761203948524, + "learning_rate": 2.3397595553395903e-06, + "loss": 0.5621, + "step": 8420 + }, + { + "epoch": 0.69, + "grad_norm": 3.6709496478302652, + "learning_rate": 2.3386387860757487e-06, + "loss": 0.4824, + "step": 8421 + }, + { + "epoch": 0.69, + "grad_norm": 3.699658076987211, + "learning_rate": 2.337518203361746e-06, + "loss": 0.8225, + "step": 8422 + }, + { + "epoch": 0.69, + "grad_norm": 4.009785215675382, + "learning_rate": 2.33639780727613e-06, + "loss": 1.0486, + "step": 8423 + }, + { + "epoch": 0.69, + "grad_norm": 5.150383172250053, + "learning_rate": 2.3352775978974355e-06, + "loss": 0.9872, + "step": 8424 + }, + { + "epoch": 0.69, + "grad_norm": 2.66804436038213, + "learning_rate": 2.334157575304186e-06, + "loss": 0.5993, + "step": 8425 + }, + { + "epoch": 0.69, + "grad_norm": 3.642409328570551, + "learning_rate": 2.3330377395748878e-06, + "loss": 0.5431, + "step": 8426 + }, + { + "epoch": 0.69, + "grad_norm": 4.604401620902554, + "learning_rate": 2.331918090788037e-06, + "loss": 0.9804, + "step": 8427 + }, + { + "epoch": 0.69, + "grad_norm": 3.9247541734623117, + "learning_rate": 2.3307986290221162e-06, + "loss": 0.8486, + "step": 8428 + }, + { + "epoch": 0.69, + "grad_norm": 3.126764203684734, + "learning_rate": 2.329679354355595e-06, + "loss": 0.5038, + "step": 8429 + }, + { + "epoch": 0.69, + "grad_norm": 4.196092083621251, + "learning_rate": 2.32856026686693e-06, + "loss": 0.882, + "step": 8430 + }, + { + "epoch": 0.69, + "grad_norm": 2.9754652441676246, + "learning_rate": 2.3274413666345665e-06, + "loss": 0.2893, + "step": 8431 + }, + { + "epoch": 0.69, + "grad_norm": 3.1589207779363084, + "learning_rate": 2.32632265373693e-06, + "loss": 0.4983, + "step": 8432 + }, + { + "epoch": 0.69, + "grad_norm": 3.651300803151679, + "learning_rate": 2.325204128252441e-06, + "loss": 0.8112, + "step": 8433 + }, + { + "epoch": 0.69, + "grad_norm": 3.402444537328381, + "learning_rate": 2.3240857902595002e-06, + "loss": 0.4586, + "step": 8434 + }, + { + "epoch": 0.69, + "grad_norm": 4.410746347629286, + "learning_rate": 2.3229676398365e-06, + "loss": 1.0668, + "step": 8435 + }, + { + "epoch": 0.69, + "grad_norm": 4.030115535323992, + "learning_rate": 2.32184967706182e-06, + "loss": 0.9424, + "step": 8436 + }, + { + "epoch": 0.69, + "grad_norm": 3.7400689708001287, + "learning_rate": 2.3207319020138197e-06, + "loss": 0.5923, + "step": 8437 + }, + { + "epoch": 0.69, + "grad_norm": 3.384599097966671, + "learning_rate": 2.319614314770853e-06, + "loss": 0.8099, + "step": 8438 + }, + { + "epoch": 0.69, + "grad_norm": 4.172882423162879, + "learning_rate": 2.3184969154112585e-06, + "loss": 0.4573, + "step": 8439 + }, + { + "epoch": 0.69, + "grad_norm": 2.6910280218831772, + "learning_rate": 2.3173797040133595e-06, + "loss": 0.4993, + "step": 8440 + }, + { + "epoch": 0.69, + "grad_norm": 3.786782603564133, + "learning_rate": 2.3162626806554687e-06, + "loss": 0.746, + "step": 8441 + }, + { + "epoch": 0.69, + "grad_norm": 4.511630429143293, + "learning_rate": 2.3151458454158867e-06, + "loss": 0.656, + "step": 8442 + }, + { + "epoch": 0.69, + "grad_norm": 4.738657960078739, + "learning_rate": 2.3140291983728936e-06, + "loss": 0.9872, + "step": 8443 + }, + { + "epoch": 0.69, + "grad_norm": 2.782023982418744, + "learning_rate": 2.312912739604765e-06, + "loss": 0.3056, + "step": 8444 + }, + { + "epoch": 0.69, + "grad_norm": 2.234880690556221, + "learning_rate": 2.311796469189759e-06, + "loss": 0.469, + "step": 8445 + }, + { + "epoch": 0.69, + "grad_norm": 2.070585301608283, + "learning_rate": 2.310680387206121e-06, + "loss": 0.3329, + "step": 8446 + }, + { + "epoch": 0.69, + "grad_norm": 5.966473252835259, + "learning_rate": 2.309564493732086e-06, + "loss": 1.1336, + "step": 8447 + }, + { + "epoch": 0.69, + "grad_norm": 2.5164535722361734, + "learning_rate": 2.3084487888458697e-06, + "loss": 0.5251, + "step": 8448 + }, + { + "epoch": 0.69, + "grad_norm": 4.947278176717033, + "learning_rate": 2.3073332726256807e-06, + "loss": 1.2562, + "step": 8449 + }, + { + "epoch": 0.69, + "grad_norm": 4.970368894447125, + "learning_rate": 2.3062179451497095e-06, + "loss": 0.9964, + "step": 8450 + }, + { + "epoch": 0.69, + "grad_norm": 4.754486466883372, + "learning_rate": 2.305102806496137e-06, + "loss": 1.2073, + "step": 8451 + }, + { + "epoch": 0.69, + "grad_norm": 2.720551063877111, + "learning_rate": 2.303987856743129e-06, + "loss": 0.5452, + "step": 8452 + }, + { + "epoch": 0.69, + "grad_norm": 4.287684194272693, + "learning_rate": 2.3028730959688417e-06, + "loss": 0.7301, + "step": 8453 + }, + { + "epoch": 0.69, + "grad_norm": 5.782316811576312, + "learning_rate": 2.30175852425141e-06, + "loss": 0.9407, + "step": 8454 + }, + { + "epoch": 0.69, + "grad_norm": 2.008913663948316, + "learning_rate": 2.3006441416689633e-06, + "loss": 0.3661, + "step": 8455 + }, + { + "epoch": 0.69, + "grad_norm": 3.318025086232537, + "learning_rate": 2.2995299482996146e-06, + "loss": 0.6932, + "step": 8456 + }, + { + "epoch": 0.69, + "grad_norm": 3.502260191231009, + "learning_rate": 2.2984159442214637e-06, + "loss": 0.625, + "step": 8457 + }, + { + "epoch": 0.69, + "grad_norm": 3.8011882692816124, + "learning_rate": 2.2973021295125985e-06, + "loss": 0.607, + "step": 8458 + }, + { + "epoch": 0.69, + "grad_norm": 4.626678714352333, + "learning_rate": 2.2961885042510927e-06, + "loss": 1.0223, + "step": 8459 + }, + { + "epoch": 0.69, + "grad_norm": 4.755927228048318, + "learning_rate": 2.2950750685150045e-06, + "loss": 0.8516, + "step": 8460 + }, + { + "epoch": 0.69, + "grad_norm": 3.896744191915756, + "learning_rate": 2.293961822382382e-06, + "loss": 0.6381, + "step": 8461 + }, + { + "epoch": 0.69, + "grad_norm": 3.604139263103291, + "learning_rate": 2.2928487659312594e-06, + "loss": 0.5576, + "step": 8462 + }, + { + "epoch": 0.69, + "grad_norm": 4.739531463436227, + "learning_rate": 2.291735899239658e-06, + "loss": 0.9477, + "step": 8463 + }, + { + "epoch": 0.69, + "grad_norm": 4.074405910792501, + "learning_rate": 2.2906232223855824e-06, + "loss": 0.7331, + "step": 8464 + }, + { + "epoch": 0.69, + "grad_norm": 3.6067943074762, + "learning_rate": 2.289510735447029e-06, + "loss": 0.8256, + "step": 8465 + }, + { + "epoch": 0.69, + "grad_norm": 5.256104320093683, + "learning_rate": 2.288398438501976e-06, + "loss": 1.0537, + "step": 8466 + }, + { + "epoch": 0.69, + "grad_norm": 2.9481643168142435, + "learning_rate": 2.2872863316283906e-06, + "loss": 0.5068, + "step": 8467 + }, + { + "epoch": 0.69, + "grad_norm": 3.7919291921357177, + "learning_rate": 2.2861744149042275e-06, + "loss": 0.7032, + "step": 8468 + }, + { + "epoch": 0.69, + "grad_norm": 2.4314916854245245, + "learning_rate": 2.285062688407428e-06, + "loss": 0.4225, + "step": 8469 + }, + { + "epoch": 0.69, + "grad_norm": 2.039513891459796, + "learning_rate": 2.283951152215918e-06, + "loss": 0.3852, + "step": 8470 + }, + { + "epoch": 0.69, + "grad_norm": 2.839655352129458, + "learning_rate": 2.282839806407614e-06, + "loss": 0.5821, + "step": 8471 + }, + { + "epoch": 0.69, + "grad_norm": 4.067054657714496, + "learning_rate": 2.2817286510604125e-06, + "loss": 0.7038, + "step": 8472 + }, + { + "epoch": 0.69, + "grad_norm": 3.5436743447807757, + "learning_rate": 2.280617686252203e-06, + "loss": 0.7465, + "step": 8473 + }, + { + "epoch": 0.69, + "grad_norm": 2.627480036880374, + "learning_rate": 2.279506912060859e-06, + "loss": 0.5592, + "step": 8474 + }, + { + "epoch": 0.69, + "grad_norm": 3.5964562108123093, + "learning_rate": 2.2783963285642403e-06, + "loss": 0.6405, + "step": 8475 + }, + { + "epoch": 0.69, + "grad_norm": 4.097271718922358, + "learning_rate": 2.2772859358401962e-06, + "loss": 1.1977, + "step": 8476 + }, + { + "epoch": 0.69, + "grad_norm": 5.390702689594146, + "learning_rate": 2.2761757339665576e-06, + "loss": 0.6897, + "step": 8477 + }, + { + "epoch": 0.69, + "grad_norm": 3.4958214177554683, + "learning_rate": 2.2750657230211452e-06, + "loss": 0.8866, + "step": 8478 + }, + { + "epoch": 0.69, + "grad_norm": 4.150754538967326, + "learning_rate": 2.2739559030817687e-06, + "loss": 0.811, + "step": 8479 + }, + { + "epoch": 0.69, + "grad_norm": 4.187699471097217, + "learning_rate": 2.272846274226218e-06, + "loss": 1.2004, + "step": 8480 + }, + { + "epoch": 0.69, + "grad_norm": 2.077357679192823, + "learning_rate": 2.2717368365322747e-06, + "loss": 0.3078, + "step": 8481 + }, + { + "epoch": 0.69, + "grad_norm": 5.514300456812758, + "learning_rate": 2.2706275900777075e-06, + "loss": 0.936, + "step": 8482 + }, + { + "epoch": 0.69, + "grad_norm": 4.7356245830003685, + "learning_rate": 2.2695185349402664e-06, + "loss": 0.9696, + "step": 8483 + }, + { + "epoch": 0.69, + "grad_norm": 4.098094550770222, + "learning_rate": 2.2684096711976926e-06, + "loss": 0.8091, + "step": 8484 + }, + { + "epoch": 0.69, + "grad_norm": 4.791551196522658, + "learning_rate": 2.2673009989277136e-06, + "loss": 0.6636, + "step": 8485 + }, + { + "epoch": 0.69, + "grad_norm": 4.060922227655955, + "learning_rate": 2.266192518208041e-06, + "loss": 0.5593, + "step": 8486 + }, + { + "epoch": 0.69, + "grad_norm": 3.8403922186175374, + "learning_rate": 2.2650842291163755e-06, + "loss": 0.7785, + "step": 8487 + }, + { + "epoch": 0.69, + "grad_norm": 2.6763855490718926, + "learning_rate": 2.2639761317304047e-06, + "loss": 0.4132, + "step": 8488 + }, + { + "epoch": 0.69, + "grad_norm": 4.600935939884642, + "learning_rate": 2.262868226127799e-06, + "loss": 1.0364, + "step": 8489 + }, + { + "epoch": 0.69, + "grad_norm": 3.982098126888399, + "learning_rate": 2.261760512386218e-06, + "loss": 0.9001, + "step": 8490 + }, + { + "epoch": 0.69, + "grad_norm": 2.4049285672036613, + "learning_rate": 2.260652990583308e-06, + "loss": 0.4007, + "step": 8491 + }, + { + "epoch": 0.69, + "grad_norm": 1.9316133492785437, + "learning_rate": 2.259545660796702e-06, + "loss": 0.4114, + "step": 8492 + }, + { + "epoch": 0.69, + "grad_norm": 3.183810875278495, + "learning_rate": 2.2584385231040202e-06, + "loss": 0.5691, + "step": 8493 + }, + { + "epoch": 0.69, + "grad_norm": 4.435946658191863, + "learning_rate": 2.2573315775828655e-06, + "loss": 0.9812, + "step": 8494 + }, + { + "epoch": 0.69, + "grad_norm": 4.831926589661101, + "learning_rate": 2.2562248243108305e-06, + "loss": 0.8021, + "step": 8495 + }, + { + "epoch": 0.69, + "grad_norm": 3.4545849790556784, + "learning_rate": 2.255118263365496e-06, + "loss": 0.7481, + "step": 8496 + }, + { + "epoch": 0.69, + "grad_norm": 3.1328991820797314, + "learning_rate": 2.254011894824424e-06, + "loss": 0.5929, + "step": 8497 + }, + { + "epoch": 0.69, + "grad_norm": 3.2939912257282327, + "learning_rate": 2.2529057187651675e-06, + "loss": 0.7333, + "step": 8498 + }, + { + "epoch": 0.69, + "grad_norm": 4.835804670368205, + "learning_rate": 2.2517997352652663e-06, + "loss": 1.0314, + "step": 8499 + }, + { + "epoch": 0.69, + "grad_norm": 3.942105641728355, + "learning_rate": 2.2506939444022423e-06, + "loss": 1.1971, + "step": 8500 + }, + { + "epoch": 0.69, + "grad_norm": 4.051373609684334, + "learning_rate": 2.249588346253607e-06, + "loss": 0.9748, + "step": 8501 + }, + { + "epoch": 0.69, + "grad_norm": 3.5037761957391838, + "learning_rate": 2.2484829408968593e-06, + "loss": 0.3577, + "step": 8502 + }, + { + "epoch": 0.7, + "grad_norm": 4.378439313967647, + "learning_rate": 2.247377728409483e-06, + "loss": 0.9042, + "step": 8503 + }, + { + "epoch": 0.7, + "grad_norm": 4.074400812627169, + "learning_rate": 2.246272708868948e-06, + "loss": 0.647, + "step": 8504 + }, + { + "epoch": 0.7, + "grad_norm": 2.423967160613771, + "learning_rate": 2.245167882352714e-06, + "loss": 0.513, + "step": 8505 + }, + { + "epoch": 0.7, + "grad_norm": 3.1295114828424264, + "learning_rate": 2.24406324893822e-06, + "loss": 0.6978, + "step": 8506 + }, + { + "epoch": 0.7, + "grad_norm": 4.43587445844676, + "learning_rate": 2.2429588087028993e-06, + "loss": 0.6342, + "step": 8507 + }, + { + "epoch": 0.7, + "grad_norm": 3.0636716675478004, + "learning_rate": 2.2418545617241665e-06, + "loss": 0.5572, + "step": 8508 + }, + { + "epoch": 0.7, + "grad_norm": 4.740701769536277, + "learning_rate": 2.2407505080794257e-06, + "loss": 1.0533, + "step": 8509 + }, + { + "epoch": 0.7, + "grad_norm": 3.052240351497302, + "learning_rate": 2.239646647846068e-06, + "loss": 0.6739, + "step": 8510 + }, + { + "epoch": 0.7, + "grad_norm": 3.4173514237523195, + "learning_rate": 2.2385429811014654e-06, + "loss": 0.7716, + "step": 8511 + }, + { + "epoch": 0.7, + "grad_norm": 3.8918025523546453, + "learning_rate": 2.2374395079229837e-06, + "loss": 0.7011, + "step": 8512 + }, + { + "epoch": 0.7, + "grad_norm": 3.4332955544209383, + "learning_rate": 2.236336228387968e-06, + "loss": 0.5024, + "step": 8513 + }, + { + "epoch": 0.7, + "grad_norm": 5.512813254636162, + "learning_rate": 2.235233142573755e-06, + "loss": 0.8418, + "step": 8514 + }, + { + "epoch": 0.7, + "grad_norm": 3.401873909504076, + "learning_rate": 2.2341302505576663e-06, + "loss": 0.5164, + "step": 8515 + }, + { + "epoch": 0.7, + "grad_norm": 2.8503631153374727, + "learning_rate": 2.233027552417012e-06, + "loss": 0.3192, + "step": 8516 + }, + { + "epoch": 0.7, + "grad_norm": 3.240333467526782, + "learning_rate": 2.2319250482290826e-06, + "loss": 0.7147, + "step": 8517 + }, + { + "epoch": 0.7, + "grad_norm": 4.160602406918777, + "learning_rate": 2.2308227380711605e-06, + "loss": 0.9787, + "step": 8518 + }, + { + "epoch": 0.7, + "grad_norm": 5.500392377011563, + "learning_rate": 2.229720622020513e-06, + "loss": 1.2103, + "step": 8519 + }, + { + "epoch": 0.7, + "grad_norm": 2.6966145907371657, + "learning_rate": 2.2286187001543936e-06, + "loss": 0.6251, + "step": 8520 + }, + { + "epoch": 0.7, + "grad_norm": 3.258155138649007, + "learning_rate": 2.2275169725500424e-06, + "loss": 0.471, + "step": 8521 + }, + { + "epoch": 0.7, + "grad_norm": 4.217948330809415, + "learning_rate": 2.226415439284687e-06, + "loss": 1.0303, + "step": 8522 + }, + { + "epoch": 0.7, + "grad_norm": 5.27214185715932, + "learning_rate": 2.2253141004355367e-06, + "loss": 1.1734, + "step": 8523 + }, + { + "epoch": 0.7, + "grad_norm": 3.254554209891187, + "learning_rate": 2.2242129560797933e-06, + "loss": 0.5383, + "step": 8524 + }, + { + "epoch": 0.7, + "grad_norm": 3.3820214523269243, + "learning_rate": 2.2231120062946405e-06, + "loss": 0.5971, + "step": 8525 + }, + { + "epoch": 0.7, + "grad_norm": 3.635763140895818, + "learning_rate": 2.2220112511572533e-06, + "loss": 0.6923, + "step": 8526 + }, + { + "epoch": 0.7, + "grad_norm": 5.1862781005744125, + "learning_rate": 2.2209106907447853e-06, + "loss": 0.8966, + "step": 8527 + }, + { + "epoch": 0.7, + "grad_norm": 5.845679683065618, + "learning_rate": 2.2198103251343856e-06, + "loss": 1.4883, + "step": 8528 + }, + { + "epoch": 0.7, + "grad_norm": 2.715864144147195, + "learning_rate": 2.2187101544031807e-06, + "loss": 0.3922, + "step": 8529 + }, + { + "epoch": 0.7, + "grad_norm": 3.7776553147772858, + "learning_rate": 2.2176101786282893e-06, + "loss": 0.994, + "step": 8530 + }, + { + "epoch": 0.7, + "grad_norm": 4.880908010643266, + "learning_rate": 2.216510397886816e-06, + "loss": 0.57, + "step": 8531 + }, + { + "epoch": 0.7, + "grad_norm": 3.80791939885431, + "learning_rate": 2.21541081225585e-06, + "loss": 0.5118, + "step": 8532 + }, + { + "epoch": 0.7, + "grad_norm": 4.443131290143332, + "learning_rate": 2.214311421812467e-06, + "loss": 1.2995, + "step": 8533 + }, + { + "epoch": 0.7, + "grad_norm": 4.320553504408982, + "learning_rate": 2.2132122266337326e-06, + "loss": 1.0689, + "step": 8534 + }, + { + "epoch": 0.7, + "grad_norm": 4.136281392446387, + "learning_rate": 2.2121132267966907e-06, + "loss": 0.6286, + "step": 8535 + }, + { + "epoch": 0.7, + "grad_norm": 4.513037998640322, + "learning_rate": 2.211014422378378e-06, + "loss": 0.8584, + "step": 8536 + }, + { + "epoch": 0.7, + "grad_norm": 4.308923289937153, + "learning_rate": 2.2099158134558175e-06, + "loss": 0.9038, + "step": 8537 + }, + { + "epoch": 0.7, + "grad_norm": 2.8295110576552265, + "learning_rate": 2.2088174001060154e-06, + "loss": 0.6404, + "step": 8538 + }, + { + "epoch": 0.7, + "grad_norm": 3.0955056702896093, + "learning_rate": 2.2077191824059685e-06, + "loss": 0.2939, + "step": 8539 + }, + { + "epoch": 0.7, + "grad_norm": 4.883527411650362, + "learning_rate": 2.2066211604326533e-06, + "loss": 1.0983, + "step": 8540 + }, + { + "epoch": 0.7, + "grad_norm": 3.306017936599753, + "learning_rate": 2.2055233342630372e-06, + "loss": 0.7268, + "step": 8541 + }, + { + "epoch": 0.7, + "grad_norm": 4.035332985908827, + "learning_rate": 2.204425703974076e-06, + "loss": 0.5606, + "step": 8542 + }, + { + "epoch": 0.7, + "grad_norm": 3.3892802423350403, + "learning_rate": 2.2033282696427046e-06, + "loss": 0.5014, + "step": 8543 + }, + { + "epoch": 0.7, + "grad_norm": 5.318912314596869, + "learning_rate": 2.2022310313458506e-06, + "loss": 0.9304, + "step": 8544 + }, + { + "epoch": 0.7, + "grad_norm": 4.10509095348596, + "learning_rate": 2.201133989160427e-06, + "loss": 0.8174, + "step": 8545 + }, + { + "epoch": 0.7, + "grad_norm": 3.525349725727181, + "learning_rate": 2.200037143163328e-06, + "loss": 0.6178, + "step": 8546 + }, + { + "epoch": 0.7, + "grad_norm": 3.3115879393343763, + "learning_rate": 2.19894049343144e-06, + "loss": 0.7725, + "step": 8547 + }, + { + "epoch": 0.7, + "grad_norm": 2.3625063552278305, + "learning_rate": 2.1978440400416334e-06, + "loss": 0.5882, + "step": 8548 + }, + { + "epoch": 0.7, + "grad_norm": 3.257631859288316, + "learning_rate": 2.1967477830707644e-06, + "loss": 0.4958, + "step": 8549 + }, + { + "epoch": 0.7, + "grad_norm": 3.1902414133312607, + "learning_rate": 2.195651722595676e-06, + "loss": 0.4216, + "step": 8550 + }, + { + "epoch": 0.7, + "grad_norm": 3.37316639130754, + "learning_rate": 2.1945558586931994e-06, + "loss": 0.8959, + "step": 8551 + }, + { + "epoch": 0.7, + "grad_norm": 2.787099690497614, + "learning_rate": 2.1934601914401454e-06, + "loss": 0.4605, + "step": 8552 + }, + { + "epoch": 0.7, + "grad_norm": 4.080559088720089, + "learning_rate": 2.1923647209133182e-06, + "loss": 1.084, + "step": 8553 + }, + { + "epoch": 0.7, + "grad_norm": 4.811972776542819, + "learning_rate": 2.1912694471895053e-06, + "loss": 1.3616, + "step": 8554 + }, + { + "epoch": 0.7, + "grad_norm": 2.9511910177455705, + "learning_rate": 2.1901743703454804e-06, + "loss": 0.7359, + "step": 8555 + }, + { + "epoch": 0.7, + "grad_norm": 2.457071580024648, + "learning_rate": 2.1890794904580054e-06, + "loss": 0.2913, + "step": 8556 + }, + { + "epoch": 0.7, + "grad_norm": 6.205523943575567, + "learning_rate": 2.187984807603823e-06, + "loss": 0.9544, + "step": 8557 + }, + { + "epoch": 0.7, + "grad_norm": 2.9482181278145814, + "learning_rate": 2.18689032185967e-06, + "loss": 0.4539, + "step": 8558 + }, + { + "epoch": 0.7, + "grad_norm": 4.634360609800492, + "learning_rate": 2.1857960333022605e-06, + "loss": 0.7313, + "step": 8559 + }, + { + "epoch": 0.7, + "grad_norm": 3.2260508333855866, + "learning_rate": 2.1847019420083014e-06, + "loss": 0.7196, + "step": 8560 + }, + { + "epoch": 0.7, + "grad_norm": 4.2922376954795105, + "learning_rate": 2.1836080480544847e-06, + "loss": 1.0641, + "step": 8561 + }, + { + "epoch": 0.7, + "grad_norm": 4.930928441831621, + "learning_rate": 2.182514351517488e-06, + "loss": 0.9905, + "step": 8562 + }, + { + "epoch": 0.7, + "grad_norm": 4.876421767273682, + "learning_rate": 2.1814208524739723e-06, + "loss": 1.2121, + "step": 8563 + }, + { + "epoch": 0.7, + "grad_norm": 5.4454066148723985, + "learning_rate": 2.1803275510005876e-06, + "loss": 0.9244, + "step": 8564 + }, + { + "epoch": 0.7, + "grad_norm": 4.20130506058506, + "learning_rate": 2.1792344471739708e-06, + "loss": 0.4858, + "step": 8565 + }, + { + "epoch": 0.7, + "grad_norm": 2.7595268897264678, + "learning_rate": 2.178141541070743e-06, + "loss": 0.5205, + "step": 8566 + }, + { + "epoch": 0.7, + "grad_norm": 3.694964593190989, + "learning_rate": 2.177048832767513e-06, + "loss": 0.8947, + "step": 8567 + }, + { + "epoch": 0.7, + "grad_norm": 5.112295196327415, + "learning_rate": 2.1759563223408754e-06, + "loss": 1.0051, + "step": 8568 + }, + { + "epoch": 0.7, + "grad_norm": 3.065277813148473, + "learning_rate": 2.174864009867408e-06, + "loss": 0.3969, + "step": 8569 + }, + { + "epoch": 0.7, + "grad_norm": 4.397957121363607, + "learning_rate": 2.173771895423678e-06, + "loss": 1.0383, + "step": 8570 + }, + { + "epoch": 0.7, + "grad_norm": 3.3720889266708243, + "learning_rate": 2.1726799790862384e-06, + "loss": 0.5068, + "step": 8571 + }, + { + "epoch": 0.7, + "grad_norm": 1.143698773057102, + "learning_rate": 2.17158826093163e-06, + "loss": 0.1789, + "step": 8572 + }, + { + "epoch": 0.7, + "grad_norm": 4.673864608620568, + "learning_rate": 2.170496741036373e-06, + "loss": 1.0408, + "step": 8573 + }, + { + "epoch": 0.7, + "grad_norm": 3.5684299500075745, + "learning_rate": 2.1694054194769827e-06, + "loss": 0.3384, + "step": 8574 + }, + { + "epoch": 0.7, + "grad_norm": 5.626045869543701, + "learning_rate": 2.1683142963299513e-06, + "loss": 0.9893, + "step": 8575 + }, + { + "epoch": 0.7, + "grad_norm": 2.8760043551483974, + "learning_rate": 2.1672233716717644e-06, + "loss": 0.7937, + "step": 8576 + }, + { + "epoch": 0.7, + "grad_norm": 4.966860606458185, + "learning_rate": 2.166132645578891e-06, + "loss": 1.2491, + "step": 8577 + }, + { + "epoch": 0.7, + "grad_norm": 3.7472715777648595, + "learning_rate": 2.165042118127786e-06, + "loss": 0.5268, + "step": 8578 + }, + { + "epoch": 0.7, + "grad_norm": 5.278882749479071, + "learning_rate": 2.1639517893948926e-06, + "loss": 1.8395, + "step": 8579 + }, + { + "epoch": 0.7, + "grad_norm": 3.4903472991909483, + "learning_rate": 2.162861659456634e-06, + "loss": 0.6644, + "step": 8580 + }, + { + "epoch": 0.7, + "grad_norm": 2.1172260213709335, + "learning_rate": 2.161771728389427e-06, + "loss": 0.3231, + "step": 8581 + }, + { + "epoch": 0.7, + "grad_norm": 4.556372738087387, + "learning_rate": 2.1606819962696684e-06, + "loss": 0.7001, + "step": 8582 + }, + { + "epoch": 0.7, + "grad_norm": 3.651860880286598, + "learning_rate": 2.159592463173746e-06, + "loss": 0.7687, + "step": 8583 + }, + { + "epoch": 0.7, + "grad_norm": 3.119051509401893, + "learning_rate": 2.1585031291780302e-06, + "loss": 0.4643, + "step": 8584 + }, + { + "epoch": 0.7, + "grad_norm": 4.4095767126412575, + "learning_rate": 2.1574139943588807e-06, + "loss": 0.7777, + "step": 8585 + }, + { + "epoch": 0.7, + "grad_norm": 2.109160471129687, + "learning_rate": 2.156325058792637e-06, + "loss": 0.4316, + "step": 8586 + }, + { + "epoch": 0.7, + "grad_norm": 5.725993390309628, + "learning_rate": 2.1552363225556316e-06, + "loss": 0.9986, + "step": 8587 + }, + { + "epoch": 0.7, + "grad_norm": 3.528936179212356, + "learning_rate": 2.154147785724181e-06, + "loss": 0.6542, + "step": 8588 + }, + { + "epoch": 0.7, + "grad_norm": 2.940343252172774, + "learning_rate": 2.153059448374584e-06, + "loss": 0.5965, + "step": 8589 + }, + { + "epoch": 0.7, + "grad_norm": 2.614609999316483, + "learning_rate": 2.151971310583129e-06, + "loss": 0.5886, + "step": 8590 + }, + { + "epoch": 0.7, + "grad_norm": 3.928864239744701, + "learning_rate": 2.150883372426093e-06, + "loss": 0.7739, + "step": 8591 + }, + { + "epoch": 0.7, + "grad_norm": 3.89137734702575, + "learning_rate": 2.149795633979731e-06, + "loss": 0.7796, + "step": 8592 + }, + { + "epoch": 0.7, + "grad_norm": 4.166206859131393, + "learning_rate": 2.1487080953202912e-06, + "loss": 0.6869, + "step": 8593 + }, + { + "epoch": 0.7, + "grad_norm": 3.716597618134034, + "learning_rate": 2.147620756524004e-06, + "loss": 0.5369, + "step": 8594 + }, + { + "epoch": 0.7, + "grad_norm": 3.704012501500497, + "learning_rate": 2.1465336176670893e-06, + "loss": 0.804, + "step": 8595 + }, + { + "epoch": 0.7, + "grad_norm": 3.8171949102624865, + "learning_rate": 2.145446678825751e-06, + "loss": 0.8484, + "step": 8596 + }, + { + "epoch": 0.7, + "grad_norm": 4.5497017487230424, + "learning_rate": 2.144359940076176e-06, + "loss": 0.7923, + "step": 8597 + }, + { + "epoch": 0.7, + "grad_norm": 4.219090379332379, + "learning_rate": 2.1432734014945417e-06, + "loss": 0.6083, + "step": 8598 + }, + { + "epoch": 0.7, + "grad_norm": 4.0059478905592725, + "learning_rate": 2.1421870631570083e-06, + "loss": 0.8369, + "step": 8599 + }, + { + "epoch": 0.7, + "grad_norm": 4.82593726098331, + "learning_rate": 2.1411009251397257e-06, + "loss": 0.7378, + "step": 8600 + }, + { + "epoch": 0.7, + "grad_norm": 4.2473968851947275, + "learning_rate": 2.140014987518826e-06, + "loss": 0.8269, + "step": 8601 + }, + { + "epoch": 0.7, + "grad_norm": 2.875191924636826, + "learning_rate": 2.13892925037043e-06, + "loss": 0.694, + "step": 8602 + }, + { + "epoch": 0.7, + "grad_norm": 3.177039573557034, + "learning_rate": 2.1378437137706413e-06, + "loss": 0.4679, + "step": 8603 + }, + { + "epoch": 0.7, + "grad_norm": 4.005304744198496, + "learning_rate": 2.136758377795552e-06, + "loss": 0.8098, + "step": 8604 + }, + { + "epoch": 0.7, + "grad_norm": 4.245893912971883, + "learning_rate": 2.1356732425212406e-06, + "loss": 1.0079, + "step": 8605 + }, + { + "epoch": 0.7, + "grad_norm": 4.2508512678952, + "learning_rate": 2.1345883080237684e-06, + "loss": 0.7945, + "step": 8606 + }, + { + "epoch": 0.7, + "grad_norm": 3.246922879484139, + "learning_rate": 2.133503574379185e-06, + "loss": 0.465, + "step": 8607 + }, + { + "epoch": 0.7, + "grad_norm": 2.7239696989996944, + "learning_rate": 2.1324190416635275e-06, + "loss": 0.6172, + "step": 8608 + }, + { + "epoch": 0.7, + "grad_norm": 4.058882977329405, + "learning_rate": 2.131334709952814e-06, + "loss": 0.8411, + "step": 8609 + }, + { + "epoch": 0.7, + "grad_norm": 6.055853011000352, + "learning_rate": 2.1302505793230534e-06, + "loss": 1.2077, + "step": 8610 + }, + { + "epoch": 0.7, + "grad_norm": 3.238300224717042, + "learning_rate": 2.129166649850237e-06, + "loss": 0.5948, + "step": 8611 + }, + { + "epoch": 0.7, + "grad_norm": 2.8779211339993243, + "learning_rate": 2.128082921610345e-06, + "loss": 0.3973, + "step": 8612 + }, + { + "epoch": 0.7, + "grad_norm": 4.3118160894168165, + "learning_rate": 2.1269993946793414e-06, + "loss": 0.6056, + "step": 8613 + }, + { + "epoch": 0.7, + "grad_norm": 4.916006967340428, + "learning_rate": 2.1259160691331794e-06, + "loss": 0.5635, + "step": 8614 + }, + { + "epoch": 0.7, + "grad_norm": 3.3416903112367136, + "learning_rate": 2.1248329450477904e-06, + "loss": 0.8378, + "step": 8615 + }, + { + "epoch": 0.7, + "grad_norm": 4.3951543935934, + "learning_rate": 2.1237500224990994e-06, + "loss": 0.5687, + "step": 8616 + }, + { + "epoch": 0.7, + "grad_norm": 4.875284908893516, + "learning_rate": 2.122667301563014e-06, + "loss": 0.8145, + "step": 8617 + }, + { + "epoch": 0.7, + "grad_norm": 3.8377233678186777, + "learning_rate": 2.121584782315429e-06, + "loss": 0.3168, + "step": 8618 + }, + { + "epoch": 0.7, + "grad_norm": 3.5394341484148724, + "learning_rate": 2.1205024648322254e-06, + "loss": 0.5774, + "step": 8619 + }, + { + "epoch": 0.7, + "grad_norm": 4.066940806716527, + "learning_rate": 2.1194203491892657e-06, + "loss": 0.6829, + "step": 8620 + }, + { + "epoch": 0.7, + "grad_norm": 3.0735806068333074, + "learning_rate": 2.1183384354624053e-06, + "loss": 0.5733, + "step": 8621 + }, + { + "epoch": 0.7, + "grad_norm": 4.357778850248498, + "learning_rate": 2.117256723727477e-06, + "loss": 0.5016, + "step": 8622 + }, + { + "epoch": 0.7, + "grad_norm": 5.96812977430919, + "learning_rate": 2.1161752140603077e-06, + "loss": 1.1085, + "step": 8623 + }, + { + "epoch": 0.7, + "grad_norm": 3.0458089164222693, + "learning_rate": 2.1150939065367042e-06, + "loss": 0.8446, + "step": 8624 + }, + { + "epoch": 0.7, + "grad_norm": 3.750234262423207, + "learning_rate": 2.114012801232465e-06, + "loss": 0.7071, + "step": 8625 + }, + { + "epoch": 0.71, + "grad_norm": 3.686081707197256, + "learning_rate": 2.1129318982233673e-06, + "loss": 0.7895, + "step": 8626 + }, + { + "epoch": 0.71, + "grad_norm": 4.589439961308198, + "learning_rate": 2.1118511975851786e-06, + "loss": 0.8295, + "step": 8627 + }, + { + "epoch": 0.71, + "grad_norm": 4.381214266267079, + "learning_rate": 2.1107706993936517e-06, + "loss": 1.0563, + "step": 8628 + }, + { + "epoch": 0.71, + "grad_norm": 3.7484594029638356, + "learning_rate": 2.109690403724525e-06, + "loss": 0.7031, + "step": 8629 + }, + { + "epoch": 0.71, + "grad_norm": 3.878346462910791, + "learning_rate": 2.1086103106535214e-06, + "loss": 0.5772, + "step": 8630 + }, + { + "epoch": 0.71, + "grad_norm": 3.6799841612222877, + "learning_rate": 2.1075304202563545e-06, + "loss": 0.4902, + "step": 8631 + }, + { + "epoch": 0.71, + "grad_norm": 3.3065349872163217, + "learning_rate": 2.106450732608715e-06, + "loss": 0.5272, + "step": 8632 + }, + { + "epoch": 0.71, + "grad_norm": 4.312670386870131, + "learning_rate": 2.105371247786286e-06, + "loss": 0.6805, + "step": 8633 + }, + { + "epoch": 0.71, + "grad_norm": 4.36764524941005, + "learning_rate": 2.1042919658647354e-06, + "loss": 0.6347, + "step": 8634 + }, + { + "epoch": 0.71, + "grad_norm": 1.0413073985924002, + "learning_rate": 2.1032128869197177e-06, + "loss": 0.14, + "step": 8635 + }, + { + "epoch": 0.71, + "grad_norm": 5.05262620919622, + "learning_rate": 2.102134011026868e-06, + "loss": 1.2925, + "step": 8636 + }, + { + "epoch": 0.71, + "grad_norm": 3.59306747824626, + "learning_rate": 2.1010553382618137e-06, + "loss": 0.6344, + "step": 8637 + }, + { + "epoch": 0.71, + "grad_norm": 5.6625150732439735, + "learning_rate": 2.099976868700163e-06, + "loss": 1.0499, + "step": 8638 + }, + { + "epoch": 0.71, + "grad_norm": 3.1795958342969723, + "learning_rate": 2.0988986024175124e-06, + "loss": 0.5804, + "step": 8639 + }, + { + "epoch": 0.71, + "grad_norm": 5.363379536622431, + "learning_rate": 2.097820539489444e-06, + "loss": 1.1703, + "step": 8640 + }, + { + "epoch": 0.71, + "grad_norm": 3.0096364022705266, + "learning_rate": 2.096742679991526e-06, + "loss": 0.7554, + "step": 8641 + }, + { + "epoch": 0.71, + "grad_norm": 3.455715040513336, + "learning_rate": 2.0956650239993125e-06, + "loss": 0.8611, + "step": 8642 + }, + { + "epoch": 0.71, + "grad_norm": 1.35198840252164, + "learning_rate": 2.0945875715883395e-06, + "loss": 0.1979, + "step": 8643 + }, + { + "epoch": 0.71, + "grad_norm": 4.093523913388497, + "learning_rate": 2.0935103228341334e-06, + "loss": 0.7536, + "step": 8644 + }, + { + "epoch": 0.71, + "grad_norm": 3.354438838369588, + "learning_rate": 2.092433277812204e-06, + "loss": 0.3875, + "step": 8645 + }, + { + "epoch": 0.71, + "grad_norm": 3.097243062777404, + "learning_rate": 2.091356436598049e-06, + "loss": 0.555, + "step": 8646 + }, + { + "epoch": 0.71, + "grad_norm": 4.154097071235236, + "learning_rate": 2.0902797992671485e-06, + "loss": 0.6429, + "step": 8647 + }, + { + "epoch": 0.71, + "grad_norm": 3.794055154080764, + "learning_rate": 2.0892033658949734e-06, + "loss": 1.0275, + "step": 8648 + }, + { + "epoch": 0.71, + "grad_norm": 3.1779714128589682, + "learning_rate": 2.088127136556972e-06, + "loss": 0.8594, + "step": 8649 + }, + { + "epoch": 0.71, + "grad_norm": 2.656850932064562, + "learning_rate": 2.087051111328586e-06, + "loss": 0.3271, + "step": 8650 + }, + { + "epoch": 0.71, + "grad_norm": 4.103798675756965, + "learning_rate": 2.0859752902852425e-06, + "loss": 0.6619, + "step": 8651 + }, + { + "epoch": 0.71, + "grad_norm": 2.213857090780219, + "learning_rate": 2.084899673502347e-06, + "loss": 0.4175, + "step": 8652 + }, + { + "epoch": 0.71, + "grad_norm": 2.799983173929279, + "learning_rate": 2.0838242610552974e-06, + "loss": 0.3823, + "step": 8653 + }, + { + "epoch": 0.71, + "grad_norm": 2.879700441990109, + "learning_rate": 2.082749053019478e-06, + "loss": 0.217, + "step": 8654 + }, + { + "epoch": 0.71, + "grad_norm": 2.2683120918792414, + "learning_rate": 2.081674049470252e-06, + "loss": 0.2636, + "step": 8655 + }, + { + "epoch": 0.71, + "grad_norm": 3.2220936779577714, + "learning_rate": 2.080599250482975e-06, + "loss": 0.6549, + "step": 8656 + }, + { + "epoch": 0.71, + "grad_norm": 3.4491755965852686, + "learning_rate": 2.0795246561329853e-06, + "loss": 0.7324, + "step": 8657 + }, + { + "epoch": 0.71, + "grad_norm": 5.355232166552895, + "learning_rate": 2.078450266495607e-06, + "loss": 1.1377, + "step": 8658 + }, + { + "epoch": 0.71, + "grad_norm": 3.7165940900652568, + "learning_rate": 2.077376081646152e-06, + "loss": 0.4958, + "step": 8659 + }, + { + "epoch": 0.71, + "grad_norm": 4.523644529564438, + "learning_rate": 2.0763021016599126e-06, + "loss": 0.8052, + "step": 8660 + }, + { + "epoch": 0.71, + "grad_norm": 3.658025097991605, + "learning_rate": 2.075228326612172e-06, + "loss": 0.6866, + "step": 8661 + }, + { + "epoch": 0.71, + "grad_norm": 4.67246750953226, + "learning_rate": 2.074154756578197e-06, + "loss": 1.0907, + "step": 8662 + }, + { + "epoch": 0.71, + "grad_norm": 2.4601611546585773, + "learning_rate": 2.0730813916332406e-06, + "loss": 0.4387, + "step": 8663 + }, + { + "epoch": 0.71, + "grad_norm": 5.437873537315912, + "learning_rate": 2.0720082318525405e-06, + "loss": 1.1664, + "step": 8664 + }, + { + "epoch": 0.71, + "grad_norm": 4.804471556316076, + "learning_rate": 2.070935277311322e-06, + "loss": 1.148, + "step": 8665 + }, + { + "epoch": 0.71, + "grad_norm": 2.3163855581907216, + "learning_rate": 2.0698625280847917e-06, + "loss": 0.3145, + "step": 8666 + }, + { + "epoch": 0.71, + "grad_norm": 4.303632904484169, + "learning_rate": 2.0687899842481486e-06, + "loss": 1.0898, + "step": 8667 + }, + { + "epoch": 0.71, + "grad_norm": 5.116890375410322, + "learning_rate": 2.0677176458765686e-06, + "loss": 0.9163, + "step": 8668 + }, + { + "epoch": 0.71, + "grad_norm": 3.764200063004666, + "learning_rate": 2.06664551304522e-06, + "loss": 0.4481, + "step": 8669 + }, + { + "epoch": 0.71, + "grad_norm": 4.14798796756444, + "learning_rate": 2.0655735858292554e-06, + "loss": 0.4932, + "step": 8670 + }, + { + "epoch": 0.71, + "grad_norm": 7.033735479867001, + "learning_rate": 2.0645018643038132e-06, + "loss": 1.2896, + "step": 8671 + }, + { + "epoch": 0.71, + "grad_norm": 2.9940314904487093, + "learning_rate": 2.0634303485440133e-06, + "loss": 0.7398, + "step": 8672 + }, + { + "epoch": 0.71, + "grad_norm": 2.5589449579387455, + "learning_rate": 2.0623590386249665e-06, + "loss": 0.3774, + "step": 8673 + }, + { + "epoch": 0.71, + "grad_norm": 5.08627383768658, + "learning_rate": 2.0612879346217655e-06, + "loss": 1.1062, + "step": 8674 + }, + { + "epoch": 0.71, + "grad_norm": 4.217803109246731, + "learning_rate": 2.0602170366094916e-06, + "loss": 0.7815, + "step": 8675 + }, + { + "epoch": 0.71, + "grad_norm": 3.417057549848958, + "learning_rate": 2.059146344663211e-06, + "loss": 0.6506, + "step": 8676 + }, + { + "epoch": 0.71, + "grad_norm": 4.1929784125578, + "learning_rate": 2.0580758588579712e-06, + "loss": 1.117, + "step": 8677 + }, + { + "epoch": 0.71, + "grad_norm": 2.326294409846503, + "learning_rate": 2.057005579268811e-06, + "loss": 0.432, + "step": 8678 + }, + { + "epoch": 0.71, + "grad_norm": 2.8798573621145542, + "learning_rate": 2.055935505970751e-06, + "loss": 0.5126, + "step": 8679 + }, + { + "epoch": 0.71, + "grad_norm": 3.567847232897861, + "learning_rate": 2.0548656390388e-06, + "loss": 0.579, + "step": 8680 + }, + { + "epoch": 0.71, + "grad_norm": 6.5674547113868655, + "learning_rate": 2.0537959785479517e-06, + "loss": 1.1413, + "step": 8681 + }, + { + "epoch": 0.71, + "grad_norm": 5.061943075033028, + "learning_rate": 2.052726524573182e-06, + "loss": 1.0565, + "step": 8682 + }, + { + "epoch": 0.71, + "grad_norm": 4.504154197985221, + "learning_rate": 2.0516572771894577e-06, + "loss": 0.7166, + "step": 8683 + }, + { + "epoch": 0.71, + "grad_norm": 4.522642446726566, + "learning_rate": 2.0505882364717254e-06, + "loss": 0.9991, + "step": 8684 + }, + { + "epoch": 0.71, + "grad_norm": 4.329741163430596, + "learning_rate": 2.049519402494922e-06, + "loss": 0.8487, + "step": 8685 + }, + { + "epoch": 0.71, + "grad_norm": 4.969283958897433, + "learning_rate": 2.048450775333968e-06, + "loss": 0.7496, + "step": 8686 + }, + { + "epoch": 0.71, + "grad_norm": 5.608275774674261, + "learning_rate": 2.0473823550637694e-06, + "loss": 1.44, + "step": 8687 + }, + { + "epoch": 0.71, + "grad_norm": 4.140075850975716, + "learning_rate": 2.04631414175922e-06, + "loss": 0.7435, + "step": 8688 + }, + { + "epoch": 0.71, + "grad_norm": 3.7000203667267555, + "learning_rate": 2.045246135495192e-06, + "loss": 0.7559, + "step": 8689 + }, + { + "epoch": 0.71, + "grad_norm": 5.45115455835716, + "learning_rate": 2.0441783363465517e-06, + "loss": 1.0826, + "step": 8690 + }, + { + "epoch": 0.71, + "grad_norm": 5.183676669097272, + "learning_rate": 2.043110744388146e-06, + "loss": 0.8767, + "step": 8691 + }, + { + "epoch": 0.71, + "grad_norm": 4.424393510654391, + "learning_rate": 2.042043359694808e-06, + "loss": 0.876, + "step": 8692 + }, + { + "epoch": 0.71, + "grad_norm": 3.366657168875679, + "learning_rate": 2.0409761823413583e-06, + "loss": 0.5922, + "step": 8693 + }, + { + "epoch": 0.71, + "grad_norm": 4.664891129493486, + "learning_rate": 2.039909212402602e-06, + "loss": 0.9714, + "step": 8694 + }, + { + "epoch": 0.71, + "grad_norm": 5.585462203946919, + "learning_rate": 2.038842449953326e-06, + "loss": 0.7847, + "step": 8695 + }, + { + "epoch": 0.71, + "grad_norm": 4.767976704969888, + "learning_rate": 2.037775895068307e-06, + "loss": 1.002, + "step": 8696 + }, + { + "epoch": 0.71, + "grad_norm": 4.176040644881519, + "learning_rate": 2.0367095478223076e-06, + "loss": 0.6875, + "step": 8697 + }, + { + "epoch": 0.71, + "grad_norm": 2.6062123742969523, + "learning_rate": 2.035643408290071e-06, + "loss": 0.3599, + "step": 8698 + }, + { + "epoch": 0.71, + "grad_norm": 4.103258869530679, + "learning_rate": 2.034577476546331e-06, + "loss": 0.6648, + "step": 8699 + }, + { + "epoch": 0.71, + "grad_norm": 5.065527963652389, + "learning_rate": 2.033511752665806e-06, + "loss": 1.0606, + "step": 8700 + }, + { + "epoch": 0.71, + "grad_norm": 4.89701876913943, + "learning_rate": 2.0324462367231953e-06, + "loss": 0.9328, + "step": 8701 + }, + { + "epoch": 0.71, + "grad_norm": 4.282913577894604, + "learning_rate": 2.031380928793188e-06, + "loss": 1.1449, + "step": 8702 + }, + { + "epoch": 0.71, + "grad_norm": 2.093768652859012, + "learning_rate": 2.0303158289504583e-06, + "loss": 0.5894, + "step": 8703 + }, + { + "epoch": 0.71, + "grad_norm": 3.5779395972063974, + "learning_rate": 2.0292509372696652e-06, + "loss": 0.5017, + "step": 8704 + }, + { + "epoch": 0.71, + "grad_norm": 2.4919811415807835, + "learning_rate": 2.028186253825454e-06, + "loss": 0.5298, + "step": 8705 + }, + { + "epoch": 0.71, + "grad_norm": 3.553309452899958, + "learning_rate": 2.027121778692451e-06, + "loss": 0.6475, + "step": 8706 + }, + { + "epoch": 0.71, + "grad_norm": 2.8409447438725413, + "learning_rate": 2.026057511945274e-06, + "loss": 0.5548, + "step": 8707 + }, + { + "epoch": 0.71, + "grad_norm": 4.901430709220455, + "learning_rate": 2.0249934536585223e-06, + "loss": 1.1477, + "step": 8708 + }, + { + "epoch": 0.71, + "grad_norm": 4.586139984985449, + "learning_rate": 2.023929603906783e-06, + "loss": 1.0993, + "step": 8709 + }, + { + "epoch": 0.71, + "grad_norm": 5.95086982419721, + "learning_rate": 2.0228659627646257e-06, + "loss": 1.1894, + "step": 8710 + }, + { + "epoch": 0.71, + "grad_norm": 3.8891510456627842, + "learning_rate": 2.02180253030661e-06, + "loss": 0.5169, + "step": 8711 + }, + { + "epoch": 0.71, + "grad_norm": 3.984012817968478, + "learning_rate": 2.020739306607274e-06, + "loss": 0.7946, + "step": 8712 + }, + { + "epoch": 0.71, + "grad_norm": 4.3204634258703845, + "learning_rate": 2.0196762917411466e-06, + "loss": 1.0686, + "step": 8713 + }, + { + "epoch": 0.71, + "grad_norm": 3.26589177246894, + "learning_rate": 2.018613485782743e-06, + "loss": 0.9131, + "step": 8714 + }, + { + "epoch": 0.71, + "grad_norm": 4.39516109327882, + "learning_rate": 2.0175508888065563e-06, + "loss": 0.7271, + "step": 8715 + }, + { + "epoch": 0.71, + "grad_norm": 4.154450605412165, + "learning_rate": 2.0164885008870755e-06, + "loss": 0.7811, + "step": 8716 + }, + { + "epoch": 0.71, + "grad_norm": 3.72573839412881, + "learning_rate": 2.0154263220987642e-06, + "loss": 0.7865, + "step": 8717 + }, + { + "epoch": 0.71, + "grad_norm": 2.212752989508091, + "learning_rate": 2.014364352516079e-06, + "loss": 0.3108, + "step": 8718 + }, + { + "epoch": 0.71, + "grad_norm": 4.321230885821851, + "learning_rate": 2.013302592213459e-06, + "loss": 0.8986, + "step": 8719 + }, + { + "epoch": 0.71, + "grad_norm": 4.524244018089491, + "learning_rate": 2.0122410412653294e-06, + "loss": 0.5415, + "step": 8720 + }, + { + "epoch": 0.71, + "grad_norm": 4.248701771187896, + "learning_rate": 2.0111796997460997e-06, + "loss": 1.0274, + "step": 8721 + }, + { + "epoch": 0.71, + "grad_norm": 3.8017975856320105, + "learning_rate": 2.010118567730167e-06, + "loss": 0.7047, + "step": 8722 + }, + { + "epoch": 0.71, + "grad_norm": 3.6514091243692848, + "learning_rate": 2.0090576452919095e-06, + "loss": 0.5102, + "step": 8723 + }, + { + "epoch": 0.71, + "grad_norm": 5.198097127062312, + "learning_rate": 2.0079969325056947e-06, + "loss": 0.9692, + "step": 8724 + }, + { + "epoch": 0.71, + "grad_norm": 3.892927176413908, + "learning_rate": 2.006936429445873e-06, + "loss": 0.4655, + "step": 8725 + }, + { + "epoch": 0.71, + "grad_norm": 4.656582107206415, + "learning_rate": 2.005876136186782e-06, + "loss": 1.165, + "step": 8726 + }, + { + "epoch": 0.71, + "grad_norm": 1.7809918320027742, + "learning_rate": 2.0048160528027438e-06, + "loss": 0.4113, + "step": 8727 + }, + { + "epoch": 0.71, + "grad_norm": 4.950507539669394, + "learning_rate": 2.003756179368067e-06, + "loss": 1.0302, + "step": 8728 + }, + { + "epoch": 0.71, + "grad_norm": 2.698170971533982, + "learning_rate": 2.00269651595704e-06, + "loss": 0.5472, + "step": 8729 + }, + { + "epoch": 0.71, + "grad_norm": 4.94494712874036, + "learning_rate": 2.0016370626439454e-06, + "loss": 0.7062, + "step": 8730 + }, + { + "epoch": 0.71, + "grad_norm": 4.128425975096584, + "learning_rate": 2.000577819503041e-06, + "loss": 0.5132, + "step": 8731 + }, + { + "epoch": 0.71, + "grad_norm": 4.225973200281443, + "learning_rate": 1.9995187866085786e-06, + "loss": 0.4715, + "step": 8732 + }, + { + "epoch": 0.71, + "grad_norm": 3.4534934469698375, + "learning_rate": 1.998459964034791e-06, + "loss": 0.5686, + "step": 8733 + }, + { + "epoch": 0.71, + "grad_norm": 4.407785171635663, + "learning_rate": 1.9974013518558993e-06, + "loss": 0.615, + "step": 8734 + }, + { + "epoch": 0.71, + "grad_norm": 3.2206323327255437, + "learning_rate": 1.996342950146103e-06, + "loss": 0.4733, + "step": 8735 + }, + { + "epoch": 0.71, + "grad_norm": 2.6897605026812093, + "learning_rate": 1.995284758979594e-06, + "loss": 0.3708, + "step": 8736 + }, + { + "epoch": 0.71, + "grad_norm": 3.7424303998891046, + "learning_rate": 1.9942267784305475e-06, + "loss": 0.6507, + "step": 8737 + }, + { + "epoch": 0.71, + "grad_norm": 3.741602852999754, + "learning_rate": 1.9931690085731225e-06, + "loss": 0.5439, + "step": 8738 + }, + { + "epoch": 0.71, + "grad_norm": 4.083622347875406, + "learning_rate": 1.9921114494814657e-06, + "loss": 0.8254, + "step": 8739 + }, + { + "epoch": 0.71, + "grad_norm": 2.4516868864992776, + "learning_rate": 1.991054101229704e-06, + "loss": 0.5842, + "step": 8740 + }, + { + "epoch": 0.71, + "grad_norm": 2.7441013518235735, + "learning_rate": 1.9899969638919554e-06, + "loss": 0.2529, + "step": 8741 + }, + { + "epoch": 0.71, + "grad_norm": 4.86480139464636, + "learning_rate": 1.9889400375423196e-06, + "loss": 0.9784, + "step": 8742 + }, + { + "epoch": 0.71, + "grad_norm": 4.910379725924154, + "learning_rate": 1.987883322254883e-06, + "loss": 0.7598, + "step": 8743 + }, + { + "epoch": 0.71, + "grad_norm": 4.881084209076858, + "learning_rate": 1.9868268181037186e-06, + "loss": 0.6126, + "step": 8744 + }, + { + "epoch": 0.71, + "grad_norm": 3.692959313734585, + "learning_rate": 1.9857705251628796e-06, + "loss": 0.8387, + "step": 8745 + }, + { + "epoch": 0.71, + "grad_norm": 4.09280388117034, + "learning_rate": 1.98471444350641e-06, + "loss": 0.764, + "step": 8746 + }, + { + "epoch": 0.71, + "grad_norm": 4.425010778688737, + "learning_rate": 1.9836585732083334e-06, + "loss": 0.9115, + "step": 8747 + }, + { + "epoch": 0.72, + "grad_norm": 1.6966358191954547, + "learning_rate": 1.982602914342664e-06, + "loss": 0.3736, + "step": 8748 + }, + { + "epoch": 0.72, + "grad_norm": 3.9070587122298943, + "learning_rate": 1.9815474669833985e-06, + "loss": 0.7047, + "step": 8749 + }, + { + "epoch": 0.72, + "grad_norm": 2.9935935008073384, + "learning_rate": 1.9804922312045193e-06, + "loss": 0.3165, + "step": 8750 + }, + { + "epoch": 0.72, + "grad_norm": 3.733577568711758, + "learning_rate": 1.9794372070799955e-06, + "loss": 0.7414, + "step": 8751 + }, + { + "epoch": 0.72, + "grad_norm": 4.562508336536102, + "learning_rate": 1.978382394683776e-06, + "loss": 0.8799, + "step": 8752 + }, + { + "epoch": 0.72, + "grad_norm": 4.489307752013866, + "learning_rate": 1.9773277940898007e-06, + "loss": 0.9381, + "step": 8753 + }, + { + "epoch": 0.72, + "grad_norm": 5.075482943719641, + "learning_rate": 1.9762734053719923e-06, + "loss": 0.9186, + "step": 8754 + }, + { + "epoch": 0.72, + "grad_norm": 4.720913936362509, + "learning_rate": 1.975219228604259e-06, + "loss": 1.1617, + "step": 8755 + }, + { + "epoch": 0.72, + "grad_norm": 5.289513198260462, + "learning_rate": 1.9741652638604952e-06, + "loss": 0.9923, + "step": 8756 + }, + { + "epoch": 0.72, + "grad_norm": 5.238267357501408, + "learning_rate": 1.9731115112145765e-06, + "loss": 1.1233, + "step": 8757 + }, + { + "epoch": 0.72, + "grad_norm": 4.7919538037923735, + "learning_rate": 1.9720579707403677e-06, + "loss": 1.1525, + "step": 8758 + }, + { + "epoch": 0.72, + "grad_norm": 3.342877462199351, + "learning_rate": 1.9710046425117175e-06, + "loss": 0.6129, + "step": 8759 + }, + { + "epoch": 0.72, + "grad_norm": 5.035221423131665, + "learning_rate": 1.9699515266024614e-06, + "loss": 0.9605, + "step": 8760 + }, + { + "epoch": 0.72, + "grad_norm": 5.81820395917385, + "learning_rate": 1.968898623086415e-06, + "loss": 1.3903, + "step": 8761 + }, + { + "epoch": 0.72, + "grad_norm": 4.519836837579079, + "learning_rate": 1.967845932037385e-06, + "loss": 0.8829, + "step": 8762 + }, + { + "epoch": 0.72, + "grad_norm": 3.4543995131134193, + "learning_rate": 1.966793453529158e-06, + "loss": 0.7544, + "step": 8763 + }, + { + "epoch": 0.72, + "grad_norm": 1.9214886394824024, + "learning_rate": 1.9657411876355086e-06, + "loss": 0.2512, + "step": 8764 + }, + { + "epoch": 0.72, + "grad_norm": 3.9448899170277545, + "learning_rate": 1.9646891344301972e-06, + "loss": 0.9378, + "step": 8765 + }, + { + "epoch": 0.72, + "grad_norm": 3.2417350766028163, + "learning_rate": 1.9636372939869677e-06, + "loss": 0.4702, + "step": 8766 + }, + { + "epoch": 0.72, + "grad_norm": 5.483562613077437, + "learning_rate": 1.9625856663795495e-06, + "loss": 1.1118, + "step": 8767 + }, + { + "epoch": 0.72, + "grad_norm": 3.59874945966871, + "learning_rate": 1.9615342516816595e-06, + "loss": 0.5948, + "step": 8768 + }, + { + "epoch": 0.72, + "grad_norm": 5.248160818585475, + "learning_rate": 1.9604830499669927e-06, + "loss": 1.2771, + "step": 8769 + }, + { + "epoch": 0.72, + "grad_norm": 3.0465365777007856, + "learning_rate": 1.959432061309236e-06, + "loss": 0.6711, + "step": 8770 + }, + { + "epoch": 0.72, + "grad_norm": 4.113986014902147, + "learning_rate": 1.9583812857820595e-06, + "loss": 0.8002, + "step": 8771 + }, + { + "epoch": 0.72, + "grad_norm": 2.5081778890051227, + "learning_rate": 1.9573307234591177e-06, + "loss": 0.4122, + "step": 8772 + }, + { + "epoch": 0.72, + "grad_norm": 2.12267022546443, + "learning_rate": 1.956280374414051e-06, + "loss": 0.1497, + "step": 8773 + }, + { + "epoch": 0.72, + "grad_norm": 2.569631610718901, + "learning_rate": 1.9552302387204847e-06, + "loss": 0.4071, + "step": 8774 + }, + { + "epoch": 0.72, + "grad_norm": 3.095527411898248, + "learning_rate": 1.9541803164520264e-06, + "loss": 0.6379, + "step": 8775 + }, + { + "epoch": 0.72, + "grad_norm": 3.0478573325617098, + "learning_rate": 1.9531306076822738e-06, + "loss": 0.723, + "step": 8776 + }, + { + "epoch": 0.72, + "grad_norm": 3.275622302591984, + "learning_rate": 1.952081112484804e-06, + "loss": 0.5652, + "step": 8777 + }, + { + "epoch": 0.72, + "grad_norm": 2.985750730985547, + "learning_rate": 1.951031830933184e-06, + "loss": 0.5842, + "step": 8778 + }, + { + "epoch": 0.72, + "grad_norm": 3.4450346943209933, + "learning_rate": 1.9499827631009644e-06, + "loss": 0.4955, + "step": 8779 + }, + { + "epoch": 0.72, + "grad_norm": 3.2878789755454196, + "learning_rate": 1.948933909061678e-06, + "loss": 0.4982, + "step": 8780 + }, + { + "epoch": 0.72, + "grad_norm": 2.9108891559935954, + "learning_rate": 1.9478852688888467e-06, + "loss": 0.4906, + "step": 8781 + }, + { + "epoch": 0.72, + "grad_norm": 3.1479115211412, + "learning_rate": 1.946836842655975e-06, + "loss": 0.2868, + "step": 8782 + }, + { + "epoch": 0.72, + "grad_norm": 3.778667943474665, + "learning_rate": 1.9457886304365533e-06, + "loss": 0.7543, + "step": 8783 + }, + { + "epoch": 0.72, + "grad_norm": 4.356567930598804, + "learning_rate": 1.9447406323040562e-06, + "loss": 0.7398, + "step": 8784 + }, + { + "epoch": 0.72, + "grad_norm": 2.540616076380403, + "learning_rate": 1.9436928483319467e-06, + "loss": 0.7212, + "step": 8785 + }, + { + "epoch": 0.72, + "grad_norm": 5.11103211585045, + "learning_rate": 1.942645278593665e-06, + "loss": 0.8136, + "step": 8786 + }, + { + "epoch": 0.72, + "grad_norm": 3.5630316440371184, + "learning_rate": 1.9415979231626443e-06, + "loss": 0.6716, + "step": 8787 + }, + { + "epoch": 0.72, + "grad_norm": 3.1048582060855607, + "learning_rate": 1.940550782112299e-06, + "loss": 0.7179, + "step": 8788 + }, + { + "epoch": 0.72, + "grad_norm": 4.122237785674999, + "learning_rate": 1.9395038555160285e-06, + "loss": 0.7376, + "step": 8789 + }, + { + "epoch": 0.72, + "grad_norm": 2.965063265640028, + "learning_rate": 1.93845714344722e-06, + "loss": 0.3742, + "step": 8790 + }, + { + "epoch": 0.72, + "grad_norm": 4.4587164331302445, + "learning_rate": 1.9374106459792406e-06, + "loss": 0.8145, + "step": 8791 + }, + { + "epoch": 0.72, + "grad_norm": 3.4020388320522774, + "learning_rate": 1.9363643631854483e-06, + "loss": 0.6054, + "step": 8792 + }, + { + "epoch": 0.72, + "grad_norm": 4.382568314360286, + "learning_rate": 1.9353182951391793e-06, + "loss": 0.8411, + "step": 8793 + }, + { + "epoch": 0.72, + "grad_norm": 3.2978674612559438, + "learning_rate": 1.93427244191376e-06, + "loss": 0.5739, + "step": 8794 + }, + { + "epoch": 0.72, + "grad_norm": 3.3205900275988025, + "learning_rate": 1.9332268035825006e-06, + "loss": 0.4264, + "step": 8795 + }, + { + "epoch": 0.72, + "grad_norm": 4.372332606382506, + "learning_rate": 1.9321813802186972e-06, + "loss": 1.0462, + "step": 8796 + }, + { + "epoch": 0.72, + "grad_norm": 4.77716927624133, + "learning_rate": 1.931136171895627e-06, + "loss": 1.1175, + "step": 8797 + }, + { + "epoch": 0.72, + "grad_norm": 3.4186599091483756, + "learning_rate": 1.9300911786865544e-06, + "loss": 0.6359, + "step": 8798 + }, + { + "epoch": 0.72, + "grad_norm": 5.371971117921947, + "learning_rate": 1.92904640066473e-06, + "loss": 1.0489, + "step": 8799 + }, + { + "epoch": 0.72, + "grad_norm": 4.125585132588605, + "learning_rate": 1.9280018379033884e-06, + "loss": 0.8945, + "step": 8800 + }, + { + "epoch": 0.72, + "grad_norm": 4.69143808714922, + "learning_rate": 1.926957490475748e-06, + "loss": 0.5875, + "step": 8801 + }, + { + "epoch": 0.72, + "grad_norm": 5.550472159142619, + "learning_rate": 1.925913358455016e-06, + "loss": 0.8768, + "step": 8802 + }, + { + "epoch": 0.72, + "grad_norm": 3.636115589262085, + "learning_rate": 1.9248694419143776e-06, + "loss": 0.741, + "step": 8803 + }, + { + "epoch": 0.72, + "grad_norm": 3.041781653491779, + "learning_rate": 1.923825740927008e-06, + "loss": 0.507, + "step": 8804 + }, + { + "epoch": 0.72, + "grad_norm": 3.4530873656903327, + "learning_rate": 1.922782255566066e-06, + "loss": 0.7788, + "step": 8805 + }, + { + "epoch": 0.72, + "grad_norm": 4.45092229345249, + "learning_rate": 1.921738985904696e-06, + "loss": 0.9431, + "step": 8806 + }, + { + "epoch": 0.72, + "grad_norm": 5.465998075197892, + "learning_rate": 1.9206959320160286e-06, + "loss": 1.1356, + "step": 8807 + }, + { + "epoch": 0.72, + "grad_norm": 4.004891858662268, + "learning_rate": 1.9196530939731727e-06, + "loss": 1.044, + "step": 8808 + }, + { + "epoch": 0.72, + "grad_norm": 4.106135688800069, + "learning_rate": 1.9186104718492315e-06, + "loss": 1.1551, + "step": 8809 + }, + { + "epoch": 0.72, + "grad_norm": 7.0940084033406805, + "learning_rate": 1.917568065717284e-06, + "loss": 1.6003, + "step": 8810 + }, + { + "epoch": 0.72, + "grad_norm": 5.167711096702554, + "learning_rate": 1.9165258756504003e-06, + "loss": 0.9334, + "step": 8811 + }, + { + "epoch": 0.72, + "grad_norm": 2.4616970813945356, + "learning_rate": 1.9154839017216336e-06, + "loss": 0.4413, + "step": 8812 + }, + { + "epoch": 0.72, + "grad_norm": 2.3795637550189124, + "learning_rate": 1.914442144004021e-06, + "loss": 0.3302, + "step": 8813 + }, + { + "epoch": 0.72, + "grad_norm": 3.510062021432281, + "learning_rate": 1.913400602570588e-06, + "loss": 0.8562, + "step": 8814 + }, + { + "epoch": 0.72, + "grad_norm": 3.57704329704265, + "learning_rate": 1.9123592774943383e-06, + "loss": 0.6834, + "step": 8815 + }, + { + "epoch": 0.72, + "grad_norm": 3.4098023225599374, + "learning_rate": 1.911318168848265e-06, + "loss": 0.5838, + "step": 8816 + }, + { + "epoch": 0.72, + "grad_norm": 4.032834390685839, + "learning_rate": 1.9102772767053467e-06, + "loss": 0.9763, + "step": 8817 + }, + { + "epoch": 0.72, + "grad_norm": 3.883315323065235, + "learning_rate": 1.909236601138545e-06, + "loss": 0.8635, + "step": 8818 + }, + { + "epoch": 0.72, + "grad_norm": 3.9993275066765177, + "learning_rate": 1.908196142220808e-06, + "loss": 0.3683, + "step": 8819 + }, + { + "epoch": 0.72, + "grad_norm": 4.358880564580176, + "learning_rate": 1.9071559000250633e-06, + "loss": 0.8507, + "step": 8820 + }, + { + "epoch": 0.72, + "grad_norm": 3.209064044779201, + "learning_rate": 1.906115874624231e-06, + "loss": 0.548, + "step": 8821 + }, + { + "epoch": 0.72, + "grad_norm": 2.7333119421762047, + "learning_rate": 1.905076066091211e-06, + "loss": 0.6069, + "step": 8822 + }, + { + "epoch": 0.72, + "grad_norm": 3.60706101308045, + "learning_rate": 1.904036474498891e-06, + "loss": 0.6224, + "step": 8823 + }, + { + "epoch": 0.72, + "grad_norm": 4.546860315142606, + "learning_rate": 1.9029970999201387e-06, + "loss": 0.8429, + "step": 8824 + }, + { + "epoch": 0.72, + "grad_norm": 3.8835937404593306, + "learning_rate": 1.9019579424278133e-06, + "loss": 0.6663, + "step": 8825 + }, + { + "epoch": 0.72, + "grad_norm": 3.967240733651116, + "learning_rate": 1.900919002094752e-06, + "loss": 0.4519, + "step": 8826 + }, + { + "epoch": 0.72, + "grad_norm": 2.90936097733532, + "learning_rate": 1.8998802789937815e-06, + "loss": 0.5688, + "step": 8827 + }, + { + "epoch": 0.72, + "grad_norm": 4.5351784039807725, + "learning_rate": 1.898841773197711e-06, + "loss": 0.5924, + "step": 8828 + }, + { + "epoch": 0.72, + "grad_norm": 2.772796655438025, + "learning_rate": 1.8978034847793364e-06, + "loss": 0.2402, + "step": 8829 + }, + { + "epoch": 0.72, + "grad_norm": 3.2992077415812853, + "learning_rate": 1.8967654138114366e-06, + "loss": 0.5012, + "step": 8830 + }, + { + "epoch": 0.72, + "grad_norm": 2.9005704020665144, + "learning_rate": 1.895727560366778e-06, + "loss": 0.4381, + "step": 8831 + }, + { + "epoch": 0.72, + "grad_norm": 3.394153183451265, + "learning_rate": 1.8946899245181056e-06, + "loss": 0.7508, + "step": 8832 + }, + { + "epoch": 0.72, + "grad_norm": 2.9745202934259174, + "learning_rate": 1.893652506338155e-06, + "loss": 0.6123, + "step": 8833 + }, + { + "epoch": 0.72, + "grad_norm": 4.3302935435473335, + "learning_rate": 1.892615305899645e-06, + "loss": 1.2427, + "step": 8834 + }, + { + "epoch": 0.72, + "grad_norm": 3.408078375774764, + "learning_rate": 1.8915783232752788e-06, + "loss": 0.557, + "step": 8835 + }, + { + "epoch": 0.72, + "grad_norm": 4.289875698847576, + "learning_rate": 1.8905415585377458e-06, + "loss": 0.7968, + "step": 8836 + }, + { + "epoch": 0.72, + "grad_norm": 5.004776925266335, + "learning_rate": 1.8895050117597152e-06, + "loss": 1.0907, + "step": 8837 + }, + { + "epoch": 0.72, + "grad_norm": 3.6106359538734822, + "learning_rate": 1.8884686830138465e-06, + "loss": 0.7138, + "step": 8838 + }, + { + "epoch": 0.72, + "grad_norm": 4.4332554602896295, + "learning_rate": 1.8874325723727831e-06, + "loss": 0.9122, + "step": 8839 + }, + { + "epoch": 0.72, + "grad_norm": 3.869664636302528, + "learning_rate": 1.8863966799091492e-06, + "loss": 0.7228, + "step": 8840 + }, + { + "epoch": 0.72, + "grad_norm": 3.979299170617078, + "learning_rate": 1.885361005695558e-06, + "loss": 0.653, + "step": 8841 + }, + { + "epoch": 0.72, + "grad_norm": 3.048090927646827, + "learning_rate": 1.8843255498046065e-06, + "loss": 0.6582, + "step": 8842 + }, + { + "epoch": 0.72, + "grad_norm": 3.7555027623016293, + "learning_rate": 1.8832903123088725e-06, + "loss": 0.7517, + "step": 8843 + }, + { + "epoch": 0.72, + "grad_norm": 4.254266635051504, + "learning_rate": 1.882255293280924e-06, + "loss": 0.9816, + "step": 8844 + }, + { + "epoch": 0.72, + "grad_norm": 3.299368885423742, + "learning_rate": 1.8812204927933108e-06, + "loss": 0.7299, + "step": 8845 + }, + { + "epoch": 0.72, + "grad_norm": 3.2000511906844875, + "learning_rate": 1.8801859109185682e-06, + "loss": 0.5642, + "step": 8846 + }, + { + "epoch": 0.72, + "grad_norm": 4.618016929122934, + "learning_rate": 1.879151547729216e-06, + "loss": 0.7709, + "step": 8847 + }, + { + "epoch": 0.72, + "grad_norm": 5.2500007244119935, + "learning_rate": 1.87811740329776e-06, + "loss": 1.1265, + "step": 8848 + }, + { + "epoch": 0.72, + "grad_norm": 3.7510578134177135, + "learning_rate": 1.8770834776966855e-06, + "loss": 0.682, + "step": 8849 + }, + { + "epoch": 0.72, + "grad_norm": 2.617896038858609, + "learning_rate": 1.8760497709984683e-06, + "loss": 0.4952, + "step": 8850 + }, + { + "epoch": 0.72, + "grad_norm": 3.9926633184568883, + "learning_rate": 1.8750162832755669e-06, + "loss": 0.6637, + "step": 8851 + }, + { + "epoch": 0.72, + "grad_norm": 2.8388230579044857, + "learning_rate": 1.873983014600424e-06, + "loss": 0.4729, + "step": 8852 + }, + { + "epoch": 0.72, + "grad_norm": 4.664516812490497, + "learning_rate": 1.8729499650454691e-06, + "loss": 0.9084, + "step": 8853 + }, + { + "epoch": 0.72, + "grad_norm": 3.0321434265716807, + "learning_rate": 1.8719171346831106e-06, + "loss": 0.656, + "step": 8854 + }, + { + "epoch": 0.72, + "grad_norm": 3.8632003393609278, + "learning_rate": 1.8708845235857498e-06, + "loss": 0.6406, + "step": 8855 + }, + { + "epoch": 0.72, + "grad_norm": 2.9673550677903306, + "learning_rate": 1.8698521318257635e-06, + "loss": 0.5056, + "step": 8856 + }, + { + "epoch": 0.72, + "grad_norm": 4.9194441145925065, + "learning_rate": 1.8688199594755208e-06, + "loss": 0.853, + "step": 8857 + }, + { + "epoch": 0.72, + "grad_norm": 3.0424142971342847, + "learning_rate": 1.8677880066073718e-06, + "loss": 0.5882, + "step": 8858 + }, + { + "epoch": 0.72, + "grad_norm": 3.4533604590139966, + "learning_rate": 1.866756273293654e-06, + "loss": 0.6266, + "step": 8859 + }, + { + "epoch": 0.72, + "grad_norm": 1.1540424959588524, + "learning_rate": 1.8657247596066834e-06, + "loss": 0.1113, + "step": 8860 + }, + { + "epoch": 0.72, + "grad_norm": 2.937330664401966, + "learning_rate": 1.8646934656187671e-06, + "loss": 0.5732, + "step": 8861 + }, + { + "epoch": 0.72, + "grad_norm": 5.884979876373652, + "learning_rate": 1.863662391402194e-06, + "loss": 0.6385, + "step": 8862 + }, + { + "epoch": 0.72, + "grad_norm": 4.175052027188275, + "learning_rate": 1.862631537029238e-06, + "loss": 0.9419, + "step": 8863 + }, + { + "epoch": 0.72, + "grad_norm": 4.946973992101543, + "learning_rate": 1.8616009025721572e-06, + "loss": 0.8249, + "step": 8864 + }, + { + "epoch": 0.72, + "grad_norm": 3.1444148879339875, + "learning_rate": 1.860570488103196e-06, + "loss": 0.4323, + "step": 8865 + }, + { + "epoch": 0.72, + "grad_norm": 5.590713657852092, + "learning_rate": 1.85954029369458e-06, + "loss": 0.7788, + "step": 8866 + }, + { + "epoch": 0.72, + "grad_norm": 1.320357196050719, + "learning_rate": 1.858510319418521e-06, + "loss": 0.1759, + "step": 8867 + }, + { + "epoch": 0.72, + "grad_norm": 4.33373464431892, + "learning_rate": 1.8574805653472178e-06, + "loss": 0.7957, + "step": 8868 + }, + { + "epoch": 0.72, + "grad_norm": 3.852412842501961, + "learning_rate": 1.8564510315528517e-06, + "loss": 0.6909, + "step": 8869 + }, + { + "epoch": 0.72, + "grad_norm": 2.0505451876407883, + "learning_rate": 1.8554217181075862e-06, + "loss": 0.3888, + "step": 8870 + }, + { + "epoch": 0.73, + "grad_norm": 3.758074011530697, + "learning_rate": 1.8543926250835749e-06, + "loss": 0.7105, + "step": 8871 + }, + { + "epoch": 0.73, + "grad_norm": 4.977808151424315, + "learning_rate": 1.8533637525529485e-06, + "loss": 1.0953, + "step": 8872 + }, + { + "epoch": 0.73, + "grad_norm": 5.328100722581435, + "learning_rate": 1.8523351005878293e-06, + "loss": 1.1907, + "step": 8873 + }, + { + "epoch": 0.73, + "grad_norm": 2.510754729106961, + "learning_rate": 1.8513066692603204e-06, + "loss": 0.4291, + "step": 8874 + }, + { + "epoch": 0.73, + "grad_norm": 2.8439070213468214, + "learning_rate": 1.8502784586425116e-06, + "loss": 0.5486, + "step": 8875 + }, + { + "epoch": 0.73, + "grad_norm": 4.4516253149991885, + "learning_rate": 1.849250468806476e-06, + "loss": 0.8449, + "step": 8876 + }, + { + "epoch": 0.73, + "grad_norm": 4.592603633187335, + "learning_rate": 1.8482226998242692e-06, + "loss": 0.8437, + "step": 8877 + }, + { + "epoch": 0.73, + "grad_norm": 3.0296036545205443, + "learning_rate": 1.8471951517679348e-06, + "loss": 0.4635, + "step": 8878 + }, + { + "epoch": 0.73, + "grad_norm": 1.8423645483049949, + "learning_rate": 1.846167824709499e-06, + "loss": 0.2186, + "step": 8879 + }, + { + "epoch": 0.73, + "grad_norm": 4.848669674508421, + "learning_rate": 1.845140718720973e-06, + "loss": 1.1526, + "step": 8880 + }, + { + "epoch": 0.73, + "grad_norm": 4.037018099967917, + "learning_rate": 1.844113833874353e-06, + "loss": 0.8278, + "step": 8881 + }, + { + "epoch": 0.73, + "grad_norm": 4.3710378600880455, + "learning_rate": 1.8430871702416198e-06, + "loss": 0.8264, + "step": 8882 + }, + { + "epoch": 0.73, + "grad_norm": 4.301263634150496, + "learning_rate": 1.8420607278947362e-06, + "loss": 0.791, + "step": 8883 + }, + { + "epoch": 0.73, + "grad_norm": 2.0243568108169714, + "learning_rate": 1.8410345069056517e-06, + "loss": 0.3202, + "step": 8884 + }, + { + "epoch": 0.73, + "grad_norm": 2.9360226309033695, + "learning_rate": 1.840008507346302e-06, + "loss": 0.4884, + "step": 8885 + }, + { + "epoch": 0.73, + "grad_norm": 4.49497390112113, + "learning_rate": 1.838982729288602e-06, + "loss": 0.9013, + "step": 8886 + }, + { + "epoch": 0.73, + "grad_norm": 2.954276049008714, + "learning_rate": 1.8379571728044559e-06, + "loss": 0.6735, + "step": 8887 + }, + { + "epoch": 0.73, + "grad_norm": 2.947073723266408, + "learning_rate": 1.8369318379657526e-06, + "loss": 0.4891, + "step": 8888 + }, + { + "epoch": 0.73, + "grad_norm": 3.085480599916667, + "learning_rate": 1.8359067248443602e-06, + "loss": 0.3471, + "step": 8889 + }, + { + "epoch": 0.73, + "grad_norm": 3.0392365817723133, + "learning_rate": 1.8348818335121355e-06, + "loss": 0.6283, + "step": 8890 + }, + { + "epoch": 0.73, + "grad_norm": 5.6103342856232805, + "learning_rate": 1.8338571640409203e-06, + "loss": 0.9192, + "step": 8891 + }, + { + "epoch": 0.73, + "grad_norm": 3.587916812618091, + "learning_rate": 1.8328327165025384e-06, + "loss": 0.7832, + "step": 8892 + }, + { + "epoch": 0.73, + "grad_norm": 4.671478660931107, + "learning_rate": 1.8318084909687995e-06, + "loss": 0.8664, + "step": 8893 + }, + { + "epoch": 0.73, + "grad_norm": 3.428929659668447, + "learning_rate": 1.8307844875114993e-06, + "loss": 0.3476, + "step": 8894 + }, + { + "epoch": 0.73, + "grad_norm": 3.1948707359979056, + "learning_rate": 1.8297607062024125e-06, + "loss": 0.717, + "step": 8895 + }, + { + "epoch": 0.73, + "grad_norm": 4.149110950463409, + "learning_rate": 1.828737147113303e-06, + "loss": 0.7536, + "step": 8896 + }, + { + "epoch": 0.73, + "grad_norm": 2.8963364537499205, + "learning_rate": 1.827713810315918e-06, + "loss": 0.4259, + "step": 8897 + }, + { + "epoch": 0.73, + "grad_norm": 3.3710584652754862, + "learning_rate": 1.8266906958819892e-06, + "loss": 0.5665, + "step": 8898 + }, + { + "epoch": 0.73, + "grad_norm": 4.246601455389945, + "learning_rate": 1.8256678038832342e-06, + "loss": 0.7157, + "step": 8899 + }, + { + "epoch": 0.73, + "grad_norm": 2.7252475510620577, + "learning_rate": 1.8246451343913497e-06, + "loss": 0.4024, + "step": 8900 + }, + { + "epoch": 0.73, + "grad_norm": 2.2154462309831513, + "learning_rate": 1.8236226874780233e-06, + "loss": 0.3761, + "step": 8901 + }, + { + "epoch": 0.73, + "grad_norm": 4.463399088426534, + "learning_rate": 1.822600463214922e-06, + "loss": 0.7889, + "step": 8902 + }, + { + "epoch": 0.73, + "grad_norm": 4.617610782224787, + "learning_rate": 1.8215784616736993e-06, + "loss": 1.2148, + "step": 8903 + }, + { + "epoch": 0.73, + "grad_norm": 4.454984366704652, + "learning_rate": 1.8205566829259942e-06, + "loss": 0.813, + "step": 8904 + }, + { + "epoch": 0.73, + "grad_norm": 5.205670702293653, + "learning_rate": 1.8195351270434303e-06, + "loss": 1.1278, + "step": 8905 + }, + { + "epoch": 0.73, + "grad_norm": 2.285411479543468, + "learning_rate": 1.8185137940976111e-06, + "loss": 0.5392, + "step": 8906 + }, + { + "epoch": 0.73, + "grad_norm": 3.7137924721743736, + "learning_rate": 1.8174926841601294e-06, + "loss": 0.6425, + "step": 8907 + }, + { + "epoch": 0.73, + "grad_norm": 5.725552627995343, + "learning_rate": 1.816471797302559e-06, + "loss": 1.1603, + "step": 8908 + }, + { + "epoch": 0.73, + "grad_norm": 3.415861053345675, + "learning_rate": 1.8154511335964619e-06, + "loss": 0.5348, + "step": 8909 + }, + { + "epoch": 0.73, + "grad_norm": 3.1246213158237968, + "learning_rate": 1.8144306931133809e-06, + "loss": 0.2999, + "step": 8910 + }, + { + "epoch": 0.73, + "grad_norm": 3.7435419776111294, + "learning_rate": 1.8134104759248461e-06, + "loss": 0.369, + "step": 8911 + }, + { + "epoch": 0.73, + "grad_norm": 3.9848728694727753, + "learning_rate": 1.8123904821023675e-06, + "loss": 0.8606, + "step": 8912 + }, + { + "epoch": 0.73, + "grad_norm": 2.2749438745423083, + "learning_rate": 1.8113707117174433e-06, + "loss": 0.6244, + "step": 8913 + }, + { + "epoch": 0.73, + "grad_norm": 5.182642877270864, + "learning_rate": 1.8103511648415556e-06, + "loss": 1.3483, + "step": 8914 + }, + { + "epoch": 0.73, + "grad_norm": 5.0969152987592725, + "learning_rate": 1.8093318415461698e-06, + "loss": 0.8401, + "step": 8915 + }, + { + "epoch": 0.73, + "grad_norm": 5.307504013633494, + "learning_rate": 1.8083127419027375e-06, + "loss": 1.1576, + "step": 8916 + }, + { + "epoch": 0.73, + "grad_norm": 3.888400058596468, + "learning_rate": 1.80729386598269e-06, + "loss": 1.08, + "step": 8917 + }, + { + "epoch": 0.73, + "grad_norm": 4.491074432520151, + "learning_rate": 1.8062752138574497e-06, + "loss": 1.6848, + "step": 8918 + }, + { + "epoch": 0.73, + "grad_norm": 4.445135276899701, + "learning_rate": 1.805256785598416e-06, + "loss": 0.8892, + "step": 8919 + }, + { + "epoch": 0.73, + "grad_norm": 4.234531444079373, + "learning_rate": 1.804238581276978e-06, + "loss": 0.8545, + "step": 8920 + }, + { + "epoch": 0.73, + "grad_norm": 5.143084059745439, + "learning_rate": 1.8032206009645077e-06, + "loss": 0.7199, + "step": 8921 + }, + { + "epoch": 0.73, + "grad_norm": 4.042827522492736, + "learning_rate": 1.8022028447323619e-06, + "loss": 0.6258, + "step": 8922 + }, + { + "epoch": 0.73, + "grad_norm": 2.826884504598852, + "learning_rate": 1.8011853126518786e-06, + "loss": 0.5258, + "step": 8923 + }, + { + "epoch": 0.73, + "grad_norm": 5.623346055293667, + "learning_rate": 1.8001680047943836e-06, + "loss": 1.0562, + "step": 8924 + }, + { + "epoch": 0.73, + "grad_norm": 2.9842091905897403, + "learning_rate": 1.7991509212311858e-06, + "loss": 0.219, + "step": 8925 + }, + { + "epoch": 0.73, + "grad_norm": 4.48664500069927, + "learning_rate": 1.798134062033578e-06, + "loss": 0.7726, + "step": 8926 + }, + { + "epoch": 0.73, + "grad_norm": 2.1405945631380128, + "learning_rate": 1.7971174272728381e-06, + "loss": 0.3926, + "step": 8927 + }, + { + "epoch": 0.73, + "grad_norm": 4.0999316996457, + "learning_rate": 1.7961010170202293e-06, + "loss": 0.9603, + "step": 8928 + }, + { + "epoch": 0.73, + "grad_norm": 3.7755183810759707, + "learning_rate": 1.7950848313469944e-06, + "loss": 0.7153, + "step": 8929 + }, + { + "epoch": 0.73, + "grad_norm": 3.424073871148489, + "learning_rate": 1.7940688703243641e-06, + "loss": 0.4287, + "step": 8930 + }, + { + "epoch": 0.73, + "grad_norm": 2.195063690892858, + "learning_rate": 1.7930531340235546e-06, + "loss": 0.3746, + "step": 8931 + }, + { + "epoch": 0.73, + "grad_norm": 5.073234184739771, + "learning_rate": 1.7920376225157648e-06, + "loss": 0.801, + "step": 8932 + }, + { + "epoch": 0.73, + "grad_norm": 3.9950993521702953, + "learning_rate": 1.7910223358721751e-06, + "loss": 1.2285, + "step": 8933 + }, + { + "epoch": 0.73, + "grad_norm": 2.8398240441967366, + "learning_rate": 1.7900072741639557e-06, + "loss": 0.4967, + "step": 8934 + }, + { + "epoch": 0.73, + "grad_norm": 3.9808438863197506, + "learning_rate": 1.7889924374622552e-06, + "loss": 0.7187, + "step": 8935 + }, + { + "epoch": 0.73, + "grad_norm": 3.3805642712079598, + "learning_rate": 1.7879778258382103e-06, + "loss": 0.7522, + "step": 8936 + }, + { + "epoch": 0.73, + "grad_norm": 5.326232272044018, + "learning_rate": 1.786963439362941e-06, + "loss": 1.0033, + "step": 8937 + }, + { + "epoch": 0.73, + "grad_norm": 2.0476375259141353, + "learning_rate": 1.7859492781075511e-06, + "loss": 0.3842, + "step": 8938 + }, + { + "epoch": 0.73, + "grad_norm": 4.872031597441457, + "learning_rate": 1.7849353421431316e-06, + "loss": 0.7099, + "step": 8939 + }, + { + "epoch": 0.73, + "grad_norm": 3.912564714887852, + "learning_rate": 1.7839216315407498e-06, + "loss": 0.8623, + "step": 8940 + }, + { + "epoch": 0.73, + "grad_norm": 4.953381229946058, + "learning_rate": 1.782908146371466e-06, + "loss": 0.7803, + "step": 8941 + }, + { + "epoch": 0.73, + "grad_norm": 2.460200700473453, + "learning_rate": 1.7818948867063201e-06, + "loss": 0.7107, + "step": 8942 + }, + { + "epoch": 0.73, + "grad_norm": 5.176029544680021, + "learning_rate": 1.780881852616338e-06, + "loss": 0.9858, + "step": 8943 + }, + { + "epoch": 0.73, + "grad_norm": 2.753092611167436, + "learning_rate": 1.7798690441725275e-06, + "loss": 0.3707, + "step": 8944 + }, + { + "epoch": 0.73, + "grad_norm": 3.948615487037133, + "learning_rate": 1.7788564614458853e-06, + "loss": 0.9159, + "step": 8945 + }, + { + "epoch": 0.73, + "grad_norm": 2.8964014081785283, + "learning_rate": 1.7778441045073846e-06, + "loss": 0.5617, + "step": 8946 + }, + { + "epoch": 0.73, + "grad_norm": 2.4158829649665705, + "learning_rate": 1.7768319734279894e-06, + "loss": 0.5745, + "step": 8947 + }, + { + "epoch": 0.73, + "grad_norm": 3.9495655938363736, + "learning_rate": 1.775820068278647e-06, + "loss": 0.8235, + "step": 8948 + }, + { + "epoch": 0.73, + "grad_norm": 2.3442309344872614, + "learning_rate": 1.7748083891302847e-06, + "loss": 0.2646, + "step": 8949 + }, + { + "epoch": 0.73, + "grad_norm": 3.7840108594247086, + "learning_rate": 1.7737969360538187e-06, + "loss": 0.5071, + "step": 8950 + }, + { + "epoch": 0.73, + "grad_norm": 4.488174423512263, + "learning_rate": 1.7727857091201477e-06, + "loss": 0.986, + "step": 8951 + }, + { + "epoch": 0.73, + "grad_norm": 3.4000460221903386, + "learning_rate": 1.771774708400153e-06, + "loss": 0.6213, + "step": 8952 + }, + { + "epoch": 0.73, + "grad_norm": 3.7478045033289273, + "learning_rate": 1.7707639339647015e-06, + "loss": 0.8226, + "step": 8953 + }, + { + "epoch": 0.73, + "grad_norm": 4.02873919600009, + "learning_rate": 1.7697533858846444e-06, + "loss": 0.6703, + "step": 8954 + }, + { + "epoch": 0.73, + "grad_norm": 3.461477367730992, + "learning_rate": 1.7687430642308167e-06, + "loss": 0.5836, + "step": 8955 + }, + { + "epoch": 0.73, + "grad_norm": 3.9165398389129913, + "learning_rate": 1.7677329690740397e-06, + "loss": 0.7629, + "step": 8956 + }, + { + "epoch": 0.73, + "grad_norm": 4.06870891643253, + "learning_rate": 1.7667231004851132e-06, + "loss": 0.8262, + "step": 8957 + }, + { + "epoch": 0.73, + "grad_norm": 2.8948921154450318, + "learning_rate": 1.7657134585348257e-06, + "loss": 0.3847, + "step": 8958 + }, + { + "epoch": 0.73, + "grad_norm": 4.278096040896495, + "learning_rate": 1.7647040432939494e-06, + "loss": 1.0448, + "step": 8959 + }, + { + "epoch": 0.73, + "grad_norm": 2.350081697197041, + "learning_rate": 1.7636948548332394e-06, + "loss": 0.447, + "step": 8960 + }, + { + "epoch": 0.73, + "grad_norm": 4.743675239557907, + "learning_rate": 1.762685893223436e-06, + "loss": 0.8835, + "step": 8961 + }, + { + "epoch": 0.73, + "grad_norm": 5.1219408922093415, + "learning_rate": 1.7616771585352638e-06, + "loss": 0.6029, + "step": 8962 + }, + { + "epoch": 0.73, + "grad_norm": 2.932424093664737, + "learning_rate": 1.7606686508394278e-06, + "loss": 0.4565, + "step": 8963 + }, + { + "epoch": 0.73, + "grad_norm": 4.350284755062462, + "learning_rate": 1.759660370206624e-06, + "loss": 0.8739, + "step": 8964 + }, + { + "epoch": 0.73, + "grad_norm": 1.8392207856522242, + "learning_rate": 1.7586523167075243e-06, + "loss": 0.3384, + "step": 8965 + }, + { + "epoch": 0.73, + "grad_norm": 2.047881033882093, + "learning_rate": 1.7576444904127909e-06, + "loss": 0.3368, + "step": 8966 + }, + { + "epoch": 0.73, + "grad_norm": 3.5023817371920622, + "learning_rate": 1.7566368913930677e-06, + "loss": 0.5931, + "step": 8967 + }, + { + "epoch": 0.73, + "grad_norm": 3.0312836485738424, + "learning_rate": 1.7556295197189849e-06, + "loss": 0.4801, + "step": 8968 + }, + { + "epoch": 0.73, + "grad_norm": 4.760566696496184, + "learning_rate": 1.754622375461152e-06, + "loss": 0.8811, + "step": 8969 + }, + { + "epoch": 0.73, + "grad_norm": 3.2345251307797587, + "learning_rate": 1.753615458690166e-06, + "loss": 0.5812, + "step": 8970 + }, + { + "epoch": 0.73, + "grad_norm": 6.045578240916088, + "learning_rate": 1.7526087694766086e-06, + "loss": 1.1997, + "step": 8971 + }, + { + "epoch": 0.73, + "grad_norm": 3.481839428642073, + "learning_rate": 1.7516023078910438e-06, + "loss": 0.9402, + "step": 8972 + }, + { + "epoch": 0.73, + "grad_norm": 3.04590137193262, + "learning_rate": 1.7505960740040196e-06, + "loss": 0.7429, + "step": 8973 + }, + { + "epoch": 0.73, + "grad_norm": 1.2468634629602602, + "learning_rate": 1.749590067886071e-06, + "loss": 0.1524, + "step": 8974 + }, + { + "epoch": 0.73, + "grad_norm": 4.637452769824487, + "learning_rate": 1.7485842896077116e-06, + "loss": 0.8302, + "step": 8975 + }, + { + "epoch": 0.73, + "grad_norm": 3.7895625674392375, + "learning_rate": 1.7475787392394427e-06, + "loss": 0.6474, + "step": 8976 + }, + { + "epoch": 0.73, + "grad_norm": 2.601124604564186, + "learning_rate": 1.7465734168517501e-06, + "loss": 0.2956, + "step": 8977 + }, + { + "epoch": 0.73, + "grad_norm": 3.4072381897503887, + "learning_rate": 1.7455683225151037e-06, + "loss": 0.9623, + "step": 8978 + }, + { + "epoch": 0.73, + "grad_norm": 3.9706115422857633, + "learning_rate": 1.7445634562999526e-06, + "loss": 0.6604, + "step": 8979 + }, + { + "epoch": 0.73, + "grad_norm": 3.308798596479263, + "learning_rate": 1.7435588182767371e-06, + "loss": 0.5646, + "step": 8980 + }, + { + "epoch": 0.73, + "grad_norm": 2.188892167150614, + "learning_rate": 1.7425544085158747e-06, + "loss": 0.3848, + "step": 8981 + }, + { + "epoch": 0.73, + "grad_norm": 5.398272377558863, + "learning_rate": 1.741550227087772e-06, + "loss": 0.703, + "step": 8982 + }, + { + "epoch": 0.73, + "grad_norm": 4.811819056468078, + "learning_rate": 1.7405462740628177e-06, + "loss": 1.0272, + "step": 8983 + }, + { + "epoch": 0.73, + "grad_norm": 3.4139978187096194, + "learning_rate": 1.7395425495113838e-06, + "loss": 0.3504, + "step": 8984 + }, + { + "epoch": 0.73, + "grad_norm": 3.703841869876057, + "learning_rate": 1.7385390535038299e-06, + "loss": 0.5092, + "step": 8985 + }, + { + "epoch": 0.73, + "grad_norm": 5.341951956627426, + "learning_rate": 1.7375357861104924e-06, + "loss": 0.9809, + "step": 8986 + }, + { + "epoch": 0.73, + "grad_norm": 3.687216573165631, + "learning_rate": 1.7365327474016979e-06, + "loss": 0.5291, + "step": 8987 + }, + { + "epoch": 0.73, + "grad_norm": 3.2601727656097803, + "learning_rate": 1.7355299374477558e-06, + "loss": 0.6321, + "step": 8988 + }, + { + "epoch": 0.73, + "grad_norm": 5.022852657978894, + "learning_rate": 1.7345273563189575e-06, + "loss": 0.9331, + "step": 8989 + }, + { + "epoch": 0.73, + "grad_norm": 3.4543615795409, + "learning_rate": 1.7335250040855805e-06, + "loss": 0.5367, + "step": 8990 + }, + { + "epoch": 0.73, + "grad_norm": 5.405105561209966, + "learning_rate": 1.7325228808178862e-06, + "loss": 0.9613, + "step": 8991 + }, + { + "epoch": 0.73, + "grad_norm": 3.899093810149087, + "learning_rate": 1.7315209865861165e-06, + "loss": 0.7388, + "step": 8992 + }, + { + "epoch": 0.74, + "grad_norm": 5.012980749679616, + "learning_rate": 1.730519321460501e-06, + "loss": 0.9296, + "step": 8993 + }, + { + "epoch": 0.74, + "grad_norm": 4.868724219782896, + "learning_rate": 1.7295178855112537e-06, + "loss": 1.0129, + "step": 8994 + }, + { + "epoch": 0.74, + "grad_norm": 3.921950322463082, + "learning_rate": 1.7285166788085683e-06, + "loss": 0.7011, + "step": 8995 + }, + { + "epoch": 0.74, + "grad_norm": 3.467594249950734, + "learning_rate": 1.7275157014226274e-06, + "loss": 0.6228, + "step": 8996 + }, + { + "epoch": 0.74, + "grad_norm": 5.208537067500457, + "learning_rate": 1.7265149534235925e-06, + "loss": 0.8779, + "step": 8997 + }, + { + "epoch": 0.74, + "grad_norm": 4.587388261769327, + "learning_rate": 1.7255144348816134e-06, + "loss": 0.7871, + "step": 8998 + }, + { + "epoch": 0.74, + "grad_norm": 3.725501053167962, + "learning_rate": 1.7245141458668213e-06, + "loss": 0.6805, + "step": 8999 + }, + { + "epoch": 0.74, + "grad_norm": 4.741787604017313, + "learning_rate": 1.7235140864493327e-06, + "loss": 1.0489, + "step": 9000 + }, + { + "epoch": 0.74, + "grad_norm": 3.2071984650447534, + "learning_rate": 1.7225142566992476e-06, + "loss": 0.8655, + "step": 9001 + }, + { + "epoch": 0.74, + "grad_norm": 4.985035641680996, + "learning_rate": 1.7215146566866508e-06, + "loss": 0.8706, + "step": 9002 + }, + { + "epoch": 0.74, + "grad_norm": 4.47775958261579, + "learning_rate": 1.7205152864816071e-06, + "loss": 0.828, + "step": 9003 + }, + { + "epoch": 0.74, + "grad_norm": 3.7970700653958174, + "learning_rate": 1.7195161461541692e-06, + "loss": 0.3933, + "step": 9004 + }, + { + "epoch": 0.74, + "grad_norm": 5.013903958013123, + "learning_rate": 1.7185172357743729e-06, + "loss": 0.7536, + "step": 9005 + }, + { + "epoch": 0.74, + "grad_norm": 3.2442239307638188, + "learning_rate": 1.7175185554122375e-06, + "loss": 0.6861, + "step": 9006 + }, + { + "epoch": 0.74, + "grad_norm": 2.2467894939410833, + "learning_rate": 1.7165201051377657e-06, + "loss": 0.3923, + "step": 9007 + }, + { + "epoch": 0.74, + "grad_norm": 4.5533531296390075, + "learning_rate": 1.7155218850209465e-06, + "loss": 0.92, + "step": 9008 + }, + { + "epoch": 0.74, + "grad_norm": 4.097363179597376, + "learning_rate": 1.7145238951317473e-06, + "loss": 0.688, + "step": 9009 + }, + { + "epoch": 0.74, + "grad_norm": 1.6374955130177864, + "learning_rate": 1.7135261355401246e-06, + "loss": 0.2497, + "step": 9010 + }, + { + "epoch": 0.74, + "grad_norm": 4.751931732962066, + "learning_rate": 1.712528606316019e-06, + "loss": 1.0268, + "step": 9011 + }, + { + "epoch": 0.74, + "grad_norm": 1.3351512488767607, + "learning_rate": 1.7115313075293488e-06, + "loss": 0.2148, + "step": 9012 + }, + { + "epoch": 0.74, + "grad_norm": 4.28499191945056, + "learning_rate": 1.710534239250023e-06, + "loss": 0.6547, + "step": 9013 + }, + { + "epoch": 0.74, + "grad_norm": 3.7785812157892913, + "learning_rate": 1.7095374015479326e-06, + "loss": 0.6309, + "step": 9014 + }, + { + "epoch": 0.74, + "grad_norm": 4.166814066008341, + "learning_rate": 1.7085407944929488e-06, + "loss": 0.8211, + "step": 9015 + }, + { + "epoch": 0.74, + "grad_norm": 4.734193097427948, + "learning_rate": 1.7075444181549305e-06, + "loss": 0.92, + "step": 9016 + }, + { + "epoch": 0.74, + "grad_norm": 3.9215893062088756, + "learning_rate": 1.7065482726037196e-06, + "loss": 0.4628, + "step": 9017 + }, + { + "epoch": 0.74, + "grad_norm": 6.093119807275033, + "learning_rate": 1.7055523579091422e-06, + "loss": 1.0065, + "step": 9018 + }, + { + "epoch": 0.74, + "grad_norm": 4.323869904369412, + "learning_rate": 1.704556674141008e-06, + "loss": 0.7332, + "step": 9019 + }, + { + "epoch": 0.74, + "grad_norm": 1.5696067383535437, + "learning_rate": 1.7035612213691083e-06, + "loss": 0.3485, + "step": 9020 + }, + { + "epoch": 0.74, + "grad_norm": 2.530204008821355, + "learning_rate": 1.7025659996632198e-06, + "loss": 0.4463, + "step": 9021 + }, + { + "epoch": 0.74, + "grad_norm": 3.2497473771022154, + "learning_rate": 1.7015710090931047e-06, + "loss": 0.5451, + "step": 9022 + }, + { + "epoch": 0.74, + "grad_norm": 1.9079734640877182, + "learning_rate": 1.7005762497285078e-06, + "loss": 0.3221, + "step": 9023 + }, + { + "epoch": 0.74, + "grad_norm": 2.956229764069996, + "learning_rate": 1.6995817216391559e-06, + "loss": 0.7575, + "step": 9024 + }, + { + "epoch": 0.74, + "grad_norm": 3.8665180173730653, + "learning_rate": 1.698587424894763e-06, + "loss": 0.8057, + "step": 9025 + }, + { + "epoch": 0.74, + "grad_norm": 3.7188981255366276, + "learning_rate": 1.6975933595650229e-06, + "loss": 0.6104, + "step": 9026 + }, + { + "epoch": 0.74, + "grad_norm": 1.9620310746669432, + "learning_rate": 1.6965995257196177e-06, + "loss": 0.3023, + "step": 9027 + }, + { + "epoch": 0.74, + "grad_norm": 3.13494334777588, + "learning_rate": 1.6956059234282079e-06, + "loss": 0.4358, + "step": 9028 + }, + { + "epoch": 0.74, + "grad_norm": 3.4396347140855688, + "learning_rate": 1.6946125527604419e-06, + "loss": 0.4067, + "step": 9029 + }, + { + "epoch": 0.74, + "grad_norm": 2.7671237129759287, + "learning_rate": 1.6936194137859508e-06, + "loss": 0.7057, + "step": 9030 + }, + { + "epoch": 0.74, + "grad_norm": 3.9593379420039243, + "learning_rate": 1.6926265065743507e-06, + "loss": 0.47, + "step": 9031 + }, + { + "epoch": 0.74, + "grad_norm": 3.659273991675345, + "learning_rate": 1.6916338311952373e-06, + "loss": 0.6953, + "step": 9032 + }, + { + "epoch": 0.74, + "grad_norm": 4.132124704254727, + "learning_rate": 1.6906413877181948e-06, + "loss": 0.7218, + "step": 9033 + }, + { + "epoch": 0.74, + "grad_norm": 4.13717100686943, + "learning_rate": 1.6896491762127882e-06, + "loss": 0.6954, + "step": 9034 + }, + { + "epoch": 0.74, + "grad_norm": 4.821745647616559, + "learning_rate": 1.6886571967485677e-06, + "loss": 0.7649, + "step": 9035 + }, + { + "epoch": 0.74, + "grad_norm": 3.6486101795414934, + "learning_rate": 1.6876654493950666e-06, + "loss": 0.7128, + "step": 9036 + }, + { + "epoch": 0.74, + "grad_norm": 4.829340506404127, + "learning_rate": 1.6866739342218042e-06, + "loss": 0.7762, + "step": 9037 + }, + { + "epoch": 0.74, + "grad_norm": 5.418065515804254, + "learning_rate": 1.6856826512982772e-06, + "loss": 1.0962, + "step": 9038 + }, + { + "epoch": 0.74, + "grad_norm": 1.282175126871541, + "learning_rate": 1.6846916006939724e-06, + "loss": 0.1717, + "step": 9039 + }, + { + "epoch": 0.74, + "grad_norm": 4.689916488916796, + "learning_rate": 1.6837007824783586e-06, + "loss": 0.6698, + "step": 9040 + }, + { + "epoch": 0.74, + "grad_norm": 5.710826508344211, + "learning_rate": 1.6827101967208887e-06, + "loss": 1.0579, + "step": 9041 + }, + { + "epoch": 0.74, + "grad_norm": 3.8698950145246562, + "learning_rate": 1.6817198434909954e-06, + "loss": 0.7306, + "step": 9042 + }, + { + "epoch": 0.74, + "grad_norm": 4.822068675912592, + "learning_rate": 1.6807297228581016e-06, + "loss": 0.9391, + "step": 9043 + }, + { + "epoch": 0.74, + "grad_norm": 5.440054907992409, + "learning_rate": 1.6797398348916073e-06, + "loss": 0.8555, + "step": 9044 + }, + { + "epoch": 0.74, + "grad_norm": 2.6654135564767585, + "learning_rate": 1.6787501796609001e-06, + "loss": 0.3411, + "step": 9045 + }, + { + "epoch": 0.74, + "grad_norm": 3.0646110765873495, + "learning_rate": 1.6777607572353516e-06, + "loss": 0.3515, + "step": 9046 + }, + { + "epoch": 0.74, + "grad_norm": 3.809911256440361, + "learning_rate": 1.676771567684316e-06, + "loss": 0.9619, + "step": 9047 + }, + { + "epoch": 0.74, + "grad_norm": 5.248089390765532, + "learning_rate": 1.675782611077132e-06, + "loss": 0.9298, + "step": 9048 + }, + { + "epoch": 0.74, + "grad_norm": 5.046653300658506, + "learning_rate": 1.6747938874831182e-06, + "loss": 1.0072, + "step": 9049 + }, + { + "epoch": 0.74, + "grad_norm": 3.9210787418348643, + "learning_rate": 1.6738053969715818e-06, + "loss": 0.6771, + "step": 9050 + }, + { + "epoch": 0.74, + "grad_norm": 5.7392379153531135, + "learning_rate": 1.672817139611811e-06, + "loss": 1.1881, + "step": 9051 + }, + { + "epoch": 0.74, + "grad_norm": 4.729634737365383, + "learning_rate": 1.6718291154730792e-06, + "loss": 0.8296, + "step": 9052 + }, + { + "epoch": 0.74, + "grad_norm": 7.292886378742193, + "learning_rate": 1.6708413246246418e-06, + "loss": 1.7863, + "step": 9053 + }, + { + "epoch": 0.74, + "grad_norm": 4.091853622035211, + "learning_rate": 1.6698537671357406e-06, + "loss": 0.5248, + "step": 9054 + }, + { + "epoch": 0.74, + "grad_norm": 2.034897188619175, + "learning_rate": 1.6688664430755964e-06, + "loss": 0.3249, + "step": 9055 + }, + { + "epoch": 0.74, + "grad_norm": 4.212051184388152, + "learning_rate": 1.6678793525134167e-06, + "loss": 0.5452, + "step": 9056 + }, + { + "epoch": 0.74, + "grad_norm": 5.587073637312639, + "learning_rate": 1.6668924955183952e-06, + "loss": 1.2495, + "step": 9057 + }, + { + "epoch": 0.74, + "grad_norm": 3.4326452585038636, + "learning_rate": 1.665905872159702e-06, + "loss": 0.6913, + "step": 9058 + }, + { + "epoch": 0.74, + "grad_norm": 5.64344670227028, + "learning_rate": 1.6649194825064991e-06, + "loss": 1.0694, + "step": 9059 + }, + { + "epoch": 0.74, + "grad_norm": 2.987460457256301, + "learning_rate": 1.6639333266279244e-06, + "loss": 0.6845, + "step": 9060 + }, + { + "epoch": 0.74, + "grad_norm": 3.392028734809809, + "learning_rate": 1.662947404593105e-06, + "loss": 0.7027, + "step": 9061 + }, + { + "epoch": 0.74, + "grad_norm": 3.08408362057497, + "learning_rate": 1.6619617164711493e-06, + "loss": 0.6966, + "step": 9062 + }, + { + "epoch": 0.74, + "grad_norm": 4.3109251101137485, + "learning_rate": 1.6609762623311504e-06, + "loss": 0.7128, + "step": 9063 + }, + { + "epoch": 0.74, + "grad_norm": 4.243086851835061, + "learning_rate": 1.659991042242184e-06, + "loss": 1.1725, + "step": 9064 + }, + { + "epoch": 0.74, + "grad_norm": 4.204599777941349, + "learning_rate": 1.6590060562733111e-06, + "loss": 0.5851, + "step": 9065 + }, + { + "epoch": 0.74, + "grad_norm": 3.9517819510821637, + "learning_rate": 1.6580213044935723e-06, + "loss": 0.5402, + "step": 9066 + }, + { + "epoch": 0.74, + "grad_norm": 5.053050927242831, + "learning_rate": 1.6570367869719955e-06, + "loss": 0.5968, + "step": 9067 + }, + { + "epoch": 0.74, + "grad_norm": 4.239009288234037, + "learning_rate": 1.656052503777591e-06, + "loss": 0.7625, + "step": 9068 + }, + { + "epoch": 0.74, + "grad_norm": 2.87110788114826, + "learning_rate": 1.6550684549793539e-06, + "loss": 0.5221, + "step": 9069 + }, + { + "epoch": 0.74, + "grad_norm": 3.766413422379634, + "learning_rate": 1.6540846406462602e-06, + "loss": 0.4895, + "step": 9070 + }, + { + "epoch": 0.74, + "grad_norm": 4.844991604320573, + "learning_rate": 1.6531010608472736e-06, + "loss": 1.3181, + "step": 9071 + }, + { + "epoch": 0.74, + "grad_norm": 4.687473886208534, + "learning_rate": 1.6521177156513351e-06, + "loss": 0.9449, + "step": 9072 + }, + { + "epoch": 0.74, + "grad_norm": 2.255207412004284, + "learning_rate": 1.6511346051273768e-06, + "loss": 0.3351, + "step": 9073 + }, + { + "epoch": 0.74, + "grad_norm": 4.047676731344719, + "learning_rate": 1.6501517293443064e-06, + "loss": 0.8867, + "step": 9074 + }, + { + "epoch": 0.74, + "grad_norm": 4.065878701202547, + "learning_rate": 1.6491690883710209e-06, + "loss": 0.6451, + "step": 9075 + }, + { + "epoch": 0.74, + "grad_norm": 3.5443967112541754, + "learning_rate": 1.6481866822763997e-06, + "loss": 0.4899, + "step": 9076 + }, + { + "epoch": 0.74, + "grad_norm": 3.9996127676580624, + "learning_rate": 1.6472045111293072e-06, + "loss": 0.6043, + "step": 9077 + }, + { + "epoch": 0.74, + "grad_norm": 3.0559692397812004, + "learning_rate": 1.6462225749985845e-06, + "loss": 0.8296, + "step": 9078 + }, + { + "epoch": 0.74, + "grad_norm": 4.266144230809973, + "learning_rate": 1.645240873953064e-06, + "loss": 0.8095, + "step": 9079 + }, + { + "epoch": 0.74, + "grad_norm": 3.841030181663696, + "learning_rate": 1.6442594080615581e-06, + "loss": 0.6333, + "step": 9080 + }, + { + "epoch": 0.74, + "grad_norm": 3.7709648439262544, + "learning_rate": 1.6432781773928636e-06, + "loss": 0.8523, + "step": 9081 + }, + { + "epoch": 0.74, + "grad_norm": 4.394573663876398, + "learning_rate": 1.6422971820157623e-06, + "loss": 0.791, + "step": 9082 + }, + { + "epoch": 0.74, + "grad_norm": 4.9444633502066395, + "learning_rate": 1.6413164219990136e-06, + "loss": 0.8428, + "step": 9083 + }, + { + "epoch": 0.74, + "grad_norm": 1.9488231025805063, + "learning_rate": 1.640335897411367e-06, + "loss": 0.3488, + "step": 9084 + }, + { + "epoch": 0.74, + "grad_norm": 1.9429043504182903, + "learning_rate": 1.6393556083215528e-06, + "loss": 0.3321, + "step": 9085 + }, + { + "epoch": 0.74, + "grad_norm": 4.459417834868013, + "learning_rate": 1.6383755547982844e-06, + "loss": 0.5809, + "step": 9086 + }, + { + "epoch": 0.74, + "grad_norm": 3.792858688546865, + "learning_rate": 1.6373957369102616e-06, + "loss": 0.8116, + "step": 9087 + }, + { + "epoch": 0.74, + "grad_norm": 3.1192732924982023, + "learning_rate": 1.636416154726162e-06, + "loss": 0.4053, + "step": 9088 + }, + { + "epoch": 0.74, + "grad_norm": 3.621300593467316, + "learning_rate": 1.6354368083146532e-06, + "loss": 0.574, + "step": 9089 + }, + { + "epoch": 0.74, + "grad_norm": 3.7491013378790123, + "learning_rate": 1.63445769774438e-06, + "loss": 0.5252, + "step": 9090 + }, + { + "epoch": 0.74, + "grad_norm": 4.417582731462544, + "learning_rate": 1.6334788230839753e-06, + "loss": 0.9197, + "step": 9091 + }, + { + "epoch": 0.74, + "grad_norm": 5.077667303709065, + "learning_rate": 1.6325001844020538e-06, + "loss": 1.2808, + "step": 9092 + }, + { + "epoch": 0.74, + "grad_norm": 2.9993091285089273, + "learning_rate": 1.6315217817672142e-06, + "loss": 0.558, + "step": 9093 + }, + { + "epoch": 0.74, + "grad_norm": 4.99726956482455, + "learning_rate": 1.6305436152480392e-06, + "loss": 0.8917, + "step": 9094 + }, + { + "epoch": 0.74, + "grad_norm": 1.970521738608599, + "learning_rate": 1.6295656849130914e-06, + "loss": 0.3274, + "step": 9095 + }, + { + "epoch": 0.74, + "grad_norm": 4.221441371096554, + "learning_rate": 1.628587990830921e-06, + "loss": 0.6971, + "step": 9096 + }, + { + "epoch": 0.74, + "grad_norm": 3.870364589941543, + "learning_rate": 1.6276105330700599e-06, + "loss": 0.5622, + "step": 9097 + }, + { + "epoch": 0.74, + "grad_norm": 4.1773326866114715, + "learning_rate": 1.6266333116990242e-06, + "loss": 0.6384, + "step": 9098 + }, + { + "epoch": 0.74, + "grad_norm": 2.2829276113017065, + "learning_rate": 1.6256563267863135e-06, + "loss": 0.348, + "step": 9099 + }, + { + "epoch": 0.74, + "grad_norm": 3.420048364811174, + "learning_rate": 1.6246795784004076e-06, + "loss": 0.5856, + "step": 9100 + }, + { + "epoch": 0.74, + "grad_norm": 3.7505987876192113, + "learning_rate": 1.6237030666097736e-06, + "loss": 0.7085, + "step": 9101 + }, + { + "epoch": 0.74, + "grad_norm": 3.8063053381010703, + "learning_rate": 1.6227267914828615e-06, + "loss": 1.0319, + "step": 9102 + }, + { + "epoch": 0.74, + "grad_norm": 3.804431995865752, + "learning_rate": 1.6217507530881048e-06, + "loss": 0.5913, + "step": 9103 + }, + { + "epoch": 0.74, + "grad_norm": 3.687685126129331, + "learning_rate": 1.6207749514939164e-06, + "loss": 0.7277, + "step": 9104 + }, + { + "epoch": 0.74, + "grad_norm": 4.062244202441285, + "learning_rate": 1.6197993867686973e-06, + "loss": 0.7456, + "step": 9105 + }, + { + "epoch": 0.74, + "grad_norm": 2.3369190904442183, + "learning_rate": 1.6188240589808325e-06, + "loss": 0.4212, + "step": 9106 + }, + { + "epoch": 0.74, + "grad_norm": 4.1709524686338915, + "learning_rate": 1.6178489681986842e-06, + "loss": 0.9092, + "step": 9107 + }, + { + "epoch": 0.74, + "grad_norm": 3.3744359543461533, + "learning_rate": 1.616874114490604e-06, + "loss": 0.6864, + "step": 9108 + }, + { + "epoch": 0.74, + "grad_norm": 2.58724493813051, + "learning_rate": 1.6158994979249255e-06, + "loss": 0.4519, + "step": 9109 + }, + { + "epoch": 0.74, + "grad_norm": 2.6247163893013767, + "learning_rate": 1.6149251185699643e-06, + "loss": 0.237, + "step": 9110 + }, + { + "epoch": 0.74, + "grad_norm": 3.723515891720296, + "learning_rate": 1.613950976494022e-06, + "loss": 0.655, + "step": 9111 + }, + { + "epoch": 0.74, + "grad_norm": 2.9683408496964163, + "learning_rate": 1.6129770717653781e-06, + "loss": 0.4724, + "step": 9112 + }, + { + "epoch": 0.74, + "grad_norm": 4.158516849075152, + "learning_rate": 1.6120034044523015e-06, + "loss": 0.9067, + "step": 9113 + }, + { + "epoch": 0.74, + "grad_norm": 4.8198622674689116, + "learning_rate": 1.6110299746230419e-06, + "loss": 1.0292, + "step": 9114 + }, + { + "epoch": 0.75, + "grad_norm": 3.7482896303080935, + "learning_rate": 1.6100567823458319e-06, + "loss": 0.5969, + "step": 9115 + }, + { + "epoch": 0.75, + "grad_norm": 4.44067565129291, + "learning_rate": 1.6090838276888882e-06, + "loss": 0.9371, + "step": 9116 + }, + { + "epoch": 0.75, + "grad_norm": 3.450233594803811, + "learning_rate": 1.6081111107204127e-06, + "loss": 0.4965, + "step": 9117 + }, + { + "epoch": 0.75, + "grad_norm": 2.5899593889008092, + "learning_rate": 1.6071386315085851e-06, + "loss": 0.5021, + "step": 9118 + }, + { + "epoch": 0.75, + "grad_norm": 1.9734439489250912, + "learning_rate": 1.606166390121574e-06, + "loss": 0.3827, + "step": 9119 + }, + { + "epoch": 0.75, + "grad_norm": 2.34945668400547, + "learning_rate": 1.60519438662753e-06, + "loss": 0.4631, + "step": 9120 + }, + { + "epoch": 0.75, + "grad_norm": 4.239324985671788, + "learning_rate": 1.6042226210945838e-06, + "loss": 0.5508, + "step": 9121 + }, + { + "epoch": 0.75, + "grad_norm": 3.7334163443070567, + "learning_rate": 1.6032510935908551e-06, + "loss": 0.5746, + "step": 9122 + }, + { + "epoch": 0.75, + "grad_norm": 4.6674181829994215, + "learning_rate": 1.6022798041844407e-06, + "loss": 0.7384, + "step": 9123 + }, + { + "epoch": 0.75, + "grad_norm": 4.929516148507866, + "learning_rate": 1.6013087529434247e-06, + "loss": 1.1649, + "step": 9124 + }, + { + "epoch": 0.75, + "grad_norm": 2.765302163788124, + "learning_rate": 1.6003379399358742e-06, + "loss": 0.3319, + "step": 9125 + }, + { + "epoch": 0.75, + "grad_norm": 4.751803140552971, + "learning_rate": 1.5993673652298386e-06, + "loss": 0.8956, + "step": 9126 + }, + { + "epoch": 0.75, + "grad_norm": 5.303559736955207, + "learning_rate": 1.5983970288933509e-06, + "loss": 1.1144, + "step": 9127 + }, + { + "epoch": 0.75, + "grad_norm": 4.0912639283894245, + "learning_rate": 1.5974269309944296e-06, + "loss": 0.6042, + "step": 9128 + }, + { + "epoch": 0.75, + "grad_norm": 4.236076518778019, + "learning_rate": 1.5964570716010708e-06, + "loss": 0.7398, + "step": 9129 + }, + { + "epoch": 0.75, + "grad_norm": 4.891067061234153, + "learning_rate": 1.595487450781259e-06, + "loss": 0.8603, + "step": 9130 + }, + { + "epoch": 0.75, + "grad_norm": 4.186974074359808, + "learning_rate": 1.5945180686029598e-06, + "loss": 0.8253, + "step": 9131 + }, + { + "epoch": 0.75, + "grad_norm": 3.4969354792471243, + "learning_rate": 1.593548925134124e-06, + "loss": 0.3732, + "step": 9132 + }, + { + "epoch": 0.75, + "grad_norm": 5.246678029315557, + "learning_rate": 1.5925800204426833e-06, + "loss": 0.9658, + "step": 9133 + }, + { + "epoch": 0.75, + "grad_norm": 5.004737255489296, + "learning_rate": 1.5916113545965562e-06, + "loss": 0.6139, + "step": 9134 + }, + { + "epoch": 0.75, + "grad_norm": 2.4217889762322553, + "learning_rate": 1.5906429276636376e-06, + "loss": 0.3275, + "step": 9135 + }, + { + "epoch": 0.75, + "grad_norm": 3.798173938948157, + "learning_rate": 1.589674739711814e-06, + "loss": 0.9051, + "step": 9136 + }, + { + "epoch": 0.75, + "grad_norm": 2.621642259837425, + "learning_rate": 1.5887067908089472e-06, + "loss": 0.3025, + "step": 9137 + }, + { + "epoch": 0.75, + "grad_norm": 3.7705583656212167, + "learning_rate": 1.5877390810228888e-06, + "loss": 0.5332, + "step": 9138 + }, + { + "epoch": 0.75, + "grad_norm": 1.1907773558344406, + "learning_rate": 1.5867716104214725e-06, + "loss": 0.1737, + "step": 9139 + }, + { + "epoch": 0.75, + "grad_norm": 3.354378021368676, + "learning_rate": 1.5858043790725096e-06, + "loss": 0.6442, + "step": 9140 + }, + { + "epoch": 0.75, + "grad_norm": 3.819461979920989, + "learning_rate": 1.5848373870438016e-06, + "loss": 0.2835, + "step": 9141 + }, + { + "epoch": 0.75, + "grad_norm": 2.5942531966284172, + "learning_rate": 1.58387063440313e-06, + "loss": 0.4373, + "step": 9142 + }, + { + "epoch": 0.75, + "grad_norm": 4.81101872617202, + "learning_rate": 1.58290412121826e-06, + "loss": 0.8678, + "step": 9143 + }, + { + "epoch": 0.75, + "grad_norm": 3.625409574628458, + "learning_rate": 1.5819378475569396e-06, + "loss": 0.6234, + "step": 9144 + }, + { + "epoch": 0.75, + "grad_norm": 4.659023434089425, + "learning_rate": 1.5809718134869024e-06, + "loss": 0.7718, + "step": 9145 + }, + { + "epoch": 0.75, + "grad_norm": 4.435425429651767, + "learning_rate": 1.5800060190758592e-06, + "loss": 0.9598, + "step": 9146 + }, + { + "epoch": 0.75, + "grad_norm": 2.1947682057148135, + "learning_rate": 1.5790404643915108e-06, + "loss": 0.3132, + "step": 9147 + }, + { + "epoch": 0.75, + "grad_norm": 3.460252436859948, + "learning_rate": 1.5780751495015379e-06, + "loss": 0.5249, + "step": 9148 + }, + { + "epoch": 0.75, + "grad_norm": 3.794571630750243, + "learning_rate": 1.5771100744736039e-06, + "loss": 0.7865, + "step": 9149 + }, + { + "epoch": 0.75, + "grad_norm": 2.601783932317756, + "learning_rate": 1.5761452393753596e-06, + "loss": 0.5184, + "step": 9150 + }, + { + "epoch": 0.75, + "grad_norm": 4.265490982404797, + "learning_rate": 1.5751806442744315e-06, + "loss": 1.082, + "step": 9151 + }, + { + "epoch": 0.75, + "grad_norm": 6.026421806841435, + "learning_rate": 1.5742162892384372e-06, + "loss": 1.2085, + "step": 9152 + }, + { + "epoch": 0.75, + "grad_norm": 3.885104571684317, + "learning_rate": 1.57325217433497e-06, + "loss": 0.9064, + "step": 9153 + }, + { + "epoch": 0.75, + "grad_norm": 2.5114201265328964, + "learning_rate": 1.5722882996316125e-06, + "loss": 0.4936, + "step": 9154 + }, + { + "epoch": 0.75, + "grad_norm": 3.8762422337914098, + "learning_rate": 1.5713246651959275e-06, + "loss": 0.735, + "step": 9155 + }, + { + "epoch": 0.75, + "grad_norm": 5.451621781366753, + "learning_rate": 1.570361271095462e-06, + "loss": 1.0967, + "step": 9156 + }, + { + "epoch": 0.75, + "grad_norm": 4.57069746566801, + "learning_rate": 1.5693981173977468e-06, + "loss": 0.6759, + "step": 9157 + }, + { + "epoch": 0.75, + "grad_norm": 4.530243213795102, + "learning_rate": 1.568435204170292e-06, + "loss": 0.8214, + "step": 9158 + }, + { + "epoch": 0.75, + "grad_norm": 4.486699756452967, + "learning_rate": 1.5674725314805955e-06, + "loss": 1.3609, + "step": 9159 + }, + { + "epoch": 0.75, + "grad_norm": 4.165946902347504, + "learning_rate": 1.5665100993961358e-06, + "loss": 0.9671, + "step": 9160 + }, + { + "epoch": 0.75, + "grad_norm": 5.834807390806405, + "learning_rate": 1.565547907984376e-06, + "loss": 1.0393, + "step": 9161 + }, + { + "epoch": 0.75, + "grad_norm": 5.4886776264223816, + "learning_rate": 1.564585957312762e-06, + "loss": 1.4576, + "step": 9162 + }, + { + "epoch": 0.75, + "grad_norm": 4.392651734475036, + "learning_rate": 1.5636242474487207e-06, + "loss": 0.5364, + "step": 9163 + }, + { + "epoch": 0.75, + "grad_norm": 5.270991036499043, + "learning_rate": 1.5626627784596638e-06, + "loss": 1.3793, + "step": 9164 + }, + { + "epoch": 0.75, + "grad_norm": 2.2656349137900924, + "learning_rate": 1.5617015504129867e-06, + "loss": 0.3967, + "step": 9165 + }, + { + "epoch": 0.75, + "grad_norm": 4.42978305101965, + "learning_rate": 1.560740563376069e-06, + "loss": 0.7694, + "step": 9166 + }, + { + "epoch": 0.75, + "grad_norm": 6.110916215287073, + "learning_rate": 1.5597798174162693e-06, + "loss": 0.8729, + "step": 9167 + }, + { + "epoch": 0.75, + "grad_norm": 3.143223280894732, + "learning_rate": 1.5588193126009332e-06, + "loss": 0.587, + "step": 9168 + }, + { + "epoch": 0.75, + "grad_norm": 4.703192537288563, + "learning_rate": 1.557859048997386e-06, + "loss": 1.0459, + "step": 9169 + }, + { + "epoch": 0.75, + "grad_norm": 3.171765841636195, + "learning_rate": 1.5568990266729394e-06, + "loss": 0.4245, + "step": 9170 + }, + { + "epoch": 0.75, + "grad_norm": 4.923832562043842, + "learning_rate": 1.5559392456948863e-06, + "loss": 1.0203, + "step": 9171 + }, + { + "epoch": 0.75, + "grad_norm": 3.2433702013284265, + "learning_rate": 1.5549797061305039e-06, + "loss": 0.7897, + "step": 9172 + }, + { + "epoch": 0.75, + "grad_norm": 4.151892778553473, + "learning_rate": 1.5540204080470512e-06, + "loss": 1.1794, + "step": 9173 + }, + { + "epoch": 0.75, + "grad_norm": 3.868456145903217, + "learning_rate": 1.5530613515117721e-06, + "loss": 0.4656, + "step": 9174 + }, + { + "epoch": 0.75, + "grad_norm": 2.6448226324506305, + "learning_rate": 1.5521025365918895e-06, + "loss": 0.4984, + "step": 9175 + }, + { + "epoch": 0.75, + "grad_norm": 4.23419139243431, + "learning_rate": 1.5511439633546143e-06, + "loss": 0.5805, + "step": 9176 + }, + { + "epoch": 0.75, + "grad_norm": 3.0798081955034866, + "learning_rate": 1.5501856318671376e-06, + "loss": 0.6739, + "step": 9177 + }, + { + "epoch": 0.75, + "grad_norm": 4.303341304803665, + "learning_rate": 1.5492275421966346e-06, + "loss": 0.9649, + "step": 9178 + }, + { + "epoch": 0.75, + "grad_norm": 3.2423816550283253, + "learning_rate": 1.5482696944102643e-06, + "loss": 0.7856, + "step": 9179 + }, + { + "epoch": 0.75, + "grad_norm": 3.566822051382423, + "learning_rate": 1.5473120885751652e-06, + "loss": 0.7349, + "step": 9180 + }, + { + "epoch": 0.75, + "grad_norm": 4.187762857595358, + "learning_rate": 1.5463547247584621e-06, + "loss": 0.6959, + "step": 9181 + }, + { + "epoch": 0.75, + "grad_norm": 2.9031055664051526, + "learning_rate": 1.5453976030272645e-06, + "loss": 0.7711, + "step": 9182 + }, + { + "epoch": 0.75, + "grad_norm": 3.1449051622626314, + "learning_rate": 1.5444407234486585e-06, + "loss": 0.5387, + "step": 9183 + }, + { + "epoch": 0.75, + "grad_norm": 4.46496397396142, + "learning_rate": 1.5434840860897194e-06, + "loss": 1.0059, + "step": 9184 + }, + { + "epoch": 0.75, + "grad_norm": 4.073251346906123, + "learning_rate": 1.5425276910175046e-06, + "loss": 0.9449, + "step": 9185 + }, + { + "epoch": 0.75, + "grad_norm": 2.155958714244286, + "learning_rate": 1.5415715382990504e-06, + "loss": 0.427, + "step": 9186 + }, + { + "epoch": 0.75, + "grad_norm": 2.203303357750263, + "learning_rate": 1.54061562800138e-06, + "loss": 0.3593, + "step": 9187 + }, + { + "epoch": 0.75, + "grad_norm": 4.2183995353328285, + "learning_rate": 1.5396599601914986e-06, + "loss": 0.5619, + "step": 9188 + }, + { + "epoch": 0.75, + "grad_norm": 3.199122158242305, + "learning_rate": 1.5387045349363948e-06, + "loss": 0.6847, + "step": 9189 + }, + { + "epoch": 0.75, + "grad_norm": 3.7695706975246277, + "learning_rate": 1.537749352303039e-06, + "loss": 0.7021, + "step": 9190 + }, + { + "epoch": 0.75, + "grad_norm": 3.6356070296975225, + "learning_rate": 1.5367944123583884e-06, + "loss": 0.466, + "step": 9191 + }, + { + "epoch": 0.75, + "grad_norm": 4.199184907548104, + "learning_rate": 1.535839715169375e-06, + "loss": 0.7087, + "step": 9192 + }, + { + "epoch": 0.75, + "grad_norm": 4.531727412732683, + "learning_rate": 1.5348852608029218e-06, + "loss": 0.841, + "step": 9193 + }, + { + "epoch": 0.75, + "grad_norm": 3.345802142439073, + "learning_rate": 1.5339310493259318e-06, + "loss": 0.5765, + "step": 9194 + }, + { + "epoch": 0.75, + "grad_norm": 3.1223332428130117, + "learning_rate": 1.5329770808052908e-06, + "loss": 0.603, + "step": 9195 + }, + { + "epoch": 0.75, + "grad_norm": 2.8725909950301562, + "learning_rate": 1.5320233553078694e-06, + "loss": 0.2948, + "step": 9196 + }, + { + "epoch": 0.75, + "grad_norm": 2.6803796890064913, + "learning_rate": 1.5310698729005163e-06, + "loss": 0.3243, + "step": 9197 + }, + { + "epoch": 0.75, + "grad_norm": 3.310145402547282, + "learning_rate": 1.5301166336500701e-06, + "loss": 0.7332, + "step": 9198 + }, + { + "epoch": 0.75, + "grad_norm": 3.252048882124109, + "learning_rate": 1.5291636376233453e-06, + "loss": 0.5481, + "step": 9199 + }, + { + "epoch": 0.75, + "grad_norm": 2.9315753599609495, + "learning_rate": 1.5282108848871445e-06, + "loss": 0.614, + "step": 9200 + }, + { + "epoch": 0.75, + "grad_norm": 3.032979560741048, + "learning_rate": 1.5272583755082516e-06, + "loss": 0.7907, + "step": 9201 + }, + { + "epoch": 0.75, + "grad_norm": 1.2516871167184147, + "learning_rate": 1.5263061095534343e-06, + "loss": 0.1925, + "step": 9202 + }, + { + "epoch": 0.75, + "grad_norm": 3.881963340392816, + "learning_rate": 1.5253540870894395e-06, + "loss": 0.7426, + "step": 9203 + }, + { + "epoch": 0.75, + "grad_norm": 3.977219673175557, + "learning_rate": 1.5244023081830018e-06, + "loss": 0.7551, + "step": 9204 + }, + { + "epoch": 0.75, + "grad_norm": 2.988781431535191, + "learning_rate": 1.5234507729008363e-06, + "loss": 0.4727, + "step": 9205 + }, + { + "epoch": 0.75, + "grad_norm": 3.142425806367947, + "learning_rate": 1.5224994813096417e-06, + "loss": 0.6021, + "step": 9206 + }, + { + "epoch": 0.75, + "grad_norm": 4.4882982513011305, + "learning_rate": 1.5215484334760988e-06, + "loss": 0.9421, + "step": 9207 + }, + { + "epoch": 0.75, + "grad_norm": 5.227574799732071, + "learning_rate": 1.5205976294668745e-06, + "loss": 0.8865, + "step": 9208 + }, + { + "epoch": 0.75, + "grad_norm": 2.294329145390786, + "learning_rate": 1.5196470693486127e-06, + "loss": 0.3568, + "step": 9209 + }, + { + "epoch": 0.75, + "grad_norm": 2.3143291348855994, + "learning_rate": 1.5186967531879443e-06, + "loss": 0.3798, + "step": 9210 + }, + { + "epoch": 0.75, + "grad_norm": 6.1684971311613435, + "learning_rate": 1.517746681051483e-06, + "loss": 1.1759, + "step": 9211 + }, + { + "epoch": 0.75, + "grad_norm": 4.807693670268817, + "learning_rate": 1.5167968530058263e-06, + "loss": 1.0803, + "step": 9212 + }, + { + "epoch": 0.75, + "grad_norm": 4.06193247946862, + "learning_rate": 1.5158472691175491e-06, + "loss": 0.9869, + "step": 9213 + }, + { + "epoch": 0.75, + "grad_norm": 4.603334917657383, + "learning_rate": 1.5148979294532157e-06, + "loss": 0.8715, + "step": 9214 + }, + { + "epoch": 0.75, + "grad_norm": 4.506355503448254, + "learning_rate": 1.5139488340793718e-06, + "loss": 0.8412, + "step": 9215 + }, + { + "epoch": 0.75, + "grad_norm": 3.3900375758386714, + "learning_rate": 1.512999983062542e-06, + "loss": 0.4941, + "step": 9216 + }, + { + "epoch": 0.75, + "grad_norm": 3.615903306345567, + "learning_rate": 1.5120513764692373e-06, + "loss": 0.6802, + "step": 9217 + }, + { + "epoch": 0.75, + "grad_norm": 4.350467604248198, + "learning_rate": 1.5111030143659516e-06, + "loss": 0.5037, + "step": 9218 + }, + { + "epoch": 0.75, + "grad_norm": 4.471453585618678, + "learning_rate": 1.5101548968191626e-06, + "loss": 0.6427, + "step": 9219 + }, + { + "epoch": 0.75, + "grad_norm": 3.454923873453871, + "learning_rate": 1.509207023895326e-06, + "loss": 1.0145, + "step": 9220 + }, + { + "epoch": 0.75, + "grad_norm": 2.56444356398917, + "learning_rate": 1.5082593956608848e-06, + "loss": 0.3001, + "step": 9221 + }, + { + "epoch": 0.75, + "grad_norm": 3.721004611228858, + "learning_rate": 1.5073120121822642e-06, + "loss": 0.9185, + "step": 9222 + }, + { + "epoch": 0.75, + "grad_norm": 2.0366899485017664, + "learning_rate": 1.5063648735258713e-06, + "loss": 0.3418, + "step": 9223 + }, + { + "epoch": 0.75, + "grad_norm": 3.1813444894396907, + "learning_rate": 1.5054179797580959e-06, + "loss": 0.3646, + "step": 9224 + }, + { + "epoch": 0.75, + "grad_norm": 4.625454279907865, + "learning_rate": 1.5044713309453135e-06, + "loss": 0.9152, + "step": 9225 + }, + { + "epoch": 0.75, + "grad_norm": 2.708317774378133, + "learning_rate": 1.5035249271538766e-06, + "loss": 0.5606, + "step": 9226 + }, + { + "epoch": 0.75, + "grad_norm": 5.811622971525766, + "learning_rate": 1.5025787684501259e-06, + "loss": 1.402, + "step": 9227 + }, + { + "epoch": 0.75, + "grad_norm": 3.1435508201419733, + "learning_rate": 1.5016328549003822e-06, + "loss": 0.5919, + "step": 9228 + }, + { + "epoch": 0.75, + "grad_norm": 3.422794846371174, + "learning_rate": 1.5006871865709527e-06, + "loss": 0.9311, + "step": 9229 + }, + { + "epoch": 0.75, + "grad_norm": 3.5595881510978926, + "learning_rate": 1.4997417635281204e-06, + "loss": 0.5799, + "step": 9230 + }, + { + "epoch": 0.75, + "grad_norm": 3.0844228003472995, + "learning_rate": 1.4987965858381587e-06, + "loss": 0.4529, + "step": 9231 + }, + { + "epoch": 0.75, + "grad_norm": 3.2559439791737455, + "learning_rate": 1.4978516535673176e-06, + "loss": 0.4913, + "step": 9232 + }, + { + "epoch": 0.75, + "grad_norm": 3.7266898885025044, + "learning_rate": 1.4969069667818342e-06, + "loss": 1.016, + "step": 9233 + }, + { + "epoch": 0.75, + "grad_norm": 1.6652470891476823, + "learning_rate": 1.495962525547927e-06, + "loss": 0.3189, + "step": 9234 + }, + { + "epoch": 0.75, + "grad_norm": 3.8723780905297405, + "learning_rate": 1.4950183299317972e-06, + "loss": 0.5337, + "step": 9235 + }, + { + "epoch": 0.75, + "grad_norm": 3.299827963645423, + "learning_rate": 1.4940743799996282e-06, + "loss": 0.8833, + "step": 9236 + }, + { + "epoch": 0.75, + "grad_norm": 1.4783504472332694, + "learning_rate": 1.4931306758175896e-06, + "loss": 0.1693, + "step": 9237 + }, + { + "epoch": 0.76, + "grad_norm": 1.9569216401301468, + "learning_rate": 1.4921872174518264e-06, + "loss": 0.3565, + "step": 9238 + }, + { + "epoch": 0.76, + "grad_norm": 3.9978458167392135, + "learning_rate": 1.491244004968474e-06, + "loss": 0.9464, + "step": 9239 + }, + { + "epoch": 0.76, + "grad_norm": 3.694128103080308, + "learning_rate": 1.4903010384336465e-06, + "loss": 0.4128, + "step": 9240 + }, + { + "epoch": 0.76, + "grad_norm": 3.4706648723140625, + "learning_rate": 1.4893583179134414e-06, + "loss": 0.568, + "step": 9241 + }, + { + "epoch": 0.76, + "grad_norm": 5.648688088377756, + "learning_rate": 1.488415843473942e-06, + "loss": 1.097, + "step": 9242 + }, + { + "epoch": 0.76, + "grad_norm": 4.266772854792492, + "learning_rate": 1.4874736151812075e-06, + "loss": 1.2822, + "step": 9243 + }, + { + "epoch": 0.76, + "grad_norm": 4.411746530543385, + "learning_rate": 1.4865316331012862e-06, + "loss": 0.8683, + "step": 9244 + }, + { + "epoch": 0.76, + "grad_norm": 3.772661039761835, + "learning_rate": 1.4855898973002087e-06, + "loss": 0.5959, + "step": 9245 + }, + { + "epoch": 0.76, + "grad_norm": 3.814719328692366, + "learning_rate": 1.484648407843982e-06, + "loss": 0.5103, + "step": 9246 + }, + { + "epoch": 0.76, + "grad_norm": 3.507064544108054, + "learning_rate": 1.483707164798604e-06, + "loss": 0.6051, + "step": 9247 + }, + { + "epoch": 0.76, + "grad_norm": 4.202915654401037, + "learning_rate": 1.4827661682300521e-06, + "loss": 1.0418, + "step": 9248 + }, + { + "epoch": 0.76, + "grad_norm": 3.469431937363606, + "learning_rate": 1.4818254182042834e-06, + "loss": 0.6728, + "step": 9249 + }, + { + "epoch": 0.76, + "grad_norm": 2.4053054748930736, + "learning_rate": 1.4808849147872417e-06, + "loss": 0.4348, + "step": 9250 + }, + { + "epoch": 0.76, + "grad_norm": 2.7957141907992247, + "learning_rate": 1.4799446580448517e-06, + "loss": 0.597, + "step": 9251 + }, + { + "epoch": 0.76, + "grad_norm": 4.380756083812477, + "learning_rate": 1.4790046480430226e-06, + "loss": 0.7895, + "step": 9252 + }, + { + "epoch": 0.76, + "grad_norm": 3.542878811181817, + "learning_rate": 1.4780648848476436e-06, + "loss": 0.7149, + "step": 9253 + }, + { + "epoch": 0.76, + "grad_norm": 2.6472417985590964, + "learning_rate": 1.4771253685245907e-06, + "loss": 0.3477, + "step": 9254 + }, + { + "epoch": 0.76, + "grad_norm": 2.8230679422187395, + "learning_rate": 1.476186099139716e-06, + "loss": 0.4883, + "step": 9255 + }, + { + "epoch": 0.76, + "grad_norm": 2.998509200929273, + "learning_rate": 1.47524707675886e-06, + "loss": 0.4907, + "step": 9256 + }, + { + "epoch": 0.76, + "grad_norm": 5.727298671205657, + "learning_rate": 1.4743083014478443e-06, + "loss": 1.0115, + "step": 9257 + }, + { + "epoch": 0.76, + "grad_norm": 4.258019229535348, + "learning_rate": 1.4733697732724728e-06, + "loss": 0.8451, + "step": 9258 + }, + { + "epoch": 0.76, + "grad_norm": 3.0894962139530087, + "learning_rate": 1.472431492298534e-06, + "loss": 0.7153, + "step": 9259 + }, + { + "epoch": 0.76, + "grad_norm": 3.140265764565909, + "learning_rate": 1.4714934585917933e-06, + "loss": 0.5697, + "step": 9260 + }, + { + "epoch": 0.76, + "grad_norm": 4.008713959072589, + "learning_rate": 1.4705556722180075e-06, + "loss": 0.6259, + "step": 9261 + }, + { + "epoch": 0.76, + "grad_norm": 5.148159226128686, + "learning_rate": 1.4696181332429065e-06, + "loss": 1.1498, + "step": 9262 + }, + { + "epoch": 0.76, + "grad_norm": 4.23851846708833, + "learning_rate": 1.4686808417322107e-06, + "loss": 0.845, + "step": 9263 + }, + { + "epoch": 0.76, + "grad_norm": 2.2197524105620805, + "learning_rate": 1.4677437977516197e-06, + "loss": 0.5212, + "step": 9264 + }, + { + "epoch": 0.76, + "grad_norm": 5.469659448279783, + "learning_rate": 1.4668070013668173e-06, + "loss": 0.9465, + "step": 9265 + }, + { + "epoch": 0.76, + "grad_norm": 3.8821891357359815, + "learning_rate": 1.465870452643466e-06, + "loss": 0.8122, + "step": 9266 + }, + { + "epoch": 0.76, + "grad_norm": 4.459784063330109, + "learning_rate": 1.464934151647215e-06, + "loss": 0.9195, + "step": 9267 + }, + { + "epoch": 0.76, + "grad_norm": 3.3430373100392607, + "learning_rate": 1.4639980984436957e-06, + "loss": 0.5985, + "step": 9268 + }, + { + "epoch": 0.76, + "grad_norm": 3.594680733843876, + "learning_rate": 1.463062293098521e-06, + "loss": 0.5093, + "step": 9269 + }, + { + "epoch": 0.76, + "grad_norm": 4.152942092158016, + "learning_rate": 1.4621267356772867e-06, + "loss": 0.7401, + "step": 9270 + }, + { + "epoch": 0.76, + "grad_norm": 4.643754547322705, + "learning_rate": 1.461191426245573e-06, + "loss": 0.7576, + "step": 9271 + }, + { + "epoch": 0.76, + "grad_norm": 1.4539657131529113, + "learning_rate": 1.4602563648689378e-06, + "loss": 0.1889, + "step": 9272 + }, + { + "epoch": 0.76, + "grad_norm": 6.884785121400726, + "learning_rate": 1.459321551612926e-06, + "loss": 1.0614, + "step": 9273 + }, + { + "epoch": 0.76, + "grad_norm": 2.28683900158588, + "learning_rate": 1.458386986543065e-06, + "loss": 0.3655, + "step": 9274 + }, + { + "epoch": 0.76, + "grad_norm": 4.380924904423539, + "learning_rate": 1.4574526697248643e-06, + "loss": 0.8172, + "step": 9275 + }, + { + "epoch": 0.76, + "grad_norm": 3.336139561522197, + "learning_rate": 1.4565186012238126e-06, + "loss": 0.4901, + "step": 9276 + }, + { + "epoch": 0.76, + "grad_norm": 5.045062288953997, + "learning_rate": 1.4555847811053875e-06, + "loss": 0.9607, + "step": 9277 + }, + { + "epoch": 0.76, + "grad_norm": 3.236958797578773, + "learning_rate": 1.4546512094350424e-06, + "loss": 0.7349, + "step": 9278 + }, + { + "epoch": 0.76, + "grad_norm": 3.9752286428550136, + "learning_rate": 1.4537178862782175e-06, + "loss": 0.7118, + "step": 9279 + }, + { + "epoch": 0.76, + "grad_norm": 3.75898198448468, + "learning_rate": 1.4527848117003357e-06, + "loss": 1.1713, + "step": 9280 + }, + { + "epoch": 0.76, + "grad_norm": 4.520741778373616, + "learning_rate": 1.4518519857668012e-06, + "loss": 0.8197, + "step": 9281 + }, + { + "epoch": 0.76, + "grad_norm": 3.377246753506351, + "learning_rate": 1.4509194085430024e-06, + "loss": 0.7396, + "step": 9282 + }, + { + "epoch": 0.76, + "grad_norm": 4.599893595646473, + "learning_rate": 1.4499870800943055e-06, + "loss": 0.4715, + "step": 9283 + }, + { + "epoch": 0.76, + "grad_norm": 5.1112529821371355, + "learning_rate": 1.4490550004860655e-06, + "loss": 1.0465, + "step": 9284 + }, + { + "epoch": 0.76, + "grad_norm": 3.127834502096852, + "learning_rate": 1.4481231697836152e-06, + "loss": 0.6399, + "step": 9285 + }, + { + "epoch": 0.76, + "grad_norm": 1.5725140266229545, + "learning_rate": 1.447191588052273e-06, + "loss": 0.1532, + "step": 9286 + }, + { + "epoch": 0.76, + "grad_norm": 4.859033873850176, + "learning_rate": 1.446260255357339e-06, + "loss": 1.1814, + "step": 9287 + }, + { + "epoch": 0.76, + "grad_norm": 1.951917050503969, + "learning_rate": 1.4453291717640966e-06, + "loss": 0.2644, + "step": 9288 + }, + { + "epoch": 0.76, + "grad_norm": 3.300478741110975, + "learning_rate": 1.4443983373378078e-06, + "loss": 0.4078, + "step": 9289 + }, + { + "epoch": 0.76, + "grad_norm": 3.5342038298764753, + "learning_rate": 1.4434677521437213e-06, + "loss": 0.4432, + "step": 9290 + }, + { + "epoch": 0.76, + "grad_norm": 4.663422152939918, + "learning_rate": 1.442537416247069e-06, + "loss": 0.9114, + "step": 9291 + }, + { + "epoch": 0.76, + "grad_norm": 3.431581004164061, + "learning_rate": 1.44160732971306e-06, + "loss": 0.7277, + "step": 9292 + }, + { + "epoch": 0.76, + "grad_norm": 3.913245516633208, + "learning_rate": 1.4406774926068912e-06, + "loss": 1.0555, + "step": 9293 + }, + { + "epoch": 0.76, + "grad_norm": 3.899105723703307, + "learning_rate": 1.4397479049937413e-06, + "loss": 0.6237, + "step": 9294 + }, + { + "epoch": 0.76, + "grad_norm": 3.7982967597812856, + "learning_rate": 1.4388185669387678e-06, + "loss": 0.79, + "step": 9295 + }, + { + "epoch": 0.76, + "grad_norm": 4.478167377412812, + "learning_rate": 1.437889478507114e-06, + "loss": 0.8015, + "step": 9296 + }, + { + "epoch": 0.76, + "grad_norm": 5.683330777135807, + "learning_rate": 1.4369606397639058e-06, + "loss": 0.8996, + "step": 9297 + }, + { + "epoch": 0.76, + "grad_norm": 2.0947825710626913, + "learning_rate": 1.4360320507742503e-06, + "loss": 0.3233, + "step": 9298 + }, + { + "epoch": 0.76, + "grad_norm": 3.9859594539972947, + "learning_rate": 1.4351037116032391e-06, + "loss": 0.8938, + "step": 9299 + }, + { + "epoch": 0.76, + "grad_norm": 2.531021143798802, + "learning_rate": 1.4341756223159414e-06, + "loss": 0.2781, + "step": 9300 + }, + { + "epoch": 0.76, + "grad_norm": 2.355153049487024, + "learning_rate": 1.4332477829774144e-06, + "loss": 0.3262, + "step": 9301 + }, + { + "epoch": 0.76, + "grad_norm": 5.000948490375274, + "learning_rate": 1.432320193652695e-06, + "loss": 0.7266, + "step": 9302 + }, + { + "epoch": 0.76, + "grad_norm": 5.299111288992087, + "learning_rate": 1.4313928544068033e-06, + "loss": 0.7665, + "step": 9303 + }, + { + "epoch": 0.76, + "grad_norm": 3.5906372535420803, + "learning_rate": 1.430465765304742e-06, + "loss": 0.7717, + "step": 9304 + }, + { + "epoch": 0.76, + "grad_norm": 4.570204564671903, + "learning_rate": 1.429538926411498e-06, + "loss": 0.8141, + "step": 9305 + }, + { + "epoch": 0.76, + "grad_norm": 4.044477247782365, + "learning_rate": 1.4286123377920342e-06, + "loss": 1.0105, + "step": 9306 + }, + { + "epoch": 0.76, + "grad_norm": 4.814453038391482, + "learning_rate": 1.4276859995113047e-06, + "loss": 1.3584, + "step": 9307 + }, + { + "epoch": 0.76, + "grad_norm": 3.5677901090269843, + "learning_rate": 1.4267599116342384e-06, + "loss": 0.7221, + "step": 9308 + }, + { + "epoch": 0.76, + "grad_norm": 2.9836186775557545, + "learning_rate": 1.4258340742257516e-06, + "loss": 0.5854, + "step": 9309 + }, + { + "epoch": 0.76, + "grad_norm": 4.789849731644247, + "learning_rate": 1.4249084873507412e-06, + "loss": 1.1888, + "step": 9310 + }, + { + "epoch": 0.76, + "grad_norm": 3.761888111510434, + "learning_rate": 1.423983151074088e-06, + "loss": 0.5422, + "step": 9311 + }, + { + "epoch": 0.76, + "grad_norm": 5.060486241860016, + "learning_rate": 1.4230580654606523e-06, + "loss": 1.1927, + "step": 9312 + }, + { + "epoch": 0.76, + "grad_norm": 5.9496662520122205, + "learning_rate": 1.422133230575279e-06, + "loss": 1.3018, + "step": 9313 + }, + { + "epoch": 0.76, + "grad_norm": 4.520326361871921, + "learning_rate": 1.4212086464827957e-06, + "loss": 0.9233, + "step": 9314 + }, + { + "epoch": 0.76, + "grad_norm": 6.280643679190604, + "learning_rate": 1.420284313248011e-06, + "loss": 1.4922, + "step": 9315 + }, + { + "epoch": 0.76, + "grad_norm": 4.251869664324042, + "learning_rate": 1.419360230935717e-06, + "loss": 0.6898, + "step": 9316 + }, + { + "epoch": 0.76, + "grad_norm": 3.65519446900931, + "learning_rate": 1.4184363996106888e-06, + "loss": 1.1632, + "step": 9317 + }, + { + "epoch": 0.76, + "grad_norm": 4.138380698480845, + "learning_rate": 1.417512819337681e-06, + "loss": 0.9813, + "step": 9318 + }, + { + "epoch": 0.76, + "grad_norm": 3.404739869469795, + "learning_rate": 1.4165894901814337e-06, + "loss": 0.8594, + "step": 9319 + }, + { + "epoch": 0.76, + "grad_norm": 2.8786235118438714, + "learning_rate": 1.4156664122066678e-06, + "loss": 0.4921, + "step": 9320 + }, + { + "epoch": 0.76, + "grad_norm": 4.155279089720612, + "learning_rate": 1.414743585478089e-06, + "loss": 1.0607, + "step": 9321 + }, + { + "epoch": 0.76, + "grad_norm": 2.7988268307826685, + "learning_rate": 1.41382101006038e-06, + "loss": 0.3708, + "step": 9322 + }, + { + "epoch": 0.76, + "grad_norm": 1.0743694371725223, + "learning_rate": 1.412898686018211e-06, + "loss": 0.1034, + "step": 9323 + }, + { + "epoch": 0.76, + "grad_norm": 2.3399958967063275, + "learning_rate": 1.411976613416235e-06, + "loss": 0.3909, + "step": 9324 + }, + { + "epoch": 0.76, + "grad_norm": 4.74287426460022, + "learning_rate": 1.4110547923190816e-06, + "loss": 0.962, + "step": 9325 + }, + { + "epoch": 0.76, + "grad_norm": 4.6131435057312045, + "learning_rate": 1.4101332227913677e-06, + "loss": 0.7313, + "step": 9326 + }, + { + "epoch": 0.76, + "grad_norm": 4.610089113972206, + "learning_rate": 1.409211904897692e-06, + "loss": 0.8497, + "step": 9327 + }, + { + "epoch": 0.76, + "grad_norm": 5.298292588477315, + "learning_rate": 1.4082908387026362e-06, + "loss": 0.9706, + "step": 9328 + }, + { + "epoch": 0.76, + "grad_norm": 5.271103752048412, + "learning_rate": 1.40737002427076e-06, + "loss": 1.0153, + "step": 9329 + }, + { + "epoch": 0.76, + "grad_norm": 3.1823134658759185, + "learning_rate": 1.4064494616666096e-06, + "loss": 0.4829, + "step": 9330 + }, + { + "epoch": 0.76, + "grad_norm": 2.532047882558935, + "learning_rate": 1.405529150954713e-06, + "loss": 0.2629, + "step": 9331 + }, + { + "epoch": 0.76, + "grad_norm": 4.042392721562254, + "learning_rate": 1.4046090921995798e-06, + "loss": 0.6231, + "step": 9332 + }, + { + "epoch": 0.76, + "grad_norm": 4.070483187386631, + "learning_rate": 1.4036892854657019e-06, + "loss": 0.7259, + "step": 9333 + }, + { + "epoch": 0.76, + "grad_norm": 3.1113878681312666, + "learning_rate": 1.4027697308175554e-06, + "loss": 0.4745, + "step": 9334 + }, + { + "epoch": 0.76, + "grad_norm": 4.709747769472877, + "learning_rate": 1.4018504283195938e-06, + "loss": 1.1229, + "step": 9335 + }, + { + "epoch": 0.76, + "grad_norm": 1.9946288181004355, + "learning_rate": 1.4009313780362582e-06, + "loss": 0.3971, + "step": 9336 + }, + { + "epoch": 0.76, + "grad_norm": 4.925474563921446, + "learning_rate": 1.4000125800319702e-06, + "loss": 0.9125, + "step": 9337 + }, + { + "epoch": 0.76, + "grad_norm": 2.7557709471588145, + "learning_rate": 1.399094034371134e-06, + "loss": 0.5966, + "step": 9338 + }, + { + "epoch": 0.76, + "grad_norm": 4.1324359061760685, + "learning_rate": 1.398175741118134e-06, + "loss": 0.8441, + "step": 9339 + }, + { + "epoch": 0.76, + "grad_norm": 3.233867463157931, + "learning_rate": 1.3972577003373406e-06, + "loss": 0.6437, + "step": 9340 + }, + { + "epoch": 0.76, + "grad_norm": 3.818645870947484, + "learning_rate": 1.3963399120931014e-06, + "loss": 0.5644, + "step": 9341 + }, + { + "epoch": 0.76, + "grad_norm": 5.272791124987817, + "learning_rate": 1.395422376449751e-06, + "loss": 0.9272, + "step": 9342 + }, + { + "epoch": 0.76, + "grad_norm": 4.315108114203056, + "learning_rate": 1.3945050934716054e-06, + "loss": 0.9911, + "step": 9343 + }, + { + "epoch": 0.76, + "grad_norm": 5.992811384045854, + "learning_rate": 1.3935880632229614e-06, + "loss": 1.4808, + "step": 9344 + }, + { + "epoch": 0.76, + "grad_norm": 5.115425303865779, + "learning_rate": 1.3926712857681002e-06, + "loss": 1.2953, + "step": 9345 + }, + { + "epoch": 0.76, + "grad_norm": 0.7229788244527938, + "learning_rate": 1.3917547611712818e-06, + "loss": 0.1135, + "step": 9346 + }, + { + "epoch": 0.76, + "grad_norm": 3.296847285183059, + "learning_rate": 1.3908384894967514e-06, + "loss": 0.4233, + "step": 9347 + }, + { + "epoch": 0.76, + "grad_norm": 4.8014559482617996, + "learning_rate": 1.3899224708087356e-06, + "loss": 0.7937, + "step": 9348 + }, + { + "epoch": 0.76, + "grad_norm": 2.5560120219233435, + "learning_rate": 1.3890067051714435e-06, + "loss": 0.5396, + "step": 9349 + }, + { + "epoch": 0.76, + "grad_norm": 2.245313196667449, + "learning_rate": 1.3880911926490658e-06, + "loss": 0.4887, + "step": 9350 + }, + { + "epoch": 0.76, + "grad_norm": 3.983509551692768, + "learning_rate": 1.3871759333057783e-06, + "loss": 0.7939, + "step": 9351 + }, + { + "epoch": 0.76, + "grad_norm": 3.5050686399806574, + "learning_rate": 1.3862609272057337e-06, + "loss": 0.7862, + "step": 9352 + }, + { + "epoch": 0.76, + "grad_norm": 4.121032184419712, + "learning_rate": 1.3853461744130703e-06, + "loss": 0.4938, + "step": 9353 + }, + { + "epoch": 0.76, + "grad_norm": 3.9763811056618734, + "learning_rate": 1.3844316749919113e-06, + "loss": 1.0544, + "step": 9354 + }, + { + "epoch": 0.76, + "grad_norm": 4.587943342699081, + "learning_rate": 1.3835174290063553e-06, + "loss": 1.1476, + "step": 9355 + }, + { + "epoch": 0.76, + "grad_norm": 3.1870902498506797, + "learning_rate": 1.3826034365204876e-06, + "loss": 0.6916, + "step": 9356 + }, + { + "epoch": 0.76, + "grad_norm": 4.174244088434585, + "learning_rate": 1.3816896975983784e-06, + "loss": 0.7123, + "step": 9357 + }, + { + "epoch": 0.76, + "grad_norm": 3.739528189373238, + "learning_rate": 1.380776212304073e-06, + "loss": 0.7124, + "step": 9358 + }, + { + "epoch": 0.76, + "grad_norm": 3.591639188404987, + "learning_rate": 1.379862980701604e-06, + "loss": 0.5921, + "step": 9359 + }, + { + "epoch": 0.77, + "grad_norm": 3.73035731361834, + "learning_rate": 1.378950002854985e-06, + "loss": 0.6594, + "step": 9360 + }, + { + "epoch": 0.77, + "grad_norm": 4.607527610861995, + "learning_rate": 1.378037278828212e-06, + "loss": 0.8706, + "step": 9361 + }, + { + "epoch": 0.77, + "grad_norm": 5.870706821427959, + "learning_rate": 1.3771248086852646e-06, + "loss": 1.1595, + "step": 9362 + }, + { + "epoch": 0.77, + "grad_norm": 4.951566000798771, + "learning_rate": 1.3762125924900998e-06, + "loss": 1.2864, + "step": 9363 + }, + { + "epoch": 0.77, + "grad_norm": 1.0097441103001916, + "learning_rate": 1.3753006303066612e-06, + "loss": 0.1232, + "step": 9364 + }, + { + "epoch": 0.77, + "grad_norm": 5.441261479938119, + "learning_rate": 1.3743889221988744e-06, + "loss": 0.895, + "step": 9365 + }, + { + "epoch": 0.77, + "grad_norm": 2.9012789096869036, + "learning_rate": 1.3734774682306446e-06, + "loss": 0.515, + "step": 9366 + }, + { + "epoch": 0.77, + "grad_norm": 4.148929030640898, + "learning_rate": 1.372566268465862e-06, + "loss": 0.7201, + "step": 9367 + }, + { + "epoch": 0.77, + "grad_norm": 4.385343386382513, + "learning_rate": 1.3716553229683989e-06, + "loss": 0.726, + "step": 9368 + }, + { + "epoch": 0.77, + "grad_norm": 4.111237908195454, + "learning_rate": 1.3707446318021051e-06, + "loss": 0.8248, + "step": 9369 + }, + { + "epoch": 0.77, + "grad_norm": 3.449998081835322, + "learning_rate": 1.3698341950308198e-06, + "loss": 0.7886, + "step": 9370 + }, + { + "epoch": 0.77, + "grad_norm": 4.300561376325745, + "learning_rate": 1.3689240127183572e-06, + "loss": 0.8067, + "step": 9371 + }, + { + "epoch": 0.77, + "grad_norm": 4.250771097655397, + "learning_rate": 1.3680140849285196e-06, + "loss": 0.779, + "step": 9372 + }, + { + "epoch": 0.77, + "grad_norm": 1.5043779709019471, + "learning_rate": 1.3671044117250875e-06, + "loss": 0.2278, + "step": 9373 + }, + { + "epoch": 0.77, + "grad_norm": 3.6883155849475115, + "learning_rate": 1.366194993171827e-06, + "loss": 0.4736, + "step": 9374 + }, + { + "epoch": 0.77, + "grad_norm": 3.793560869027931, + "learning_rate": 1.3652858293324823e-06, + "loss": 0.9813, + "step": 9375 + }, + { + "epoch": 0.77, + "grad_norm": 2.328732050350097, + "learning_rate": 1.3643769202707824e-06, + "loss": 0.2956, + "step": 9376 + }, + { + "epoch": 0.77, + "grad_norm": 4.89171571364176, + "learning_rate": 1.3634682660504379e-06, + "loss": 1.0947, + "step": 9377 + }, + { + "epoch": 0.77, + "grad_norm": 3.4013676591947335, + "learning_rate": 1.362559866735142e-06, + "loss": 0.7116, + "step": 9378 + }, + { + "epoch": 0.77, + "grad_norm": 6.549481225715749, + "learning_rate": 1.3616517223885707e-06, + "loss": 0.8947, + "step": 9379 + }, + { + "epoch": 0.77, + "grad_norm": 3.999966242113525, + "learning_rate": 1.3607438330743778e-06, + "loss": 0.8584, + "step": 9380 + }, + { + "epoch": 0.77, + "grad_norm": 4.9561831618363765, + "learning_rate": 1.3598361988562037e-06, + "loss": 0.8152, + "step": 9381 + }, + { + "epoch": 0.77, + "grad_norm": 4.874434061208297, + "learning_rate": 1.3589288197976707e-06, + "loss": 0.6809, + "step": 9382 + }, + { + "epoch": 0.77, + "grad_norm": 5.074947674701273, + "learning_rate": 1.358021695962381e-06, + "loss": 0.7936, + "step": 9383 + }, + { + "epoch": 0.77, + "grad_norm": 4.760480366466265, + "learning_rate": 1.3571148274139223e-06, + "loss": 0.7204, + "step": 9384 + }, + { + "epoch": 0.77, + "grad_norm": 4.5202573924317875, + "learning_rate": 1.356208214215859e-06, + "loss": 0.8679, + "step": 9385 + }, + { + "epoch": 0.77, + "grad_norm": 3.7399295576934057, + "learning_rate": 1.3553018564317432e-06, + "loss": 0.6583, + "step": 9386 + }, + { + "epoch": 0.77, + "grad_norm": 2.5029310094502546, + "learning_rate": 1.354395754125104e-06, + "loss": 0.4584, + "step": 9387 + }, + { + "epoch": 0.77, + "grad_norm": 4.787278806573176, + "learning_rate": 1.3534899073594566e-06, + "loss": 0.9349, + "step": 9388 + }, + { + "epoch": 0.77, + "grad_norm": 3.8050894166241926, + "learning_rate": 1.352584316198297e-06, + "loss": 0.7285, + "step": 9389 + }, + { + "epoch": 0.77, + "grad_norm": 4.330536045395092, + "learning_rate": 1.351678980705104e-06, + "loss": 1.2079, + "step": 9390 + }, + { + "epoch": 0.77, + "grad_norm": 3.253642168070775, + "learning_rate": 1.3507739009433374e-06, + "loss": 0.6539, + "step": 9391 + }, + { + "epoch": 0.77, + "grad_norm": 6.463133979464778, + "learning_rate": 1.3498690769764378e-06, + "loss": 1.4632, + "step": 9392 + }, + { + "epoch": 0.77, + "grad_norm": 4.254093796324293, + "learning_rate": 1.3489645088678305e-06, + "loss": 1.1103, + "step": 9393 + }, + { + "epoch": 0.77, + "grad_norm": 2.596333662761472, + "learning_rate": 1.348060196680922e-06, + "loss": 0.5275, + "step": 9394 + }, + { + "epoch": 0.77, + "grad_norm": 3.251869759713248, + "learning_rate": 1.3471561404791e-06, + "loss": 0.6351, + "step": 9395 + }, + { + "epoch": 0.77, + "grad_norm": 2.355014066464097, + "learning_rate": 1.3462523403257355e-06, + "loss": 0.3062, + "step": 9396 + }, + { + "epoch": 0.77, + "grad_norm": 3.033922825683495, + "learning_rate": 1.3453487962841821e-06, + "loss": 0.5902, + "step": 9397 + }, + { + "epoch": 0.77, + "grad_norm": 4.126571279478452, + "learning_rate": 1.3444455084177716e-06, + "loss": 0.6138, + "step": 9398 + }, + { + "epoch": 0.77, + "grad_norm": 1.7703313801155975, + "learning_rate": 1.343542476789822e-06, + "loss": 0.3165, + "step": 9399 + }, + { + "epoch": 0.77, + "grad_norm": 3.172617171443144, + "learning_rate": 1.3426397014636334e-06, + "loss": 0.4856, + "step": 9400 + }, + { + "epoch": 0.77, + "grad_norm": 4.597938554244449, + "learning_rate": 1.3417371825024832e-06, + "loss": 0.9556, + "step": 9401 + }, + { + "epoch": 0.77, + "grad_norm": 4.4172447302014675, + "learning_rate": 1.3408349199696374e-06, + "loss": 0.9496, + "step": 9402 + }, + { + "epoch": 0.77, + "grad_norm": 4.725445692558316, + "learning_rate": 1.3399329139283375e-06, + "loss": 1.241, + "step": 9403 + }, + { + "epoch": 0.77, + "grad_norm": 4.7472402363942745, + "learning_rate": 1.3390311644418113e-06, + "loss": 1.1577, + "step": 9404 + }, + { + "epoch": 0.77, + "grad_norm": 2.658514626642248, + "learning_rate": 1.3381296715732678e-06, + "loss": 0.215, + "step": 9405 + }, + { + "epoch": 0.77, + "grad_norm": 3.9845902825761854, + "learning_rate": 1.3372284353858983e-06, + "loss": 0.5422, + "step": 9406 + }, + { + "epoch": 0.77, + "grad_norm": 3.8423579845115987, + "learning_rate": 1.3363274559428747e-06, + "loss": 0.7291, + "step": 9407 + }, + { + "epoch": 0.77, + "grad_norm": 4.410067348783659, + "learning_rate": 1.335426733307354e-06, + "loss": 0.7367, + "step": 9408 + }, + { + "epoch": 0.77, + "grad_norm": 4.537893849743915, + "learning_rate": 1.3345262675424691e-06, + "loss": 0.8426, + "step": 9409 + }, + { + "epoch": 0.77, + "grad_norm": 2.876409687775638, + "learning_rate": 1.333626058711341e-06, + "loss": 0.679, + "step": 9410 + }, + { + "epoch": 0.77, + "grad_norm": 5.163415705623005, + "learning_rate": 1.3327261068770698e-06, + "loss": 1.0723, + "step": 9411 + }, + { + "epoch": 0.77, + "grad_norm": 5.045484247563682, + "learning_rate": 1.331826412102738e-06, + "loss": 0.8043, + "step": 9412 + }, + { + "epoch": 0.77, + "grad_norm": 4.400894042145848, + "learning_rate": 1.3309269744514114e-06, + "loss": 0.8605, + "step": 9413 + }, + { + "epoch": 0.77, + "grad_norm": 3.41461886481942, + "learning_rate": 1.3300277939861372e-06, + "loss": 0.4978, + "step": 9414 + }, + { + "epoch": 0.77, + "grad_norm": 4.885506898324602, + "learning_rate": 1.3291288707699417e-06, + "loss": 0.615, + "step": 9415 + }, + { + "epoch": 0.77, + "grad_norm": 5.452241834588947, + "learning_rate": 1.328230204865838e-06, + "loss": 1.2617, + "step": 9416 + }, + { + "epoch": 0.77, + "grad_norm": 5.098897694714867, + "learning_rate": 1.327331796336816e-06, + "loss": 1.0752, + "step": 9417 + }, + { + "epoch": 0.77, + "grad_norm": 4.66589764611867, + "learning_rate": 1.3264336452458514e-06, + "loss": 0.9541, + "step": 9418 + }, + { + "epoch": 0.77, + "grad_norm": 3.275492019155368, + "learning_rate": 1.3255357516559025e-06, + "loss": 0.4241, + "step": 9419 + }, + { + "epoch": 0.77, + "grad_norm": 3.9066721077298308, + "learning_rate": 1.3246381156299048e-06, + "loss": 0.886, + "step": 9420 + }, + { + "epoch": 0.77, + "grad_norm": 2.1401612817685454, + "learning_rate": 1.3237407372307792e-06, + "loss": 0.3662, + "step": 9421 + }, + { + "epoch": 0.77, + "grad_norm": 4.326368524937841, + "learning_rate": 1.3228436165214298e-06, + "loss": 0.7366, + "step": 9422 + }, + { + "epoch": 0.77, + "grad_norm": 2.3468037609646273, + "learning_rate": 1.321946753564739e-06, + "loss": 0.489, + "step": 9423 + }, + { + "epoch": 0.77, + "grad_norm": 3.2502527737540756, + "learning_rate": 1.3210501484235744e-06, + "loss": 0.5933, + "step": 9424 + }, + { + "epoch": 0.77, + "grad_norm": 2.2470551800294674, + "learning_rate": 1.3201538011607845e-06, + "loss": 0.3139, + "step": 9425 + }, + { + "epoch": 0.77, + "grad_norm": 4.489138232154664, + "learning_rate": 1.3192577118391975e-06, + "loss": 0.7102, + "step": 9426 + }, + { + "epoch": 0.77, + "grad_norm": 1.8914064841933165, + "learning_rate": 1.318361880521626e-06, + "loss": 0.4059, + "step": 9427 + }, + { + "epoch": 0.77, + "grad_norm": 4.063844683388602, + "learning_rate": 1.3174663072708637e-06, + "loss": 0.6773, + "step": 9428 + }, + { + "epoch": 0.77, + "grad_norm": 3.50563997940386, + "learning_rate": 1.3165709921496873e-06, + "loss": 0.714, + "step": 9429 + }, + { + "epoch": 0.77, + "grad_norm": 3.1772613011563724, + "learning_rate": 1.3156759352208554e-06, + "loss": 0.5923, + "step": 9430 + }, + { + "epoch": 0.77, + "grad_norm": 2.2247310279851917, + "learning_rate": 1.3147811365471048e-06, + "loss": 0.3034, + "step": 9431 + }, + { + "epoch": 0.77, + "grad_norm": 3.25694837834173, + "learning_rate": 1.3138865961911585e-06, + "loss": 0.8065, + "step": 9432 + }, + { + "epoch": 0.77, + "grad_norm": 4.787558627022996, + "learning_rate": 1.312992314215721e-06, + "loss": 0.9318, + "step": 9433 + }, + { + "epoch": 0.77, + "grad_norm": 2.661175799036978, + "learning_rate": 1.3120982906834745e-06, + "loss": 0.7151, + "step": 9434 + }, + { + "epoch": 0.77, + "grad_norm": 5.371334083380921, + "learning_rate": 1.3112045256570888e-06, + "loss": 1.0989, + "step": 9435 + }, + { + "epoch": 0.77, + "grad_norm": 5.424428779312583, + "learning_rate": 1.3103110191992118e-06, + "loss": 1.0371, + "step": 9436 + }, + { + "epoch": 0.77, + "grad_norm": 3.7596546812463707, + "learning_rate": 1.3094177713724765e-06, + "loss": 0.7684, + "step": 9437 + }, + { + "epoch": 0.77, + "grad_norm": 4.099396301324731, + "learning_rate": 1.308524782239492e-06, + "loss": 0.8312, + "step": 9438 + }, + { + "epoch": 0.77, + "grad_norm": 3.6834372052861317, + "learning_rate": 1.3076320518628554e-06, + "loss": 0.5639, + "step": 9439 + }, + { + "epoch": 0.77, + "grad_norm": 1.2043445807513642, + "learning_rate": 1.3067395803051425e-06, + "loss": 0.2032, + "step": 9440 + }, + { + "epoch": 0.77, + "grad_norm": 4.290269026486594, + "learning_rate": 1.3058473676289118e-06, + "loss": 0.7477, + "step": 9441 + }, + { + "epoch": 0.77, + "grad_norm": 4.87168361128787, + "learning_rate": 1.3049554138967052e-06, + "loss": 1.0051, + "step": 9442 + }, + { + "epoch": 0.77, + "grad_norm": 1.492575685994819, + "learning_rate": 1.304063719171042e-06, + "loss": 0.1826, + "step": 9443 + }, + { + "epoch": 0.77, + "grad_norm": 3.6077122922831446, + "learning_rate": 1.3031722835144266e-06, + "loss": 0.4407, + "step": 9444 + }, + { + "epoch": 0.77, + "grad_norm": 3.58056453786561, + "learning_rate": 1.302281106989346e-06, + "loss": 0.5929, + "step": 9445 + }, + { + "epoch": 0.77, + "grad_norm": 4.254577200822444, + "learning_rate": 1.3013901896582677e-06, + "loss": 0.8297, + "step": 9446 + }, + { + "epoch": 0.77, + "grad_norm": 4.348001334484949, + "learning_rate": 1.3004995315836417e-06, + "loss": 1.0947, + "step": 9447 + }, + { + "epoch": 0.77, + "grad_norm": 1.1117699308226239, + "learning_rate": 1.2996091328278965e-06, + "loss": 0.1159, + "step": 9448 + }, + { + "epoch": 0.77, + "grad_norm": 2.1536886378875844, + "learning_rate": 1.2987189934534488e-06, + "loss": 0.4287, + "step": 9449 + }, + { + "epoch": 0.77, + "grad_norm": 3.998286991655185, + "learning_rate": 1.29782911352269e-06, + "loss": 0.7968, + "step": 9450 + }, + { + "epoch": 0.77, + "grad_norm": 5.112248998883585, + "learning_rate": 1.2969394930979984e-06, + "loss": 1.1064, + "step": 9451 + }, + { + "epoch": 0.77, + "grad_norm": 2.889365596754016, + "learning_rate": 1.2960501322417323e-06, + "loss": 0.4504, + "step": 9452 + }, + { + "epoch": 0.77, + "grad_norm": 4.3994968910567405, + "learning_rate": 1.2951610310162326e-06, + "loss": 0.8025, + "step": 9453 + }, + { + "epoch": 0.77, + "grad_norm": 3.7173719745240255, + "learning_rate": 1.2942721894838227e-06, + "loss": 0.6403, + "step": 9454 + }, + { + "epoch": 0.77, + "grad_norm": 3.8722081746417203, + "learning_rate": 1.2933836077068036e-06, + "loss": 0.645, + "step": 9455 + }, + { + "epoch": 0.77, + "grad_norm": 4.660927074059716, + "learning_rate": 1.2924952857474622e-06, + "loss": 0.9383, + "step": 9456 + }, + { + "epoch": 0.77, + "grad_norm": 3.21218245222277, + "learning_rate": 1.291607223668066e-06, + "loss": 0.2733, + "step": 9457 + }, + { + "epoch": 0.77, + "grad_norm": 3.122593583023933, + "learning_rate": 1.2907194215308644e-06, + "loss": 0.6487, + "step": 9458 + }, + { + "epoch": 0.77, + "grad_norm": 5.298242692595961, + "learning_rate": 1.2898318793980903e-06, + "loss": 0.9055, + "step": 9459 + }, + { + "epoch": 0.77, + "grad_norm": 5.478740435234801, + "learning_rate": 1.288944597331953e-06, + "loss": 1.3584, + "step": 9460 + }, + { + "epoch": 0.77, + "grad_norm": 4.834110280911659, + "learning_rate": 1.288057575394649e-06, + "loss": 0.8821, + "step": 9461 + }, + { + "epoch": 0.77, + "grad_norm": 3.4921116431704475, + "learning_rate": 1.2871708136483546e-06, + "loss": 0.8571, + "step": 9462 + }, + { + "epoch": 0.77, + "grad_norm": 4.40767678423515, + "learning_rate": 1.2862843121552293e-06, + "loss": 0.6731, + "step": 9463 + }, + { + "epoch": 0.77, + "grad_norm": 3.4209369222793753, + "learning_rate": 1.28539807097741e-06, + "loss": 0.6032, + "step": 9464 + }, + { + "epoch": 0.77, + "grad_norm": 2.98294933471214, + "learning_rate": 1.2845120901770214e-06, + "loss": 0.5268, + "step": 9465 + }, + { + "epoch": 0.77, + "grad_norm": 2.8442712477361876, + "learning_rate": 1.2836263698161638e-06, + "loss": 0.527, + "step": 9466 + }, + { + "epoch": 0.77, + "grad_norm": 2.020183840794972, + "learning_rate": 1.2827409099569237e-06, + "loss": 0.4005, + "step": 9467 + }, + { + "epoch": 0.77, + "grad_norm": 4.753830099766738, + "learning_rate": 1.2818557106613689e-06, + "loss": 0.9774, + "step": 9468 + }, + { + "epoch": 0.77, + "grad_norm": 4.753391100022944, + "learning_rate": 1.280970771991547e-06, + "loss": 0.6152, + "step": 9469 + }, + { + "epoch": 0.77, + "grad_norm": 4.5802211426804424, + "learning_rate": 1.2800860940094889e-06, + "loss": 0.8815, + "step": 9470 + }, + { + "epoch": 0.77, + "grad_norm": 2.932757418212026, + "learning_rate": 1.2792016767772081e-06, + "loss": 0.6008, + "step": 9471 + }, + { + "epoch": 0.77, + "grad_norm": 4.060272391727669, + "learning_rate": 1.278317520356695e-06, + "loss": 0.7747, + "step": 9472 + }, + { + "epoch": 0.77, + "grad_norm": 3.391174501253654, + "learning_rate": 1.2774336248099272e-06, + "loss": 0.5028, + "step": 9473 + }, + { + "epoch": 0.77, + "grad_norm": 4.10834258261621, + "learning_rate": 1.2765499901988616e-06, + "loss": 0.4337, + "step": 9474 + }, + { + "epoch": 0.77, + "grad_norm": 3.6488900764648484, + "learning_rate": 1.275666616585437e-06, + "loss": 0.8388, + "step": 9475 + }, + { + "epoch": 0.77, + "grad_norm": 3.0377874146013193, + "learning_rate": 1.2747835040315741e-06, + "loss": 0.7262, + "step": 9476 + }, + { + "epoch": 0.77, + "grad_norm": 3.020586427498907, + "learning_rate": 1.2739006525991772e-06, + "loss": 0.4525, + "step": 9477 + }, + { + "epoch": 0.77, + "grad_norm": 5.296675081507472, + "learning_rate": 1.273018062350127e-06, + "loss": 1.4636, + "step": 9478 + }, + { + "epoch": 0.77, + "grad_norm": 2.586643243293631, + "learning_rate": 1.2721357333462918e-06, + "loss": 0.5183, + "step": 9479 + }, + { + "epoch": 0.77, + "grad_norm": 2.8351867377323905, + "learning_rate": 1.2712536656495167e-06, + "loss": 0.5367, + "step": 9480 + }, + { + "epoch": 0.77, + "grad_norm": 4.886196342458603, + "learning_rate": 1.2703718593216324e-06, + "loss": 0.7353, + "step": 9481 + }, + { + "epoch": 0.78, + "grad_norm": 3.554634043073279, + "learning_rate": 1.2694903144244509e-06, + "loss": 0.703, + "step": 9482 + }, + { + "epoch": 0.78, + "grad_norm": 5.4791022781511325, + "learning_rate": 1.2686090310197613e-06, + "loss": 0.7757, + "step": 9483 + }, + { + "epoch": 0.78, + "grad_norm": 3.7325192961946816, + "learning_rate": 1.2677280091693395e-06, + "loss": 0.7382, + "step": 9484 + }, + { + "epoch": 0.78, + "grad_norm": 4.521604735262762, + "learning_rate": 1.2668472489349416e-06, + "loss": 1.0818, + "step": 9485 + }, + { + "epoch": 0.78, + "grad_norm": 3.000064290386803, + "learning_rate": 1.2659667503783047e-06, + "loss": 0.2897, + "step": 9486 + }, + { + "epoch": 0.78, + "grad_norm": 3.1188878553316743, + "learning_rate": 1.265086513561148e-06, + "loss": 0.4462, + "step": 9487 + }, + { + "epoch": 0.78, + "grad_norm": 6.865122818758232, + "learning_rate": 1.2642065385451736e-06, + "loss": 1.4022, + "step": 9488 + }, + { + "epoch": 0.78, + "grad_norm": 6.057940881244001, + "learning_rate": 1.263326825392061e-06, + "loss": 1.1208, + "step": 9489 + }, + { + "epoch": 0.78, + "grad_norm": 4.035237632457478, + "learning_rate": 1.2624473741634764e-06, + "loss": 0.8167, + "step": 9490 + }, + { + "epoch": 0.78, + "grad_norm": 6.389708678626936, + "learning_rate": 1.2615681849210648e-06, + "loss": 1.2094, + "step": 9491 + }, + { + "epoch": 0.78, + "grad_norm": 3.9356074826170615, + "learning_rate": 1.260689257726454e-06, + "loss": 0.767, + "step": 9492 + }, + { + "epoch": 0.78, + "grad_norm": 3.229117168492173, + "learning_rate": 1.2598105926412536e-06, + "loss": 0.5801, + "step": 9493 + }, + { + "epoch": 0.78, + "grad_norm": 3.9430776553172686, + "learning_rate": 1.2589321897270523e-06, + "loss": 0.5619, + "step": 9494 + }, + { + "epoch": 0.78, + "grad_norm": 3.8731757871049557, + "learning_rate": 1.2580540490454246e-06, + "loss": 0.689, + "step": 9495 + }, + { + "epoch": 0.78, + "grad_norm": 3.9330079288498623, + "learning_rate": 1.2571761706579216e-06, + "loss": 0.5508, + "step": 9496 + }, + { + "epoch": 0.78, + "grad_norm": 4.306828303358857, + "learning_rate": 1.2562985546260804e-06, + "loss": 0.5306, + "step": 9497 + }, + { + "epoch": 0.78, + "grad_norm": 2.9891319839435906, + "learning_rate": 1.2554212010114176e-06, + "loss": 0.784, + "step": 9498 + }, + { + "epoch": 0.78, + "grad_norm": 3.56118675712117, + "learning_rate": 1.2545441098754336e-06, + "loss": 0.7236, + "step": 9499 + }, + { + "epoch": 0.78, + "grad_norm": 4.921038626828936, + "learning_rate": 1.2536672812796057e-06, + "loss": 0.7664, + "step": 9500 + }, + { + "epoch": 0.78, + "grad_norm": 4.779705067114796, + "learning_rate": 1.252790715285398e-06, + "loss": 0.7688, + "step": 9501 + }, + { + "epoch": 0.78, + "grad_norm": 4.0120798055306865, + "learning_rate": 1.2519144119542526e-06, + "loss": 0.7212, + "step": 9502 + }, + { + "epoch": 0.78, + "grad_norm": 2.3584976659903703, + "learning_rate": 1.251038371347595e-06, + "loss": 0.3485, + "step": 9503 + }, + { + "epoch": 0.78, + "grad_norm": 4.913187163884777, + "learning_rate": 1.2501625935268325e-06, + "loss": 1.0688, + "step": 9504 + }, + { + "epoch": 0.78, + "grad_norm": 4.87134469525131, + "learning_rate": 1.2492870785533539e-06, + "loss": 0.8905, + "step": 9505 + }, + { + "epoch": 0.78, + "grad_norm": 3.8669210139522288, + "learning_rate": 1.248411826488527e-06, + "loss": 0.771, + "step": 9506 + }, + { + "epoch": 0.78, + "grad_norm": 3.366795829316916, + "learning_rate": 1.2475368373937035e-06, + "loss": 0.4478, + "step": 9507 + }, + { + "epoch": 0.78, + "grad_norm": 3.815116094891325, + "learning_rate": 1.2466621113302174e-06, + "loss": 0.7628, + "step": 9508 + }, + { + "epoch": 0.78, + "grad_norm": 3.944149278920771, + "learning_rate": 1.2457876483593839e-06, + "loss": 0.5918, + "step": 9509 + }, + { + "epoch": 0.78, + "grad_norm": 4.0388836112634285, + "learning_rate": 1.2449134485424969e-06, + "loss": 0.5214, + "step": 9510 + }, + { + "epoch": 0.78, + "grad_norm": 4.327183866109166, + "learning_rate": 1.244039511940836e-06, + "loss": 0.8289, + "step": 9511 + }, + { + "epoch": 0.78, + "grad_norm": 3.7530568667721487, + "learning_rate": 1.2431658386156576e-06, + "loss": 0.4686, + "step": 9512 + }, + { + "epoch": 0.78, + "grad_norm": 4.480329489320254, + "learning_rate": 1.2422924286282045e-06, + "loss": 0.6048, + "step": 9513 + }, + { + "epoch": 0.78, + "grad_norm": 2.1567143431506794, + "learning_rate": 1.2414192820396987e-06, + "loss": 0.4192, + "step": 9514 + }, + { + "epoch": 0.78, + "grad_norm": 5.82164518572063, + "learning_rate": 1.2405463989113437e-06, + "loss": 0.8892, + "step": 9515 + }, + { + "epoch": 0.78, + "grad_norm": 5.676772750999895, + "learning_rate": 1.2396737793043246e-06, + "loss": 0.6857, + "step": 9516 + }, + { + "epoch": 0.78, + "grad_norm": 4.92710925047657, + "learning_rate": 1.2388014232798102e-06, + "loss": 0.8158, + "step": 9517 + }, + { + "epoch": 0.78, + "grad_norm": 3.815218867450044, + "learning_rate": 1.237929330898946e-06, + "loss": 0.628, + "step": 9518 + }, + { + "epoch": 0.78, + "grad_norm": 1.5802954859651215, + "learning_rate": 1.2370575022228632e-06, + "loss": 0.2097, + "step": 9519 + }, + { + "epoch": 0.78, + "grad_norm": 3.087610106969515, + "learning_rate": 1.2361859373126727e-06, + "loss": 0.5337, + "step": 9520 + }, + { + "epoch": 0.78, + "grad_norm": 2.819997553881703, + "learning_rate": 1.2353146362294682e-06, + "loss": 0.5812, + "step": 9521 + }, + { + "epoch": 0.78, + "grad_norm": 4.129135410511098, + "learning_rate": 1.234443599034325e-06, + "loss": 0.9036, + "step": 9522 + }, + { + "epoch": 0.78, + "grad_norm": 3.448219456451351, + "learning_rate": 1.2335728257882962e-06, + "loss": 0.5669, + "step": 9523 + }, + { + "epoch": 0.78, + "grad_norm": 1.3452401635169344, + "learning_rate": 1.232702316552421e-06, + "loss": 0.2998, + "step": 9524 + }, + { + "epoch": 0.78, + "grad_norm": 5.8183723015943, + "learning_rate": 1.2318320713877191e-06, + "loss": 0.9775, + "step": 9525 + }, + { + "epoch": 0.78, + "grad_norm": 5.135468860988995, + "learning_rate": 1.230962090355189e-06, + "loss": 1.212, + "step": 9526 + }, + { + "epoch": 0.78, + "grad_norm": 4.61340378905854, + "learning_rate": 1.2300923735158133e-06, + "loss": 0.8543, + "step": 9527 + }, + { + "epoch": 0.78, + "grad_norm": 3.5275409270739684, + "learning_rate": 1.2292229209305567e-06, + "loss": 0.5539, + "step": 9528 + }, + { + "epoch": 0.78, + "grad_norm": 3.0836387895110375, + "learning_rate": 1.2283537326603611e-06, + "loss": 0.4498, + "step": 9529 + }, + { + "epoch": 0.78, + "grad_norm": 2.9828518185080988, + "learning_rate": 1.2274848087661555e-06, + "loss": 0.4785, + "step": 9530 + }, + { + "epoch": 0.78, + "grad_norm": 3.4538261017643257, + "learning_rate": 1.2266161493088463e-06, + "loss": 0.5159, + "step": 9531 + }, + { + "epoch": 0.78, + "grad_norm": 3.2069474774767754, + "learning_rate": 1.225747754349323e-06, + "loss": 0.5612, + "step": 9532 + }, + { + "epoch": 0.78, + "grad_norm": 5.064186344941871, + "learning_rate": 1.2248796239484567e-06, + "loss": 0.9742, + "step": 9533 + }, + { + "epoch": 0.78, + "grad_norm": 2.8123722929754025, + "learning_rate": 1.2240117581671013e-06, + "loss": 0.5447, + "step": 9534 + }, + { + "epoch": 0.78, + "grad_norm": 4.408214932514735, + "learning_rate": 1.2231441570660863e-06, + "loss": 0.8532, + "step": 9535 + }, + { + "epoch": 0.78, + "grad_norm": 5.284974680679537, + "learning_rate": 1.222276820706229e-06, + "loss": 1.443, + "step": 9536 + }, + { + "epoch": 0.78, + "grad_norm": 2.978184535275699, + "learning_rate": 1.2214097491483262e-06, + "loss": 0.438, + "step": 9537 + }, + { + "epoch": 0.78, + "grad_norm": 4.719536409936797, + "learning_rate": 1.2205429424531556e-06, + "loss": 0.823, + "step": 9538 + }, + { + "epoch": 0.78, + "grad_norm": 3.636600975898947, + "learning_rate": 1.2196764006814764e-06, + "loss": 0.7855, + "step": 9539 + }, + { + "epoch": 0.78, + "grad_norm": 3.313568214669807, + "learning_rate": 1.2188101238940309e-06, + "loss": 0.5635, + "step": 9540 + }, + { + "epoch": 0.78, + "grad_norm": 4.827929767912652, + "learning_rate": 1.2179441121515384e-06, + "loss": 1.1951, + "step": 9541 + }, + { + "epoch": 0.78, + "grad_norm": 5.83635248764761, + "learning_rate": 1.2170783655147056e-06, + "loss": 0.9488, + "step": 9542 + }, + { + "epoch": 0.78, + "grad_norm": 3.730236472623352, + "learning_rate": 1.2162128840442145e-06, + "loss": 0.7573, + "step": 9543 + }, + { + "epoch": 0.78, + "grad_norm": 2.8731020795937416, + "learning_rate": 1.2153476678007331e-06, + "loss": 0.7127, + "step": 9544 + }, + { + "epoch": 0.78, + "grad_norm": 4.5919527598477154, + "learning_rate": 1.2144827168449109e-06, + "loss": 0.7943, + "step": 9545 + }, + { + "epoch": 0.78, + "grad_norm": 4.010204035251635, + "learning_rate": 1.2136180312373742e-06, + "loss": 0.898, + "step": 9546 + }, + { + "epoch": 0.78, + "grad_norm": 2.8695847466899043, + "learning_rate": 1.212753611038735e-06, + "loss": 0.5351, + "step": 9547 + }, + { + "epoch": 0.78, + "grad_norm": 4.32016422722352, + "learning_rate": 1.2118894563095857e-06, + "loss": 1.0733, + "step": 9548 + }, + { + "epoch": 0.78, + "grad_norm": 4.568638969482633, + "learning_rate": 1.2110255671104997e-06, + "loss": 0.6358, + "step": 9549 + }, + { + "epoch": 0.78, + "grad_norm": 4.875271015558787, + "learning_rate": 1.2101619435020311e-06, + "loss": 0.9305, + "step": 9550 + }, + { + "epoch": 0.78, + "grad_norm": 3.022347320932427, + "learning_rate": 1.2092985855447193e-06, + "loss": 0.6068, + "step": 9551 + }, + { + "epoch": 0.78, + "grad_norm": 2.3295190510012524, + "learning_rate": 1.2084354932990772e-06, + "loss": 0.3397, + "step": 9552 + }, + { + "epoch": 0.78, + "grad_norm": 3.2833774245157272, + "learning_rate": 1.207572666825606e-06, + "loss": 0.6509, + "step": 9553 + }, + { + "epoch": 0.78, + "grad_norm": 4.484701560564738, + "learning_rate": 1.2067101061847869e-06, + "loss": 1.1058, + "step": 9554 + }, + { + "epoch": 0.78, + "grad_norm": 4.690085106932299, + "learning_rate": 1.2058478114370803e-06, + "loss": 1.0041, + "step": 9555 + }, + { + "epoch": 0.78, + "grad_norm": 3.456193791753844, + "learning_rate": 1.2049857826429317e-06, + "loss": 0.712, + "step": 9556 + }, + { + "epoch": 0.78, + "grad_norm": 4.223199212769577, + "learning_rate": 1.2041240198627617e-06, + "loss": 0.7078, + "step": 9557 + }, + { + "epoch": 0.78, + "grad_norm": 2.8785252470799803, + "learning_rate": 1.2032625231569805e-06, + "loss": 0.5626, + "step": 9558 + }, + { + "epoch": 0.78, + "grad_norm": 3.4123505230738402, + "learning_rate": 1.2024012925859712e-06, + "loss": 0.6199, + "step": 9559 + }, + { + "epoch": 0.78, + "grad_norm": 3.939580602094508, + "learning_rate": 1.201540328210104e-06, + "loss": 0.5855, + "step": 9560 + }, + { + "epoch": 0.78, + "grad_norm": 1.135372367422248, + "learning_rate": 1.200679630089729e-06, + "loss": 0.1252, + "step": 9561 + }, + { + "epoch": 0.78, + "grad_norm": 4.966481781191274, + "learning_rate": 1.1998191982851787e-06, + "loss": 0.9955, + "step": 9562 + }, + { + "epoch": 0.78, + "grad_norm": 2.694849907298931, + "learning_rate": 1.1989590328567623e-06, + "loss": 0.4365, + "step": 9563 + }, + { + "epoch": 0.78, + "grad_norm": 4.8035534047839255, + "learning_rate": 1.1980991338647757e-06, + "loss": 1.099, + "step": 9564 + }, + { + "epoch": 0.78, + "grad_norm": 4.229682790370691, + "learning_rate": 1.1972395013694944e-06, + "loss": 0.7544, + "step": 9565 + }, + { + "epoch": 0.78, + "grad_norm": 4.3258366679237765, + "learning_rate": 1.1963801354311738e-06, + "loss": 0.7798, + "step": 9566 + }, + { + "epoch": 0.78, + "grad_norm": 4.395452828167987, + "learning_rate": 1.1955210361100521e-06, + "loss": 0.8726, + "step": 9567 + }, + { + "epoch": 0.78, + "grad_norm": 5.194726586130052, + "learning_rate": 1.1946622034663507e-06, + "loss": 0.873, + "step": 9568 + }, + { + "epoch": 0.78, + "grad_norm": 3.329469032605557, + "learning_rate": 1.1938036375602662e-06, + "loss": 0.4734, + "step": 9569 + }, + { + "epoch": 0.78, + "grad_norm": 4.477324477489711, + "learning_rate": 1.1929453384519818e-06, + "loss": 0.8422, + "step": 9570 + }, + { + "epoch": 0.78, + "grad_norm": 4.86605519298845, + "learning_rate": 1.1920873062016613e-06, + "loss": 0.7282, + "step": 9571 + }, + { + "epoch": 0.78, + "grad_norm": 1.8644296246619503, + "learning_rate": 1.1912295408694496e-06, + "loss": 0.3902, + "step": 9572 + }, + { + "epoch": 0.78, + "grad_norm": 2.126756509218199, + "learning_rate": 1.1903720425154702e-06, + "loss": 0.3178, + "step": 9573 + }, + { + "epoch": 0.78, + "grad_norm": 3.1785592662935267, + "learning_rate": 1.1895148111998323e-06, + "loss": 0.2606, + "step": 9574 + }, + { + "epoch": 0.78, + "grad_norm": 2.8275021525166313, + "learning_rate": 1.1886578469826215e-06, + "loss": 0.5906, + "step": 9575 + }, + { + "epoch": 0.78, + "grad_norm": 4.148790268356531, + "learning_rate": 1.1878011499239083e-06, + "loss": 0.8455, + "step": 9576 + }, + { + "epoch": 0.78, + "grad_norm": 4.830681000824894, + "learning_rate": 1.1869447200837443e-06, + "loss": 0.701, + "step": 9577 + }, + { + "epoch": 0.78, + "grad_norm": 4.3820428705803245, + "learning_rate": 1.1860885575221603e-06, + "loss": 0.8088, + "step": 9578 + }, + { + "epoch": 0.78, + "grad_norm": 2.3999850220494645, + "learning_rate": 1.1852326622991712e-06, + "loss": 0.2845, + "step": 9579 + }, + { + "epoch": 0.78, + "grad_norm": 3.5818051654165215, + "learning_rate": 1.1843770344747712e-06, + "loss": 0.8162, + "step": 9580 + }, + { + "epoch": 0.78, + "grad_norm": 3.3386044856674983, + "learning_rate": 1.183521674108934e-06, + "loss": 0.4882, + "step": 9581 + }, + { + "epoch": 0.78, + "grad_norm": 4.6539031385252825, + "learning_rate": 1.1826665812616183e-06, + "loss": 1.1817, + "step": 9582 + }, + { + "epoch": 0.78, + "grad_norm": 4.833873751597924, + "learning_rate": 1.1818117559927622e-06, + "loss": 0.5528, + "step": 9583 + }, + { + "epoch": 0.78, + "grad_norm": 4.237939933708527, + "learning_rate": 1.1809571983622846e-06, + "loss": 0.7856, + "step": 9584 + }, + { + "epoch": 0.78, + "grad_norm": 4.505227537156948, + "learning_rate": 1.1801029084300891e-06, + "loss": 0.6273, + "step": 9585 + }, + { + "epoch": 0.78, + "grad_norm": 1.783948077996453, + "learning_rate": 1.1792488862560536e-06, + "loss": 0.3235, + "step": 9586 + }, + { + "epoch": 0.78, + "grad_norm": 6.02804589596132, + "learning_rate": 1.1783951319000437e-06, + "loss": 1.1752, + "step": 9587 + }, + { + "epoch": 0.78, + "grad_norm": 3.4292192697448938, + "learning_rate": 1.177541645421904e-06, + "loss": 0.3607, + "step": 9588 + }, + { + "epoch": 0.78, + "grad_norm": 2.202673607295536, + "learning_rate": 1.1766884268814587e-06, + "loss": 0.4486, + "step": 9589 + }, + { + "epoch": 0.78, + "grad_norm": 3.989876397149936, + "learning_rate": 1.1758354763385154e-06, + "loss": 0.812, + "step": 9590 + }, + { + "epoch": 0.78, + "grad_norm": 3.622833772162186, + "learning_rate": 1.174982793852864e-06, + "loss": 0.5832, + "step": 9591 + }, + { + "epoch": 0.78, + "grad_norm": 2.178032237414417, + "learning_rate": 1.1741303794842706e-06, + "loss": 0.3657, + "step": 9592 + }, + { + "epoch": 0.78, + "grad_norm": 3.852195921600761, + "learning_rate": 1.1732782332924874e-06, + "loss": 0.8374, + "step": 9593 + }, + { + "epoch": 0.78, + "grad_norm": 4.771156324016596, + "learning_rate": 1.1724263553372462e-06, + "loss": 0.9483, + "step": 9594 + }, + { + "epoch": 0.78, + "grad_norm": 3.292588240670132, + "learning_rate": 1.1715747456782594e-06, + "loss": 0.706, + "step": 9595 + }, + { + "epoch": 0.78, + "grad_norm": 2.5559926633483454, + "learning_rate": 1.1707234043752219e-06, + "loss": 0.5238, + "step": 9596 + }, + { + "epoch": 0.78, + "grad_norm": 4.615783434419688, + "learning_rate": 1.1698723314878102e-06, + "loss": 0.7433, + "step": 9597 + }, + { + "epoch": 0.78, + "grad_norm": 3.940962409567911, + "learning_rate": 1.1690215270756777e-06, + "loss": 0.9237, + "step": 9598 + }, + { + "epoch": 0.78, + "grad_norm": 2.6793765422907363, + "learning_rate": 1.168170991198464e-06, + "loss": 0.3631, + "step": 9599 + }, + { + "epoch": 0.78, + "grad_norm": 2.8653290532875517, + "learning_rate": 1.1673207239157874e-06, + "loss": 0.6689, + "step": 9600 + }, + { + "epoch": 0.78, + "grad_norm": 2.976737564961005, + "learning_rate": 1.1664707252872481e-06, + "loss": 0.6642, + "step": 9601 + }, + { + "epoch": 0.78, + "grad_norm": 3.9914614639763992, + "learning_rate": 1.165620995372429e-06, + "loss": 0.9346, + "step": 9602 + }, + { + "epoch": 0.78, + "grad_norm": 3.1113270695900557, + "learning_rate": 1.164771534230889e-06, + "loss": 0.7333, + "step": 9603 + }, + { + "epoch": 0.78, + "grad_norm": 4.8905432502915245, + "learning_rate": 1.1639223419221756e-06, + "loss": 0.8442, + "step": 9604 + }, + { + "epoch": 0.79, + "grad_norm": 3.450603824396783, + "learning_rate": 1.1630734185058096e-06, + "loss": 0.5446, + "step": 9605 + }, + { + "epoch": 0.79, + "grad_norm": 5.014716241071041, + "learning_rate": 1.162224764041298e-06, + "loss": 0.7966, + "step": 9606 + }, + { + "epoch": 0.79, + "grad_norm": 4.548753052212068, + "learning_rate": 1.1613763785881294e-06, + "loss": 0.8736, + "step": 9607 + }, + { + "epoch": 0.79, + "grad_norm": 5.183787050875972, + "learning_rate": 1.1605282622057718e-06, + "loss": 0.836, + "step": 9608 + }, + { + "epoch": 0.79, + "grad_norm": 3.605722267159306, + "learning_rate": 1.1596804149536723e-06, + "loss": 0.7694, + "step": 9609 + }, + { + "epoch": 0.79, + "grad_norm": 3.536756127578316, + "learning_rate": 1.1588328368912622e-06, + "loss": 0.3536, + "step": 9610 + }, + { + "epoch": 0.79, + "grad_norm": 4.338837264580741, + "learning_rate": 1.157985528077954e-06, + "loss": 0.7896, + "step": 9611 + }, + { + "epoch": 0.79, + "grad_norm": 4.002047789875118, + "learning_rate": 1.1571384885731395e-06, + "loss": 0.8901, + "step": 9612 + }, + { + "epoch": 0.79, + "grad_norm": 2.805848427243978, + "learning_rate": 1.1562917184361926e-06, + "loss": 0.3782, + "step": 9613 + }, + { + "epoch": 0.79, + "grad_norm": 5.156441147164578, + "learning_rate": 1.1554452177264703e-06, + "loss": 0.7537, + "step": 9614 + }, + { + "epoch": 0.79, + "grad_norm": 4.1989631116228505, + "learning_rate": 1.1545989865033047e-06, + "loss": 0.6675, + "step": 9615 + }, + { + "epoch": 0.79, + "grad_norm": 3.536201511039549, + "learning_rate": 1.1537530248260154e-06, + "loss": 0.7386, + "step": 9616 + }, + { + "epoch": 0.79, + "grad_norm": 5.430036824389245, + "learning_rate": 1.1529073327538997e-06, + "loss": 1.0954, + "step": 9617 + }, + { + "epoch": 0.79, + "grad_norm": 4.497001618649554, + "learning_rate": 1.1520619103462387e-06, + "loss": 0.5644, + "step": 9618 + }, + { + "epoch": 0.79, + "grad_norm": 5.06404922181396, + "learning_rate": 1.1512167576622906e-06, + "loss": 0.8917, + "step": 9619 + }, + { + "epoch": 0.79, + "grad_norm": 2.4379380659815215, + "learning_rate": 1.150371874761299e-06, + "loss": 0.4712, + "step": 9620 + }, + { + "epoch": 0.79, + "grad_norm": 3.695014470124851, + "learning_rate": 1.1495272617024839e-06, + "loss": 0.7112, + "step": 9621 + }, + { + "epoch": 0.79, + "grad_norm": 4.497502645969183, + "learning_rate": 1.1486829185450504e-06, + "loss": 0.9781, + "step": 9622 + }, + { + "epoch": 0.79, + "grad_norm": 4.00480616008217, + "learning_rate": 1.147838845348183e-06, + "loss": 0.8317, + "step": 9623 + }, + { + "epoch": 0.79, + "grad_norm": 4.394024203815417, + "learning_rate": 1.1469950421710486e-06, + "loss": 0.744, + "step": 9624 + }, + { + "epoch": 0.79, + "grad_norm": 4.134570910059159, + "learning_rate": 1.1461515090727943e-06, + "loss": 0.5166, + "step": 9625 + }, + { + "epoch": 0.79, + "grad_norm": 3.2031665141097005, + "learning_rate": 1.1453082461125465e-06, + "loss": 0.7629, + "step": 9626 + }, + { + "epoch": 0.79, + "grad_norm": 6.273922961464934, + "learning_rate": 1.1444652533494143e-06, + "loss": 1.3982, + "step": 9627 + }, + { + "epoch": 0.79, + "grad_norm": 4.001798258529542, + "learning_rate": 1.1436225308424885e-06, + "loss": 0.504, + "step": 9628 + }, + { + "epoch": 0.79, + "grad_norm": 4.180857843732888, + "learning_rate": 1.1427800786508402e-06, + "loss": 0.8142, + "step": 9629 + }, + { + "epoch": 0.79, + "grad_norm": 2.3082931776411604, + "learning_rate": 1.141937896833522e-06, + "loss": 0.5347, + "step": 9630 + }, + { + "epoch": 0.79, + "grad_norm": 4.208351716277365, + "learning_rate": 1.1410959854495684e-06, + "loss": 0.922, + "step": 9631 + }, + { + "epoch": 0.79, + "grad_norm": 7.570266375131698, + "learning_rate": 1.1402543445579905e-06, + "loss": 0.8222, + "step": 9632 + }, + { + "epoch": 0.79, + "grad_norm": 3.74728306378593, + "learning_rate": 1.1394129742177856e-06, + "loss": 0.7746, + "step": 9633 + }, + { + "epoch": 0.79, + "grad_norm": 3.1241305119409484, + "learning_rate": 1.1385718744879298e-06, + "loss": 0.5867, + "step": 9634 + }, + { + "epoch": 0.79, + "grad_norm": 3.49803535696854, + "learning_rate": 1.1377310454273821e-06, + "loss": 0.5357, + "step": 9635 + }, + { + "epoch": 0.79, + "grad_norm": 4.578844931449383, + "learning_rate": 1.1368904870950782e-06, + "loss": 1.0146, + "step": 9636 + }, + { + "epoch": 0.79, + "grad_norm": 2.642038477169935, + "learning_rate": 1.1360501995499396e-06, + "loss": 0.5953, + "step": 9637 + }, + { + "epoch": 0.79, + "grad_norm": 3.7415699599739503, + "learning_rate": 1.135210182850865e-06, + "loss": 0.7982, + "step": 9638 + }, + { + "epoch": 0.79, + "grad_norm": 5.355420188822306, + "learning_rate": 1.1343704370567371e-06, + "loss": 1.0446, + "step": 9639 + }, + { + "epoch": 0.79, + "grad_norm": 5.726871691223727, + "learning_rate": 1.1335309622264184e-06, + "loss": 1.3793, + "step": 9640 + }, + { + "epoch": 0.79, + "grad_norm": 5.050570243004476, + "learning_rate": 1.1326917584187518e-06, + "loss": 0.5335, + "step": 9641 + }, + { + "epoch": 0.79, + "grad_norm": 3.4912518680649693, + "learning_rate": 1.1318528256925642e-06, + "loss": 0.5002, + "step": 9642 + }, + { + "epoch": 0.79, + "grad_norm": 3.2478760852442887, + "learning_rate": 1.131014164106658e-06, + "loss": 0.4667, + "step": 9643 + }, + { + "epoch": 0.79, + "grad_norm": 2.2488126936794814, + "learning_rate": 1.1301757737198206e-06, + "loss": 0.4027, + "step": 9644 + }, + { + "epoch": 0.79, + "grad_norm": 4.655049431304579, + "learning_rate": 1.1293376545908202e-06, + "loss": 0.9878, + "step": 9645 + }, + { + "epoch": 0.79, + "grad_norm": 3.700968314352387, + "learning_rate": 1.1284998067784047e-06, + "loss": 0.7646, + "step": 9646 + }, + { + "epoch": 0.79, + "grad_norm": 4.00981019441697, + "learning_rate": 1.1276622303413043e-06, + "loss": 0.7577, + "step": 9647 + }, + { + "epoch": 0.79, + "grad_norm": 5.716290315788904, + "learning_rate": 1.1268249253382303e-06, + "loss": 0.7271, + "step": 9648 + }, + { + "epoch": 0.79, + "grad_norm": 4.505095945581661, + "learning_rate": 1.1259878918278717e-06, + "loss": 0.852, + "step": 9649 + }, + { + "epoch": 0.79, + "grad_norm": 4.338128790190635, + "learning_rate": 1.1251511298689015e-06, + "loss": 0.4403, + "step": 9650 + }, + { + "epoch": 0.79, + "grad_norm": 4.241555005222036, + "learning_rate": 1.1243146395199762e-06, + "loss": 0.9327, + "step": 9651 + }, + { + "epoch": 0.79, + "grad_norm": 3.5833214319323226, + "learning_rate": 1.1234784208397254e-06, + "loss": 0.7184, + "step": 9652 + }, + { + "epoch": 0.79, + "grad_norm": 2.954642674831938, + "learning_rate": 1.122642473886767e-06, + "loss": 0.5282, + "step": 9653 + }, + { + "epoch": 0.79, + "grad_norm": 4.72774250321284, + "learning_rate": 1.121806798719698e-06, + "loss": 0.9484, + "step": 9654 + }, + { + "epoch": 0.79, + "grad_norm": 4.9282920460344455, + "learning_rate": 1.120971395397093e-06, + "loss": 1.0189, + "step": 9655 + }, + { + "epoch": 0.79, + "grad_norm": 4.462960738027389, + "learning_rate": 1.120136263977512e-06, + "loss": 0.8744, + "step": 9656 + }, + { + "epoch": 0.79, + "grad_norm": 2.760146405525162, + "learning_rate": 1.1193014045194934e-06, + "loss": 0.4105, + "step": 9657 + }, + { + "epoch": 0.79, + "grad_norm": 3.2457823955428218, + "learning_rate": 1.1184668170815572e-06, + "loss": 0.4571, + "step": 9658 + }, + { + "epoch": 0.79, + "grad_norm": 5.166433479661119, + "learning_rate": 1.117632501722205e-06, + "loss": 0.7967, + "step": 9659 + }, + { + "epoch": 0.79, + "grad_norm": 2.560238295755665, + "learning_rate": 1.1167984584999197e-06, + "loss": 0.3852, + "step": 9660 + }, + { + "epoch": 0.79, + "grad_norm": 4.640499295917284, + "learning_rate": 1.1159646874731612e-06, + "loss": 0.7281, + "step": 9661 + }, + { + "epoch": 0.79, + "grad_norm": 3.491885857740138, + "learning_rate": 1.1151311887003747e-06, + "loss": 0.6245, + "step": 9662 + }, + { + "epoch": 0.79, + "grad_norm": 4.0728974812486936, + "learning_rate": 1.1142979622399853e-06, + "loss": 0.4791, + "step": 9663 + }, + { + "epoch": 0.79, + "grad_norm": 5.048737541139456, + "learning_rate": 1.1134650081503978e-06, + "loss": 1.1579, + "step": 9664 + }, + { + "epoch": 0.79, + "grad_norm": 5.080432675687764, + "learning_rate": 1.1126323264900002e-06, + "loss": 0.9797, + "step": 9665 + }, + { + "epoch": 0.79, + "grad_norm": 2.7141021438171187, + "learning_rate": 1.1117999173171574e-06, + "loss": 0.2737, + "step": 9666 + }, + { + "epoch": 0.79, + "grad_norm": 3.4687110977368856, + "learning_rate": 1.1109677806902203e-06, + "loss": 0.9324, + "step": 9667 + }, + { + "epoch": 0.79, + "grad_norm": 3.114467256724678, + "learning_rate": 1.1101359166675151e-06, + "loss": 0.5055, + "step": 9668 + }, + { + "epoch": 0.79, + "grad_norm": 4.174587067675621, + "learning_rate": 1.1093043253073538e-06, + "loss": 0.7105, + "step": 9669 + }, + { + "epoch": 0.79, + "grad_norm": 4.8009407831622655, + "learning_rate": 1.1084730066680267e-06, + "loss": 1.0176, + "step": 9670 + }, + { + "epoch": 0.79, + "grad_norm": 6.475202699175426, + "learning_rate": 1.107641960807807e-06, + "loss": 1.4798, + "step": 9671 + }, + { + "epoch": 0.79, + "grad_norm": 3.953644598963639, + "learning_rate": 1.1068111877849448e-06, + "loss": 0.5494, + "step": 9672 + }, + { + "epoch": 0.79, + "grad_norm": 3.8234011788648514, + "learning_rate": 1.1059806876576756e-06, + "loss": 0.7237, + "step": 9673 + }, + { + "epoch": 0.79, + "grad_norm": 4.920376867534809, + "learning_rate": 1.1051504604842128e-06, + "loss": 0.6737, + "step": 9674 + }, + { + "epoch": 0.79, + "grad_norm": 2.50548118931816, + "learning_rate": 1.104320506322753e-06, + "loss": 0.3674, + "step": 9675 + }, + { + "epoch": 0.79, + "grad_norm": 4.377095839238889, + "learning_rate": 1.1034908252314714e-06, + "loss": 0.8886, + "step": 9676 + }, + { + "epoch": 0.79, + "grad_norm": 3.931779261768232, + "learning_rate": 1.1026614172685263e-06, + "loss": 0.8574, + "step": 9677 + }, + { + "epoch": 0.79, + "grad_norm": 3.4732582543823733, + "learning_rate": 1.1018322824920535e-06, + "loss": 0.4539, + "step": 9678 + }, + { + "epoch": 0.79, + "grad_norm": 3.568627885041344, + "learning_rate": 1.1010034209601727e-06, + "loss": 0.7445, + "step": 9679 + }, + { + "epoch": 0.79, + "grad_norm": 4.593252938454156, + "learning_rate": 1.1001748327309835e-06, + "loss": 0.8384, + "step": 9680 + }, + { + "epoch": 0.79, + "grad_norm": 5.594480343185368, + "learning_rate": 1.0993465178625678e-06, + "loss": 1.0378, + "step": 9681 + }, + { + "epoch": 0.79, + "grad_norm": 4.46661245479926, + "learning_rate": 1.0985184764129847e-06, + "loss": 0.8683, + "step": 9682 + }, + { + "epoch": 0.79, + "grad_norm": 5.046667478036286, + "learning_rate": 1.0976907084402776e-06, + "loss": 0.9989, + "step": 9683 + }, + { + "epoch": 0.79, + "grad_norm": 3.233168789512369, + "learning_rate": 1.0968632140024683e-06, + "loss": 0.7017, + "step": 9684 + }, + { + "epoch": 0.79, + "grad_norm": 4.852547466308722, + "learning_rate": 1.096035993157561e-06, + "loss": 0.7889, + "step": 9685 + }, + { + "epoch": 0.79, + "grad_norm": 5.7693518060992535, + "learning_rate": 1.09520904596354e-06, + "loss": 1.301, + "step": 9686 + }, + { + "epoch": 0.79, + "grad_norm": 1.2474182959207654, + "learning_rate": 1.0943823724783719e-06, + "loss": 0.1488, + "step": 9687 + }, + { + "epoch": 0.79, + "grad_norm": 1.3677448085476038, + "learning_rate": 1.0935559727600032e-06, + "loss": 0.2045, + "step": 9688 + }, + { + "epoch": 0.79, + "grad_norm": 3.6865572644403133, + "learning_rate": 1.0927298468663582e-06, + "loss": 0.8139, + "step": 9689 + }, + { + "epoch": 0.79, + "grad_norm": 5.141834242533534, + "learning_rate": 1.0919039948553467e-06, + "loss": 1.1825, + "step": 9690 + }, + { + "epoch": 0.79, + "grad_norm": 2.9705959149538557, + "learning_rate": 1.0910784167848576e-06, + "loss": 0.5733, + "step": 9691 + }, + { + "epoch": 0.79, + "grad_norm": 3.5182354410933647, + "learning_rate": 1.090253112712759e-06, + "loss": 0.4627, + "step": 9692 + }, + { + "epoch": 0.79, + "grad_norm": 3.3906113326699754, + "learning_rate": 1.0894280826969022e-06, + "loss": 0.6566, + "step": 9693 + }, + { + "epoch": 0.79, + "grad_norm": 3.356231320302676, + "learning_rate": 1.0886033267951196e-06, + "loss": 0.7698, + "step": 9694 + }, + { + "epoch": 0.79, + "grad_norm": 4.742199082679943, + "learning_rate": 1.0877788450652199e-06, + "loss": 0.8193, + "step": 9695 + }, + { + "epoch": 0.79, + "grad_norm": 3.7612987299533867, + "learning_rate": 1.086954637564997e-06, + "loss": 0.8393, + "step": 9696 + }, + { + "epoch": 0.79, + "grad_norm": 3.8863342177328155, + "learning_rate": 1.0861307043522256e-06, + "loss": 0.8334, + "step": 9697 + }, + { + "epoch": 0.79, + "grad_norm": 3.3466674184223657, + "learning_rate": 1.085307045484657e-06, + "loss": 0.4735, + "step": 9698 + }, + { + "epoch": 0.79, + "grad_norm": 2.6521837509972768, + "learning_rate": 1.0844836610200282e-06, + "loss": 0.5738, + "step": 9699 + }, + { + "epoch": 0.79, + "grad_norm": 4.035367081615931, + "learning_rate": 1.0836605510160558e-06, + "loss": 0.6813, + "step": 9700 + }, + { + "epoch": 0.79, + "grad_norm": 4.302242602103993, + "learning_rate": 1.0828377155304332e-06, + "loss": 1.1911, + "step": 9701 + }, + { + "epoch": 0.79, + "grad_norm": 4.31959290473899, + "learning_rate": 1.082015154620839e-06, + "loss": 1.1347, + "step": 9702 + }, + { + "epoch": 0.79, + "grad_norm": 6.198033537390311, + "learning_rate": 1.0811928683449318e-06, + "loss": 0.9468, + "step": 9703 + }, + { + "epoch": 0.79, + "grad_norm": 4.581324713236814, + "learning_rate": 1.0803708567603493e-06, + "loss": 0.756, + "step": 9704 + }, + { + "epoch": 0.79, + "grad_norm": 2.227710907963709, + "learning_rate": 1.0795491199247133e-06, + "loss": 0.3648, + "step": 9705 + }, + { + "epoch": 0.79, + "grad_norm": 3.7858849391276843, + "learning_rate": 1.0787276578956207e-06, + "loss": 0.6991, + "step": 9706 + }, + { + "epoch": 0.79, + "grad_norm": 5.430426207389419, + "learning_rate": 1.0779064707306536e-06, + "loss": 1.1752, + "step": 9707 + }, + { + "epoch": 0.79, + "grad_norm": 3.0675236089607156, + "learning_rate": 1.077085558487374e-06, + "loss": 0.5475, + "step": 9708 + }, + { + "epoch": 0.79, + "grad_norm": 4.074170239303381, + "learning_rate": 1.076264921223324e-06, + "loss": 0.8862, + "step": 9709 + }, + { + "epoch": 0.79, + "grad_norm": 4.14174743579678, + "learning_rate": 1.0754445589960273e-06, + "loss": 1.2746, + "step": 9710 + }, + { + "epoch": 0.79, + "grad_norm": 3.7197812301940547, + "learning_rate": 1.0746244718629883e-06, + "loss": 0.6069, + "step": 9711 + }, + { + "epoch": 0.79, + "grad_norm": 4.275241200789883, + "learning_rate": 1.0738046598816891e-06, + "loss": 0.6855, + "step": 9712 + }, + { + "epoch": 0.79, + "grad_norm": 3.9473450395501235, + "learning_rate": 1.0729851231095983e-06, + "loss": 0.5234, + "step": 9713 + }, + { + "epoch": 0.79, + "grad_norm": 4.624716296959476, + "learning_rate": 1.0721658616041581e-06, + "loss": 1.0341, + "step": 9714 + }, + { + "epoch": 0.79, + "grad_norm": 3.18194587295983, + "learning_rate": 1.0713468754227968e-06, + "loss": 0.6545, + "step": 9715 + }, + { + "epoch": 0.79, + "grad_norm": 3.67836566277044, + "learning_rate": 1.0705281646229227e-06, + "loss": 0.5493, + "step": 9716 + }, + { + "epoch": 0.79, + "grad_norm": 3.899433654390494, + "learning_rate": 1.0697097292619241e-06, + "loss": 0.9894, + "step": 9717 + }, + { + "epoch": 0.79, + "grad_norm": 4.512719535347329, + "learning_rate": 1.0688915693971675e-06, + "loss": 0.9985, + "step": 9718 + }, + { + "epoch": 0.79, + "grad_norm": 1.598644989378368, + "learning_rate": 1.0680736850860034e-06, + "loss": 0.2486, + "step": 9719 + }, + { + "epoch": 0.79, + "grad_norm": 5.536177573138868, + "learning_rate": 1.0672560763857626e-06, + "loss": 0.9369, + "step": 9720 + }, + { + "epoch": 0.79, + "grad_norm": 5.5585393591174235, + "learning_rate": 1.066438743353755e-06, + "loss": 1.2031, + "step": 9721 + }, + { + "epoch": 0.79, + "grad_norm": 5.132905482869254, + "learning_rate": 1.065621686047274e-06, + "loss": 1.0333, + "step": 9722 + }, + { + "epoch": 0.79, + "grad_norm": 3.5796683304053936, + "learning_rate": 1.0648049045235891e-06, + "loss": 0.6483, + "step": 9723 + }, + { + "epoch": 0.79, + "grad_norm": 3.0256589186041563, + "learning_rate": 1.0639883988399547e-06, + "loss": 0.7105, + "step": 9724 + }, + { + "epoch": 0.79, + "grad_norm": 6.390922431470076, + "learning_rate": 1.0631721690536034e-06, + "loss": 0.6557, + "step": 9725 + }, + { + "epoch": 0.79, + "grad_norm": 2.1932750602916995, + "learning_rate": 1.0623562152217503e-06, + "loss": 0.3335, + "step": 9726 + }, + { + "epoch": 0.8, + "grad_norm": 5.586248012852279, + "learning_rate": 1.0615405374015913e-06, + "loss": 0.6929, + "step": 9727 + }, + { + "epoch": 0.8, + "grad_norm": 2.199738806020164, + "learning_rate": 1.060725135650299e-06, + "loss": 0.1826, + "step": 9728 + }, + { + "epoch": 0.8, + "grad_norm": 4.001062988625413, + "learning_rate": 1.059910010025032e-06, + "loss": 0.5847, + "step": 9729 + }, + { + "epoch": 0.8, + "grad_norm": 4.126468899802749, + "learning_rate": 1.0590951605829247e-06, + "loss": 0.9041, + "step": 9730 + }, + { + "epoch": 0.8, + "grad_norm": 4.114543696558655, + "learning_rate": 1.0582805873810959e-06, + "loss": 1.0175, + "step": 9731 + }, + { + "epoch": 0.8, + "grad_norm": 5.226737038244887, + "learning_rate": 1.0574662904766432e-06, + "loss": 0.9064, + "step": 9732 + }, + { + "epoch": 0.8, + "grad_norm": 2.348937215859812, + "learning_rate": 1.0566522699266457e-06, + "loss": 0.398, + "step": 9733 + }, + { + "epoch": 0.8, + "grad_norm": 4.014006204755118, + "learning_rate": 1.0558385257881637e-06, + "loss": 0.8706, + "step": 9734 + }, + { + "epoch": 0.8, + "grad_norm": 2.926980563123289, + "learning_rate": 1.0550250581182353e-06, + "loss": 0.4947, + "step": 9735 + }, + { + "epoch": 0.8, + "grad_norm": 4.774309576349225, + "learning_rate": 1.054211866973881e-06, + "loss": 1.4669, + "step": 9736 + }, + { + "epoch": 0.8, + "grad_norm": 6.424093652109563, + "learning_rate": 1.053398952412103e-06, + "loss": 1.2049, + "step": 9737 + }, + { + "epoch": 0.8, + "grad_norm": 3.4743138653694388, + "learning_rate": 1.052586314489883e-06, + "loss": 0.6989, + "step": 9738 + }, + { + "epoch": 0.8, + "grad_norm": 3.101626911918696, + "learning_rate": 1.051773953264183e-06, + "loss": 0.4607, + "step": 9739 + }, + { + "epoch": 0.8, + "grad_norm": 3.6506214341008953, + "learning_rate": 1.0509618687919476e-06, + "loss": 1.0164, + "step": 9740 + }, + { + "epoch": 0.8, + "grad_norm": 2.475059228286483, + "learning_rate": 1.0501500611300974e-06, + "loss": 0.4186, + "step": 9741 + }, + { + "epoch": 0.8, + "grad_norm": 3.1277552416511982, + "learning_rate": 1.049338530335538e-06, + "loss": 0.4358, + "step": 9742 + }, + { + "epoch": 0.8, + "grad_norm": 5.009423272641236, + "learning_rate": 1.0485272764651543e-06, + "loss": 0.7617, + "step": 9743 + }, + { + "epoch": 0.8, + "grad_norm": 2.815574150796619, + "learning_rate": 1.0477162995758133e-06, + "loss": 0.5747, + "step": 9744 + }, + { + "epoch": 0.8, + "grad_norm": 2.4430449005631587, + "learning_rate": 1.0469055997243578e-06, + "loss": 0.3788, + "step": 9745 + }, + { + "epoch": 0.8, + "grad_norm": 2.962221865232792, + "learning_rate": 1.0460951769676175e-06, + "loss": 0.353, + "step": 9746 + }, + { + "epoch": 0.8, + "grad_norm": 3.4239971821571578, + "learning_rate": 1.0452850313623958e-06, + "loss": 0.573, + "step": 9747 + }, + { + "epoch": 0.8, + "grad_norm": 1.0293272781540777, + "learning_rate": 1.0444751629654831e-06, + "loss": 0.1605, + "step": 9748 + }, + { + "epoch": 0.8, + "grad_norm": 5.101613154249267, + "learning_rate": 1.0436655718336464e-06, + "loss": 0.6731, + "step": 9749 + }, + { + "epoch": 0.8, + "grad_norm": 1.113365293730688, + "learning_rate": 1.0428562580236358e-06, + "loss": 0.175, + "step": 9750 + }, + { + "epoch": 0.8, + "grad_norm": 5.105455382948563, + "learning_rate": 1.0420472215921807e-06, + "loss": 1.1378, + "step": 9751 + }, + { + "epoch": 0.8, + "grad_norm": 3.7861055323893917, + "learning_rate": 1.0412384625959887e-06, + "loss": 1.08, + "step": 9752 + }, + { + "epoch": 0.8, + "grad_norm": 2.675653711131224, + "learning_rate": 1.0404299810917523e-06, + "loss": 0.4479, + "step": 9753 + }, + { + "epoch": 0.8, + "grad_norm": 4.335133964402627, + "learning_rate": 1.0396217771361422e-06, + "loss": 0.5115, + "step": 9754 + }, + { + "epoch": 0.8, + "grad_norm": 4.980571756794141, + "learning_rate": 1.0388138507858098e-06, + "loss": 1.1913, + "step": 9755 + }, + { + "epoch": 0.8, + "grad_norm": 3.8508596544370906, + "learning_rate": 1.0380062020973875e-06, + "loss": 0.6614, + "step": 9756 + }, + { + "epoch": 0.8, + "grad_norm": 4.914488203777597, + "learning_rate": 1.037198831127489e-06, + "loss": 0.7279, + "step": 9757 + }, + { + "epoch": 0.8, + "grad_norm": 3.7708154404824783, + "learning_rate": 1.036391737932705e-06, + "loss": 0.7997, + "step": 9758 + }, + { + "epoch": 0.8, + "grad_norm": 4.380952909168642, + "learning_rate": 1.0355849225696102e-06, + "loss": 0.7888, + "step": 9759 + }, + { + "epoch": 0.8, + "grad_norm": 4.42437832779043, + "learning_rate": 1.0347783850947606e-06, + "loss": 0.5281, + "step": 9760 + }, + { + "epoch": 0.8, + "grad_norm": 5.508575678821504, + "learning_rate": 1.0339721255646885e-06, + "loss": 1.3033, + "step": 9761 + }, + { + "epoch": 0.8, + "grad_norm": 3.93822300897433, + "learning_rate": 1.0331661440359114e-06, + "loss": 0.7981, + "step": 9762 + }, + { + "epoch": 0.8, + "grad_norm": 4.311374690150685, + "learning_rate": 1.0323604405649224e-06, + "loss": 0.8647, + "step": 9763 + }, + { + "epoch": 0.8, + "grad_norm": 3.0385444435253683, + "learning_rate": 1.0315550152081988e-06, + "loss": 0.5561, + "step": 9764 + }, + { + "epoch": 0.8, + "grad_norm": 3.508625854582256, + "learning_rate": 1.0307498680221988e-06, + "loss": 0.4562, + "step": 9765 + }, + { + "epoch": 0.8, + "grad_norm": 2.6243096831556865, + "learning_rate": 1.029944999063358e-06, + "loss": 0.2955, + "step": 9766 + }, + { + "epoch": 0.8, + "grad_norm": 2.7360148103370268, + "learning_rate": 1.0291404083880957e-06, + "loss": 0.3317, + "step": 9767 + }, + { + "epoch": 0.8, + "grad_norm": 5.045643567688735, + "learning_rate": 1.0283360960528104e-06, + "loss": 0.9755, + "step": 9768 + }, + { + "epoch": 0.8, + "grad_norm": 4.892143673884387, + "learning_rate": 1.027532062113879e-06, + "loss": 0.8521, + "step": 9769 + }, + { + "epoch": 0.8, + "grad_norm": 4.793368296844829, + "learning_rate": 1.0267283066276618e-06, + "loss": 1.1566, + "step": 9770 + }, + { + "epoch": 0.8, + "grad_norm": 1.9362162470033908, + "learning_rate": 1.0259248296504986e-06, + "loss": 0.1881, + "step": 9771 + }, + { + "epoch": 0.8, + "grad_norm": 2.454044760591829, + "learning_rate": 1.025121631238709e-06, + "loss": 0.5303, + "step": 9772 + }, + { + "epoch": 0.8, + "grad_norm": 2.8740149655278215, + "learning_rate": 1.0243187114485953e-06, + "loss": 0.4281, + "step": 9773 + }, + { + "epoch": 0.8, + "grad_norm": 3.395325037236486, + "learning_rate": 1.0235160703364384e-06, + "loss": 0.5552, + "step": 9774 + }, + { + "epoch": 0.8, + "grad_norm": 4.774354601146491, + "learning_rate": 1.022713707958498e-06, + "loss": 1.0098, + "step": 9775 + }, + { + "epoch": 0.8, + "grad_norm": 4.109845239922136, + "learning_rate": 1.0219116243710192e-06, + "loss": 0.4951, + "step": 9776 + }, + { + "epoch": 0.8, + "grad_norm": 3.589572101861237, + "learning_rate": 1.021109819630221e-06, + "loss": 0.9188, + "step": 9777 + }, + { + "epoch": 0.8, + "grad_norm": 2.2871748538178953, + "learning_rate": 1.0203082937923082e-06, + "loss": 0.6727, + "step": 9778 + }, + { + "epoch": 0.8, + "grad_norm": 5.300448811565389, + "learning_rate": 1.019507046913465e-06, + "loss": 0.7887, + "step": 9779 + }, + { + "epoch": 0.8, + "grad_norm": 3.7733135121297705, + "learning_rate": 1.0187060790498553e-06, + "loss": 0.821, + "step": 9780 + }, + { + "epoch": 0.8, + "grad_norm": 5.4690246160815486, + "learning_rate": 1.0179053902576214e-06, + "loss": 0.9153, + "step": 9781 + }, + { + "epoch": 0.8, + "grad_norm": 2.9915957436347544, + "learning_rate": 1.01710498059289e-06, + "loss": 0.6375, + "step": 9782 + }, + { + "epoch": 0.8, + "grad_norm": 4.057028414868053, + "learning_rate": 1.0163048501117657e-06, + "loss": 1.1214, + "step": 9783 + }, + { + "epoch": 0.8, + "grad_norm": 3.5991348622918498, + "learning_rate": 1.0155049988703342e-06, + "loss": 0.4839, + "step": 9784 + }, + { + "epoch": 0.8, + "grad_norm": 4.125793983286334, + "learning_rate": 1.014705426924663e-06, + "loss": 0.7087, + "step": 9785 + }, + { + "epoch": 0.8, + "grad_norm": 4.105332917713791, + "learning_rate": 1.013906134330796e-06, + "loss": 0.5788, + "step": 9786 + }, + { + "epoch": 0.8, + "grad_norm": 5.982479435308393, + "learning_rate": 1.013107121144762e-06, + "loss": 1.4878, + "step": 9787 + }, + { + "epoch": 0.8, + "grad_norm": 5.090355400449551, + "learning_rate": 1.012308387422567e-06, + "loss": 0.8028, + "step": 9788 + }, + { + "epoch": 0.8, + "grad_norm": 3.6454026800282806, + "learning_rate": 1.0115099332201999e-06, + "loss": 0.7259, + "step": 9789 + }, + { + "epoch": 0.8, + "grad_norm": 3.3975299683156943, + "learning_rate": 1.01071175859363e-06, + "loss": 0.4905, + "step": 9790 + }, + { + "epoch": 0.8, + "grad_norm": 4.916271590526557, + "learning_rate": 1.0099138635988026e-06, + "loss": 0.8442, + "step": 9791 + }, + { + "epoch": 0.8, + "grad_norm": 3.5728726610791397, + "learning_rate": 1.00911624829165e-06, + "loss": 0.7227, + "step": 9792 + }, + { + "epoch": 0.8, + "grad_norm": 5.115132988655575, + "learning_rate": 1.008318912728079e-06, + "loss": 0.6475, + "step": 9793 + }, + { + "epoch": 0.8, + "grad_norm": 3.904296494945222, + "learning_rate": 1.00752185696398e-06, + "loss": 0.5609, + "step": 9794 + }, + { + "epoch": 0.8, + "grad_norm": 4.899616251799633, + "learning_rate": 1.0067250810552236e-06, + "loss": 0.8103, + "step": 9795 + }, + { + "epoch": 0.8, + "grad_norm": 2.8749327110273764, + "learning_rate": 1.00592858505766e-06, + "loss": 0.5936, + "step": 9796 + }, + { + "epoch": 0.8, + "grad_norm": 3.029676630552301, + "learning_rate": 1.005132369027122e-06, + "loss": 0.6193, + "step": 9797 + }, + { + "epoch": 0.8, + "grad_norm": 4.158077519218264, + "learning_rate": 1.0043364330194178e-06, + "loss": 0.7681, + "step": 9798 + }, + { + "epoch": 0.8, + "grad_norm": 5.9746467392254266, + "learning_rate": 1.0035407770903405e-06, + "loss": 1.0636, + "step": 9799 + }, + { + "epoch": 0.8, + "grad_norm": 2.37050920443815, + "learning_rate": 1.0027454012956617e-06, + "loss": 0.4571, + "step": 9800 + }, + { + "epoch": 0.8, + "grad_norm": 6.804320790908561, + "learning_rate": 1.0019503056911346e-06, + "loss": 1.0382, + "step": 9801 + }, + { + "epoch": 0.8, + "grad_norm": 5.35103012277236, + "learning_rate": 1.0011554903324928e-06, + "loss": 1.043, + "step": 9802 + }, + { + "epoch": 0.8, + "grad_norm": 4.608746779516858, + "learning_rate": 1.0003609552754468e-06, + "loss": 0.7013, + "step": 9803 + }, + { + "epoch": 0.8, + "grad_norm": 3.1014704714558783, + "learning_rate": 9.995667005756909e-07, + "loss": 0.3483, + "step": 9804 + }, + { + "epoch": 0.8, + "grad_norm": 6.257377286497344, + "learning_rate": 9.987727262888997e-07, + "loss": 0.7886, + "step": 9805 + }, + { + "epoch": 0.8, + "grad_norm": 4.308433224780674, + "learning_rate": 9.979790324707284e-07, + "loss": 0.7515, + "step": 9806 + }, + { + "epoch": 0.8, + "grad_norm": 5.4792480740472165, + "learning_rate": 9.971856191768086e-07, + "loss": 0.9002, + "step": 9807 + }, + { + "epoch": 0.8, + "grad_norm": 4.140277972857411, + "learning_rate": 9.963924864627578e-07, + "loss": 0.9105, + "step": 9808 + }, + { + "epoch": 0.8, + "grad_norm": 3.914102260867357, + "learning_rate": 9.95599634384169e-07, + "loss": 0.9481, + "step": 9809 + }, + { + "epoch": 0.8, + "grad_norm": 1.513118226771093, + "learning_rate": 9.948070629966183e-07, + "loss": 0.169, + "step": 9810 + }, + { + "epoch": 0.8, + "grad_norm": 2.7295513628394326, + "learning_rate": 9.940147723556614e-07, + "loss": 0.6453, + "step": 9811 + }, + { + "epoch": 0.8, + "grad_norm": 3.5774326972108734, + "learning_rate": 9.932227625168356e-07, + "loss": 0.5254, + "step": 9812 + }, + { + "epoch": 0.8, + "grad_norm": 5.602790864794578, + "learning_rate": 9.924310335356563e-07, + "loss": 1.1936, + "step": 9813 + }, + { + "epoch": 0.8, + "grad_norm": 4.360797507501289, + "learning_rate": 9.91639585467622e-07, + "loss": 0.8715, + "step": 9814 + }, + { + "epoch": 0.8, + "grad_norm": 3.4461678530552238, + "learning_rate": 9.908484183682065e-07, + "loss": 0.666, + "step": 9815 + }, + { + "epoch": 0.8, + "grad_norm": 1.760828772809022, + "learning_rate": 9.900575322928696e-07, + "loss": 0.3425, + "step": 9816 + }, + { + "epoch": 0.8, + "grad_norm": 4.34303928784837, + "learning_rate": 9.892669272970485e-07, + "loss": 0.9513, + "step": 9817 + }, + { + "epoch": 0.8, + "grad_norm": 5.345416542482766, + "learning_rate": 9.884766034361604e-07, + "loss": 1.0784, + "step": 9818 + }, + { + "epoch": 0.8, + "grad_norm": 3.706912283533461, + "learning_rate": 9.876865607656045e-07, + "loss": 0.466, + "step": 9819 + }, + { + "epoch": 0.8, + "grad_norm": 2.9258263158249984, + "learning_rate": 9.868967993407603e-07, + "loss": 0.5956, + "step": 9820 + }, + { + "epoch": 0.8, + "grad_norm": 3.0342284251333296, + "learning_rate": 9.86107319216984e-07, + "loss": 0.5981, + "step": 9821 + }, + { + "epoch": 0.8, + "grad_norm": 4.143853090321942, + "learning_rate": 9.853181204496176e-07, + "loss": 0.7826, + "step": 9822 + }, + { + "epoch": 0.8, + "grad_norm": 4.106256324176616, + "learning_rate": 9.845292030939775e-07, + "loss": 0.7144, + "step": 9823 + }, + { + "epoch": 0.8, + "grad_norm": 4.946979660030996, + "learning_rate": 9.837405672053651e-07, + "loss": 0.8796, + "step": 9824 + }, + { + "epoch": 0.8, + "grad_norm": 4.094283608807544, + "learning_rate": 9.829522128390611e-07, + "loss": 0.678, + "step": 9825 + }, + { + "epoch": 0.8, + "grad_norm": 1.2068649294230744, + "learning_rate": 9.821641400503235e-07, + "loss": 0.1685, + "step": 9826 + }, + { + "epoch": 0.8, + "grad_norm": 2.9386882985967446, + "learning_rate": 9.813763488943946e-07, + "loss": 0.5521, + "step": 9827 + }, + { + "epoch": 0.8, + "grad_norm": 3.1806468055796757, + "learning_rate": 9.80588839426494e-07, + "loss": 0.5293, + "step": 9828 + }, + { + "epoch": 0.8, + "grad_norm": 5.376950259944358, + "learning_rate": 9.798016117018233e-07, + "loss": 1.2063, + "step": 9829 + }, + { + "epoch": 0.8, + "grad_norm": 3.6004839039357366, + "learning_rate": 9.790146657755633e-07, + "loss": 0.7438, + "step": 9830 + }, + { + "epoch": 0.8, + "grad_norm": 4.17331895871248, + "learning_rate": 9.782280017028777e-07, + "loss": 0.9275, + "step": 9831 + }, + { + "epoch": 0.8, + "grad_norm": 4.179139915704291, + "learning_rate": 9.774416195389046e-07, + "loss": 0.4853, + "step": 9832 + }, + { + "epoch": 0.8, + "grad_norm": 4.576781743034318, + "learning_rate": 9.766555193387683e-07, + "loss": 1.141, + "step": 9833 + }, + { + "epoch": 0.8, + "grad_norm": 4.229859663170041, + "learning_rate": 9.7586970115757e-07, + "loss": 1.069, + "step": 9834 + }, + { + "epoch": 0.8, + "grad_norm": 5.163122161832738, + "learning_rate": 9.750841650503928e-07, + "loss": 1.1175, + "step": 9835 + }, + { + "epoch": 0.8, + "grad_norm": 1.2391700379530222, + "learning_rate": 9.742989110723e-07, + "loss": 0.1761, + "step": 9836 + }, + { + "epoch": 0.8, + "grad_norm": 2.460523201070169, + "learning_rate": 9.735139392783326e-07, + "loss": 0.3726, + "step": 9837 + }, + { + "epoch": 0.8, + "grad_norm": 3.9994436411899814, + "learning_rate": 9.727292497235151e-07, + "loss": 0.8943, + "step": 9838 + }, + { + "epoch": 0.8, + "grad_norm": 3.0603174081227125, + "learning_rate": 9.719448424628514e-07, + "loss": 0.6165, + "step": 9839 + }, + { + "epoch": 0.8, + "grad_norm": 3.8683916770844027, + "learning_rate": 9.711607175513228e-07, + "loss": 0.7775, + "step": 9840 + }, + { + "epoch": 0.8, + "grad_norm": 3.3962013038998338, + "learning_rate": 9.70376875043894e-07, + "loss": 0.5398, + "step": 9841 + }, + { + "epoch": 0.8, + "grad_norm": 4.466230612997388, + "learning_rate": 9.695933149955111e-07, + "loss": 0.8821, + "step": 9842 + }, + { + "epoch": 0.8, + "grad_norm": 3.126243935036026, + "learning_rate": 9.688100374610953e-07, + "loss": 0.5089, + "step": 9843 + }, + { + "epoch": 0.8, + "grad_norm": 4.075033137777421, + "learning_rate": 9.68027042495552e-07, + "loss": 0.8537, + "step": 9844 + }, + { + "epoch": 0.8, + "grad_norm": 5.044256011589565, + "learning_rate": 9.672443301537654e-07, + "loss": 0.8834, + "step": 9845 + }, + { + "epoch": 0.8, + "grad_norm": 3.9775589520839407, + "learning_rate": 9.664619004906007e-07, + "loss": 0.7768, + "step": 9846 + }, + { + "epoch": 0.8, + "grad_norm": 3.532802096795839, + "learning_rate": 9.65679753560903e-07, + "loss": 0.7676, + "step": 9847 + }, + { + "epoch": 0.8, + "grad_norm": 4.9665152868615134, + "learning_rate": 9.648978894194983e-07, + "loss": 0.7077, + "step": 9848 + }, + { + "epoch": 0.81, + "grad_norm": 2.497109619755261, + "learning_rate": 9.641163081211891e-07, + "loss": 0.542, + "step": 9849 + }, + { + "epoch": 0.81, + "grad_norm": 4.00698599759604, + "learning_rate": 9.633350097207628e-07, + "loss": 0.8032, + "step": 9850 + }, + { + "epoch": 0.81, + "grad_norm": 3.3863573071418904, + "learning_rate": 9.62553994272985e-07, + "loss": 0.5541, + "step": 9851 + }, + { + "epoch": 0.81, + "grad_norm": 3.247677039720678, + "learning_rate": 9.61773261832601e-07, + "loss": 0.5179, + "step": 9852 + }, + { + "epoch": 0.81, + "grad_norm": 3.8418076360091997, + "learning_rate": 9.609928124543376e-07, + "loss": 0.6889, + "step": 9853 + }, + { + "epoch": 0.81, + "grad_norm": 3.118237736916841, + "learning_rate": 9.602126461929002e-07, + "loss": 0.8431, + "step": 9854 + }, + { + "epoch": 0.81, + "grad_norm": 2.686936764023641, + "learning_rate": 9.594327631029753e-07, + "loss": 0.2751, + "step": 9855 + }, + { + "epoch": 0.81, + "grad_norm": 2.640907225065898, + "learning_rate": 9.586531632392282e-07, + "loss": 0.5965, + "step": 9856 + }, + { + "epoch": 0.81, + "grad_norm": 2.207380321843211, + "learning_rate": 9.578738466563065e-07, + "loss": 0.2198, + "step": 9857 + }, + { + "epoch": 0.81, + "grad_norm": 3.2635923528591935, + "learning_rate": 9.570948134088364e-07, + "loss": 0.7252, + "step": 9858 + }, + { + "epoch": 0.81, + "grad_norm": 4.105489324566011, + "learning_rate": 9.563160635514252e-07, + "loss": 0.6897, + "step": 9859 + }, + { + "epoch": 0.81, + "grad_norm": 5.64835001657043, + "learning_rate": 9.55537597138661e-07, + "loss": 0.6154, + "step": 9860 + }, + { + "epoch": 0.81, + "grad_norm": 3.7978146569040385, + "learning_rate": 9.547594142251089e-07, + "loss": 0.7363, + "step": 9861 + }, + { + "epoch": 0.81, + "grad_norm": 1.7402329161412262, + "learning_rate": 9.539815148653163e-07, + "loss": 0.1861, + "step": 9862 + }, + { + "epoch": 0.81, + "grad_norm": 3.53530084807777, + "learning_rate": 9.532038991138115e-07, + "loss": 0.863, + "step": 9863 + }, + { + "epoch": 0.81, + "grad_norm": 4.914928019255762, + "learning_rate": 9.524265670251015e-07, + "loss": 0.7861, + "step": 9864 + }, + { + "epoch": 0.81, + "grad_norm": 3.5257229406622033, + "learning_rate": 9.516495186536751e-07, + "loss": 0.7089, + "step": 9865 + }, + { + "epoch": 0.81, + "grad_norm": 4.417659463572757, + "learning_rate": 9.508727540539981e-07, + "loss": 1.1149, + "step": 9866 + }, + { + "epoch": 0.81, + "grad_norm": 4.709118153703828, + "learning_rate": 9.500962732805192e-07, + "loss": 0.6207, + "step": 9867 + }, + { + "epoch": 0.81, + "grad_norm": 5.524006700894052, + "learning_rate": 9.493200763876658e-07, + "loss": 0.7884, + "step": 9868 + }, + { + "epoch": 0.81, + "grad_norm": 2.996789862109213, + "learning_rate": 9.485441634298482e-07, + "loss": 0.5168, + "step": 9869 + }, + { + "epoch": 0.81, + "grad_norm": 3.0079425193436378, + "learning_rate": 9.477685344614517e-07, + "loss": 0.5929, + "step": 9870 + }, + { + "epoch": 0.81, + "grad_norm": 3.3091738066070024, + "learning_rate": 9.469931895368462e-07, + "loss": 0.5454, + "step": 9871 + }, + { + "epoch": 0.81, + "grad_norm": 4.8737147416684445, + "learning_rate": 9.462181287103783e-07, + "loss": 1.1663, + "step": 9872 + }, + { + "epoch": 0.81, + "grad_norm": 2.910258465431136, + "learning_rate": 9.454433520363776e-07, + "loss": 0.5832, + "step": 9873 + }, + { + "epoch": 0.81, + "grad_norm": 4.3697371231984325, + "learning_rate": 9.446688595691522e-07, + "loss": 0.9605, + "step": 9874 + }, + { + "epoch": 0.81, + "grad_norm": 3.0701907349081323, + "learning_rate": 9.438946513629915e-07, + "loss": 0.7593, + "step": 9875 + }, + { + "epoch": 0.81, + "grad_norm": 4.494410910517011, + "learning_rate": 9.431207274721627e-07, + "loss": 0.8716, + "step": 9876 + }, + { + "epoch": 0.81, + "grad_norm": 5.872713129768486, + "learning_rate": 9.423470879509172e-07, + "loss": 1.1377, + "step": 9877 + }, + { + "epoch": 0.81, + "grad_norm": 3.8073964996011616, + "learning_rate": 9.415737328534802e-07, + "loss": 0.5902, + "step": 9878 + }, + { + "epoch": 0.81, + "grad_norm": 3.960128248000555, + "learning_rate": 9.408006622340627e-07, + "loss": 0.6662, + "step": 9879 + }, + { + "epoch": 0.81, + "grad_norm": 3.8411134500064015, + "learning_rate": 9.400278761468523e-07, + "loss": 0.6525, + "step": 9880 + }, + { + "epoch": 0.81, + "grad_norm": 5.271183552028433, + "learning_rate": 9.392553746460193e-07, + "loss": 0.8974, + "step": 9881 + }, + { + "epoch": 0.81, + "grad_norm": 3.6937126757581447, + "learning_rate": 9.384831577857135e-07, + "loss": 0.8745, + "step": 9882 + }, + { + "epoch": 0.81, + "grad_norm": 3.918194333309644, + "learning_rate": 9.37711225620061e-07, + "loss": 0.6488, + "step": 9883 + }, + { + "epoch": 0.81, + "grad_norm": 2.768034398152464, + "learning_rate": 9.36939578203172e-07, + "loss": 0.5795, + "step": 9884 + }, + { + "epoch": 0.81, + "grad_norm": 3.5521280178173114, + "learning_rate": 9.361682155891382e-07, + "loss": 0.492, + "step": 9885 + }, + { + "epoch": 0.81, + "grad_norm": 5.010274233421411, + "learning_rate": 9.353971378320248e-07, + "loss": 0.9207, + "step": 9886 + }, + { + "epoch": 0.81, + "grad_norm": 1.6367400905396001, + "learning_rate": 9.346263449858828e-07, + "loss": 0.3406, + "step": 9887 + }, + { + "epoch": 0.81, + "grad_norm": 5.299231533318858, + "learning_rate": 9.338558371047429e-07, + "loss": 0.9699, + "step": 9888 + }, + { + "epoch": 0.81, + "grad_norm": 4.455281392120958, + "learning_rate": 9.33085614242612e-07, + "loss": 0.479, + "step": 9889 + }, + { + "epoch": 0.81, + "grad_norm": 4.576101144784835, + "learning_rate": 9.323156764534797e-07, + "loss": 0.8125, + "step": 9890 + }, + { + "epoch": 0.81, + "grad_norm": 6.001248491978731, + "learning_rate": 9.315460237913159e-07, + "loss": 0.9924, + "step": 9891 + }, + { + "epoch": 0.81, + "grad_norm": 4.043072769369696, + "learning_rate": 9.3077665631007e-07, + "loss": 1.0591, + "step": 9892 + }, + { + "epoch": 0.81, + "grad_norm": 2.4866963629472107, + "learning_rate": 9.300075740636716e-07, + "loss": 0.3196, + "step": 9893 + }, + { + "epoch": 0.81, + "grad_norm": 2.8031433196931106, + "learning_rate": 9.292387771060302e-07, + "loss": 0.7287, + "step": 9894 + }, + { + "epoch": 0.81, + "grad_norm": 3.311911836138839, + "learning_rate": 9.284702654910338e-07, + "loss": 0.5071, + "step": 9895 + }, + { + "epoch": 0.81, + "grad_norm": 2.4149683062187797, + "learning_rate": 9.277020392725522e-07, + "loss": 0.3489, + "step": 9896 + }, + { + "epoch": 0.81, + "grad_norm": 4.1060268580376, + "learning_rate": 9.269340985044345e-07, + "loss": 0.9197, + "step": 9897 + }, + { + "epoch": 0.81, + "grad_norm": 1.2150900519384455, + "learning_rate": 9.261664432405109e-07, + "loss": 0.1616, + "step": 9898 + }, + { + "epoch": 0.81, + "grad_norm": 3.005804801362847, + "learning_rate": 9.253990735345914e-07, + "loss": 0.6852, + "step": 9899 + }, + { + "epoch": 0.81, + "grad_norm": 4.974940928049727, + "learning_rate": 9.246319894404632e-07, + "loss": 0.7249, + "step": 9900 + }, + { + "epoch": 0.81, + "grad_norm": 4.038730562058925, + "learning_rate": 9.238651910118973e-07, + "loss": 0.855, + "step": 9901 + }, + { + "epoch": 0.81, + "grad_norm": 3.3181916794751687, + "learning_rate": 9.230986783026413e-07, + "loss": 0.4042, + "step": 9902 + }, + { + "epoch": 0.81, + "grad_norm": 4.576947794174118, + "learning_rate": 9.223324513664245e-07, + "loss": 0.7459, + "step": 9903 + }, + { + "epoch": 0.81, + "grad_norm": 3.720413585163589, + "learning_rate": 9.215665102569577e-07, + "loss": 0.7138, + "step": 9904 + }, + { + "epoch": 0.81, + "grad_norm": 4.406165342302664, + "learning_rate": 9.208008550279296e-07, + "loss": 0.5856, + "step": 9905 + }, + { + "epoch": 0.81, + "grad_norm": 3.613024596190667, + "learning_rate": 9.20035485733008e-07, + "loss": 0.5525, + "step": 9906 + }, + { + "epoch": 0.81, + "grad_norm": 5.11050019329382, + "learning_rate": 9.192704024258426e-07, + "loss": 1.169, + "step": 9907 + }, + { + "epoch": 0.81, + "grad_norm": 4.645508394267574, + "learning_rate": 9.185056051600627e-07, + "loss": 0.8694, + "step": 9908 + }, + { + "epoch": 0.81, + "grad_norm": 3.5518757837879007, + "learning_rate": 9.177410939892772e-07, + "loss": 0.5554, + "step": 9909 + }, + { + "epoch": 0.81, + "grad_norm": 3.638157839908345, + "learning_rate": 9.169768689670749e-07, + "loss": 0.9099, + "step": 9910 + }, + { + "epoch": 0.81, + "grad_norm": 3.268801656265276, + "learning_rate": 9.162129301470258e-07, + "loss": 0.8827, + "step": 9911 + }, + { + "epoch": 0.81, + "grad_norm": 3.9296386685375695, + "learning_rate": 9.154492775826762e-07, + "loss": 0.8059, + "step": 9912 + }, + { + "epoch": 0.81, + "grad_norm": 1.9105302074156747, + "learning_rate": 9.146859113275569e-07, + "loss": 0.333, + "step": 9913 + }, + { + "epoch": 0.81, + "grad_norm": 4.412695156513237, + "learning_rate": 9.13922831435175e-07, + "loss": 0.6103, + "step": 9914 + }, + { + "epoch": 0.81, + "grad_norm": 3.5759420864804925, + "learning_rate": 9.131600379590222e-07, + "loss": 0.7136, + "step": 9915 + }, + { + "epoch": 0.81, + "grad_norm": 3.7030317891351676, + "learning_rate": 9.123975309525629e-07, + "loss": 0.7758, + "step": 9916 + }, + { + "epoch": 0.81, + "grad_norm": 5.236943959535839, + "learning_rate": 9.116353104692488e-07, + "loss": 0.8042, + "step": 9917 + }, + { + "epoch": 0.81, + "grad_norm": 4.01181731080616, + "learning_rate": 9.10873376562505e-07, + "loss": 0.5553, + "step": 9918 + }, + { + "epoch": 0.81, + "grad_norm": 2.9999144714800794, + "learning_rate": 9.10111729285742e-07, + "loss": 0.4767, + "step": 9919 + }, + { + "epoch": 0.81, + "grad_norm": 4.079564436691244, + "learning_rate": 9.093503686923477e-07, + "loss": 0.7797, + "step": 9920 + }, + { + "epoch": 0.81, + "grad_norm": 3.242097952262364, + "learning_rate": 9.0858929483569e-07, + "loss": 0.6149, + "step": 9921 + }, + { + "epoch": 0.81, + "grad_norm": 5.012721973636521, + "learning_rate": 9.078285077691179e-07, + "loss": 0.6988, + "step": 9922 + }, + { + "epoch": 0.81, + "grad_norm": 1.9548161095069063, + "learning_rate": 9.07068007545957e-07, + "loss": 0.2971, + "step": 9923 + }, + { + "epoch": 0.81, + "grad_norm": 4.129316033445483, + "learning_rate": 9.063077942195164e-07, + "loss": 0.8209, + "step": 9924 + }, + { + "epoch": 0.81, + "grad_norm": 4.084635351098784, + "learning_rate": 9.055478678430835e-07, + "loss": 1.014, + "step": 9925 + }, + { + "epoch": 0.81, + "grad_norm": 2.8229568048217413, + "learning_rate": 9.047882284699255e-07, + "loss": 0.555, + "step": 9926 + }, + { + "epoch": 0.81, + "grad_norm": 3.2574689239458445, + "learning_rate": 9.040288761532911e-07, + "loss": 0.4482, + "step": 9927 + }, + { + "epoch": 0.81, + "grad_norm": 2.806892944470939, + "learning_rate": 9.032698109464072e-07, + "loss": 0.5379, + "step": 9928 + }, + { + "epoch": 0.81, + "grad_norm": 4.991868694121024, + "learning_rate": 9.0251103290248e-07, + "loss": 1.1004, + "step": 9929 + }, + { + "epoch": 0.81, + "grad_norm": 5.024159326541445, + "learning_rate": 9.017525420746964e-07, + "loss": 0.703, + "step": 9930 + }, + { + "epoch": 0.81, + "grad_norm": 3.5474833957255796, + "learning_rate": 9.009943385162256e-07, + "loss": 0.7723, + "step": 9931 + }, + { + "epoch": 0.81, + "grad_norm": 2.7371136350187237, + "learning_rate": 9.002364222802118e-07, + "loss": 0.5427, + "step": 9932 + }, + { + "epoch": 0.81, + "grad_norm": 3.6272837149566324, + "learning_rate": 8.994787934197819e-07, + "loss": 0.8529, + "step": 9933 + }, + { + "epoch": 0.81, + "grad_norm": 3.4494997421343547, + "learning_rate": 8.987214519880449e-07, + "loss": 0.5914, + "step": 9934 + }, + { + "epoch": 0.81, + "grad_norm": 3.044674802676371, + "learning_rate": 8.979643980380837e-07, + "loss": 0.4443, + "step": 9935 + }, + { + "epoch": 0.81, + "grad_norm": 4.450958132122734, + "learning_rate": 8.972076316229661e-07, + "loss": 0.8514, + "step": 9936 + }, + { + "epoch": 0.81, + "grad_norm": 3.997254308784755, + "learning_rate": 8.964511527957382e-07, + "loss": 0.981, + "step": 9937 + }, + { + "epoch": 0.81, + "grad_norm": 3.8162827170779, + "learning_rate": 8.956949616094257e-07, + "loss": 0.9472, + "step": 9938 + }, + { + "epoch": 0.81, + "grad_norm": 1.6060584816750842, + "learning_rate": 8.949390581170341e-07, + "loss": 0.2576, + "step": 9939 + }, + { + "epoch": 0.81, + "grad_norm": 2.704545234891308, + "learning_rate": 8.941834423715512e-07, + "loss": 0.5923, + "step": 9940 + }, + { + "epoch": 0.81, + "grad_norm": 3.3713590366128905, + "learning_rate": 8.934281144259388e-07, + "loss": 0.5801, + "step": 9941 + }, + { + "epoch": 0.81, + "grad_norm": 2.4640626424930945, + "learning_rate": 8.926730743331436e-07, + "loss": 0.4416, + "step": 9942 + }, + { + "epoch": 0.81, + "grad_norm": 4.604280564084286, + "learning_rate": 8.919183221460909e-07, + "loss": 0.6132, + "step": 9943 + }, + { + "epoch": 0.81, + "grad_norm": 3.4174402956829706, + "learning_rate": 8.911638579176851e-07, + "loss": 0.5593, + "step": 9944 + }, + { + "epoch": 0.81, + "grad_norm": 3.2855548280997926, + "learning_rate": 8.904096817008129e-07, + "loss": 1.0127, + "step": 9945 + }, + { + "epoch": 0.81, + "grad_norm": 4.4040226749869, + "learning_rate": 8.896557935483352e-07, + "loss": 0.7921, + "step": 9946 + }, + { + "epoch": 0.81, + "grad_norm": 4.270894329429153, + "learning_rate": 8.889021935130987e-07, + "loss": 0.8391, + "step": 9947 + }, + { + "epoch": 0.81, + "grad_norm": 5.358811433320635, + "learning_rate": 8.881488816479278e-07, + "loss": 1.1271, + "step": 9948 + }, + { + "epoch": 0.81, + "grad_norm": 2.4049660304861966, + "learning_rate": 8.873958580056241e-07, + "loss": 0.3162, + "step": 9949 + }, + { + "epoch": 0.81, + "grad_norm": 4.207338887500905, + "learning_rate": 8.866431226389727e-07, + "loss": 0.9544, + "step": 9950 + }, + { + "epoch": 0.81, + "grad_norm": 4.194212944720493, + "learning_rate": 8.858906756007385e-07, + "loss": 0.7725, + "step": 9951 + }, + { + "epoch": 0.81, + "grad_norm": 4.024844323097988, + "learning_rate": 8.851385169436616e-07, + "loss": 0.7606, + "step": 9952 + }, + { + "epoch": 0.81, + "grad_norm": 4.96861080229688, + "learning_rate": 8.843866467204671e-07, + "loss": 1.0984, + "step": 9953 + }, + { + "epoch": 0.81, + "grad_norm": 2.333868653973619, + "learning_rate": 8.836350649838576e-07, + "loss": 0.3738, + "step": 9954 + }, + { + "epoch": 0.81, + "grad_norm": 4.347678846049028, + "learning_rate": 8.828837717865151e-07, + "loss": 0.5327, + "step": 9955 + }, + { + "epoch": 0.81, + "grad_norm": 3.5626055060594073, + "learning_rate": 8.821327671811025e-07, + "loss": 0.6182, + "step": 9956 + }, + { + "epoch": 0.81, + "grad_norm": 4.323903238945544, + "learning_rate": 8.813820512202637e-07, + "loss": 0.757, + "step": 9957 + }, + { + "epoch": 0.81, + "grad_norm": 3.207498333234816, + "learning_rate": 8.80631623956617e-07, + "loss": 0.8589, + "step": 9958 + }, + { + "epoch": 0.81, + "grad_norm": 2.567352678695993, + "learning_rate": 8.798814854427661e-07, + "loss": 0.2841, + "step": 9959 + }, + { + "epoch": 0.81, + "grad_norm": 3.4653789463370224, + "learning_rate": 8.791316357312923e-07, + "loss": 0.8585, + "step": 9960 + }, + { + "epoch": 0.81, + "grad_norm": 4.159159522826038, + "learning_rate": 8.783820748747568e-07, + "loss": 0.6917, + "step": 9961 + }, + { + "epoch": 0.81, + "grad_norm": 5.236044226099467, + "learning_rate": 8.776328029257014e-07, + "loss": 0.9786, + "step": 9962 + }, + { + "epoch": 0.81, + "grad_norm": 3.8408160000979645, + "learning_rate": 8.768838199366448e-07, + "loss": 0.8945, + "step": 9963 + }, + { + "epoch": 0.81, + "grad_norm": 6.087986763963806, + "learning_rate": 8.761351259600904e-07, + "loss": 0.8972, + "step": 9964 + }, + { + "epoch": 0.81, + "grad_norm": 2.2336086813978655, + "learning_rate": 8.753867210485145e-07, + "loss": 0.4059, + "step": 9965 + }, + { + "epoch": 0.81, + "grad_norm": 2.946149759152611, + "learning_rate": 8.746386052543793e-07, + "loss": 0.5339, + "step": 9966 + }, + { + "epoch": 0.81, + "grad_norm": 4.999607721075151, + "learning_rate": 8.738907786301242e-07, + "loss": 0.8304, + "step": 9967 + }, + { + "epoch": 0.81, + "grad_norm": 5.461783597333245, + "learning_rate": 8.731432412281705e-07, + "loss": 1.3311, + "step": 9968 + }, + { + "epoch": 0.81, + "grad_norm": 3.048219803158993, + "learning_rate": 8.723959931009135e-07, + "loss": 0.4584, + "step": 9969 + }, + { + "epoch": 0.81, + "grad_norm": 1.2166370546771277, + "learning_rate": 8.716490343007344e-07, + "loss": 0.198, + "step": 9970 + }, + { + "epoch": 0.81, + "grad_norm": 2.636804300884971, + "learning_rate": 8.709023648799908e-07, + "loss": 0.5062, + "step": 9971 + }, + { + "epoch": 0.82, + "grad_norm": 2.841325653511893, + "learning_rate": 8.701559848910224e-07, + "loss": 0.3741, + "step": 9972 + }, + { + "epoch": 0.82, + "grad_norm": 5.226905068076976, + "learning_rate": 8.694098943861457e-07, + "loss": 1.0037, + "step": 9973 + }, + { + "epoch": 0.82, + "grad_norm": 4.09276388083209, + "learning_rate": 8.686640934176604e-07, + "loss": 0.5478, + "step": 9974 + }, + { + "epoch": 0.82, + "grad_norm": 3.2628753408620814, + "learning_rate": 8.67918582037841e-07, + "loss": 0.6117, + "step": 9975 + }, + { + "epoch": 0.82, + "grad_norm": 6.094075317614659, + "learning_rate": 8.671733602989463e-07, + "loss": 1.5157, + "step": 9976 + }, + { + "epoch": 0.82, + "grad_norm": 2.9949040744102318, + "learning_rate": 8.664284282532132e-07, + "loss": 0.4868, + "step": 9977 + }, + { + "epoch": 0.82, + "grad_norm": 3.580425700118394, + "learning_rate": 8.656837859528589e-07, + "loss": 0.5457, + "step": 9978 + }, + { + "epoch": 0.82, + "grad_norm": 3.7619693330016513, + "learning_rate": 8.649394334500777e-07, + "loss": 0.5573, + "step": 9979 + }, + { + "epoch": 0.82, + "grad_norm": 2.4998835827530375, + "learning_rate": 8.641953707970468e-07, + "loss": 0.3458, + "step": 9980 + }, + { + "epoch": 0.82, + "grad_norm": 2.5884753213983136, + "learning_rate": 8.634515980459207e-07, + "loss": 0.4565, + "step": 9981 + }, + { + "epoch": 0.82, + "grad_norm": 3.5308432908564105, + "learning_rate": 8.627081152488353e-07, + "loss": 0.6351, + "step": 9982 + }, + { + "epoch": 0.82, + "grad_norm": 3.6583575584188517, + "learning_rate": 8.619649224579051e-07, + "loss": 0.7003, + "step": 9983 + }, + { + "epoch": 0.82, + "grad_norm": 4.610901179277053, + "learning_rate": 8.612220197252257e-07, + "loss": 0.8103, + "step": 9984 + }, + { + "epoch": 0.82, + "grad_norm": 3.144654102560811, + "learning_rate": 8.604794071028716e-07, + "loss": 0.6213, + "step": 9985 + }, + { + "epoch": 0.82, + "grad_norm": 4.693232625300068, + "learning_rate": 8.597370846428943e-07, + "loss": 1.0916, + "step": 9986 + }, + { + "epoch": 0.82, + "grad_norm": 4.512234857020179, + "learning_rate": 8.58995052397329e-07, + "loss": 0.8597, + "step": 9987 + }, + { + "epoch": 0.82, + "grad_norm": 2.800831230839158, + "learning_rate": 8.582533104181889e-07, + "loss": 0.3972, + "step": 9988 + }, + { + "epoch": 0.82, + "grad_norm": 2.9638615202286096, + "learning_rate": 8.575118587574666e-07, + "loss": 0.4356, + "step": 9989 + }, + { + "epoch": 0.82, + "grad_norm": 4.119015109367693, + "learning_rate": 8.567706974671353e-07, + "loss": 1.125, + "step": 9990 + }, + { + "epoch": 0.82, + "grad_norm": 4.347167414230204, + "learning_rate": 8.560298265991473e-07, + "loss": 0.9594, + "step": 9991 + }, + { + "epoch": 0.82, + "grad_norm": 3.638443725545612, + "learning_rate": 8.55289246205433e-07, + "loss": 1.0653, + "step": 9992 + }, + { + "epoch": 0.82, + "grad_norm": 3.2924314784327344, + "learning_rate": 8.54548956337905e-07, + "loss": 0.4807, + "step": 9993 + }, + { + "epoch": 0.82, + "grad_norm": 1.5310620171207978, + "learning_rate": 8.538089570484548e-07, + "loss": 0.1781, + "step": 9994 + }, + { + "epoch": 0.82, + "grad_norm": 1.353735075993639, + "learning_rate": 8.530692483889514e-07, + "loss": 0.1444, + "step": 9995 + }, + { + "epoch": 0.82, + "grad_norm": 4.664733022307317, + "learning_rate": 8.523298304112465e-07, + "loss": 0.4313, + "step": 9996 + }, + { + "epoch": 0.82, + "grad_norm": 4.8533237551889785, + "learning_rate": 8.515907031671705e-07, + "loss": 0.5627, + "step": 9997 + }, + { + "epoch": 0.82, + "grad_norm": 3.1254378915318313, + "learning_rate": 8.508518667085314e-07, + "loss": 0.4626, + "step": 9998 + }, + { + "epoch": 0.82, + "grad_norm": 4.272195748833848, + "learning_rate": 8.501133210871188e-07, + "loss": 0.5535, + "step": 9999 + }, + { + "epoch": 0.82, + "grad_norm": 3.056707904463845, + "learning_rate": 8.493750663547024e-07, + "loss": 0.6391, + "step": 10000 + }, + { + "epoch": 0.82, + "grad_norm": 3.571965715095055, + "learning_rate": 8.486371025630302e-07, + "loss": 0.8858, + "step": 10001 + }, + { + "epoch": 0.82, + "grad_norm": 4.207486077789943, + "learning_rate": 8.478994297638316e-07, + "loss": 0.7767, + "step": 10002 + }, + { + "epoch": 0.82, + "grad_norm": 1.9268246492293135, + "learning_rate": 8.471620480088117e-07, + "loss": 0.2496, + "step": 10003 + }, + { + "epoch": 0.82, + "grad_norm": 4.2457661811966805, + "learning_rate": 8.464249573496591e-07, + "loss": 0.7234, + "step": 10004 + }, + { + "epoch": 0.82, + "grad_norm": 4.185701483125576, + "learning_rate": 8.456881578380405e-07, + "loss": 0.683, + "step": 10005 + }, + { + "epoch": 0.82, + "grad_norm": 7.215225747900721, + "learning_rate": 8.449516495256022e-07, + "loss": 1.4064, + "step": 10006 + }, + { + "epoch": 0.82, + "grad_norm": 3.90089749768446, + "learning_rate": 8.442154324639706e-07, + "loss": 0.8309, + "step": 10007 + }, + { + "epoch": 0.82, + "grad_norm": 2.198820210759882, + "learning_rate": 8.434795067047524e-07, + "loss": 0.3133, + "step": 10008 + }, + { + "epoch": 0.82, + "grad_norm": 2.781826961822043, + "learning_rate": 8.427438722995301e-07, + "loss": 0.3979, + "step": 10009 + }, + { + "epoch": 0.82, + "grad_norm": 4.160628587137114, + "learning_rate": 8.420085292998714e-07, + "loss": 0.7002, + "step": 10010 + }, + { + "epoch": 0.82, + "grad_norm": 4.262311883425024, + "learning_rate": 8.412734777573178e-07, + "loss": 0.7013, + "step": 10011 + }, + { + "epoch": 0.82, + "grad_norm": 4.31000974324674, + "learning_rate": 8.405387177233948e-07, + "loss": 0.5945, + "step": 10012 + }, + { + "epoch": 0.82, + "grad_norm": 2.785005472523334, + "learning_rate": 8.398042492496056e-07, + "loss": 0.3029, + "step": 10013 + }, + { + "epoch": 0.82, + "grad_norm": 4.1983697979929, + "learning_rate": 8.390700723874346e-07, + "loss": 0.7723, + "step": 10014 + }, + { + "epoch": 0.82, + "grad_norm": 4.1287679927077345, + "learning_rate": 8.383361871883417e-07, + "loss": 0.4062, + "step": 10015 + }, + { + "epoch": 0.82, + "grad_norm": 4.16968495234099, + "learning_rate": 8.376025937037702e-07, + "loss": 0.644, + "step": 10016 + }, + { + "epoch": 0.82, + "grad_norm": 4.445627009539799, + "learning_rate": 8.368692919851424e-07, + "loss": 0.8638, + "step": 10017 + }, + { + "epoch": 0.82, + "grad_norm": 2.5414590746261316, + "learning_rate": 8.361362820838593e-07, + "loss": 0.492, + "step": 10018 + }, + { + "epoch": 0.82, + "grad_norm": 5.417243551042646, + "learning_rate": 8.354035640513014e-07, + "loss": 1.3996, + "step": 10019 + }, + { + "epoch": 0.82, + "grad_norm": 3.7135312777360125, + "learning_rate": 8.346711379388306e-07, + "loss": 0.684, + "step": 10020 + }, + { + "epoch": 0.82, + "grad_norm": 1.7078717728104544, + "learning_rate": 8.33939003797784e-07, + "loss": 0.268, + "step": 10021 + }, + { + "epoch": 0.82, + "grad_norm": 4.50771291363033, + "learning_rate": 8.332071616794829e-07, + "loss": 0.6469, + "step": 10022 + }, + { + "epoch": 0.82, + "grad_norm": 3.7552968502155464, + "learning_rate": 8.324756116352256e-07, + "loss": 0.8547, + "step": 10023 + }, + { + "epoch": 0.82, + "grad_norm": 4.2278798354341065, + "learning_rate": 8.317443537162922e-07, + "loss": 0.9567, + "step": 10024 + }, + { + "epoch": 0.82, + "grad_norm": 5.301315101902168, + "learning_rate": 8.310133879739379e-07, + "loss": 1.1694, + "step": 10025 + }, + { + "epoch": 0.82, + "grad_norm": 3.3715012305367575, + "learning_rate": 8.302827144594028e-07, + "loss": 0.5722, + "step": 10026 + }, + { + "epoch": 0.82, + "grad_norm": 4.066136984584118, + "learning_rate": 8.295523332239014e-07, + "loss": 0.5825, + "step": 10027 + }, + { + "epoch": 0.82, + "grad_norm": 2.7098705094390225, + "learning_rate": 8.288222443186317e-07, + "loss": 0.472, + "step": 10028 + }, + { + "epoch": 0.82, + "grad_norm": 4.432335998365689, + "learning_rate": 8.280924477947699e-07, + "loss": 0.8808, + "step": 10029 + }, + { + "epoch": 0.82, + "grad_norm": 3.0500464971568686, + "learning_rate": 8.273629437034708e-07, + "loss": 0.5363, + "step": 10030 + }, + { + "epoch": 0.82, + "grad_norm": 4.639329475382795, + "learning_rate": 8.266337320958718e-07, + "loss": 0.8161, + "step": 10031 + }, + { + "epoch": 0.82, + "grad_norm": 5.796479380976112, + "learning_rate": 8.25904813023084e-07, + "loss": 1.5202, + "step": 10032 + }, + { + "epoch": 0.82, + "grad_norm": 3.730708086836667, + "learning_rate": 8.251761865362035e-07, + "loss": 0.6162, + "step": 10033 + }, + { + "epoch": 0.82, + "grad_norm": 2.6349957122901544, + "learning_rate": 8.244478526863026e-07, + "loss": 0.4139, + "step": 10034 + }, + { + "epoch": 0.82, + "grad_norm": 3.723412902142466, + "learning_rate": 8.23719811524436e-07, + "loss": 0.4541, + "step": 10035 + }, + { + "epoch": 0.82, + "grad_norm": 3.794269298914522, + "learning_rate": 8.229920631016353e-07, + "loss": 0.684, + "step": 10036 + }, + { + "epoch": 0.82, + "grad_norm": 5.290127336830074, + "learning_rate": 8.222646074689133e-07, + "loss": 1.117, + "step": 10037 + }, + { + "epoch": 0.82, + "grad_norm": 4.5149409100032685, + "learning_rate": 8.215374446772595e-07, + "loss": 0.9134, + "step": 10038 + }, + { + "epoch": 0.82, + "grad_norm": 3.9944726149977607, + "learning_rate": 8.208105747776468e-07, + "loss": 0.6517, + "step": 10039 + }, + { + "epoch": 0.82, + "grad_norm": 6.588783694265223, + "learning_rate": 8.200839978210256e-07, + "loss": 1.1527, + "step": 10040 + }, + { + "epoch": 0.82, + "grad_norm": 4.934127704405573, + "learning_rate": 8.193577138583242e-07, + "loss": 0.7983, + "step": 10041 + }, + { + "epoch": 0.82, + "grad_norm": 2.9057358963851736, + "learning_rate": 8.186317229404523e-07, + "loss": 0.4775, + "step": 10042 + }, + { + "epoch": 0.82, + "grad_norm": 3.819537120002851, + "learning_rate": 8.179060251183007e-07, + "loss": 1.0176, + "step": 10043 + }, + { + "epoch": 0.82, + "grad_norm": 2.3721447545738563, + "learning_rate": 8.171806204427351e-07, + "loss": 0.3996, + "step": 10044 + }, + { + "epoch": 0.82, + "grad_norm": 3.3969713669358352, + "learning_rate": 8.164555089646048e-07, + "loss": 0.6868, + "step": 10045 + }, + { + "epoch": 0.82, + "grad_norm": 4.814354675455581, + "learning_rate": 8.157306907347357e-07, + "loss": 0.8552, + "step": 10046 + }, + { + "epoch": 0.82, + "grad_norm": 3.827139451801565, + "learning_rate": 8.150061658039354e-07, + "loss": 0.7606, + "step": 10047 + }, + { + "epoch": 0.82, + "grad_norm": 3.661073688282505, + "learning_rate": 8.142819342229913e-07, + "loss": 0.8016, + "step": 10048 + }, + { + "epoch": 0.82, + "grad_norm": 2.984591847641335, + "learning_rate": 8.135579960426659e-07, + "loss": 0.4342, + "step": 10049 + }, + { + "epoch": 0.82, + "grad_norm": 2.5052051722141777, + "learning_rate": 8.12834351313705e-07, + "loss": 0.3033, + "step": 10050 + }, + { + "epoch": 0.82, + "grad_norm": 4.131568483230662, + "learning_rate": 8.121110000868343e-07, + "loss": 0.9707, + "step": 10051 + }, + { + "epoch": 0.82, + "grad_norm": 4.872821654104947, + "learning_rate": 8.113879424127564e-07, + "loss": 0.9356, + "step": 10052 + }, + { + "epoch": 0.82, + "grad_norm": 4.499040191623966, + "learning_rate": 8.106651783421543e-07, + "loss": 0.9858, + "step": 10053 + }, + { + "epoch": 0.82, + "grad_norm": 2.2972781125121213, + "learning_rate": 8.099427079256928e-07, + "loss": 0.3593, + "step": 10054 + }, + { + "epoch": 0.82, + "grad_norm": 4.9651309608674, + "learning_rate": 8.092205312140111e-07, + "loss": 0.9283, + "step": 10055 + }, + { + "epoch": 0.82, + "grad_norm": 5.2035955637071405, + "learning_rate": 8.084986482577323e-07, + "loss": 1.4109, + "step": 10056 + }, + { + "epoch": 0.82, + "grad_norm": 1.8922151956035511, + "learning_rate": 8.077770591074574e-07, + "loss": 0.3467, + "step": 10057 + }, + { + "epoch": 0.82, + "grad_norm": 4.039653787579526, + "learning_rate": 8.070557638137649e-07, + "loss": 1.1254, + "step": 10058 + }, + { + "epoch": 0.82, + "grad_norm": 3.5731288296777097, + "learning_rate": 8.063347624272156e-07, + "loss": 0.6772, + "step": 10059 + }, + { + "epoch": 0.82, + "grad_norm": 3.200731054783732, + "learning_rate": 8.056140549983499e-07, + "loss": 0.544, + "step": 10060 + }, + { + "epoch": 0.82, + "grad_norm": 4.2915502818258195, + "learning_rate": 8.048936415776837e-07, + "loss": 0.6498, + "step": 10061 + }, + { + "epoch": 0.82, + "grad_norm": 3.621791096115158, + "learning_rate": 8.041735222157159e-07, + "loss": 0.8866, + "step": 10062 + }, + { + "epoch": 0.82, + "grad_norm": 3.5508307845696594, + "learning_rate": 8.034536969629242e-07, + "loss": 0.5232, + "step": 10063 + }, + { + "epoch": 0.82, + "grad_norm": 1.1309327404270295, + "learning_rate": 8.027341658697646e-07, + "loss": 0.1377, + "step": 10064 + }, + { + "epoch": 0.82, + "grad_norm": 3.6677811482716183, + "learning_rate": 8.020149289866746e-07, + "loss": 0.8675, + "step": 10065 + }, + { + "epoch": 0.82, + "grad_norm": 3.676930005706927, + "learning_rate": 8.012959863640674e-07, + "loss": 1.0361, + "step": 10066 + }, + { + "epoch": 0.82, + "grad_norm": 3.9075167493127307, + "learning_rate": 8.005773380523386e-07, + "loss": 0.974, + "step": 10067 + }, + { + "epoch": 0.82, + "grad_norm": 2.8272851825025835, + "learning_rate": 7.998589841018622e-07, + "loss": 0.3865, + "step": 10068 + }, + { + "epoch": 0.82, + "grad_norm": 3.773220858860217, + "learning_rate": 7.991409245629922e-07, + "loss": 0.6879, + "step": 10069 + }, + { + "epoch": 0.82, + "grad_norm": 3.78635399173605, + "learning_rate": 7.984231594860614e-07, + "loss": 0.6683, + "step": 10070 + }, + { + "epoch": 0.82, + "grad_norm": 3.5489463805363006, + "learning_rate": 7.977056889213831e-07, + "loss": 0.8355, + "step": 10071 + }, + { + "epoch": 0.82, + "grad_norm": 4.73856111582991, + "learning_rate": 7.969885129192456e-07, + "loss": 0.7822, + "step": 10072 + }, + { + "epoch": 0.82, + "grad_norm": 5.000026482958678, + "learning_rate": 7.962716315299235e-07, + "loss": 1.1317, + "step": 10073 + }, + { + "epoch": 0.82, + "grad_norm": 3.8629512984605703, + "learning_rate": 7.955550448036642e-07, + "loss": 0.4413, + "step": 10074 + }, + { + "epoch": 0.82, + "grad_norm": 2.980903230806037, + "learning_rate": 7.948387527906987e-07, + "loss": 0.4457, + "step": 10075 + }, + { + "epoch": 0.82, + "grad_norm": 5.403371770741651, + "learning_rate": 7.941227555412351e-07, + "loss": 0.8789, + "step": 10076 + }, + { + "epoch": 0.82, + "grad_norm": 3.615071009109892, + "learning_rate": 7.934070531054638e-07, + "loss": 0.5785, + "step": 10077 + }, + { + "epoch": 0.82, + "grad_norm": 4.102659969057406, + "learning_rate": 7.926916455335498e-07, + "loss": 0.8829, + "step": 10078 + }, + { + "epoch": 0.82, + "grad_norm": 4.573077847289734, + "learning_rate": 7.919765328756407e-07, + "loss": 0.93, + "step": 10079 + }, + { + "epoch": 0.82, + "grad_norm": 4.136876063217532, + "learning_rate": 7.912617151818636e-07, + "loss": 0.6846, + "step": 10080 + }, + { + "epoch": 0.82, + "grad_norm": 4.4749603064475245, + "learning_rate": 7.90547192502324e-07, + "loss": 0.8403, + "step": 10081 + }, + { + "epoch": 0.82, + "grad_norm": 3.909745880030514, + "learning_rate": 7.898329648871067e-07, + "loss": 0.6864, + "step": 10082 + }, + { + "epoch": 0.82, + "grad_norm": 1.5567840556596326, + "learning_rate": 7.891190323862762e-07, + "loss": 0.1979, + "step": 10083 + }, + { + "epoch": 0.82, + "grad_norm": 2.4274023280178167, + "learning_rate": 7.884053950498754e-07, + "loss": 0.3834, + "step": 10084 + }, + { + "epoch": 0.82, + "grad_norm": 2.737814252469486, + "learning_rate": 7.87692052927927e-07, + "loss": 0.6148, + "step": 10085 + }, + { + "epoch": 0.82, + "grad_norm": 4.9986161359124175, + "learning_rate": 7.869790060704341e-07, + "loss": 1.178, + "step": 10086 + }, + { + "epoch": 0.82, + "grad_norm": 4.548081185556967, + "learning_rate": 7.862662545273786e-07, + "loss": 0.7753, + "step": 10087 + }, + { + "epoch": 0.82, + "grad_norm": 2.760979136167705, + "learning_rate": 7.855537983487194e-07, + "loss": 0.3284, + "step": 10088 + }, + { + "epoch": 0.82, + "grad_norm": 4.172216511458501, + "learning_rate": 7.848416375843987e-07, + "loss": 0.5237, + "step": 10089 + }, + { + "epoch": 0.82, + "grad_norm": 3.0377431401690775, + "learning_rate": 7.841297722843333e-07, + "loss": 0.6412, + "step": 10090 + }, + { + "epoch": 0.82, + "grad_norm": 4.928285806833154, + "learning_rate": 7.834182024984238e-07, + "loss": 0.5305, + "step": 10091 + }, + { + "epoch": 0.82, + "grad_norm": 3.4613951992850245, + "learning_rate": 7.827069282765475e-07, + "loss": 0.3983, + "step": 10092 + }, + { + "epoch": 0.82, + "grad_norm": 4.262635263729186, + "learning_rate": 7.81995949668562e-07, + "loss": 0.9262, + "step": 10093 + }, + { + "epoch": 0.83, + "grad_norm": 3.319007183839746, + "learning_rate": 7.812852667243043e-07, + "loss": 0.3667, + "step": 10094 + }, + { + "epoch": 0.83, + "grad_norm": 4.015797994989106, + "learning_rate": 7.805748794935886e-07, + "loss": 0.9151, + "step": 10095 + }, + { + "epoch": 0.83, + "grad_norm": 1.4061862206066524, + "learning_rate": 7.798647880262111e-07, + "loss": 0.1664, + "step": 10096 + }, + { + "epoch": 0.83, + "grad_norm": 4.410668028791237, + "learning_rate": 7.791549923719455e-07, + "loss": 0.9573, + "step": 10097 + }, + { + "epoch": 0.83, + "grad_norm": 2.868404391966383, + "learning_rate": 7.784454925805457e-07, + "loss": 0.7018, + "step": 10098 + }, + { + "epoch": 0.83, + "grad_norm": 2.381961548937185, + "learning_rate": 7.777362887017448e-07, + "loss": 0.2947, + "step": 10099 + }, + { + "epoch": 0.83, + "grad_norm": 4.892050919340202, + "learning_rate": 7.770273807852557e-07, + "loss": 1.2152, + "step": 10100 + }, + { + "epoch": 0.83, + "grad_norm": 6.19196362056325, + "learning_rate": 7.763187688807677e-07, + "loss": 0.6899, + "step": 10101 + }, + { + "epoch": 0.83, + "grad_norm": 3.0527982191823737, + "learning_rate": 7.756104530379526e-07, + "loss": 0.3678, + "step": 10102 + }, + { + "epoch": 0.83, + "grad_norm": 4.1701771145805395, + "learning_rate": 7.749024333064614e-07, + "loss": 0.7823, + "step": 10103 + }, + { + "epoch": 0.83, + "grad_norm": 5.286789747108113, + "learning_rate": 7.74194709735921e-07, + "loss": 0.8221, + "step": 10104 + }, + { + "epoch": 0.83, + "grad_norm": 5.316492683343482, + "learning_rate": 7.73487282375941e-07, + "loss": 1.1504, + "step": 10105 + }, + { + "epoch": 0.83, + "grad_norm": 3.624176181992469, + "learning_rate": 7.72780151276108e-07, + "loss": 0.6505, + "step": 10106 + }, + { + "epoch": 0.83, + "grad_norm": 4.622089616941887, + "learning_rate": 7.720733164859895e-07, + "loss": 0.7511, + "step": 10107 + }, + { + "epoch": 0.83, + "grad_norm": 5.117173719225514, + "learning_rate": 7.713667780551315e-07, + "loss": 0.5868, + "step": 10108 + }, + { + "epoch": 0.83, + "grad_norm": 4.066366457292403, + "learning_rate": 7.706605360330594e-07, + "loss": 0.4654, + "step": 10109 + }, + { + "epoch": 0.83, + "grad_norm": 2.733998100948551, + "learning_rate": 7.699545904692774e-07, + "loss": 0.3261, + "step": 10110 + }, + { + "epoch": 0.83, + "grad_norm": 3.2497827288864802, + "learning_rate": 7.692489414132703e-07, + "loss": 0.742, + "step": 10111 + }, + { + "epoch": 0.83, + "grad_norm": 3.47988675054254, + "learning_rate": 7.685435889144993e-07, + "loss": 0.6233, + "step": 10112 + }, + { + "epoch": 0.83, + "grad_norm": 2.8554813264771406, + "learning_rate": 7.678385330224075e-07, + "loss": 0.4469, + "step": 10113 + }, + { + "epoch": 0.83, + "grad_norm": 4.4381940487933935, + "learning_rate": 7.671337737864159e-07, + "loss": 0.5083, + "step": 10114 + }, + { + "epoch": 0.83, + "grad_norm": 4.6863597272472095, + "learning_rate": 7.664293112559251e-07, + "loss": 1.0119, + "step": 10115 + }, + { + "epoch": 0.83, + "grad_norm": 2.4769697594820794, + "learning_rate": 7.65725145480315e-07, + "loss": 0.4439, + "step": 10116 + }, + { + "epoch": 0.83, + "grad_norm": 5.370028598981, + "learning_rate": 7.65021276508946e-07, + "loss": 0.8541, + "step": 10117 + }, + { + "epoch": 0.83, + "grad_norm": 3.4499207348327525, + "learning_rate": 7.643177043911538e-07, + "loss": 0.5175, + "step": 10118 + }, + { + "epoch": 0.83, + "grad_norm": 2.1584533314883565, + "learning_rate": 7.636144291762576e-07, + "loss": 0.2468, + "step": 10119 + }, + { + "epoch": 0.83, + "grad_norm": 4.018358478001542, + "learning_rate": 7.629114509135521e-07, + "loss": 0.9447, + "step": 10120 + }, + { + "epoch": 0.83, + "grad_norm": 3.5912207702822387, + "learning_rate": 7.62208769652314e-07, + "loss": 0.5151, + "step": 10121 + }, + { + "epoch": 0.83, + "grad_norm": 4.328108848632602, + "learning_rate": 7.615063854417981e-07, + "loss": 0.6842, + "step": 10122 + }, + { + "epoch": 0.83, + "grad_norm": 4.900438538556681, + "learning_rate": 7.608042983312397e-07, + "loss": 0.7065, + "step": 10123 + }, + { + "epoch": 0.83, + "grad_norm": 4.201546029218351, + "learning_rate": 7.601025083698499e-07, + "loss": 0.4837, + "step": 10124 + }, + { + "epoch": 0.83, + "grad_norm": 3.026673616477278, + "learning_rate": 7.594010156068221e-07, + "loss": 0.4566, + "step": 10125 + }, + { + "epoch": 0.83, + "grad_norm": 4.436617140355298, + "learning_rate": 7.586998200913282e-07, + "loss": 0.7603, + "step": 10126 + }, + { + "epoch": 0.83, + "grad_norm": 3.8850862955727252, + "learning_rate": 7.579989218725187e-07, + "loss": 0.8296, + "step": 10127 + }, + { + "epoch": 0.83, + "grad_norm": 6.665295443382982, + "learning_rate": 7.572983209995244e-07, + "loss": 0.8821, + "step": 10128 + }, + { + "epoch": 0.83, + "grad_norm": 4.59546360930877, + "learning_rate": 7.565980175214526e-07, + "loss": 0.885, + "step": 10129 + }, + { + "epoch": 0.83, + "grad_norm": 3.8774411324788542, + "learning_rate": 7.558980114873921e-07, + "loss": 0.726, + "step": 10130 + }, + { + "epoch": 0.83, + "grad_norm": 3.0323948272572405, + "learning_rate": 7.551983029464111e-07, + "loss": 0.5955, + "step": 10131 + }, + { + "epoch": 0.83, + "grad_norm": 4.266205587184443, + "learning_rate": 7.544988919475555e-07, + "loss": 0.9898, + "step": 10132 + }, + { + "epoch": 0.83, + "grad_norm": 4.56948708053087, + "learning_rate": 7.537997785398515e-07, + "loss": 0.7395, + "step": 10133 + }, + { + "epoch": 0.83, + "grad_norm": 3.4088982946641324, + "learning_rate": 7.531009627723035e-07, + "loss": 0.6821, + "step": 10134 + }, + { + "epoch": 0.83, + "grad_norm": 3.0972983217595726, + "learning_rate": 7.52402444693896e-07, + "loss": 0.6063, + "step": 10135 + }, + { + "epoch": 0.83, + "grad_norm": 3.8439494774168987, + "learning_rate": 7.517042243535899e-07, + "loss": 0.719, + "step": 10136 + }, + { + "epoch": 0.83, + "grad_norm": 2.9165481492503744, + "learning_rate": 7.5100630180033e-07, + "loss": 0.4903, + "step": 10137 + }, + { + "epoch": 0.83, + "grad_norm": 1.8094712506379884, + "learning_rate": 7.503086770830359e-07, + "loss": 0.1993, + "step": 10138 + }, + { + "epoch": 0.83, + "grad_norm": 4.732114676057692, + "learning_rate": 7.496113502506092e-07, + "loss": 1.0937, + "step": 10139 + }, + { + "epoch": 0.83, + "grad_norm": 4.573506384797262, + "learning_rate": 7.489143213519301e-07, + "loss": 1.1496, + "step": 10140 + }, + { + "epoch": 0.83, + "grad_norm": 3.1810527637860355, + "learning_rate": 7.482175904358552e-07, + "loss": 0.5457, + "step": 10141 + }, + { + "epoch": 0.83, + "grad_norm": 4.829992228202246, + "learning_rate": 7.475211575512231e-07, + "loss": 0.7806, + "step": 10142 + }, + { + "epoch": 0.83, + "grad_norm": 3.4087777526064107, + "learning_rate": 7.468250227468515e-07, + "loss": 0.5698, + "step": 10143 + }, + { + "epoch": 0.83, + "grad_norm": 4.443150332879401, + "learning_rate": 7.461291860715359e-07, + "loss": 0.9275, + "step": 10144 + }, + { + "epoch": 0.83, + "grad_norm": 3.8076918964860598, + "learning_rate": 7.45433647574052e-07, + "loss": 0.7731, + "step": 10145 + }, + { + "epoch": 0.83, + "grad_norm": 4.00916585804603, + "learning_rate": 7.447384073031527e-07, + "loss": 0.8045, + "step": 10146 + }, + { + "epoch": 0.83, + "grad_norm": 7.543639619527765, + "learning_rate": 7.440434653075723e-07, + "loss": 1.4757, + "step": 10147 + }, + { + "epoch": 0.83, + "grad_norm": 3.744778257036902, + "learning_rate": 7.43348821636023e-07, + "loss": 1.0878, + "step": 10148 + }, + { + "epoch": 0.83, + "grad_norm": 2.873126512495574, + "learning_rate": 7.426544763371974e-07, + "loss": 0.5527, + "step": 10149 + }, + { + "epoch": 0.83, + "grad_norm": 3.5157252702605923, + "learning_rate": 7.419604294597632e-07, + "loss": 0.6573, + "step": 10150 + }, + { + "epoch": 0.83, + "grad_norm": 2.6910821262864113, + "learning_rate": 7.412666810523727e-07, + "loss": 0.5749, + "step": 10151 + }, + { + "epoch": 0.83, + "grad_norm": 2.0645375077749435, + "learning_rate": 7.405732311636543e-07, + "loss": 0.2744, + "step": 10152 + }, + { + "epoch": 0.83, + "grad_norm": 6.118859734677217, + "learning_rate": 7.398800798422145e-07, + "loss": 0.8487, + "step": 10153 + }, + { + "epoch": 0.83, + "grad_norm": 2.8014057053335506, + "learning_rate": 7.391872271366407e-07, + "loss": 0.4396, + "step": 10154 + }, + { + "epoch": 0.83, + "grad_norm": 4.57035437763408, + "learning_rate": 7.384946730954995e-07, + "loss": 0.745, + "step": 10155 + }, + { + "epoch": 0.83, + "grad_norm": 2.6010854374742336, + "learning_rate": 7.378024177673354e-07, + "loss": 0.6538, + "step": 10156 + }, + { + "epoch": 0.83, + "grad_norm": 2.783812121647489, + "learning_rate": 7.371104612006741e-07, + "loss": 0.3029, + "step": 10157 + }, + { + "epoch": 0.83, + "grad_norm": 3.70317558211454, + "learning_rate": 7.364188034440161e-07, + "loss": 1.002, + "step": 10158 + }, + { + "epoch": 0.83, + "grad_norm": 3.9305650101655183, + "learning_rate": 7.357274445458446e-07, + "loss": 0.7544, + "step": 10159 + }, + { + "epoch": 0.83, + "grad_norm": 6.608343086018153, + "learning_rate": 7.35036384554621e-07, + "loss": 1.4278, + "step": 10160 + }, + { + "epoch": 0.83, + "grad_norm": 4.571341322709621, + "learning_rate": 7.343456235187857e-07, + "loss": 0.6947, + "step": 10161 + }, + { + "epoch": 0.83, + "grad_norm": 3.4511973270380194, + "learning_rate": 7.336551614867582e-07, + "loss": 0.6418, + "step": 10162 + }, + { + "epoch": 0.83, + "grad_norm": 5.478077366704615, + "learning_rate": 7.329649985069376e-07, + "loss": 1.3947, + "step": 10163 + }, + { + "epoch": 0.83, + "grad_norm": 3.4354541645576178, + "learning_rate": 7.322751346276997e-07, + "loss": 0.8503, + "step": 10164 + }, + { + "epoch": 0.83, + "grad_norm": 3.0040327011560812, + "learning_rate": 7.315855698974012e-07, + "loss": 0.5111, + "step": 10165 + }, + { + "epoch": 0.83, + "grad_norm": 3.9865675662372406, + "learning_rate": 7.308963043643791e-07, + "loss": 0.5029, + "step": 10166 + }, + { + "epoch": 0.83, + "grad_norm": 2.5068836581670593, + "learning_rate": 7.302073380769459e-07, + "loss": 0.32, + "step": 10167 + }, + { + "epoch": 0.83, + "grad_norm": 5.108309155183119, + "learning_rate": 7.295186710833973e-07, + "loss": 0.6038, + "step": 10168 + }, + { + "epoch": 0.83, + "grad_norm": 3.1470775770097736, + "learning_rate": 7.288303034320038e-07, + "loss": 0.5335, + "step": 10169 + }, + { + "epoch": 0.83, + "grad_norm": 4.930333607704734, + "learning_rate": 7.281422351710177e-07, + "loss": 0.8595, + "step": 10170 + }, + { + "epoch": 0.83, + "grad_norm": 1.1370544711017538, + "learning_rate": 7.274544663486694e-07, + "loss": 0.1652, + "step": 10171 + }, + { + "epoch": 0.83, + "grad_norm": 4.136367665352145, + "learning_rate": 7.267669970131691e-07, + "loss": 0.8134, + "step": 10172 + }, + { + "epoch": 0.83, + "grad_norm": 5.141838639356749, + "learning_rate": 7.260798272127051e-07, + "loss": 1.0518, + "step": 10173 + }, + { + "epoch": 0.83, + "grad_norm": 5.168489051573621, + "learning_rate": 7.25392956995446e-07, + "loss": 0.8956, + "step": 10174 + }, + { + "epoch": 0.83, + "grad_norm": 4.033656979157179, + "learning_rate": 7.247063864095361e-07, + "loss": 0.8774, + "step": 10175 + }, + { + "epoch": 0.83, + "grad_norm": 4.684176619776644, + "learning_rate": 7.240201155031029e-07, + "loss": 0.9595, + "step": 10176 + }, + { + "epoch": 0.83, + "grad_norm": 3.3587537054595167, + "learning_rate": 7.233341443242504e-07, + "loss": 0.4504, + "step": 10177 + }, + { + "epoch": 0.83, + "grad_norm": 4.72953453724526, + "learning_rate": 7.22648472921062e-07, + "loss": 0.8717, + "step": 10178 + }, + { + "epoch": 0.83, + "grad_norm": 4.2640117138401425, + "learning_rate": 7.219631013416007e-07, + "loss": 0.8372, + "step": 10179 + }, + { + "epoch": 0.83, + "grad_norm": 5.031931141346207, + "learning_rate": 7.212780296339095e-07, + "loss": 0.7958, + "step": 10180 + }, + { + "epoch": 0.83, + "grad_norm": 3.3418842891165044, + "learning_rate": 7.205932578460056e-07, + "loss": 0.5695, + "step": 10181 + }, + { + "epoch": 0.83, + "grad_norm": 3.416966419131427, + "learning_rate": 7.199087860258913e-07, + "loss": 0.6697, + "step": 10182 + }, + { + "epoch": 0.83, + "grad_norm": 3.022172971765802, + "learning_rate": 7.192246142215437e-07, + "loss": 0.4962, + "step": 10183 + }, + { + "epoch": 0.83, + "grad_norm": 3.3941894220163036, + "learning_rate": 7.185407424809199e-07, + "loss": 1.0106, + "step": 10184 + }, + { + "epoch": 0.83, + "grad_norm": 2.8137243834314782, + "learning_rate": 7.178571708519589e-07, + "loss": 0.5257, + "step": 10185 + }, + { + "epoch": 0.83, + "grad_norm": 4.4841375497339975, + "learning_rate": 7.171738993825728e-07, + "loss": 0.9764, + "step": 10186 + }, + { + "epoch": 0.83, + "grad_norm": 2.495239260489433, + "learning_rate": 7.164909281206573e-07, + "loss": 0.2904, + "step": 10187 + }, + { + "epoch": 0.83, + "grad_norm": 2.932504966643108, + "learning_rate": 7.158082571140857e-07, + "loss": 0.5881, + "step": 10188 + }, + { + "epoch": 0.83, + "grad_norm": 4.309706622233228, + "learning_rate": 7.151258864107107e-07, + "loss": 0.8552, + "step": 10189 + }, + { + "epoch": 0.83, + "grad_norm": 5.151604904407833, + "learning_rate": 7.144438160583633e-07, + "loss": 1.3396, + "step": 10190 + }, + { + "epoch": 0.83, + "grad_norm": 2.175446160551388, + "learning_rate": 7.137620461048544e-07, + "loss": 0.3172, + "step": 10191 + }, + { + "epoch": 0.83, + "grad_norm": 3.0943987081345203, + "learning_rate": 7.130805765979714e-07, + "loss": 0.5698, + "step": 10192 + }, + { + "epoch": 0.83, + "grad_norm": 3.836987678336197, + "learning_rate": 7.123994075854834e-07, + "loss": 0.5729, + "step": 10193 + }, + { + "epoch": 0.83, + "grad_norm": 2.072622197125855, + "learning_rate": 7.117185391151371e-07, + "loss": 0.3823, + "step": 10194 + }, + { + "epoch": 0.83, + "grad_norm": 4.870931924754225, + "learning_rate": 7.110379712346582e-07, + "loss": 0.9687, + "step": 10195 + }, + { + "epoch": 0.83, + "grad_norm": 4.549694135245669, + "learning_rate": 7.103577039917536e-07, + "loss": 1.1397, + "step": 10196 + }, + { + "epoch": 0.83, + "grad_norm": 4.443048944288806, + "learning_rate": 7.09677737434104e-07, + "loss": 0.9514, + "step": 10197 + }, + { + "epoch": 0.83, + "grad_norm": 6.412042572557696, + "learning_rate": 7.089980716093741e-07, + "loss": 1.004, + "step": 10198 + }, + { + "epoch": 0.83, + "grad_norm": 2.8298060473757447, + "learning_rate": 7.083187065652042e-07, + "loss": 0.492, + "step": 10199 + }, + { + "epoch": 0.83, + "grad_norm": 2.462097342915266, + "learning_rate": 7.076396423492154e-07, + "loss": 0.5119, + "step": 10200 + }, + { + "epoch": 0.83, + "grad_norm": 3.4503156311001444, + "learning_rate": 7.069608790090077e-07, + "loss": 0.4069, + "step": 10201 + }, + { + "epoch": 0.83, + "grad_norm": 1.514757874641178, + "learning_rate": 7.062824165921589e-07, + "loss": 0.2277, + "step": 10202 + }, + { + "epoch": 0.83, + "grad_norm": 4.0935899306461865, + "learning_rate": 7.056042551462273e-07, + "loss": 0.9142, + "step": 10203 + }, + { + "epoch": 0.83, + "grad_norm": 4.408676529971462, + "learning_rate": 7.049263947187468e-07, + "loss": 0.7984, + "step": 10204 + }, + { + "epoch": 0.83, + "grad_norm": 3.7888943397001684, + "learning_rate": 7.042488353572341e-07, + "loss": 0.8042, + "step": 10205 + }, + { + "epoch": 0.83, + "grad_norm": 3.6447457960019145, + "learning_rate": 7.035715771091828e-07, + "loss": 0.7367, + "step": 10206 + }, + { + "epoch": 0.83, + "grad_norm": 2.7901059754746016, + "learning_rate": 7.028946200220655e-07, + "loss": 0.4016, + "step": 10207 + }, + { + "epoch": 0.83, + "grad_norm": 2.768076715298043, + "learning_rate": 7.022179641433357e-07, + "loss": 0.3792, + "step": 10208 + }, + { + "epoch": 0.83, + "grad_norm": 2.9790775227827817, + "learning_rate": 7.015416095204214e-07, + "loss": 0.2407, + "step": 10209 + }, + { + "epoch": 0.83, + "grad_norm": 4.779174705300358, + "learning_rate": 7.008655562007333e-07, + "loss": 0.9499, + "step": 10210 + }, + { + "epoch": 0.83, + "grad_norm": 3.504210062203104, + "learning_rate": 7.001898042316602e-07, + "loss": 0.5965, + "step": 10211 + }, + { + "epoch": 0.83, + "grad_norm": 4.098430800876282, + "learning_rate": 6.995143536605698e-07, + "loss": 0.8216, + "step": 10212 + }, + { + "epoch": 0.83, + "grad_norm": 4.722458855400462, + "learning_rate": 6.988392045348063e-07, + "loss": 0.5898, + "step": 10213 + }, + { + "epoch": 0.83, + "grad_norm": 5.603204189743334, + "learning_rate": 6.981643569016966e-07, + "loss": 0.9451, + "step": 10214 + }, + { + "epoch": 0.83, + "grad_norm": 4.383153672975866, + "learning_rate": 6.974898108085431e-07, + "loss": 0.7547, + "step": 10215 + }, + { + "epoch": 0.84, + "grad_norm": 5.777138167338287, + "learning_rate": 6.968155663026294e-07, + "loss": 1.0101, + "step": 10216 + }, + { + "epoch": 0.84, + "grad_norm": 2.949733626148483, + "learning_rate": 6.961416234312168e-07, + "loss": 0.5266, + "step": 10217 + }, + { + "epoch": 0.84, + "grad_norm": 3.286890743214652, + "learning_rate": 6.954679822415461e-07, + "loss": 0.624, + "step": 10218 + }, + { + "epoch": 0.84, + "grad_norm": 5.196197776760363, + "learning_rate": 6.947946427808366e-07, + "loss": 1.1136, + "step": 10219 + }, + { + "epoch": 0.84, + "grad_norm": 5.8562787817039075, + "learning_rate": 6.941216050962868e-07, + "loss": 0.8969, + "step": 10220 + }, + { + "epoch": 0.84, + "grad_norm": 5.311974724124445, + "learning_rate": 6.934488692350727e-07, + "loss": 0.8868, + "step": 10221 + }, + { + "epoch": 0.84, + "grad_norm": 4.961188506136366, + "learning_rate": 6.927764352443506e-07, + "loss": 0.8205, + "step": 10222 + }, + { + "epoch": 0.84, + "grad_norm": 4.955038469711517, + "learning_rate": 6.921043031712549e-07, + "loss": 0.8039, + "step": 10223 + }, + { + "epoch": 0.84, + "grad_norm": 5.323796643908006, + "learning_rate": 6.914324730629002e-07, + "loss": 1.027, + "step": 10224 + }, + { + "epoch": 0.84, + "grad_norm": 3.0023604844410343, + "learning_rate": 6.907609449663788e-07, + "loss": 0.49, + "step": 10225 + }, + { + "epoch": 0.84, + "grad_norm": 4.357653845060114, + "learning_rate": 6.900897189287603e-07, + "loss": 0.5876, + "step": 10226 + }, + { + "epoch": 0.84, + "grad_norm": 2.68285164443623, + "learning_rate": 6.894187949970954e-07, + "loss": 0.3064, + "step": 10227 + }, + { + "epoch": 0.84, + "grad_norm": 3.3820541379404068, + "learning_rate": 6.887481732184148e-07, + "loss": 0.6108, + "step": 10228 + }, + { + "epoch": 0.84, + "grad_norm": 6.299569677418883, + "learning_rate": 6.880778536397237e-07, + "loss": 0.8979, + "step": 10229 + }, + { + "epoch": 0.84, + "grad_norm": 2.970028067271835, + "learning_rate": 6.874078363080089e-07, + "loss": 0.6938, + "step": 10230 + }, + { + "epoch": 0.84, + "grad_norm": 3.031576213495916, + "learning_rate": 6.867381212702378e-07, + "loss": 0.5359, + "step": 10231 + }, + { + "epoch": 0.84, + "grad_norm": 1.325165945870046, + "learning_rate": 6.860687085733519e-07, + "loss": 0.1823, + "step": 10232 + }, + { + "epoch": 0.84, + "grad_norm": 4.633013472211267, + "learning_rate": 6.853995982642753e-07, + "loss": 0.681, + "step": 10233 + }, + { + "epoch": 0.84, + "grad_norm": 5.529771631025134, + "learning_rate": 6.847307903899091e-07, + "loss": 0.7647, + "step": 10234 + }, + { + "epoch": 0.84, + "grad_norm": 2.125269134331355, + "learning_rate": 6.840622849971352e-07, + "loss": 0.451, + "step": 10235 + }, + { + "epoch": 0.84, + "grad_norm": 1.1441559641432029, + "learning_rate": 6.833940821328117e-07, + "loss": 0.1391, + "step": 10236 + }, + { + "epoch": 0.84, + "grad_norm": 4.361524339778844, + "learning_rate": 6.827261818437781e-07, + "loss": 0.7674, + "step": 10237 + }, + { + "epoch": 0.84, + "grad_norm": 3.0626961124778864, + "learning_rate": 6.820585841768496e-07, + "loss": 0.5342, + "step": 10238 + }, + { + "epoch": 0.84, + "grad_norm": 4.582327339796292, + "learning_rate": 6.813912891788221e-07, + "loss": 1.0739, + "step": 10239 + }, + { + "epoch": 0.84, + "grad_norm": 4.562027936198054, + "learning_rate": 6.807242968964711e-07, + "loss": 1.1472, + "step": 10240 + }, + { + "epoch": 0.84, + "grad_norm": 3.9999078913684025, + "learning_rate": 6.800576073765486e-07, + "loss": 0.7174, + "step": 10241 + }, + { + "epoch": 0.84, + "grad_norm": 3.317538348890353, + "learning_rate": 6.793912206657893e-07, + "loss": 0.4795, + "step": 10242 + }, + { + "epoch": 0.84, + "grad_norm": 3.5309894163865896, + "learning_rate": 6.787251368109005e-07, + "loss": 0.5938, + "step": 10243 + }, + { + "epoch": 0.84, + "grad_norm": 5.284058046073281, + "learning_rate": 6.780593558585746e-07, + "loss": 0.9317, + "step": 10244 + }, + { + "epoch": 0.84, + "grad_norm": 4.844064243994371, + "learning_rate": 6.773938778554773e-07, + "loss": 0.8454, + "step": 10245 + }, + { + "epoch": 0.84, + "grad_norm": 3.5893596866876116, + "learning_rate": 6.767287028482577e-07, + "loss": 0.5012, + "step": 10246 + }, + { + "epoch": 0.84, + "grad_norm": 3.3520825892480266, + "learning_rate": 6.760638308835404e-07, + "loss": 0.4746, + "step": 10247 + }, + { + "epoch": 0.84, + "grad_norm": 2.393175137395471, + "learning_rate": 6.753992620079325e-07, + "loss": 0.4163, + "step": 10248 + }, + { + "epoch": 0.84, + "grad_norm": 3.4533606783094313, + "learning_rate": 6.747349962680144e-07, + "loss": 0.5819, + "step": 10249 + }, + { + "epoch": 0.84, + "grad_norm": 2.7237269976253433, + "learning_rate": 6.74071033710349e-07, + "loss": 0.4897, + "step": 10250 + }, + { + "epoch": 0.84, + "grad_norm": 4.746376562234642, + "learning_rate": 6.734073743814779e-07, + "loss": 1.1306, + "step": 10251 + }, + { + "epoch": 0.84, + "grad_norm": 5.358985476907801, + "learning_rate": 6.727440183279205e-07, + "loss": 1.1153, + "step": 10252 + }, + { + "epoch": 0.84, + "grad_norm": 3.0330042057965145, + "learning_rate": 6.720809655961752e-07, + "loss": 0.2759, + "step": 10253 + }, + { + "epoch": 0.84, + "grad_norm": 5.387208326336032, + "learning_rate": 6.714182162327198e-07, + "loss": 0.7345, + "step": 10254 + }, + { + "epoch": 0.84, + "grad_norm": 3.9235955723959077, + "learning_rate": 6.707557702840084e-07, + "loss": 0.5651, + "step": 10255 + }, + { + "epoch": 0.84, + "grad_norm": 4.368011627009563, + "learning_rate": 6.700936277964771e-07, + "loss": 1.0694, + "step": 10256 + }, + { + "epoch": 0.84, + "grad_norm": 2.6604373487739097, + "learning_rate": 6.694317888165381e-07, + "loss": 0.3197, + "step": 10257 + }, + { + "epoch": 0.84, + "grad_norm": 4.394383222912035, + "learning_rate": 6.687702533905855e-07, + "loss": 0.7531, + "step": 10258 + }, + { + "epoch": 0.84, + "grad_norm": 2.02730281209635, + "learning_rate": 6.681090215649872e-07, + "loss": 0.3874, + "step": 10259 + }, + { + "epoch": 0.84, + "grad_norm": 3.4602941295006677, + "learning_rate": 6.674480933860938e-07, + "loss": 0.7086, + "step": 10260 + }, + { + "epoch": 0.84, + "grad_norm": 3.408342882524193, + "learning_rate": 6.667874689002352e-07, + "loss": 0.5866, + "step": 10261 + }, + { + "epoch": 0.84, + "grad_norm": 2.4173230715632665, + "learning_rate": 6.661271481537157e-07, + "loss": 0.5306, + "step": 10262 + }, + { + "epoch": 0.84, + "grad_norm": 4.969856473735861, + "learning_rate": 6.654671311928218e-07, + "loss": 0.9168, + "step": 10263 + }, + { + "epoch": 0.84, + "grad_norm": 2.6517478940535586, + "learning_rate": 6.648074180638181e-07, + "loss": 0.501, + "step": 10264 + }, + { + "epoch": 0.84, + "grad_norm": 4.653153651391398, + "learning_rate": 6.641480088129488e-07, + "loss": 0.8284, + "step": 10265 + }, + { + "epoch": 0.84, + "grad_norm": 3.850258416331185, + "learning_rate": 6.634889034864334e-07, + "loss": 0.8756, + "step": 10266 + }, + { + "epoch": 0.84, + "grad_norm": 2.985382741642617, + "learning_rate": 6.628301021304734e-07, + "loss": 0.3819, + "step": 10267 + }, + { + "epoch": 0.84, + "grad_norm": 3.380448581255921, + "learning_rate": 6.621716047912475e-07, + "loss": 0.6911, + "step": 10268 + }, + { + "epoch": 0.84, + "grad_norm": 5.063915509321915, + "learning_rate": 6.615134115149135e-07, + "loss": 0.839, + "step": 10269 + }, + { + "epoch": 0.84, + "grad_norm": 3.769133946396099, + "learning_rate": 6.608555223476088e-07, + "loss": 0.7109, + "step": 10270 + }, + { + "epoch": 0.84, + "grad_norm": 2.433360575241961, + "learning_rate": 6.60197937335449e-07, + "loss": 0.3123, + "step": 10271 + }, + { + "epoch": 0.84, + "grad_norm": 4.813186992499145, + "learning_rate": 6.595406565245255e-07, + "loss": 0.9826, + "step": 10272 + }, + { + "epoch": 0.84, + "grad_norm": 3.6466358439004374, + "learning_rate": 6.588836799609128e-07, + "loss": 0.6288, + "step": 10273 + }, + { + "epoch": 0.84, + "grad_norm": 4.693817862440512, + "learning_rate": 6.582270076906611e-07, + "loss": 0.5749, + "step": 10274 + }, + { + "epoch": 0.84, + "grad_norm": 6.070760684402229, + "learning_rate": 6.57570639759802e-07, + "loss": 1.4687, + "step": 10275 + }, + { + "epoch": 0.84, + "grad_norm": 3.607951170928873, + "learning_rate": 6.569145762143414e-07, + "loss": 0.7955, + "step": 10276 + }, + { + "epoch": 0.84, + "grad_norm": 5.837526405375264, + "learning_rate": 6.562588171002688e-07, + "loss": 0.8754, + "step": 10277 + }, + { + "epoch": 0.84, + "grad_norm": 4.214752328843424, + "learning_rate": 6.556033624635482e-07, + "loss": 0.9623, + "step": 10278 + }, + { + "epoch": 0.84, + "grad_norm": 3.033828828863102, + "learning_rate": 6.549482123501249e-07, + "loss": 0.3344, + "step": 10279 + }, + { + "epoch": 0.84, + "grad_norm": 1.3399389391733743, + "learning_rate": 6.542933668059226e-07, + "loss": 0.1505, + "step": 10280 + }, + { + "epoch": 0.84, + "grad_norm": 4.133479400402992, + "learning_rate": 6.536388258768423e-07, + "loss": 0.9193, + "step": 10281 + }, + { + "epoch": 0.84, + "grad_norm": 1.5262125922080394, + "learning_rate": 6.529845896087649e-07, + "loss": 0.1784, + "step": 10282 + }, + { + "epoch": 0.84, + "grad_norm": 3.4484274448162457, + "learning_rate": 6.523306580475508e-07, + "loss": 0.7567, + "step": 10283 + }, + { + "epoch": 0.84, + "grad_norm": 5.2346819404260065, + "learning_rate": 6.516770312390353e-07, + "loss": 1.1449, + "step": 10284 + }, + { + "epoch": 0.84, + "grad_norm": 4.732851697170656, + "learning_rate": 6.510237092290361e-07, + "loss": 0.6355, + "step": 10285 + }, + { + "epoch": 0.84, + "grad_norm": 6.066102838902472, + "learning_rate": 6.503706920633473e-07, + "loss": 1.278, + "step": 10286 + }, + { + "epoch": 0.84, + "grad_norm": 4.29385326911628, + "learning_rate": 6.49717979787744e-07, + "loss": 1.0592, + "step": 10287 + }, + { + "epoch": 0.84, + "grad_norm": 2.741642006414545, + "learning_rate": 6.490655724479789e-07, + "loss": 0.4784, + "step": 10288 + }, + { + "epoch": 0.84, + "grad_norm": 4.301411693753504, + "learning_rate": 6.48413470089781e-07, + "loss": 0.4115, + "step": 10289 + }, + { + "epoch": 0.84, + "grad_norm": 3.9701793371450704, + "learning_rate": 6.477616727588604e-07, + "loss": 0.593, + "step": 10290 + }, + { + "epoch": 0.84, + "grad_norm": 2.334020513818201, + "learning_rate": 6.471101805009062e-07, + "loss": 0.3358, + "step": 10291 + }, + { + "epoch": 0.84, + "grad_norm": 4.042172667693351, + "learning_rate": 6.464589933615839e-07, + "loss": 0.5782, + "step": 10292 + }, + { + "epoch": 0.84, + "grad_norm": 4.898454941842396, + "learning_rate": 6.458081113865395e-07, + "loss": 0.98, + "step": 10293 + }, + { + "epoch": 0.84, + "grad_norm": 3.301511079307743, + "learning_rate": 6.451575346213979e-07, + "loss": 0.6028, + "step": 10294 + }, + { + "epoch": 0.84, + "grad_norm": 2.127705479254126, + "learning_rate": 6.4450726311176e-07, + "loss": 0.2934, + "step": 10295 + }, + { + "epoch": 0.84, + "grad_norm": 3.6716391467874003, + "learning_rate": 6.438572969032075e-07, + "loss": 0.495, + "step": 10296 + }, + { + "epoch": 0.84, + "grad_norm": 2.8791457123016366, + "learning_rate": 6.432076360413003e-07, + "loss": 0.5122, + "step": 10297 + }, + { + "epoch": 0.84, + "grad_norm": 3.1169069139262344, + "learning_rate": 6.425582805715775e-07, + "loss": 0.5587, + "step": 10298 + }, + { + "epoch": 0.84, + "grad_norm": 2.140954051481482, + "learning_rate": 6.419092305395552e-07, + "loss": 0.3388, + "step": 10299 + }, + { + "epoch": 0.84, + "grad_norm": 2.4818241671767765, + "learning_rate": 6.412604859907306e-07, + "loss": 0.46, + "step": 10300 + }, + { + "epoch": 0.84, + "grad_norm": 4.585994386964461, + "learning_rate": 6.406120469705757e-07, + "loss": 0.6881, + "step": 10301 + }, + { + "epoch": 0.84, + "grad_norm": 3.3936003516441566, + "learning_rate": 6.399639135245439e-07, + "loss": 0.6548, + "step": 10302 + }, + { + "epoch": 0.84, + "grad_norm": 3.076143681485743, + "learning_rate": 6.393160856980668e-07, + "loss": 0.6639, + "step": 10303 + }, + { + "epoch": 0.84, + "grad_norm": 5.277110114326273, + "learning_rate": 6.386685635365542e-07, + "loss": 0.8738, + "step": 10304 + }, + { + "epoch": 0.84, + "grad_norm": 4.097791072129815, + "learning_rate": 6.380213470853963e-07, + "loss": 0.7573, + "step": 10305 + }, + { + "epoch": 0.84, + "grad_norm": 2.6666051053495963, + "learning_rate": 6.37374436389957e-07, + "loss": 0.6457, + "step": 10306 + }, + { + "epoch": 0.84, + "grad_norm": 6.189374771386946, + "learning_rate": 6.367278314955849e-07, + "loss": 1.1032, + "step": 10307 + }, + { + "epoch": 0.84, + "grad_norm": 3.2975087719293557, + "learning_rate": 6.360815324476016e-07, + "loss": 0.5535, + "step": 10308 + }, + { + "epoch": 0.84, + "grad_norm": 4.001540898596129, + "learning_rate": 6.354355392913114e-07, + "loss": 0.7347, + "step": 10309 + }, + { + "epoch": 0.84, + "grad_norm": 3.903098744714241, + "learning_rate": 6.347898520719947e-07, + "loss": 0.7331, + "step": 10310 + }, + { + "epoch": 0.84, + "grad_norm": 3.7852382356129795, + "learning_rate": 6.341444708349131e-07, + "loss": 0.9693, + "step": 10311 + }, + { + "epoch": 0.84, + "grad_norm": 3.788993743466064, + "learning_rate": 6.334993956253033e-07, + "loss": 0.5701, + "step": 10312 + }, + { + "epoch": 0.84, + "grad_norm": 3.4718749584589923, + "learning_rate": 6.328546264883822e-07, + "loss": 0.5135, + "step": 10313 + }, + { + "epoch": 0.84, + "grad_norm": 2.1507870284409103, + "learning_rate": 6.322101634693461e-07, + "loss": 0.5019, + "step": 10314 + }, + { + "epoch": 0.84, + "grad_norm": 3.6874629571572686, + "learning_rate": 6.315660066133689e-07, + "loss": 0.6376, + "step": 10315 + }, + { + "epoch": 0.84, + "grad_norm": 3.5487397987975715, + "learning_rate": 6.309221559656026e-07, + "loss": 0.599, + "step": 10316 + }, + { + "epoch": 0.84, + "grad_norm": 4.856529420525376, + "learning_rate": 6.302786115711806e-07, + "loss": 0.6894, + "step": 10317 + }, + { + "epoch": 0.84, + "grad_norm": 5.266462849628899, + "learning_rate": 6.296353734752098e-07, + "loss": 0.9058, + "step": 10318 + }, + { + "epoch": 0.84, + "grad_norm": 6.010416616131682, + "learning_rate": 6.289924417227789e-07, + "loss": 1.1745, + "step": 10319 + }, + { + "epoch": 0.84, + "grad_norm": 1.1058518824775168, + "learning_rate": 6.283498163589558e-07, + "loss": 0.1765, + "step": 10320 + }, + { + "epoch": 0.84, + "grad_norm": 5.802275931450062, + "learning_rate": 6.277074974287856e-07, + "loss": 1.0474, + "step": 10321 + }, + { + "epoch": 0.84, + "grad_norm": 4.797194600107948, + "learning_rate": 6.270654849772906e-07, + "loss": 0.8021, + "step": 10322 + }, + { + "epoch": 0.84, + "grad_norm": 3.4246262562427128, + "learning_rate": 6.264237790494754e-07, + "loss": 0.6082, + "step": 10323 + }, + { + "epoch": 0.84, + "grad_norm": 3.4649718930823123, + "learning_rate": 6.257823796903178e-07, + "loss": 0.6519, + "step": 10324 + }, + { + "epoch": 0.84, + "grad_norm": 3.05193623676796, + "learning_rate": 6.251412869447793e-07, + "loss": 0.3848, + "step": 10325 + }, + { + "epoch": 0.84, + "grad_norm": 5.309915119979786, + "learning_rate": 6.24500500857797e-07, + "loss": 0.8005, + "step": 10326 + }, + { + "epoch": 0.84, + "grad_norm": 4.254255006430809, + "learning_rate": 6.238600214742868e-07, + "loss": 0.58, + "step": 10327 + }, + { + "epoch": 0.84, + "grad_norm": 3.961040158682524, + "learning_rate": 6.232198488391461e-07, + "loss": 0.6447, + "step": 10328 + }, + { + "epoch": 0.84, + "grad_norm": 6.040144358348651, + "learning_rate": 6.225799829972445e-07, + "loss": 1.0723, + "step": 10329 + }, + { + "epoch": 0.84, + "grad_norm": 4.395942298388709, + "learning_rate": 6.219404239934357e-07, + "loss": 0.8869, + "step": 10330 + }, + { + "epoch": 0.84, + "grad_norm": 3.4425011509638686, + "learning_rate": 6.213011718725493e-07, + "loss": 0.3754, + "step": 10331 + }, + { + "epoch": 0.84, + "grad_norm": 2.0282056723859623, + "learning_rate": 6.206622266793949e-07, + "loss": 0.2786, + "step": 10332 + }, + { + "epoch": 0.84, + "grad_norm": 2.798718887193467, + "learning_rate": 6.200235884587596e-07, + "loss": 0.5698, + "step": 10333 + }, + { + "epoch": 0.84, + "grad_norm": 4.575555834030352, + "learning_rate": 6.193852572554104e-07, + "loss": 0.8362, + "step": 10334 + }, + { + "epoch": 0.84, + "grad_norm": 3.5107693830811364, + "learning_rate": 6.187472331140887e-07, + "loss": 0.6514, + "step": 10335 + }, + { + "epoch": 0.84, + "grad_norm": 3.401100975202584, + "learning_rate": 6.181095160795187e-07, + "loss": 0.8509, + "step": 10336 + }, + { + "epoch": 0.84, + "grad_norm": 2.982092836864028, + "learning_rate": 6.174721061964029e-07, + "loss": 0.6061, + "step": 10337 + }, + { + "epoch": 0.84, + "grad_norm": 4.069443870464548, + "learning_rate": 6.168350035094178e-07, + "loss": 0.6044, + "step": 10338 + }, + { + "epoch": 0.85, + "grad_norm": 3.6613953155838637, + "learning_rate": 6.161982080632239e-07, + "loss": 0.7821, + "step": 10339 + }, + { + "epoch": 0.85, + "grad_norm": 2.951373078869165, + "learning_rate": 6.155617199024588e-07, + "loss": 0.5071, + "step": 10340 + }, + { + "epoch": 0.85, + "grad_norm": 5.23635692772508, + "learning_rate": 6.14925539071734e-07, + "loss": 0.7125, + "step": 10341 + }, + { + "epoch": 0.85, + "grad_norm": 1.2432538500796653, + "learning_rate": 6.142896656156455e-07, + "loss": 0.1723, + "step": 10342 + }, + { + "epoch": 0.85, + "grad_norm": 2.1860302085802137, + "learning_rate": 6.136540995787649e-07, + "loss": 0.3348, + "step": 10343 + }, + { + "epoch": 0.85, + "grad_norm": 4.883965192436654, + "learning_rate": 6.13018841005642e-07, + "loss": 0.8817, + "step": 10344 + }, + { + "epoch": 0.85, + "grad_norm": 3.2281268341711797, + "learning_rate": 6.12383889940808e-07, + "loss": 0.566, + "step": 10345 + }, + { + "epoch": 0.85, + "grad_norm": 4.594241528342523, + "learning_rate": 6.117492464287666e-07, + "loss": 0.9008, + "step": 10346 + }, + { + "epoch": 0.85, + "grad_norm": 2.73225891709093, + "learning_rate": 6.111149105140052e-07, + "loss": 0.7543, + "step": 10347 + }, + { + "epoch": 0.85, + "grad_norm": 2.8994344357396984, + "learning_rate": 6.104808822409885e-07, + "loss": 0.6075, + "step": 10348 + }, + { + "epoch": 0.85, + "grad_norm": 3.8249853665661506, + "learning_rate": 6.098471616541585e-07, + "loss": 1.0379, + "step": 10349 + }, + { + "epoch": 0.85, + "grad_norm": 4.6325995942495455, + "learning_rate": 6.092137487979366e-07, + "loss": 0.7691, + "step": 10350 + }, + { + "epoch": 0.85, + "grad_norm": 5.325602957824057, + "learning_rate": 6.08580643716723e-07, + "loss": 1.0923, + "step": 10351 + }, + { + "epoch": 0.85, + "grad_norm": 3.8762181860203797, + "learning_rate": 6.079478464548938e-07, + "loss": 0.8949, + "step": 10352 + }, + { + "epoch": 0.85, + "grad_norm": 5.082466940308302, + "learning_rate": 6.073153570568074e-07, + "loss": 1.0848, + "step": 10353 + }, + { + "epoch": 0.85, + "grad_norm": 5.0532638854790966, + "learning_rate": 6.066831755667962e-07, + "loss": 1.1865, + "step": 10354 + }, + { + "epoch": 0.85, + "grad_norm": 2.5533008575699334, + "learning_rate": 6.060513020291753e-07, + "loss": 0.3113, + "step": 10355 + }, + { + "epoch": 0.85, + "grad_norm": 2.5437168783894606, + "learning_rate": 6.054197364882347e-07, + "loss": 0.6903, + "step": 10356 + }, + { + "epoch": 0.85, + "grad_norm": 3.869798916119071, + "learning_rate": 6.047884789882469e-07, + "loss": 0.653, + "step": 10357 + }, + { + "epoch": 0.85, + "grad_norm": 5.146009182397108, + "learning_rate": 6.041575295734576e-07, + "loss": 1.0095, + "step": 10358 + }, + { + "epoch": 0.85, + "grad_norm": 3.8993766307489413, + "learning_rate": 6.035268882880941e-07, + "loss": 0.4933, + "step": 10359 + }, + { + "epoch": 0.85, + "grad_norm": 5.448991672717264, + "learning_rate": 6.028965551763627e-07, + "loss": 1.195, + "step": 10360 + }, + { + "epoch": 0.85, + "grad_norm": 4.678067743838973, + "learning_rate": 6.022665302824465e-07, + "loss": 0.8999, + "step": 10361 + }, + { + "epoch": 0.85, + "grad_norm": 4.6260758312453385, + "learning_rate": 6.016368136505074e-07, + "loss": 0.7788, + "step": 10362 + }, + { + "epoch": 0.85, + "grad_norm": 6.046366637884028, + "learning_rate": 6.010074053246872e-07, + "loss": 0.8591, + "step": 10363 + }, + { + "epoch": 0.85, + "grad_norm": 3.3503412128880736, + "learning_rate": 6.003783053491025e-07, + "loss": 0.5532, + "step": 10364 + }, + { + "epoch": 0.85, + "grad_norm": 4.203630680515273, + "learning_rate": 5.99749513767851e-07, + "loss": 0.581, + "step": 10365 + }, + { + "epoch": 0.85, + "grad_norm": 2.9230485157594908, + "learning_rate": 5.99121030625009e-07, + "loss": 0.4968, + "step": 10366 + }, + { + "epoch": 0.85, + "grad_norm": 4.0128915740778766, + "learning_rate": 5.984928559646297e-07, + "loss": 0.8049, + "step": 10367 + }, + { + "epoch": 0.85, + "grad_norm": 2.5001324671821212, + "learning_rate": 5.97864989830747e-07, + "loss": 0.4365, + "step": 10368 + }, + { + "epoch": 0.85, + "grad_norm": 4.697993203346618, + "learning_rate": 5.97237432267369e-07, + "loss": 0.9607, + "step": 10369 + }, + { + "epoch": 0.85, + "grad_norm": 2.456607685228635, + "learning_rate": 5.966101833184873e-07, + "loss": 0.4056, + "step": 10370 + }, + { + "epoch": 0.85, + "grad_norm": 3.821843099442181, + "learning_rate": 5.959832430280677e-07, + "loss": 0.6264, + "step": 10371 + }, + { + "epoch": 0.85, + "grad_norm": 3.903722547084163, + "learning_rate": 5.953566114400555e-07, + "loss": 0.65, + "step": 10372 + }, + { + "epoch": 0.85, + "grad_norm": 4.238758982285471, + "learning_rate": 5.947302885983763e-07, + "loss": 0.8129, + "step": 10373 + }, + { + "epoch": 0.85, + "grad_norm": 5.1879731369082, + "learning_rate": 5.941042745469333e-07, + "loss": 0.9396, + "step": 10374 + }, + { + "epoch": 0.85, + "grad_norm": 3.6125945881433394, + "learning_rate": 5.934785693296046e-07, + "loss": 0.6156, + "step": 10375 + }, + { + "epoch": 0.85, + "grad_norm": 3.0299076473916093, + "learning_rate": 5.928531729902509e-07, + "loss": 0.638, + "step": 10376 + }, + { + "epoch": 0.85, + "grad_norm": 3.2482608712189336, + "learning_rate": 5.922280855727103e-07, + "loss": 0.4839, + "step": 10377 + }, + { + "epoch": 0.85, + "grad_norm": 5.30757637027783, + "learning_rate": 5.916033071207977e-07, + "loss": 1.0658, + "step": 10378 + }, + { + "epoch": 0.85, + "grad_norm": 4.601260972257547, + "learning_rate": 5.909788376783083e-07, + "loss": 0.7538, + "step": 10379 + }, + { + "epoch": 0.85, + "grad_norm": 4.367104308567999, + "learning_rate": 5.903546772890151e-07, + "loss": 0.7123, + "step": 10380 + }, + { + "epoch": 0.85, + "grad_norm": 4.307815347324862, + "learning_rate": 5.897308259966672e-07, + "loss": 0.7737, + "step": 10381 + }, + { + "epoch": 0.85, + "grad_norm": 3.688619950415655, + "learning_rate": 5.891072838449946e-07, + "loss": 0.9054, + "step": 10382 + }, + { + "epoch": 0.85, + "grad_norm": 4.309335159982341, + "learning_rate": 5.884840508777056e-07, + "loss": 0.6679, + "step": 10383 + }, + { + "epoch": 0.85, + "grad_norm": 6.2618730791039985, + "learning_rate": 5.878611271384865e-07, + "loss": 1.2592, + "step": 10384 + }, + { + "epoch": 0.85, + "grad_norm": 1.294292166667183, + "learning_rate": 5.872385126709995e-07, + "loss": 0.1747, + "step": 10385 + }, + { + "epoch": 0.85, + "grad_norm": 3.068858159170679, + "learning_rate": 5.866162075188892e-07, + "loss": 0.5161, + "step": 10386 + }, + { + "epoch": 0.85, + "grad_norm": 4.841731758237636, + "learning_rate": 5.859942117257749e-07, + "loss": 0.621, + "step": 10387 + }, + { + "epoch": 0.85, + "grad_norm": 3.14131196717673, + "learning_rate": 5.853725253352566e-07, + "loss": 0.6049, + "step": 10388 + }, + { + "epoch": 0.85, + "grad_norm": 3.604489147954973, + "learning_rate": 5.847511483909119e-07, + "loss": 0.5231, + "step": 10389 + }, + { + "epoch": 0.85, + "grad_norm": 1.019636781178539, + "learning_rate": 5.841300809362959e-07, + "loss": 0.1193, + "step": 10390 + }, + { + "epoch": 0.85, + "grad_norm": 3.7573726079017606, + "learning_rate": 5.835093230149447e-07, + "loss": 0.6389, + "step": 10391 + }, + { + "epoch": 0.85, + "grad_norm": 3.6155034308134195, + "learning_rate": 5.828888746703687e-07, + "loss": 0.9993, + "step": 10392 + }, + { + "epoch": 0.85, + "grad_norm": 2.5814248275023153, + "learning_rate": 5.822687359460588e-07, + "loss": 0.352, + "step": 10393 + }, + { + "epoch": 0.85, + "grad_norm": 3.959016551167589, + "learning_rate": 5.816489068854841e-07, + "loss": 0.5663, + "step": 10394 + }, + { + "epoch": 0.85, + "grad_norm": 3.4804949765515043, + "learning_rate": 5.810293875320927e-07, + "loss": 0.4629, + "step": 10395 + }, + { + "epoch": 0.85, + "grad_norm": 4.340824583344594, + "learning_rate": 5.804101779293098e-07, + "loss": 0.9673, + "step": 10396 + }, + { + "epoch": 0.85, + "grad_norm": 3.491235360132722, + "learning_rate": 5.797912781205406e-07, + "loss": 0.5077, + "step": 10397 + }, + { + "epoch": 0.85, + "grad_norm": 3.178766204446659, + "learning_rate": 5.791726881491644e-07, + "loss": 0.6482, + "step": 10398 + }, + { + "epoch": 0.85, + "grad_norm": 3.3425427217509744, + "learning_rate": 5.785544080585437e-07, + "loss": 0.5488, + "step": 10399 + }, + { + "epoch": 0.85, + "grad_norm": 4.871187595458767, + "learning_rate": 5.779364378920177e-07, + "loss": 0.8559, + "step": 10400 + }, + { + "epoch": 0.85, + "grad_norm": 4.938369484063271, + "learning_rate": 5.773187776929017e-07, + "loss": 0.9178, + "step": 10401 + }, + { + "epoch": 0.85, + "grad_norm": 3.239734217262983, + "learning_rate": 5.767014275044914e-07, + "loss": 0.6713, + "step": 10402 + }, + { + "epoch": 0.85, + "grad_norm": 4.226986391688428, + "learning_rate": 5.760843873700622e-07, + "loss": 0.7308, + "step": 10403 + }, + { + "epoch": 0.85, + "grad_norm": 3.060829800690869, + "learning_rate": 5.754676573328632e-07, + "loss": 0.5538, + "step": 10404 + }, + { + "epoch": 0.85, + "grad_norm": 2.299010938722108, + "learning_rate": 5.74851237436126e-07, + "loss": 0.3488, + "step": 10405 + }, + { + "epoch": 0.85, + "grad_norm": 3.634979349676279, + "learning_rate": 5.742351277230584e-07, + "loss": 0.6518, + "step": 10406 + }, + { + "epoch": 0.85, + "grad_norm": 4.758525735469257, + "learning_rate": 5.736193282368474e-07, + "loss": 1.0361, + "step": 10407 + }, + { + "epoch": 0.85, + "grad_norm": 5.071298839841249, + "learning_rate": 5.730038390206594e-07, + "loss": 1.0557, + "step": 10408 + }, + { + "epoch": 0.85, + "grad_norm": 4.511955381102928, + "learning_rate": 5.723886601176343e-07, + "loss": 0.8135, + "step": 10409 + }, + { + "epoch": 0.85, + "grad_norm": 3.4612770054887276, + "learning_rate": 5.717737915708954e-07, + "loss": 0.6504, + "step": 10410 + }, + { + "epoch": 0.85, + "grad_norm": 3.622655119430177, + "learning_rate": 5.711592334235416e-07, + "loss": 0.6042, + "step": 10411 + }, + { + "epoch": 0.85, + "grad_norm": 3.370232020220817, + "learning_rate": 5.705449857186518e-07, + "loss": 0.4626, + "step": 10412 + }, + { + "epoch": 0.85, + "grad_norm": 3.4557763354897095, + "learning_rate": 5.699310484992809e-07, + "loss": 0.679, + "step": 10413 + }, + { + "epoch": 0.85, + "grad_norm": 5.834903902822914, + "learning_rate": 5.693174218084652e-07, + "loss": 1.3057, + "step": 10414 + }, + { + "epoch": 0.85, + "grad_norm": 3.8354419775478115, + "learning_rate": 5.687041056892145e-07, + "loss": 0.6512, + "step": 10415 + }, + { + "epoch": 0.85, + "grad_norm": 4.903765763800176, + "learning_rate": 5.680911001845218e-07, + "loss": 0.6977, + "step": 10416 + }, + { + "epoch": 0.85, + "grad_norm": 3.8246310995433146, + "learning_rate": 5.674784053373545e-07, + "loss": 0.7733, + "step": 10417 + }, + { + "epoch": 0.85, + "grad_norm": 4.159198786553831, + "learning_rate": 5.668660211906607e-07, + "loss": 0.939, + "step": 10418 + }, + { + "epoch": 0.85, + "grad_norm": 4.1686359363891405, + "learning_rate": 5.662539477873657e-07, + "loss": 0.5318, + "step": 10419 + }, + { + "epoch": 0.85, + "grad_norm": 4.145125587558952, + "learning_rate": 5.656421851703742e-07, + "loss": 0.8105, + "step": 10420 + }, + { + "epoch": 0.85, + "grad_norm": 2.849234211932994, + "learning_rate": 5.650307333825661e-07, + "loss": 0.3998, + "step": 10421 + }, + { + "epoch": 0.85, + "grad_norm": 2.3382026475057884, + "learning_rate": 5.644195924668028e-07, + "loss": 0.4322, + "step": 10422 + }, + { + "epoch": 0.85, + "grad_norm": 2.753214314856311, + "learning_rate": 5.638087624659216e-07, + "loss": 0.3664, + "step": 10423 + }, + { + "epoch": 0.85, + "grad_norm": 2.688637428225327, + "learning_rate": 5.631982434227406e-07, + "loss": 0.5746, + "step": 10424 + }, + { + "epoch": 0.85, + "grad_norm": 3.493311422581332, + "learning_rate": 5.625880353800545e-07, + "loss": 0.6409, + "step": 10425 + }, + { + "epoch": 0.85, + "grad_norm": 4.5404767230707925, + "learning_rate": 5.619781383806345e-07, + "loss": 0.7954, + "step": 10426 + }, + { + "epoch": 0.85, + "grad_norm": 3.6385739608008625, + "learning_rate": 5.613685524672318e-07, + "loss": 0.7876, + "step": 10427 + }, + { + "epoch": 0.85, + "grad_norm": 4.242105143127419, + "learning_rate": 5.607592776825777e-07, + "loss": 0.7932, + "step": 10428 + }, + { + "epoch": 0.85, + "grad_norm": 5.131498899837967, + "learning_rate": 5.601503140693782e-07, + "loss": 0.6301, + "step": 10429 + }, + { + "epoch": 0.85, + "grad_norm": 4.150034675458454, + "learning_rate": 5.595416616703203e-07, + "loss": 0.688, + "step": 10430 + }, + { + "epoch": 0.85, + "grad_norm": 4.3659764124021025, + "learning_rate": 5.589333205280662e-07, + "loss": 0.8211, + "step": 10431 + }, + { + "epoch": 0.85, + "grad_norm": 3.9082421698650633, + "learning_rate": 5.583252906852594e-07, + "loss": 0.9047, + "step": 10432 + }, + { + "epoch": 0.85, + "grad_norm": 4.194612789126027, + "learning_rate": 5.577175721845185e-07, + "loss": 0.5861, + "step": 10433 + }, + { + "epoch": 0.85, + "grad_norm": 5.315130226747433, + "learning_rate": 5.571101650684435e-07, + "loss": 1.2291, + "step": 10434 + }, + { + "epoch": 0.85, + "grad_norm": 2.723126091057395, + "learning_rate": 5.565030693796098e-07, + "loss": 0.5973, + "step": 10435 + }, + { + "epoch": 0.85, + "grad_norm": 4.598692276891783, + "learning_rate": 5.558962851605731e-07, + "loss": 0.7576, + "step": 10436 + }, + { + "epoch": 0.85, + "grad_norm": 3.2228214330867964, + "learning_rate": 5.552898124538669e-07, + "loss": 0.5391, + "step": 10437 + }, + { + "epoch": 0.85, + "grad_norm": 4.220146649411876, + "learning_rate": 5.546836513020004e-07, + "loss": 0.6394, + "step": 10438 + }, + { + "epoch": 0.85, + "grad_norm": 2.742810055429589, + "learning_rate": 5.540778017474635e-07, + "loss": 0.2506, + "step": 10439 + }, + { + "epoch": 0.85, + "grad_norm": 4.035624938028897, + "learning_rate": 5.534722638327245e-07, + "loss": 0.652, + "step": 10440 + }, + { + "epoch": 0.85, + "grad_norm": 3.690919519405272, + "learning_rate": 5.528670376002282e-07, + "loss": 0.5183, + "step": 10441 + }, + { + "epoch": 0.85, + "grad_norm": 2.442735923919764, + "learning_rate": 5.522621230923986e-07, + "loss": 0.6551, + "step": 10442 + }, + { + "epoch": 0.85, + "grad_norm": 4.765068024938463, + "learning_rate": 5.516575203516389e-07, + "loss": 0.839, + "step": 10443 + }, + { + "epoch": 0.85, + "grad_norm": 3.2785990526068276, + "learning_rate": 5.510532294203264e-07, + "loss": 0.5858, + "step": 10444 + }, + { + "epoch": 0.85, + "grad_norm": 4.778841998641732, + "learning_rate": 5.504492503408204e-07, + "loss": 0.8196, + "step": 10445 + }, + { + "epoch": 0.85, + "grad_norm": 5.085316672922174, + "learning_rate": 5.498455831554589e-07, + "loss": 1.2685, + "step": 10446 + }, + { + "epoch": 0.85, + "grad_norm": 4.967054918121122, + "learning_rate": 5.492422279065535e-07, + "loss": 0.8716, + "step": 10447 + }, + { + "epoch": 0.85, + "grad_norm": 4.017089926733228, + "learning_rate": 5.486391846363998e-07, + "loss": 0.6948, + "step": 10448 + }, + { + "epoch": 0.85, + "grad_norm": 2.960935176016891, + "learning_rate": 5.48036453387265e-07, + "loss": 0.5706, + "step": 10449 + }, + { + "epoch": 0.85, + "grad_norm": 4.29836270974255, + "learning_rate": 5.474340342014007e-07, + "loss": 0.6272, + "step": 10450 + }, + { + "epoch": 0.85, + "grad_norm": 3.618055713945523, + "learning_rate": 5.468319271210326e-07, + "loss": 0.743, + "step": 10451 + }, + { + "epoch": 0.85, + "grad_norm": 2.777227654941848, + "learning_rate": 5.462301321883661e-07, + "loss": 0.4506, + "step": 10452 + }, + { + "epoch": 0.85, + "grad_norm": 3.755869482729697, + "learning_rate": 5.456286494455843e-07, + "loss": 0.6088, + "step": 10453 + }, + { + "epoch": 0.85, + "grad_norm": 3.6009071215185906, + "learning_rate": 5.450274789348497e-07, + "loss": 0.9747, + "step": 10454 + }, + { + "epoch": 0.85, + "grad_norm": 4.5863105045010215, + "learning_rate": 5.444266206983001e-07, + "loss": 1.3258, + "step": 10455 + }, + { + "epoch": 0.85, + "grad_norm": 4.419252475761903, + "learning_rate": 5.438260747780532e-07, + "loss": 0.7476, + "step": 10456 + }, + { + "epoch": 0.85, + "grad_norm": 4.419854446947665, + "learning_rate": 5.432258412162056e-07, + "loss": 1.1689, + "step": 10457 + }, + { + "epoch": 0.85, + "grad_norm": 2.0074298968463062, + "learning_rate": 5.4262592005483e-07, + "loss": 0.3632, + "step": 10458 + }, + { + "epoch": 0.85, + "grad_norm": 3.225202148603655, + "learning_rate": 5.420263113359791e-07, + "loss": 0.6382, + "step": 10459 + }, + { + "epoch": 0.85, + "grad_norm": 4.111944379787736, + "learning_rate": 5.414270151016843e-07, + "loss": 0.5012, + "step": 10460 + }, + { + "epoch": 0.86, + "grad_norm": 3.6107706317948147, + "learning_rate": 5.408280313939502e-07, + "loss": 0.4584, + "step": 10461 + }, + { + "epoch": 0.86, + "grad_norm": 1.9131447266796116, + "learning_rate": 5.402293602547659e-07, + "loss": 0.3841, + "step": 10462 + }, + { + "epoch": 0.86, + "grad_norm": 3.9771864336352314, + "learning_rate": 5.396310017260931e-07, + "loss": 0.8553, + "step": 10463 + }, + { + "epoch": 0.86, + "grad_norm": 4.495631673597813, + "learning_rate": 5.390329558498759e-07, + "loss": 0.9242, + "step": 10464 + }, + { + "epoch": 0.86, + "grad_norm": 5.0870467055378406, + "learning_rate": 5.384352226680356e-07, + "loss": 1.0474, + "step": 10465 + }, + { + "epoch": 0.86, + "grad_norm": 5.110527224780676, + "learning_rate": 5.378378022224679e-07, + "loss": 0.8318, + "step": 10466 + }, + { + "epoch": 0.86, + "grad_norm": 1.7325885690721872, + "learning_rate": 5.372406945550507e-07, + "loss": 0.1975, + "step": 10467 + }, + { + "epoch": 0.86, + "grad_norm": 2.80887620719976, + "learning_rate": 5.366438997076396e-07, + "loss": 0.2936, + "step": 10468 + }, + { + "epoch": 0.86, + "grad_norm": 4.850940394131275, + "learning_rate": 5.360474177220659e-07, + "loss": 1.162, + "step": 10469 + }, + { + "epoch": 0.86, + "grad_norm": 4.180895013424875, + "learning_rate": 5.354512486401409e-07, + "loss": 0.4575, + "step": 10470 + }, + { + "epoch": 0.86, + "grad_norm": 2.8108880858287897, + "learning_rate": 5.348553925036553e-07, + "loss": 0.5706, + "step": 10471 + }, + { + "epoch": 0.86, + "grad_norm": 1.8481081046692012, + "learning_rate": 5.342598493543727e-07, + "loss": 0.3095, + "step": 10472 + }, + { + "epoch": 0.86, + "grad_norm": 4.895634104931042, + "learning_rate": 5.3366461923404e-07, + "loss": 0.7126, + "step": 10473 + }, + { + "epoch": 0.86, + "grad_norm": 3.960115078884639, + "learning_rate": 5.330697021843795e-07, + "loss": 0.6181, + "step": 10474 + }, + { + "epoch": 0.86, + "grad_norm": 3.34098382409636, + "learning_rate": 5.324750982470933e-07, + "loss": 0.5289, + "step": 10475 + }, + { + "epoch": 0.86, + "grad_norm": 3.844540506448431, + "learning_rate": 5.318808074638598e-07, + "loss": 0.7852, + "step": 10476 + }, + { + "epoch": 0.86, + "grad_norm": 2.316453704559759, + "learning_rate": 5.312868298763374e-07, + "loss": 0.4248, + "step": 10477 + }, + { + "epoch": 0.86, + "grad_norm": 5.065511357492012, + "learning_rate": 5.306931655261588e-07, + "loss": 1.1797, + "step": 10478 + }, + { + "epoch": 0.86, + "grad_norm": 5.088625813835447, + "learning_rate": 5.300998144549402e-07, + "loss": 0.7932, + "step": 10479 + }, + { + "epoch": 0.86, + "grad_norm": 4.17514705001973, + "learning_rate": 5.29506776704271e-07, + "loss": 0.5734, + "step": 10480 + }, + { + "epoch": 0.86, + "grad_norm": 3.5910957602546696, + "learning_rate": 5.289140523157205e-07, + "loss": 0.8801, + "step": 10481 + }, + { + "epoch": 0.86, + "grad_norm": 3.6013748244996906, + "learning_rate": 5.283216413308367e-07, + "loss": 0.8214, + "step": 10482 + }, + { + "epoch": 0.86, + "grad_norm": 3.6717264426904643, + "learning_rate": 5.277295437911462e-07, + "loss": 0.6352, + "step": 10483 + }, + { + "epoch": 0.86, + "grad_norm": 4.371896629040389, + "learning_rate": 5.271377597381505e-07, + "loss": 0.5906, + "step": 10484 + }, + { + "epoch": 0.86, + "grad_norm": 3.368054379749993, + "learning_rate": 5.265462892133317e-07, + "loss": 0.4887, + "step": 10485 + }, + { + "epoch": 0.86, + "grad_norm": 2.8464376430535165, + "learning_rate": 5.259551322581496e-07, + "loss": 0.3931, + "step": 10486 + }, + { + "epoch": 0.86, + "grad_norm": 3.399816522319251, + "learning_rate": 5.253642889140414e-07, + "loss": 0.4729, + "step": 10487 + }, + { + "epoch": 0.86, + "grad_norm": 3.2664370652522123, + "learning_rate": 5.247737592224239e-07, + "loss": 0.304, + "step": 10488 + }, + { + "epoch": 0.86, + "grad_norm": 3.5684092464370605, + "learning_rate": 5.241835432246888e-07, + "loss": 0.4109, + "step": 10489 + }, + { + "epoch": 0.86, + "grad_norm": 3.106307996943481, + "learning_rate": 5.235936409622083e-07, + "loss": 0.6459, + "step": 10490 + }, + { + "epoch": 0.86, + "grad_norm": 5.120738502869185, + "learning_rate": 5.230040524763325e-07, + "loss": 1.2144, + "step": 10491 + }, + { + "epoch": 0.86, + "grad_norm": 5.702304509190002, + "learning_rate": 5.224147778083882e-07, + "loss": 1.2657, + "step": 10492 + }, + { + "epoch": 0.86, + "grad_norm": 4.913241630622584, + "learning_rate": 5.218258169996825e-07, + "loss": 1.1311, + "step": 10493 + }, + { + "epoch": 0.86, + "grad_norm": 3.9066351228096754, + "learning_rate": 5.212371700914976e-07, + "loss": 0.7973, + "step": 10494 + }, + { + "epoch": 0.86, + "grad_norm": 5.202969004089788, + "learning_rate": 5.206488371250956e-07, + "loss": 1.1047, + "step": 10495 + }, + { + "epoch": 0.86, + "grad_norm": 3.6915185468451486, + "learning_rate": 5.200608181417155e-07, + "loss": 0.9084, + "step": 10496 + }, + { + "epoch": 0.86, + "grad_norm": 3.2002742632248755, + "learning_rate": 5.194731131825754e-07, + "loss": 0.6965, + "step": 10497 + }, + { + "epoch": 0.86, + "grad_norm": 3.1255230862771204, + "learning_rate": 5.188857222888699e-07, + "loss": 0.386, + "step": 10498 + }, + { + "epoch": 0.86, + "grad_norm": 4.113045842082105, + "learning_rate": 5.182986455017741e-07, + "loss": 0.7443, + "step": 10499 + }, + { + "epoch": 0.86, + "grad_norm": 3.8650408809820687, + "learning_rate": 5.177118828624395e-07, + "loss": 0.8432, + "step": 10500 + }, + { + "epoch": 0.86, + "grad_norm": 4.533967590398182, + "learning_rate": 5.171254344119941e-07, + "loss": 1.0344, + "step": 10501 + }, + { + "epoch": 0.86, + "grad_norm": 4.553799879888968, + "learning_rate": 5.165393001915464e-07, + "loss": 0.7334, + "step": 10502 + }, + { + "epoch": 0.86, + "grad_norm": 2.568057645612484, + "learning_rate": 5.159534802421817e-07, + "loss": 0.272, + "step": 10503 + }, + { + "epoch": 0.86, + "grad_norm": 2.892616461853972, + "learning_rate": 5.153679746049628e-07, + "loss": 0.2415, + "step": 10504 + }, + { + "epoch": 0.86, + "grad_norm": 3.2287915215924756, + "learning_rate": 5.147827833209334e-07, + "loss": 0.2845, + "step": 10505 + }, + { + "epoch": 0.86, + "grad_norm": 2.98277451964769, + "learning_rate": 5.141979064311098e-07, + "loss": 0.5627, + "step": 10506 + }, + { + "epoch": 0.86, + "grad_norm": 4.759412696679251, + "learning_rate": 5.136133439764907e-07, + "loss": 0.8121, + "step": 10507 + }, + { + "epoch": 0.86, + "grad_norm": 3.6435535949517086, + "learning_rate": 5.130290959980511e-07, + "loss": 0.5884, + "step": 10508 + }, + { + "epoch": 0.86, + "grad_norm": 3.5847272345069072, + "learning_rate": 5.12445162536746e-07, + "loss": 0.6392, + "step": 10509 + }, + { + "epoch": 0.86, + "grad_norm": 2.7730194330075157, + "learning_rate": 5.11861543633504e-07, + "loss": 0.5449, + "step": 10510 + }, + { + "epoch": 0.86, + "grad_norm": 4.836953255912099, + "learning_rate": 5.112782393292359e-07, + "loss": 0.8827, + "step": 10511 + }, + { + "epoch": 0.86, + "grad_norm": 2.7660198150343227, + "learning_rate": 5.106952496648276e-07, + "loss": 0.5007, + "step": 10512 + }, + { + "epoch": 0.86, + "grad_norm": 3.33503023950577, + "learning_rate": 5.101125746811447e-07, + "loss": 0.6088, + "step": 10513 + }, + { + "epoch": 0.86, + "grad_norm": 3.8980156264743555, + "learning_rate": 5.095302144190307e-07, + "loss": 0.6212, + "step": 10514 + }, + { + "epoch": 0.86, + "grad_norm": 4.84448207594496, + "learning_rate": 5.089481689193054e-07, + "loss": 0.8081, + "step": 10515 + }, + { + "epoch": 0.86, + "grad_norm": 4.64459833793355, + "learning_rate": 5.083664382227688e-07, + "loss": 0.8825, + "step": 10516 + }, + { + "epoch": 0.86, + "grad_norm": 2.043838681594187, + "learning_rate": 5.07785022370198e-07, + "loss": 0.3449, + "step": 10517 + }, + { + "epoch": 0.86, + "grad_norm": 4.154060146279045, + "learning_rate": 5.072039214023461e-07, + "loss": 0.7281, + "step": 10518 + }, + { + "epoch": 0.86, + "grad_norm": 3.96466531977924, + "learning_rate": 5.06623135359946e-07, + "loss": 0.8337, + "step": 10519 + }, + { + "epoch": 0.86, + "grad_norm": 4.676759933945837, + "learning_rate": 5.060426642837096e-07, + "loss": 0.6837, + "step": 10520 + }, + { + "epoch": 0.86, + "grad_norm": 4.565229230187216, + "learning_rate": 5.054625082143244e-07, + "loss": 0.9036, + "step": 10521 + }, + { + "epoch": 0.86, + "grad_norm": 6.105733286335197, + "learning_rate": 5.048826671924573e-07, + "loss": 0.8276, + "step": 10522 + }, + { + "epoch": 0.86, + "grad_norm": 4.453506602357791, + "learning_rate": 5.043031412587529e-07, + "loss": 0.7909, + "step": 10523 + }, + { + "epoch": 0.86, + "grad_norm": 2.4796082685018095, + "learning_rate": 5.037239304538328e-07, + "loss": 0.3556, + "step": 10524 + }, + { + "epoch": 0.86, + "grad_norm": 4.4196196199598, + "learning_rate": 5.031450348182976e-07, + "loss": 0.9265, + "step": 10525 + }, + { + "epoch": 0.86, + "grad_norm": 1.712548641642958, + "learning_rate": 5.025664543927239e-07, + "loss": 0.3009, + "step": 10526 + }, + { + "epoch": 0.86, + "grad_norm": 5.949476720099252, + "learning_rate": 5.019881892176692e-07, + "loss": 1.3988, + "step": 10527 + }, + { + "epoch": 0.86, + "grad_norm": 3.287437585260928, + "learning_rate": 5.014102393336684e-07, + "loss": 0.7728, + "step": 10528 + }, + { + "epoch": 0.86, + "grad_norm": 3.608351739883492, + "learning_rate": 5.008326047812306e-07, + "loss": 0.7378, + "step": 10529 + }, + { + "epoch": 0.86, + "grad_norm": 4.24070022130551, + "learning_rate": 5.002552856008463e-07, + "loss": 1.0254, + "step": 10530 + }, + { + "epoch": 0.86, + "grad_norm": 3.3390730038185388, + "learning_rate": 4.996782818329843e-07, + "loss": 0.6737, + "step": 10531 + }, + { + "epoch": 0.86, + "grad_norm": 4.083408045700113, + "learning_rate": 4.991015935180887e-07, + "loss": 0.742, + "step": 10532 + }, + { + "epoch": 0.86, + "grad_norm": 4.174766349101573, + "learning_rate": 4.985252206965841e-07, + "loss": 0.6072, + "step": 10533 + }, + { + "epoch": 0.86, + "grad_norm": 4.172485055619851, + "learning_rate": 4.979491634088712e-07, + "loss": 0.6658, + "step": 10534 + }, + { + "epoch": 0.86, + "grad_norm": 5.402005108799347, + "learning_rate": 4.973734216953285e-07, + "loss": 0.8842, + "step": 10535 + }, + { + "epoch": 0.86, + "grad_norm": 5.90056761758074, + "learning_rate": 4.967979955963132e-07, + "loss": 1.0879, + "step": 10536 + }, + { + "epoch": 0.86, + "grad_norm": 2.39097764998649, + "learning_rate": 4.962228851521606e-07, + "loss": 0.27, + "step": 10537 + }, + { + "epoch": 0.86, + "grad_norm": 3.060768350538714, + "learning_rate": 4.956480904031829e-07, + "loss": 0.7793, + "step": 10538 + }, + { + "epoch": 0.86, + "grad_norm": 3.0608895948984265, + "learning_rate": 4.950736113896726e-07, + "loss": 0.4767, + "step": 10539 + }, + { + "epoch": 0.86, + "grad_norm": 3.2562936229286406, + "learning_rate": 4.94499448151895e-07, + "loss": 0.5844, + "step": 10540 + }, + { + "epoch": 0.86, + "grad_norm": 3.4369122871795086, + "learning_rate": 4.939256007300997e-07, + "loss": 0.6791, + "step": 10541 + }, + { + "epoch": 0.86, + "grad_norm": 4.343019423477376, + "learning_rate": 4.933520691645078e-07, + "loss": 0.7955, + "step": 10542 + }, + { + "epoch": 0.86, + "grad_norm": 2.4430208185765965, + "learning_rate": 4.927788534953232e-07, + "loss": 0.2051, + "step": 10543 + }, + { + "epoch": 0.86, + "grad_norm": 4.375757953035753, + "learning_rate": 4.922059537627249e-07, + "loss": 0.8462, + "step": 10544 + }, + { + "epoch": 0.86, + "grad_norm": 4.787432113919724, + "learning_rate": 4.916333700068732e-07, + "loss": 0.7809, + "step": 10545 + }, + { + "epoch": 0.86, + "grad_norm": 3.277283723496338, + "learning_rate": 4.910611022679002e-07, + "loss": 0.8131, + "step": 10546 + }, + { + "epoch": 0.86, + "grad_norm": 4.915573451282051, + "learning_rate": 4.904891505859211e-07, + "loss": 0.7347, + "step": 10547 + }, + { + "epoch": 0.86, + "grad_norm": 4.869333183646448, + "learning_rate": 4.899175150010266e-07, + "loss": 1.0938, + "step": 10548 + }, + { + "epoch": 0.86, + "grad_norm": 4.985754897390461, + "learning_rate": 4.893461955532869e-07, + "loss": 0.7476, + "step": 10549 + }, + { + "epoch": 0.86, + "grad_norm": 5.600746051641911, + "learning_rate": 4.887751922827483e-07, + "loss": 1.5825, + "step": 10550 + }, + { + "epoch": 0.86, + "grad_norm": 4.604774612678929, + "learning_rate": 4.882045052294371e-07, + "loss": 0.7399, + "step": 10551 + }, + { + "epoch": 0.86, + "grad_norm": 5.329949394899275, + "learning_rate": 4.876341344333535e-07, + "loss": 0.9543, + "step": 10552 + }, + { + "epoch": 0.86, + "grad_norm": 3.183678652065017, + "learning_rate": 4.870640799344789e-07, + "loss": 0.6445, + "step": 10553 + }, + { + "epoch": 0.86, + "grad_norm": 2.619817988896082, + "learning_rate": 4.864943417727719e-07, + "loss": 0.2499, + "step": 10554 + }, + { + "epoch": 0.86, + "grad_norm": 2.7996139038893224, + "learning_rate": 4.8592491998817e-07, + "loss": 0.6501, + "step": 10555 + }, + { + "epoch": 0.86, + "grad_norm": 3.8476390589206884, + "learning_rate": 4.85355814620585e-07, + "loss": 0.5333, + "step": 10556 + }, + { + "epoch": 0.86, + "grad_norm": 1.6991815993314117, + "learning_rate": 4.847870257099102e-07, + "loss": 0.2765, + "step": 10557 + }, + { + "epoch": 0.86, + "grad_norm": 4.626329793778804, + "learning_rate": 4.842185532960142e-07, + "loss": 0.7608, + "step": 10558 + }, + { + "epoch": 0.86, + "grad_norm": 4.239040453232224, + "learning_rate": 4.836503974187446e-07, + "loss": 0.6292, + "step": 10559 + }, + { + "epoch": 0.86, + "grad_norm": 4.090350654471295, + "learning_rate": 4.830825581179266e-07, + "loss": 0.8787, + "step": 10560 + }, + { + "epoch": 0.86, + "grad_norm": 3.8632163414524663, + "learning_rate": 4.825150354333641e-07, + "loss": 0.8996, + "step": 10561 + }, + { + "epoch": 0.86, + "grad_norm": 4.597256036373677, + "learning_rate": 4.819478294048368e-07, + "loss": 0.8609, + "step": 10562 + }, + { + "epoch": 0.86, + "grad_norm": 3.8255565498335624, + "learning_rate": 4.813809400721053e-07, + "loss": 0.7586, + "step": 10563 + }, + { + "epoch": 0.86, + "grad_norm": 5.747533371702565, + "learning_rate": 4.808143674749044e-07, + "loss": 1.0456, + "step": 10564 + }, + { + "epoch": 0.86, + "grad_norm": 3.7978246930803086, + "learning_rate": 4.802481116529484e-07, + "loss": 0.8228, + "step": 10565 + }, + { + "epoch": 0.86, + "grad_norm": 4.369936623859059, + "learning_rate": 4.796821726459294e-07, + "loss": 0.9767, + "step": 10566 + }, + { + "epoch": 0.86, + "grad_norm": 4.80137353729828, + "learning_rate": 4.79116550493518e-07, + "loss": 0.759, + "step": 10567 + }, + { + "epoch": 0.86, + "grad_norm": 3.835421858453713, + "learning_rate": 4.785512452353619e-07, + "loss": 0.7454, + "step": 10568 + }, + { + "epoch": 0.86, + "grad_norm": 3.034874998674549, + "learning_rate": 4.77986256911086e-07, + "loss": 0.3137, + "step": 10569 + }, + { + "epoch": 0.86, + "grad_norm": 5.184732909644232, + "learning_rate": 4.774215855602932e-07, + "loss": 0.847, + "step": 10570 + }, + { + "epoch": 0.86, + "grad_norm": 3.991251299295491, + "learning_rate": 4.768572312225645e-07, + "loss": 0.5923, + "step": 10571 + }, + { + "epoch": 0.86, + "grad_norm": 3.1722346498282974, + "learning_rate": 4.762931939374604e-07, + "loss": 0.5217, + "step": 10572 + }, + { + "epoch": 0.86, + "grad_norm": 4.769833138335216, + "learning_rate": 4.7572947374451563e-07, + "loss": 0.6477, + "step": 10573 + }, + { + "epoch": 0.86, + "grad_norm": 3.4207100410905196, + "learning_rate": 4.751660706832456e-07, + "loss": 0.5049, + "step": 10574 + }, + { + "epoch": 0.86, + "grad_norm": 4.179637135441105, + "learning_rate": 4.746029847931405e-07, + "loss": 0.6807, + "step": 10575 + }, + { + "epoch": 0.86, + "grad_norm": 2.9932773081312645, + "learning_rate": 4.74040216113672e-07, + "loss": 0.4292, + "step": 10576 + }, + { + "epoch": 0.86, + "grad_norm": 2.1567134578312914, + "learning_rate": 4.7347776468428755e-07, + "loss": 0.2362, + "step": 10577 + }, + { + "epoch": 0.86, + "grad_norm": 2.808502722109823, + "learning_rate": 4.729156305444121e-07, + "loss": 0.2996, + "step": 10578 + }, + { + "epoch": 0.86, + "grad_norm": 3.193178091505855, + "learning_rate": 4.7235381373344877e-07, + "loss": 0.34, + "step": 10579 + }, + { + "epoch": 0.86, + "grad_norm": 4.854881487942244, + "learning_rate": 4.717923142907799e-07, + "loss": 1.1866, + "step": 10580 + }, + { + "epoch": 0.86, + "grad_norm": 1.1034841366520098, + "learning_rate": 4.7123113225576223e-07, + "loss": 0.1515, + "step": 10581 + }, + { + "epoch": 0.86, + "grad_norm": 3.9591419888299337, + "learning_rate": 4.7067026766773273e-07, + "loss": 1.0687, + "step": 10582 + }, + { + "epoch": 0.87, + "grad_norm": 5.852620985285636, + "learning_rate": 4.701097205660055e-07, + "loss": 1.2246, + "step": 10583 + }, + { + "epoch": 0.87, + "grad_norm": 2.440507337223321, + "learning_rate": 4.695494909898729e-07, + "loss": 0.4791, + "step": 10584 + }, + { + "epoch": 0.87, + "grad_norm": 6.249098578944652, + "learning_rate": 4.689895789786059e-07, + "loss": 1.2812, + "step": 10585 + }, + { + "epoch": 0.87, + "grad_norm": 4.578958964412463, + "learning_rate": 4.684299845714485e-07, + "loss": 0.5956, + "step": 10586 + }, + { + "epoch": 0.87, + "grad_norm": 2.58440156649555, + "learning_rate": 4.6787070780762833e-07, + "loss": 0.3894, + "step": 10587 + }, + { + "epoch": 0.87, + "grad_norm": 4.827490135978755, + "learning_rate": 4.6731174872634844e-07, + "loss": 1.1186, + "step": 10588 + }, + { + "epoch": 0.87, + "grad_norm": 4.8791858692725185, + "learning_rate": 4.6675310736678746e-07, + "loss": 1.1343, + "step": 10589 + }, + { + "epoch": 0.87, + "grad_norm": 2.45158548480915, + "learning_rate": 4.661947837681052e-07, + "loss": 0.3851, + "step": 10590 + }, + { + "epoch": 0.87, + "grad_norm": 4.8440433803230345, + "learning_rate": 4.656367779694382e-07, + "loss": 1.0207, + "step": 10591 + }, + { + "epoch": 0.87, + "grad_norm": 4.627636686901322, + "learning_rate": 4.650790900098989e-07, + "loss": 0.5841, + "step": 10592 + }, + { + "epoch": 0.87, + "grad_norm": 6.053180697309185, + "learning_rate": 4.6452171992857895e-07, + "loss": 0.9651, + "step": 10593 + }, + { + "epoch": 0.87, + "grad_norm": 3.9797866509653845, + "learning_rate": 4.6396466776454816e-07, + "loss": 0.6952, + "step": 10594 + }, + { + "epoch": 0.87, + "grad_norm": 5.283156809275132, + "learning_rate": 4.634079335568531e-07, + "loss": 1.3423, + "step": 10595 + }, + { + "epoch": 0.87, + "grad_norm": 4.098370401479687, + "learning_rate": 4.628515173445186e-07, + "loss": 0.9491, + "step": 10596 + }, + { + "epoch": 0.87, + "grad_norm": 5.4882523325858426, + "learning_rate": 4.6229541916654797e-07, + "loss": 0.6638, + "step": 10597 + }, + { + "epoch": 0.87, + "grad_norm": 4.21272766290156, + "learning_rate": 4.6173963906191945e-07, + "loss": 0.6257, + "step": 10598 + }, + { + "epoch": 0.87, + "grad_norm": 4.578440361226375, + "learning_rate": 4.611841770695913e-07, + "loss": 0.6912, + "step": 10599 + }, + { + "epoch": 0.87, + "grad_norm": 3.7406030006374236, + "learning_rate": 4.6062903322849963e-07, + "loss": 0.5074, + "step": 10600 + }, + { + "epoch": 0.87, + "grad_norm": 2.971787159460415, + "learning_rate": 4.600742075775572e-07, + "loss": 0.4895, + "step": 10601 + }, + { + "epoch": 0.87, + "grad_norm": 3.549420946392138, + "learning_rate": 4.5951970015565617e-07, + "loss": 0.527, + "step": 10602 + }, + { + "epoch": 0.87, + "grad_norm": 4.49758116442198, + "learning_rate": 4.5896551100166273e-07, + "loss": 0.7076, + "step": 10603 + }, + { + "epoch": 0.87, + "grad_norm": 4.221557375377842, + "learning_rate": 4.584116401544253e-07, + "loss": 0.8774, + "step": 10604 + }, + { + "epoch": 0.87, + "grad_norm": 4.704012548444121, + "learning_rate": 4.578580876527661e-07, + "loss": 0.905, + "step": 10605 + }, + { + "epoch": 0.87, + "grad_norm": 5.8933981792163435, + "learning_rate": 4.573048535354874e-07, + "loss": 1.2283, + "step": 10606 + }, + { + "epoch": 0.87, + "grad_norm": 3.6890289491684216, + "learning_rate": 4.5675193784136873e-07, + "loss": 0.7169, + "step": 10607 + }, + { + "epoch": 0.87, + "grad_norm": 3.078110281832308, + "learning_rate": 4.5619934060916747e-07, + "loss": 0.4362, + "step": 10608 + }, + { + "epoch": 0.87, + "grad_norm": 5.943365923867175, + "learning_rate": 4.556470618776171e-07, + "loss": 1.4185, + "step": 10609 + }, + { + "epoch": 0.87, + "grad_norm": 3.7983566498192602, + "learning_rate": 4.5509510168543045e-07, + "loss": 0.7491, + "step": 10610 + }, + { + "epoch": 0.87, + "grad_norm": 3.1864928678226625, + "learning_rate": 4.545434600712978e-07, + "loss": 0.6091, + "step": 10611 + }, + { + "epoch": 0.87, + "grad_norm": 4.680425124795627, + "learning_rate": 4.5399213707388645e-07, + "loss": 1.067, + "step": 10612 + }, + { + "epoch": 0.87, + "grad_norm": 3.2976887527520815, + "learning_rate": 4.5344113273184223e-07, + "loss": 0.6837, + "step": 10613 + }, + { + "epoch": 0.87, + "grad_norm": 2.247737177096619, + "learning_rate": 4.5289044708378914e-07, + "loss": 0.2153, + "step": 10614 + }, + { + "epoch": 0.87, + "grad_norm": 3.1577186393115704, + "learning_rate": 4.523400801683253e-07, + "loss": 0.4013, + "step": 10615 + }, + { + "epoch": 0.87, + "grad_norm": 5.368416540525366, + "learning_rate": 4.517900320240304e-07, + "loss": 1.1825, + "step": 10616 + }, + { + "epoch": 0.87, + "grad_norm": 4.433922677016759, + "learning_rate": 4.512403026894607e-07, + "loss": 0.6473, + "step": 10617 + }, + { + "epoch": 0.87, + "grad_norm": 5.099567381738013, + "learning_rate": 4.5069089220315e-07, + "loss": 1.078, + "step": 10618 + }, + { + "epoch": 0.87, + "grad_norm": 4.298397952514416, + "learning_rate": 4.5014180060360843e-07, + "loss": 0.7946, + "step": 10619 + }, + { + "epoch": 0.87, + "grad_norm": 5.172604711180217, + "learning_rate": 4.4959302792932645e-07, + "loss": 1.2498, + "step": 10620 + }, + { + "epoch": 0.87, + "grad_norm": 4.602856346642938, + "learning_rate": 4.490445742187688e-07, + "loss": 0.6376, + "step": 10621 + }, + { + "epoch": 0.87, + "grad_norm": 4.904344180262937, + "learning_rate": 4.484964395103808e-07, + "loss": 0.8726, + "step": 10622 + }, + { + "epoch": 0.87, + "grad_norm": 3.3263918944209077, + "learning_rate": 4.47948623842584e-07, + "loss": 0.4186, + "step": 10623 + }, + { + "epoch": 0.87, + "grad_norm": 3.617491450908338, + "learning_rate": 4.4740112725377817e-07, + "loss": 0.7986, + "step": 10624 + }, + { + "epoch": 0.87, + "grad_norm": 4.1027863871710375, + "learning_rate": 4.468539497823399e-07, + "loss": 0.7861, + "step": 10625 + }, + { + "epoch": 0.87, + "grad_norm": 2.262894681306143, + "learning_rate": 4.4630709146662623e-07, + "loss": 0.4552, + "step": 10626 + }, + { + "epoch": 0.87, + "grad_norm": 4.224403031382618, + "learning_rate": 4.4576055234496595e-07, + "loss": 0.8116, + "step": 10627 + }, + { + "epoch": 0.87, + "grad_norm": 5.141197113716972, + "learning_rate": 4.4521433245567127e-07, + "loss": 0.7435, + "step": 10628 + }, + { + "epoch": 0.87, + "grad_norm": 3.5200730619412, + "learning_rate": 4.446684318370292e-07, + "loss": 0.5186, + "step": 10629 + }, + { + "epoch": 0.87, + "grad_norm": 5.16613430209177, + "learning_rate": 4.4412285052730543e-07, + "loss": 0.8888, + "step": 10630 + }, + { + "epoch": 0.87, + "grad_norm": 6.131999128338534, + "learning_rate": 4.435775885647431e-07, + "loss": 0.9811, + "step": 10631 + }, + { + "epoch": 0.87, + "grad_norm": 3.7427335505554438, + "learning_rate": 4.4303264598756167e-07, + "loss": 0.6292, + "step": 10632 + }, + { + "epoch": 0.87, + "grad_norm": 4.312167453246354, + "learning_rate": 4.4248802283395953e-07, + "loss": 0.7849, + "step": 10633 + }, + { + "epoch": 0.87, + "grad_norm": 2.724819689411017, + "learning_rate": 4.4194371914211385e-07, + "loss": 0.2586, + "step": 10634 + }, + { + "epoch": 0.87, + "grad_norm": 4.132480578860827, + "learning_rate": 4.4139973495017584e-07, + "loss": 0.7393, + "step": 10635 + }, + { + "epoch": 0.87, + "grad_norm": 4.937206205763876, + "learning_rate": 4.4085607029627717e-07, + "loss": 0.9206, + "step": 10636 + }, + { + "epoch": 0.87, + "grad_norm": 3.1249392973486874, + "learning_rate": 4.403127252185274e-07, + "loss": 0.618, + "step": 10637 + }, + { + "epoch": 0.87, + "grad_norm": 4.403196424429233, + "learning_rate": 4.397696997550105e-07, + "loss": 0.7381, + "step": 10638 + }, + { + "epoch": 0.87, + "grad_norm": 3.567039543028251, + "learning_rate": 4.392269939437921e-07, + "loss": 0.5634, + "step": 10639 + }, + { + "epoch": 0.87, + "grad_norm": 3.854015625729932, + "learning_rate": 4.3868460782291235e-07, + "loss": 0.743, + "step": 10640 + }, + { + "epoch": 0.87, + "grad_norm": 3.9822235908521133, + "learning_rate": 4.381425414303908e-07, + "loss": 0.5683, + "step": 10641 + }, + { + "epoch": 0.87, + "grad_norm": 5.996851986473055, + "learning_rate": 4.376007948042238e-07, + "loss": 1.2439, + "step": 10642 + }, + { + "epoch": 0.87, + "grad_norm": 2.352824545867161, + "learning_rate": 4.370593679823865e-07, + "loss": 0.3105, + "step": 10643 + }, + { + "epoch": 0.87, + "grad_norm": 4.6522769651597775, + "learning_rate": 4.3651826100282844e-07, + "loss": 0.6531, + "step": 10644 + }, + { + "epoch": 0.87, + "grad_norm": 4.355682252300801, + "learning_rate": 4.3597747390348056e-07, + "loss": 0.6397, + "step": 10645 + }, + { + "epoch": 0.87, + "grad_norm": 3.389657952791804, + "learning_rate": 4.354370067222485e-07, + "loss": 0.6547, + "step": 10646 + }, + { + "epoch": 0.87, + "grad_norm": 2.5289816701473318, + "learning_rate": 4.348968594970171e-07, + "loss": 0.3732, + "step": 10647 + }, + { + "epoch": 0.87, + "grad_norm": 4.168427711599652, + "learning_rate": 4.343570322656498e-07, + "loss": 0.8713, + "step": 10648 + }, + { + "epoch": 0.87, + "grad_norm": 4.680600022944302, + "learning_rate": 4.3381752506598373e-07, + "loss": 1.2249, + "step": 10649 + }, + { + "epoch": 0.87, + "grad_norm": 2.8633096515418357, + "learning_rate": 4.33278337935838e-07, + "loss": 0.6042, + "step": 10650 + }, + { + "epoch": 0.87, + "grad_norm": 2.708536022959924, + "learning_rate": 4.3273947091300504e-07, + "loss": 0.3868, + "step": 10651 + }, + { + "epoch": 0.87, + "grad_norm": 4.033168550954183, + "learning_rate": 4.322009240352587e-07, + "loss": 0.8516, + "step": 10652 + }, + { + "epoch": 0.87, + "grad_norm": 3.8082117616109707, + "learning_rate": 4.316626973403487e-07, + "loss": 0.4788, + "step": 10653 + }, + { + "epoch": 0.87, + "grad_norm": 3.888987878913506, + "learning_rate": 4.311247908660027e-07, + "loss": 0.6127, + "step": 10654 + }, + { + "epoch": 0.87, + "grad_norm": 4.776096194500669, + "learning_rate": 4.305872046499243e-07, + "loss": 0.5333, + "step": 10655 + }, + { + "epoch": 0.87, + "grad_norm": 3.281179360056835, + "learning_rate": 4.300499387297963e-07, + "loss": 0.3566, + "step": 10656 + }, + { + "epoch": 0.87, + "grad_norm": 4.881596429018882, + "learning_rate": 4.2951299314327953e-07, + "loss": 1.1082, + "step": 10657 + }, + { + "epoch": 0.87, + "grad_norm": 2.5948203241454566, + "learning_rate": 4.2897636792801123e-07, + "loss": 0.3076, + "step": 10658 + }, + { + "epoch": 0.87, + "grad_norm": 4.400210588906877, + "learning_rate": 4.2844006312160625e-07, + "loss": 0.7595, + "step": 10659 + }, + { + "epoch": 0.87, + "grad_norm": 3.7261392672939495, + "learning_rate": 4.2790407876165783e-07, + "loss": 0.9267, + "step": 10660 + }, + { + "epoch": 0.87, + "grad_norm": 6.234313141007303, + "learning_rate": 4.2736841488573543e-07, + "loss": 1.3824, + "step": 10661 + }, + { + "epoch": 0.87, + "grad_norm": 5.613363121137143, + "learning_rate": 4.268330715313862e-07, + "loss": 0.7748, + "step": 10662 + }, + { + "epoch": 0.87, + "grad_norm": 4.808480011585258, + "learning_rate": 4.2629804873613676e-07, + "loss": 1.1513, + "step": 10663 + }, + { + "epoch": 0.87, + "grad_norm": 4.47762144904202, + "learning_rate": 4.2576334653749e-07, + "loss": 0.9783, + "step": 10664 + }, + { + "epoch": 0.87, + "grad_norm": 3.3851991281660267, + "learning_rate": 4.2522896497292465e-07, + "loss": 0.4157, + "step": 10665 + }, + { + "epoch": 0.87, + "grad_norm": 4.29677868769382, + "learning_rate": 4.2469490407990033e-07, + "loss": 0.7274, + "step": 10666 + }, + { + "epoch": 0.87, + "grad_norm": 5.316192431611226, + "learning_rate": 4.2416116389585094e-07, + "loss": 1.1019, + "step": 10667 + }, + { + "epoch": 0.87, + "grad_norm": 3.803931542736004, + "learning_rate": 4.236277444581893e-07, + "loss": 0.6968, + "step": 10668 + }, + { + "epoch": 0.87, + "grad_norm": 4.8032771517576185, + "learning_rate": 4.2309464580430614e-07, + "loss": 0.9831, + "step": 10669 + }, + { + "epoch": 0.87, + "grad_norm": 3.969830532450726, + "learning_rate": 4.2256186797156986e-07, + "loss": 0.6159, + "step": 10670 + }, + { + "epoch": 0.87, + "grad_norm": 3.424609582686892, + "learning_rate": 4.220294109973266e-07, + "loss": 0.5029, + "step": 10671 + }, + { + "epoch": 0.87, + "grad_norm": 5.8175042161609865, + "learning_rate": 4.2149727491889725e-07, + "loss": 0.9314, + "step": 10672 + }, + { + "epoch": 0.87, + "grad_norm": 3.148672746299841, + "learning_rate": 4.2096545977358294e-07, + "loss": 0.4523, + "step": 10673 + }, + { + "epoch": 0.87, + "grad_norm": 3.4410525768851987, + "learning_rate": 4.2043396559866224e-07, + "loss": 0.4707, + "step": 10674 + }, + { + "epoch": 0.87, + "grad_norm": 3.6024058720906362, + "learning_rate": 4.199027924313903e-07, + "loss": 0.8373, + "step": 10675 + }, + { + "epoch": 0.87, + "grad_norm": 3.6521156174179907, + "learning_rate": 4.1937194030899966e-07, + "loss": 0.6826, + "step": 10676 + }, + { + "epoch": 0.87, + "grad_norm": 4.334807522534701, + "learning_rate": 4.188414092687021e-07, + "loss": 1.1498, + "step": 10677 + }, + { + "epoch": 0.87, + "grad_norm": 1.9210667026365889, + "learning_rate": 4.183111993476835e-07, + "loss": 0.3626, + "step": 10678 + }, + { + "epoch": 0.87, + "grad_norm": 4.4504094068519215, + "learning_rate": 4.177813105831102e-07, + "loss": 0.9136, + "step": 10679 + }, + { + "epoch": 0.87, + "grad_norm": 2.646032517955008, + "learning_rate": 4.172517430121248e-07, + "loss": 0.3493, + "step": 10680 + }, + { + "epoch": 0.87, + "grad_norm": 2.264478556092544, + "learning_rate": 4.1672249667184974e-07, + "loss": 0.4744, + "step": 10681 + }, + { + "epoch": 0.87, + "grad_norm": 3.7508307297946977, + "learning_rate": 4.161935715993798e-07, + "loss": 0.7425, + "step": 10682 + }, + { + "epoch": 0.87, + "grad_norm": 6.403066664815627, + "learning_rate": 4.1566496783179257e-07, + "loss": 1.1371, + "step": 10683 + }, + { + "epoch": 0.87, + "grad_norm": 4.686267943481072, + "learning_rate": 4.1513668540613895e-07, + "loss": 1.2371, + "step": 10684 + }, + { + "epoch": 0.87, + "grad_norm": 3.765613706065596, + "learning_rate": 4.1460872435945046e-07, + "loss": 0.5829, + "step": 10685 + }, + { + "epoch": 0.87, + "grad_norm": 4.861893386867951, + "learning_rate": 4.1408108472873466e-07, + "loss": 1.0981, + "step": 10686 + }, + { + "epoch": 0.87, + "grad_norm": 4.095293931200413, + "learning_rate": 4.1355376655097704e-07, + "loss": 0.732, + "step": 10687 + }, + { + "epoch": 0.87, + "grad_norm": 4.023464390673694, + "learning_rate": 4.1302676986314126e-07, + "loss": 0.843, + "step": 10688 + }, + { + "epoch": 0.87, + "grad_norm": 2.7289135220673826, + "learning_rate": 4.125000947021651e-07, + "loss": 0.5838, + "step": 10689 + }, + { + "epoch": 0.87, + "grad_norm": 3.054357836962516, + "learning_rate": 4.1197374110496736e-07, + "loss": 0.2689, + "step": 10690 + }, + { + "epoch": 0.87, + "grad_norm": 4.368630862806371, + "learning_rate": 4.1144770910844287e-07, + "loss": 0.8435, + "step": 10691 + }, + { + "epoch": 0.87, + "grad_norm": 2.67632656360338, + "learning_rate": 4.1092199874946505e-07, + "loss": 0.3495, + "step": 10692 + }, + { + "epoch": 0.87, + "grad_norm": 3.8132894463745277, + "learning_rate": 4.103966100648832e-07, + "loss": 0.7751, + "step": 10693 + }, + { + "epoch": 0.87, + "grad_norm": 3.240815316954804, + "learning_rate": 4.0987154309152624e-07, + "loss": 0.5966, + "step": 10694 + }, + { + "epoch": 0.87, + "grad_norm": 3.4030123337657274, + "learning_rate": 4.0934679786619635e-07, + "loss": 0.7006, + "step": 10695 + }, + { + "epoch": 0.87, + "grad_norm": 3.607028351181928, + "learning_rate": 4.0882237442567753e-07, + "loss": 0.3138, + "step": 10696 + }, + { + "epoch": 0.87, + "grad_norm": 5.019722337630489, + "learning_rate": 4.082982728067303e-07, + "loss": 1.0796, + "step": 10697 + }, + { + "epoch": 0.87, + "grad_norm": 4.26180541436453, + "learning_rate": 4.077744930460903e-07, + "loss": 0.6425, + "step": 10698 + }, + { + "epoch": 0.87, + "grad_norm": 3.5925048951248546, + "learning_rate": 4.072510351804726e-07, + "loss": 0.7575, + "step": 10699 + }, + { + "epoch": 0.87, + "grad_norm": 4.354897154606622, + "learning_rate": 4.0672789924657065e-07, + "loss": 1.1631, + "step": 10700 + }, + { + "epoch": 0.87, + "grad_norm": 2.8872884073162206, + "learning_rate": 4.062050852810523e-07, + "loss": 0.5028, + "step": 10701 + }, + { + "epoch": 0.87, + "grad_norm": 3.504835082816432, + "learning_rate": 4.056825933205649e-07, + "loss": 0.9385, + "step": 10702 + }, + { + "epoch": 0.87, + "grad_norm": 4.3366722530498745, + "learning_rate": 4.051604234017331e-07, + "loss": 0.7271, + "step": 10703 + }, + { + "epoch": 0.87, + "grad_norm": 5.004438000810595, + "learning_rate": 4.0463857556115924e-07, + "loss": 1.0754, + "step": 10704 + }, + { + "epoch": 0.87, + "grad_norm": 4.030954108170035, + "learning_rate": 4.0411704983542186e-07, + "loss": 0.6232, + "step": 10705 + }, + { + "epoch": 0.88, + "grad_norm": 4.128803075417094, + "learning_rate": 4.0359584626107896e-07, + "loss": 0.8658, + "step": 10706 + }, + { + "epoch": 0.88, + "grad_norm": 4.140969447636292, + "learning_rate": 4.0307496487466234e-07, + "loss": 0.5852, + "step": 10707 + }, + { + "epoch": 0.88, + "grad_norm": 3.5609521974211, + "learning_rate": 4.025544057126851e-07, + "loss": 0.6727, + "step": 10708 + }, + { + "epoch": 0.88, + "grad_norm": 4.963580000879209, + "learning_rate": 4.020341688116358e-07, + "loss": 0.7234, + "step": 10709 + }, + { + "epoch": 0.88, + "grad_norm": 3.490147357337444, + "learning_rate": 4.0151425420798087e-07, + "loss": 0.6812, + "step": 10710 + }, + { + "epoch": 0.88, + "grad_norm": 4.6926098684383915, + "learning_rate": 4.009946619381649e-07, + "loss": 1.3689, + "step": 10711 + }, + { + "epoch": 0.88, + "grad_norm": 4.35012293405951, + "learning_rate": 4.004753920386073e-07, + "loss": 0.7819, + "step": 10712 + }, + { + "epoch": 0.88, + "grad_norm": 4.130592052806018, + "learning_rate": 3.999564445457088e-07, + "loss": 0.7592, + "step": 10713 + }, + { + "epoch": 0.88, + "grad_norm": 4.838509212592349, + "learning_rate": 3.994378194958426e-07, + "loss": 0.6659, + "step": 10714 + }, + { + "epoch": 0.88, + "grad_norm": 2.5971797437510538, + "learning_rate": 3.9891951692536403e-07, + "loss": 0.4575, + "step": 10715 + }, + { + "epoch": 0.88, + "grad_norm": 4.79462436955565, + "learning_rate": 3.984015368706029e-07, + "loss": 0.9588, + "step": 10716 + }, + { + "epoch": 0.88, + "grad_norm": 4.457717012861435, + "learning_rate": 3.978838793678691e-07, + "loss": 0.7413, + "step": 10717 + }, + { + "epoch": 0.88, + "grad_norm": 3.8643353666091755, + "learning_rate": 3.9736654445344583e-07, + "loss": 0.4063, + "step": 10718 + }, + { + "epoch": 0.88, + "grad_norm": 4.625635562975463, + "learning_rate": 3.968495321635973e-07, + "loss": 0.955, + "step": 10719 + }, + { + "epoch": 0.88, + "grad_norm": 4.033288993894299, + "learning_rate": 3.9633284253456306e-07, + "loss": 0.7561, + "step": 10720 + }, + { + "epoch": 0.88, + "grad_norm": 4.926926563325641, + "learning_rate": 3.9581647560256175e-07, + "loss": 0.9063, + "step": 10721 + }, + { + "epoch": 0.88, + "grad_norm": 5.649846015446501, + "learning_rate": 3.9530043140378783e-07, + "loss": 0.6667, + "step": 10722 + }, + { + "epoch": 0.88, + "grad_norm": 5.924621114151401, + "learning_rate": 3.947847099744151e-07, + "loss": 0.8411, + "step": 10723 + }, + { + "epoch": 0.88, + "grad_norm": 3.908430828876094, + "learning_rate": 3.942693113505908e-07, + "loss": 0.9769, + "step": 10724 + }, + { + "epoch": 0.88, + "grad_norm": 1.9654199707560478, + "learning_rate": 3.937542355684443e-07, + "loss": 0.5655, + "step": 10725 + }, + { + "epoch": 0.88, + "grad_norm": 2.885417670420595, + "learning_rate": 3.93239482664079e-07, + "loss": 0.6008, + "step": 10726 + }, + { + "epoch": 0.88, + "grad_norm": 4.18806641565675, + "learning_rate": 3.9272505267357817e-07, + "loss": 0.9083, + "step": 10727 + }, + { + "epoch": 0.88, + "grad_norm": 4.632116258430988, + "learning_rate": 3.9221094563299924e-07, + "loss": 0.6983, + "step": 10728 + }, + { + "epoch": 0.88, + "grad_norm": 5.415239353925382, + "learning_rate": 3.91697161578381e-07, + "loss": 1.0464, + "step": 10729 + }, + { + "epoch": 0.88, + "grad_norm": 2.5919235662955313, + "learning_rate": 3.911837005457353e-07, + "loss": 0.4949, + "step": 10730 + }, + { + "epoch": 0.88, + "grad_norm": 2.5218364501714263, + "learning_rate": 3.906705625710544e-07, + "loss": 0.5958, + "step": 10731 + }, + { + "epoch": 0.88, + "grad_norm": 4.638661032322979, + "learning_rate": 3.9015774769030737e-07, + "loss": 0.7758, + "step": 10732 + }, + { + "epoch": 0.88, + "grad_norm": 3.9578276632227913, + "learning_rate": 3.8964525593944037e-07, + "loss": 0.3478, + "step": 10733 + }, + { + "epoch": 0.88, + "grad_norm": 3.5455863004408235, + "learning_rate": 3.8913308735437695e-07, + "loss": 0.6597, + "step": 10734 + }, + { + "epoch": 0.88, + "grad_norm": 1.9097376668172084, + "learning_rate": 3.8862124197101723e-07, + "loss": 0.3072, + "step": 10735 + }, + { + "epoch": 0.88, + "grad_norm": 3.6449222677733357, + "learning_rate": 3.8810971982523925e-07, + "loss": 0.8079, + "step": 10736 + }, + { + "epoch": 0.88, + "grad_norm": 4.213293657965279, + "learning_rate": 3.875985209528993e-07, + "loss": 0.6895, + "step": 10737 + }, + { + "epoch": 0.88, + "grad_norm": 1.9313900646944315, + "learning_rate": 3.870876453898292e-07, + "loss": 0.2604, + "step": 10738 + }, + { + "epoch": 0.88, + "grad_norm": 2.680761500486827, + "learning_rate": 3.8657709317184043e-07, + "loss": 0.411, + "step": 10739 + }, + { + "epoch": 0.88, + "grad_norm": 2.6893590662009825, + "learning_rate": 3.8606686433471986e-07, + "loss": 0.3308, + "step": 10740 + }, + { + "epoch": 0.88, + "grad_norm": 3.943425828588057, + "learning_rate": 3.855569589142316e-07, + "loss": 0.5602, + "step": 10741 + }, + { + "epoch": 0.88, + "grad_norm": 1.620359041990299, + "learning_rate": 3.8504737694611884e-07, + "loss": 0.3706, + "step": 10742 + }, + { + "epoch": 0.88, + "grad_norm": 4.248144268331347, + "learning_rate": 3.8453811846610124e-07, + "loss": 0.8726, + "step": 10743 + }, + { + "epoch": 0.88, + "grad_norm": 5.475629628031191, + "learning_rate": 3.8402918350987363e-07, + "loss": 1.1841, + "step": 10744 + }, + { + "epoch": 0.88, + "grad_norm": 3.4896175540043695, + "learning_rate": 3.8352057211311187e-07, + "loss": 0.7999, + "step": 10745 + }, + { + "epoch": 0.88, + "grad_norm": 2.9714899109623616, + "learning_rate": 3.830122843114681e-07, + "loss": 0.59, + "step": 10746 + }, + { + "epoch": 0.88, + "grad_norm": 4.796663888117933, + "learning_rate": 3.825043201405687e-07, + "loss": 1.0062, + "step": 10747 + }, + { + "epoch": 0.88, + "grad_norm": 4.150614319743733, + "learning_rate": 3.819966796360214e-07, + "loss": 0.6221, + "step": 10748 + }, + { + "epoch": 0.88, + "grad_norm": 5.915798778933123, + "learning_rate": 3.8148936283340876e-07, + "loss": 0.9069, + "step": 10749 + }, + { + "epoch": 0.88, + "grad_norm": 3.0373935126550538, + "learning_rate": 3.8098236976829237e-07, + "loss": 0.5306, + "step": 10750 + }, + { + "epoch": 0.88, + "grad_norm": 3.5893956529612456, + "learning_rate": 3.804757004762105e-07, + "loss": 0.5189, + "step": 10751 + }, + { + "epoch": 0.88, + "grad_norm": 2.03046284830745, + "learning_rate": 3.7996935499267753e-07, + "loss": 0.198, + "step": 10752 + }, + { + "epoch": 0.88, + "grad_norm": 1.5964226176458247, + "learning_rate": 3.7946333335318553e-07, + "loss": 0.2255, + "step": 10753 + }, + { + "epoch": 0.88, + "grad_norm": 4.306032320380534, + "learning_rate": 3.7895763559320565e-07, + "loss": 0.597, + "step": 10754 + }, + { + "epoch": 0.88, + "grad_norm": 0.996963558315773, + "learning_rate": 3.784522617481845e-07, + "loss": 0.1405, + "step": 10755 + }, + { + "epoch": 0.88, + "grad_norm": 3.958372766379946, + "learning_rate": 3.779472118535471e-07, + "loss": 0.576, + "step": 10756 + }, + { + "epoch": 0.88, + "grad_norm": 3.1559251715806473, + "learning_rate": 3.7744248594469514e-07, + "loss": 0.6124, + "step": 10757 + }, + { + "epoch": 0.88, + "grad_norm": 4.607760712270118, + "learning_rate": 3.7693808405700693e-07, + "loss": 0.9607, + "step": 10758 + }, + { + "epoch": 0.88, + "grad_norm": 4.64135442254822, + "learning_rate": 3.764340062258404e-07, + "loss": 0.9674, + "step": 10759 + }, + { + "epoch": 0.88, + "grad_norm": 3.295047258336011, + "learning_rate": 3.7593025248652717e-07, + "loss": 0.5003, + "step": 10760 + }, + { + "epoch": 0.88, + "grad_norm": 2.1594550418128007, + "learning_rate": 3.754268228743796e-07, + "loss": 0.5412, + "step": 10761 + }, + { + "epoch": 0.88, + "grad_norm": 3.19015187923477, + "learning_rate": 3.74923717424685e-07, + "loss": 0.5206, + "step": 10762 + }, + { + "epoch": 0.88, + "grad_norm": 4.49541054523754, + "learning_rate": 3.744209361727102e-07, + "loss": 0.9129, + "step": 10763 + }, + { + "epoch": 0.88, + "grad_norm": 3.813429106006615, + "learning_rate": 3.7391847915369703e-07, + "loss": 0.8258, + "step": 10764 + }, + { + "epoch": 0.88, + "grad_norm": 4.620999661923272, + "learning_rate": 3.7341634640286507e-07, + "loss": 0.7093, + "step": 10765 + }, + { + "epoch": 0.88, + "grad_norm": 4.338545381906777, + "learning_rate": 3.729145379554128e-07, + "loss": 0.649, + "step": 10766 + }, + { + "epoch": 0.88, + "grad_norm": 5.463822813724985, + "learning_rate": 3.724130538465137e-07, + "loss": 0.6253, + "step": 10767 + }, + { + "epoch": 0.88, + "grad_norm": 1.582230910851982, + "learning_rate": 3.7191189411132145e-07, + "loss": 0.2331, + "step": 10768 + }, + { + "epoch": 0.88, + "grad_norm": 4.611934416139467, + "learning_rate": 3.7141105878496284e-07, + "loss": 1.1343, + "step": 10769 + }, + { + "epoch": 0.88, + "grad_norm": 2.332748873450337, + "learning_rate": 3.709105479025454e-07, + "loss": 0.382, + "step": 10770 + }, + { + "epoch": 0.88, + "grad_norm": 4.8338039845774174, + "learning_rate": 3.704103614991528e-07, + "loss": 0.8078, + "step": 10771 + }, + { + "epoch": 0.88, + "grad_norm": 4.871897611029115, + "learning_rate": 3.699104996098457e-07, + "loss": 1.0775, + "step": 10772 + }, + { + "epoch": 0.88, + "grad_norm": 5.132853935258872, + "learning_rate": 3.694109622696629e-07, + "loss": 1.1363, + "step": 10773 + }, + { + "epoch": 0.88, + "grad_norm": 4.776054690216075, + "learning_rate": 3.6891174951361905e-07, + "loss": 1.0977, + "step": 10774 + }, + { + "epoch": 0.88, + "grad_norm": 4.445085974684133, + "learning_rate": 3.684128613767063e-07, + "loss": 1.2064, + "step": 10775 + }, + { + "epoch": 0.88, + "grad_norm": 4.76484603760837, + "learning_rate": 3.6791429789389657e-07, + "loss": 0.5601, + "step": 10776 + }, + { + "epoch": 0.88, + "grad_norm": 2.996172031233409, + "learning_rate": 3.674160591001347e-07, + "loss": 0.5389, + "step": 10777 + }, + { + "epoch": 0.88, + "grad_norm": 4.919193116332945, + "learning_rate": 3.6691814503034607e-07, + "loss": 0.8366, + "step": 10778 + }, + { + "epoch": 0.88, + "grad_norm": 2.3764859737831223, + "learning_rate": 3.664205557194322e-07, + "loss": 0.5064, + "step": 10779 + }, + { + "epoch": 0.88, + "grad_norm": 3.426508144399529, + "learning_rate": 3.6592329120227254e-07, + "loss": 0.7038, + "step": 10780 + }, + { + "epoch": 0.88, + "grad_norm": 4.179334859223737, + "learning_rate": 3.654263515137224e-07, + "loss": 0.7756, + "step": 10781 + }, + { + "epoch": 0.88, + "grad_norm": 2.932650197611428, + "learning_rate": 3.649297366886145e-07, + "loss": 0.5344, + "step": 10782 + }, + { + "epoch": 0.88, + "grad_norm": 3.2992859885900576, + "learning_rate": 3.644334467617605e-07, + "loss": 0.4674, + "step": 10783 + }, + { + "epoch": 0.88, + "grad_norm": 3.9900866233744483, + "learning_rate": 3.6393748176794806e-07, + "loss": 0.8017, + "step": 10784 + }, + { + "epoch": 0.88, + "grad_norm": 3.4378118306854746, + "learning_rate": 3.6344184174194166e-07, + "loss": 0.5481, + "step": 10785 + }, + { + "epoch": 0.88, + "grad_norm": 4.747101774626812, + "learning_rate": 3.6294652671848506e-07, + "loss": 0.7064, + "step": 10786 + }, + { + "epoch": 0.88, + "grad_norm": 3.2071706917138627, + "learning_rate": 3.6245153673229506e-07, + "loss": 0.473, + "step": 10787 + }, + { + "epoch": 0.88, + "grad_norm": 4.71469652859469, + "learning_rate": 3.6195687181806995e-07, + "loss": 0.8903, + "step": 10788 + }, + { + "epoch": 0.88, + "grad_norm": 3.7849053291905626, + "learning_rate": 3.614625320104831e-07, + "loss": 0.6381, + "step": 10789 + }, + { + "epoch": 0.88, + "grad_norm": 4.502042212694051, + "learning_rate": 3.609685173441868e-07, + "loss": 0.8736, + "step": 10790 + }, + { + "epoch": 0.88, + "grad_norm": 4.8469244147375665, + "learning_rate": 3.604748278538073e-07, + "loss": 0.7559, + "step": 10791 + }, + { + "epoch": 0.88, + "grad_norm": 3.93578626328786, + "learning_rate": 3.599814635739518e-07, + "loss": 0.6431, + "step": 10792 + }, + { + "epoch": 0.88, + "grad_norm": 4.71900741012042, + "learning_rate": 3.5948842453920164e-07, + "loss": 0.9382, + "step": 10793 + }, + { + "epoch": 0.88, + "grad_norm": 2.776179272353354, + "learning_rate": 3.5899571078411743e-07, + "loss": 0.3649, + "step": 10794 + }, + { + "epoch": 0.88, + "grad_norm": 3.3194053237425853, + "learning_rate": 3.5850332234323604e-07, + "loss": 0.7113, + "step": 10795 + }, + { + "epoch": 0.88, + "grad_norm": 2.0038277251839287, + "learning_rate": 3.580112592510715e-07, + "loss": 0.3154, + "step": 10796 + }, + { + "epoch": 0.88, + "grad_norm": 3.146379388211302, + "learning_rate": 3.5751952154211734e-07, + "loss": 0.5958, + "step": 10797 + }, + { + "epoch": 0.88, + "grad_norm": 3.392956378574398, + "learning_rate": 3.570281092508393e-07, + "loss": 0.4243, + "step": 10798 + }, + { + "epoch": 0.88, + "grad_norm": 4.6560757309435115, + "learning_rate": 3.565370224116843e-07, + "loss": 0.83, + "step": 10799 + }, + { + "epoch": 0.88, + "grad_norm": 5.617587980117154, + "learning_rate": 3.560462610590759e-07, + "loss": 1.4945, + "step": 10800 + }, + { + "epoch": 0.88, + "grad_norm": 5.719625405410017, + "learning_rate": 3.555558252274144e-07, + "loss": 1.2418, + "step": 10801 + }, + { + "epoch": 0.88, + "grad_norm": 4.920088732077162, + "learning_rate": 3.550657149510761e-07, + "loss": 0.9742, + "step": 10802 + }, + { + "epoch": 0.88, + "grad_norm": 2.4276505680763347, + "learning_rate": 3.545759302644175e-07, + "loss": 0.3711, + "step": 10803 + }, + { + "epoch": 0.88, + "grad_norm": 4.042615404497343, + "learning_rate": 3.540864712017689e-07, + "loss": 0.6435, + "step": 10804 + }, + { + "epoch": 0.88, + "grad_norm": 5.142617960583135, + "learning_rate": 3.5359733779743887e-07, + "loss": 1.032, + "step": 10805 + }, + { + "epoch": 0.88, + "grad_norm": 5.206168299136099, + "learning_rate": 3.531085300857151e-07, + "loss": 1.0329, + "step": 10806 + }, + { + "epoch": 0.88, + "grad_norm": 4.689641946745283, + "learning_rate": 3.526200481008596e-07, + "loss": 1.1883, + "step": 10807 + }, + { + "epoch": 0.88, + "grad_norm": 4.51102607546849, + "learning_rate": 3.5213189187711383e-07, + "loss": 0.5884, + "step": 10808 + }, + { + "epoch": 0.88, + "grad_norm": 5.326322472854147, + "learning_rate": 3.516440614486943e-07, + "loss": 1.3112, + "step": 10809 + }, + { + "epoch": 0.88, + "grad_norm": 4.142139814049927, + "learning_rate": 3.5115655684979653e-07, + "loss": 0.6651, + "step": 10810 + }, + { + "epoch": 0.88, + "grad_norm": 3.5554431087528937, + "learning_rate": 3.50669378114592e-07, + "loss": 0.8697, + "step": 10811 + }, + { + "epoch": 0.88, + "grad_norm": 4.002869827342025, + "learning_rate": 3.5018252527723005e-07, + "loss": 0.7896, + "step": 10812 + }, + { + "epoch": 0.88, + "grad_norm": 5.445687922168761, + "learning_rate": 3.4969599837183677e-07, + "loss": 1.2917, + "step": 10813 + }, + { + "epoch": 0.88, + "grad_norm": 5.044010478873331, + "learning_rate": 3.4920979743251704e-07, + "loss": 0.9237, + "step": 10814 + }, + { + "epoch": 0.88, + "grad_norm": 3.9503255148776963, + "learning_rate": 3.487239224933492e-07, + "loss": 0.5253, + "step": 10815 + }, + { + "epoch": 0.88, + "grad_norm": 3.5457223215662785, + "learning_rate": 3.482383735883921e-07, + "loss": 0.8796, + "step": 10816 + }, + { + "epoch": 0.88, + "grad_norm": 5.289237360154442, + "learning_rate": 3.4775315075168014e-07, + "loss": 0.9533, + "step": 10817 + }, + { + "epoch": 0.88, + "grad_norm": 3.7949480360010073, + "learning_rate": 3.472682540172262e-07, + "loss": 0.8025, + "step": 10818 + }, + { + "epoch": 0.88, + "grad_norm": 2.30689665215385, + "learning_rate": 3.467836834190186e-07, + "loss": 0.484, + "step": 10819 + }, + { + "epoch": 0.88, + "grad_norm": 3.0670269504963104, + "learning_rate": 3.462994389910246e-07, + "loss": 0.7331, + "step": 10820 + }, + { + "epoch": 0.88, + "grad_norm": 4.098845678264885, + "learning_rate": 3.4581552076718597e-07, + "loss": 1.295, + "step": 10821 + }, + { + "epoch": 0.88, + "grad_norm": 4.553248193694052, + "learning_rate": 3.453319287814255e-07, + "loss": 0.7411, + "step": 10822 + }, + { + "epoch": 0.88, + "grad_norm": 4.688140717897219, + "learning_rate": 3.4484866306763896e-07, + "loss": 1.076, + "step": 10823 + }, + { + "epoch": 0.88, + "grad_norm": 3.3193966087256697, + "learning_rate": 3.4436572365970145e-07, + "loss": 0.4711, + "step": 10824 + }, + { + "epoch": 0.88, + "grad_norm": 3.2865368687166274, + "learning_rate": 3.438831105914653e-07, + "loss": 0.516, + "step": 10825 + }, + { + "epoch": 0.88, + "grad_norm": 2.2698605560686738, + "learning_rate": 3.4340082389676065e-07, + "loss": 0.3486, + "step": 10826 + }, + { + "epoch": 0.88, + "grad_norm": 4.251032302544349, + "learning_rate": 3.429188636093922e-07, + "loss": 0.4758, + "step": 10827 + }, + { + "epoch": 0.89, + "grad_norm": 5.176776415080748, + "learning_rate": 3.4243722976314285e-07, + "loss": 0.9426, + "step": 10828 + }, + { + "epoch": 0.89, + "grad_norm": 3.8686624142305717, + "learning_rate": 3.4195592239177455e-07, + "loss": 0.5923, + "step": 10829 + }, + { + "epoch": 0.89, + "grad_norm": 3.897109402196155, + "learning_rate": 3.4147494152902414e-07, + "loss": 0.8303, + "step": 10830 + }, + { + "epoch": 0.89, + "grad_norm": 2.5272091603729154, + "learning_rate": 3.4099428720860693e-07, + "loss": 0.4118, + "step": 10831 + }, + { + "epoch": 0.89, + "grad_norm": 4.1253640658859645, + "learning_rate": 3.4051395946421374e-07, + "loss": 0.4998, + "step": 10832 + }, + { + "epoch": 0.89, + "grad_norm": 6.410903311671439, + "learning_rate": 3.4003395832951315e-07, + "loss": 1.0841, + "step": 10833 + }, + { + "epoch": 0.89, + "grad_norm": 4.405568517002049, + "learning_rate": 3.3955428383815267e-07, + "loss": 0.825, + "step": 10834 + }, + { + "epoch": 0.89, + "grad_norm": 5.344647787528466, + "learning_rate": 3.3907493602375386e-07, + "loss": 0.6518, + "step": 10835 + }, + { + "epoch": 0.89, + "grad_norm": 4.571118190302611, + "learning_rate": 3.385959149199186e-07, + "loss": 0.6786, + "step": 10836 + }, + { + "epoch": 0.89, + "grad_norm": 4.6834737837192835, + "learning_rate": 3.3811722056022287e-07, + "loss": 0.8869, + "step": 10837 + }, + { + "epoch": 0.89, + "grad_norm": 6.3061940341381275, + "learning_rate": 3.3763885297822153e-07, + "loss": 1.5261, + "step": 10838 + }, + { + "epoch": 0.89, + "grad_norm": 3.5161441151787303, + "learning_rate": 3.371608122074455e-07, + "loss": 0.5645, + "step": 10839 + }, + { + "epoch": 0.89, + "grad_norm": 6.081472187373712, + "learning_rate": 3.36683098281404e-07, + "loss": 1.2119, + "step": 10840 + }, + { + "epoch": 0.89, + "grad_norm": 5.171022016722354, + "learning_rate": 3.36205711233582e-07, + "loss": 1.1501, + "step": 10841 + }, + { + "epoch": 0.89, + "grad_norm": 3.322989050379168, + "learning_rate": 3.3572865109744334e-07, + "loss": 0.767, + "step": 10842 + }, + { + "epoch": 0.89, + "grad_norm": 3.025603277700914, + "learning_rate": 3.3525191790642733e-07, + "loss": 0.5912, + "step": 10843 + }, + { + "epoch": 0.89, + "grad_norm": 2.5155571855514176, + "learning_rate": 3.347755116939505e-07, + "loss": 0.573, + "step": 10844 + }, + { + "epoch": 0.89, + "grad_norm": 3.9386049896185673, + "learning_rate": 3.342994324934068e-07, + "loss": 0.5738, + "step": 10845 + }, + { + "epoch": 0.89, + "grad_norm": 5.938087478700539, + "learning_rate": 3.338236803381684e-07, + "loss": 1.1193, + "step": 10846 + }, + { + "epoch": 0.89, + "grad_norm": 4.832406399777737, + "learning_rate": 3.3334825526158185e-07, + "loss": 1.208, + "step": 10847 + }, + { + "epoch": 0.89, + "grad_norm": 3.795716617602275, + "learning_rate": 3.328731572969746e-07, + "loss": 0.4324, + "step": 10848 + }, + { + "epoch": 0.89, + "grad_norm": 2.7713239105931833, + "learning_rate": 3.32398386477647e-07, + "loss": 0.4664, + "step": 10849 + }, + { + "epoch": 0.89, + "grad_norm": 4.978762454868344, + "learning_rate": 3.319239428368787e-07, + "loss": 1.4731, + "step": 10850 + }, + { + "epoch": 0.89, + "grad_norm": 4.066659139746784, + "learning_rate": 3.3144982640792633e-07, + "loss": 0.7836, + "step": 10851 + }, + { + "epoch": 0.89, + "grad_norm": 3.594254731150115, + "learning_rate": 3.309760372240245e-07, + "loss": 0.6424, + "step": 10852 + }, + { + "epoch": 0.89, + "grad_norm": 4.458815645513699, + "learning_rate": 3.3050257531838213e-07, + "loss": 0.659, + "step": 10853 + }, + { + "epoch": 0.89, + "grad_norm": 2.606011251269728, + "learning_rate": 3.300294407241883e-07, + "loss": 0.3979, + "step": 10854 + }, + { + "epoch": 0.89, + "grad_norm": 2.3601353790940625, + "learning_rate": 3.2955663347460586e-07, + "loss": 0.2911, + "step": 10855 + }, + { + "epoch": 0.89, + "grad_norm": 3.2947077288587123, + "learning_rate": 3.2908415360277777e-07, + "loss": 0.7556, + "step": 10856 + }, + { + "epoch": 0.89, + "grad_norm": 2.6814920758815863, + "learning_rate": 3.2861200114182257e-07, + "loss": 0.5605, + "step": 10857 + }, + { + "epoch": 0.89, + "grad_norm": 4.588671370200977, + "learning_rate": 3.2814017612483596e-07, + "loss": 0.9264, + "step": 10858 + }, + { + "epoch": 0.89, + "grad_norm": 5.818531071367023, + "learning_rate": 3.276686785848915e-07, + "loss": 1.2616, + "step": 10859 + }, + { + "epoch": 0.89, + "grad_norm": 3.513390776495794, + "learning_rate": 3.2719750855503886e-07, + "loss": 0.7851, + "step": 10860 + }, + { + "epoch": 0.89, + "grad_norm": 4.164370528259902, + "learning_rate": 3.267266660683044e-07, + "loss": 0.4353, + "step": 10861 + }, + { + "epoch": 0.89, + "grad_norm": 2.7186871412994567, + "learning_rate": 3.2625615115769225e-07, + "loss": 0.2845, + "step": 10862 + }, + { + "epoch": 0.89, + "grad_norm": 3.2796635993385106, + "learning_rate": 3.257859638561839e-07, + "loss": 0.4609, + "step": 10863 + }, + { + "epoch": 0.89, + "grad_norm": 4.077730283617591, + "learning_rate": 3.2531610419673675e-07, + "loss": 0.4293, + "step": 10864 + }, + { + "epoch": 0.89, + "grad_norm": 4.234160766755855, + "learning_rate": 3.248465722122868e-07, + "loss": 0.3752, + "step": 10865 + }, + { + "epoch": 0.89, + "grad_norm": 4.300505913411803, + "learning_rate": 3.24377367935747e-07, + "loss": 0.7588, + "step": 10866 + }, + { + "epoch": 0.89, + "grad_norm": 5.388942913410447, + "learning_rate": 3.2390849140000403e-07, + "loss": 0.8793, + "step": 10867 + }, + { + "epoch": 0.89, + "grad_norm": 3.0979214837702216, + "learning_rate": 3.2343994263792586e-07, + "loss": 0.5667, + "step": 10868 + }, + { + "epoch": 0.89, + "grad_norm": 4.97473027058986, + "learning_rate": 3.229717216823552e-07, + "loss": 1.0228, + "step": 10869 + }, + { + "epoch": 0.89, + "grad_norm": 4.864386867807811, + "learning_rate": 3.2250382856611193e-07, + "loss": 0.6682, + "step": 10870 + }, + { + "epoch": 0.89, + "grad_norm": 3.2722788925574453, + "learning_rate": 3.220362633219948e-07, + "loss": 0.5731, + "step": 10871 + }, + { + "epoch": 0.89, + "grad_norm": 5.379496879342514, + "learning_rate": 3.2156902598277585e-07, + "loss": 0.8015, + "step": 10872 + }, + { + "epoch": 0.89, + "grad_norm": 3.994419351663586, + "learning_rate": 3.2110211658120784e-07, + "loss": 0.6576, + "step": 10873 + }, + { + "epoch": 0.89, + "grad_norm": 3.780488642815016, + "learning_rate": 3.206355351500184e-07, + "loss": 0.6979, + "step": 10874 + }, + { + "epoch": 0.89, + "grad_norm": 4.419326211069094, + "learning_rate": 3.2016928172191377e-07, + "loss": 0.9318, + "step": 10875 + }, + { + "epoch": 0.89, + "grad_norm": 2.9204127882648647, + "learning_rate": 3.1970335632957595e-07, + "loss": 0.4459, + "step": 10876 + }, + { + "epoch": 0.89, + "grad_norm": 4.156120085557064, + "learning_rate": 3.1923775900566444e-07, + "loss": 0.6487, + "step": 10877 + }, + { + "epoch": 0.89, + "grad_norm": 2.2345160732754623, + "learning_rate": 3.1877248978281484e-07, + "loss": 0.5941, + "step": 10878 + }, + { + "epoch": 0.89, + "grad_norm": 2.4191464683206716, + "learning_rate": 3.18307548693641e-07, + "loss": 0.3887, + "step": 10879 + }, + { + "epoch": 0.89, + "grad_norm": 4.448224384755552, + "learning_rate": 3.17842935770733e-07, + "loss": 0.6698, + "step": 10880 + }, + { + "epoch": 0.89, + "grad_norm": 3.715905562647058, + "learning_rate": 3.173786510466581e-07, + "loss": 0.5083, + "step": 10881 + }, + { + "epoch": 0.89, + "grad_norm": 2.933740117090663, + "learning_rate": 3.1691469455396196e-07, + "loss": 0.5269, + "step": 10882 + }, + { + "epoch": 0.89, + "grad_norm": 3.060932225469689, + "learning_rate": 3.164510663251641e-07, + "loss": 0.5333, + "step": 10883 + }, + { + "epoch": 0.89, + "grad_norm": 5.980209462101881, + "learning_rate": 3.159877663927635e-07, + "loss": 1.1751, + "step": 10884 + }, + { + "epoch": 0.89, + "grad_norm": 2.362802845681076, + "learning_rate": 3.15524794789237e-07, + "loss": 0.3487, + "step": 10885 + }, + { + "epoch": 0.89, + "grad_norm": 3.142789023875797, + "learning_rate": 3.1506215154703424e-07, + "loss": 0.5471, + "step": 10886 + }, + { + "epoch": 0.89, + "grad_norm": 3.8573988934118852, + "learning_rate": 3.145998366985853e-07, + "loss": 0.7298, + "step": 10887 + }, + { + "epoch": 0.89, + "grad_norm": 5.515108176966938, + "learning_rate": 3.141378502762982e-07, + "loss": 1.0164, + "step": 10888 + }, + { + "epoch": 0.89, + "grad_norm": 1.8633670065312347, + "learning_rate": 3.136761923125542e-07, + "loss": 0.3277, + "step": 10889 + }, + { + "epoch": 0.89, + "grad_norm": 3.194830584266059, + "learning_rate": 3.1321486283971357e-07, + "loss": 0.4989, + "step": 10890 + }, + { + "epoch": 0.89, + "grad_norm": 3.6276186717088548, + "learning_rate": 3.127538618901144e-07, + "loss": 0.5732, + "step": 10891 + }, + { + "epoch": 0.89, + "grad_norm": 5.570089495193147, + "learning_rate": 3.122931894960707e-07, + "loss": 0.8861, + "step": 10892 + }, + { + "epoch": 0.89, + "grad_norm": 5.0135959179798055, + "learning_rate": 3.118328456898734e-07, + "loss": 0.9223, + "step": 10893 + }, + { + "epoch": 0.89, + "grad_norm": 5.141074146576756, + "learning_rate": 3.1137283050379165e-07, + "loss": 0.8711, + "step": 10894 + }, + { + "epoch": 0.89, + "grad_norm": 4.582126875808922, + "learning_rate": 3.10913143970068e-07, + "loss": 0.4177, + "step": 10895 + }, + { + "epoch": 0.89, + "grad_norm": 5.718422207878949, + "learning_rate": 3.104537861209267e-07, + "loss": 1.2523, + "step": 10896 + }, + { + "epoch": 0.89, + "grad_norm": 6.4516793569985955, + "learning_rate": 3.0999475698856583e-07, + "loss": 1.3102, + "step": 10897 + }, + { + "epoch": 0.89, + "grad_norm": 3.7206114005265314, + "learning_rate": 3.095360566051614e-07, + "loss": 0.5341, + "step": 10898 + }, + { + "epoch": 0.89, + "grad_norm": 4.436185251661134, + "learning_rate": 3.090776850028671e-07, + "loss": 0.7484, + "step": 10899 + }, + { + "epoch": 0.89, + "grad_norm": 6.066747356792933, + "learning_rate": 3.086196422138116e-07, + "loss": 1.1466, + "step": 10900 + }, + { + "epoch": 0.89, + "grad_norm": 5.889368480857732, + "learning_rate": 3.0816192827010317e-07, + "loss": 0.9629, + "step": 10901 + }, + { + "epoch": 0.89, + "grad_norm": 3.6207016500856315, + "learning_rate": 3.077045432038234e-07, + "loss": 0.4401, + "step": 10902 + }, + { + "epoch": 0.89, + "grad_norm": 5.332940686131082, + "learning_rate": 3.0724748704703435e-07, + "loss": 1.1868, + "step": 10903 + }, + { + "epoch": 0.89, + "grad_norm": 5.058409923124676, + "learning_rate": 3.0679075983177376e-07, + "loss": 0.9681, + "step": 10904 + }, + { + "epoch": 0.89, + "grad_norm": 3.279635932894341, + "learning_rate": 3.063343615900555e-07, + "loss": 0.5421, + "step": 10905 + }, + { + "epoch": 0.89, + "grad_norm": 4.102073653497036, + "learning_rate": 3.0587829235387277e-07, + "loss": 0.3761, + "step": 10906 + }, + { + "epoch": 0.89, + "grad_norm": 3.8061269220006597, + "learning_rate": 3.0542255215519177e-07, + "loss": 0.7479, + "step": 10907 + }, + { + "epoch": 0.89, + "grad_norm": 4.31739768655144, + "learning_rate": 3.0496714102595914e-07, + "loss": 1.0301, + "step": 10908 + }, + { + "epoch": 0.89, + "grad_norm": 3.5912994301111514, + "learning_rate": 3.0451205899809764e-07, + "loss": 0.5204, + "step": 10909 + }, + { + "epoch": 0.89, + "grad_norm": 4.0923652953300005, + "learning_rate": 3.0405730610350516e-07, + "loss": 0.88, + "step": 10910 + }, + { + "epoch": 0.89, + "grad_norm": 3.2713263066201983, + "learning_rate": 3.0360288237406e-07, + "loss": 0.6832, + "step": 10911 + }, + { + "epoch": 0.89, + "grad_norm": 4.892186852611783, + "learning_rate": 3.0314878784161284e-07, + "loss": 0.8156, + "step": 10912 + }, + { + "epoch": 0.89, + "grad_norm": 4.36463854352164, + "learning_rate": 3.0269502253799485e-07, + "loss": 0.7741, + "step": 10913 + }, + { + "epoch": 0.89, + "grad_norm": 2.5927460345059146, + "learning_rate": 3.0224158649501343e-07, + "loss": 0.5198, + "step": 10914 + }, + { + "epoch": 0.89, + "grad_norm": 2.516528093229124, + "learning_rate": 3.017884797444526e-07, + "loss": 0.5627, + "step": 10915 + }, + { + "epoch": 0.89, + "grad_norm": 4.563189913634584, + "learning_rate": 3.013357023180724e-07, + "loss": 0.779, + "step": 10916 + }, + { + "epoch": 0.89, + "grad_norm": 4.590422596413634, + "learning_rate": 3.008832542476109e-07, + "loss": 0.6201, + "step": 10917 + }, + { + "epoch": 0.89, + "grad_norm": 4.860653508579803, + "learning_rate": 3.0043113556478207e-07, + "loss": 1.1325, + "step": 10918 + }, + { + "epoch": 0.89, + "grad_norm": 5.0539682300732185, + "learning_rate": 2.999793463012779e-07, + "loss": 1.1529, + "step": 10919 + }, + { + "epoch": 0.89, + "grad_norm": 5.861370069172823, + "learning_rate": 2.995278864887674e-07, + "loss": 1.1975, + "step": 10920 + }, + { + "epoch": 0.89, + "grad_norm": 4.283954230503348, + "learning_rate": 2.990767561588953e-07, + "loss": 1.0167, + "step": 10921 + }, + { + "epoch": 0.89, + "grad_norm": 5.271556079490615, + "learning_rate": 2.986259553432841e-07, + "loss": 1.0017, + "step": 10922 + }, + { + "epoch": 0.89, + "grad_norm": 1.8648335588731393, + "learning_rate": 2.98175484073534e-07, + "loss": 0.2049, + "step": 10923 + }, + { + "epoch": 0.89, + "grad_norm": 4.5805437442033075, + "learning_rate": 2.977253423812193e-07, + "loss": 0.901, + "step": 10924 + }, + { + "epoch": 0.89, + "grad_norm": 2.8726092892928534, + "learning_rate": 2.9727553029789303e-07, + "loss": 0.5756, + "step": 10925 + }, + { + "epoch": 0.89, + "grad_norm": 4.73696508058446, + "learning_rate": 2.9682604785508664e-07, + "loss": 0.6246, + "step": 10926 + }, + { + "epoch": 0.89, + "grad_norm": 2.6695261519349907, + "learning_rate": 2.963768950843054e-07, + "loss": 0.5638, + "step": 10927 + }, + { + "epoch": 0.89, + "grad_norm": 5.692648401985274, + "learning_rate": 2.9592807201703486e-07, + "loss": 1.2495, + "step": 10928 + }, + { + "epoch": 0.89, + "grad_norm": 4.626581084693792, + "learning_rate": 2.9547957868473307e-07, + "loss": 0.7701, + "step": 10929 + }, + { + "epoch": 0.89, + "grad_norm": 2.8172435855293267, + "learning_rate": 2.9503141511883884e-07, + "loss": 0.5745, + "step": 10930 + }, + { + "epoch": 0.89, + "grad_norm": 2.630192348382033, + "learning_rate": 2.9458358135076693e-07, + "loss": 0.4044, + "step": 10931 + }, + { + "epoch": 0.89, + "grad_norm": 3.808703663511119, + "learning_rate": 2.9413607741190733e-07, + "loss": 0.9562, + "step": 10932 + }, + { + "epoch": 0.89, + "grad_norm": 3.929016503818397, + "learning_rate": 2.936889033336288e-07, + "loss": 0.5762, + "step": 10933 + }, + { + "epoch": 0.89, + "grad_norm": 4.436744038954424, + "learning_rate": 2.9324205914727674e-07, + "loss": 0.8342, + "step": 10934 + }, + { + "epoch": 0.89, + "grad_norm": 4.253131568729143, + "learning_rate": 2.9279554488417186e-07, + "loss": 0.6823, + "step": 10935 + }, + { + "epoch": 0.89, + "grad_norm": 2.066108099578079, + "learning_rate": 2.9234936057561336e-07, + "loss": 0.2603, + "step": 10936 + }, + { + "epoch": 0.89, + "grad_norm": 5.652093687095228, + "learning_rate": 2.919035062528769e-07, + "loss": 0.9055, + "step": 10937 + }, + { + "epoch": 0.89, + "grad_norm": 2.7600104969135906, + "learning_rate": 2.914579819472152e-07, + "loss": 0.4802, + "step": 10938 + }, + { + "epoch": 0.89, + "grad_norm": 2.9176102039855842, + "learning_rate": 2.910127876898572e-07, + "loss": 0.578, + "step": 10939 + }, + { + "epoch": 0.89, + "grad_norm": 3.72597740012647, + "learning_rate": 2.905679235120096e-07, + "loss": 0.8006, + "step": 10940 + }, + { + "epoch": 0.89, + "grad_norm": 4.30238468052772, + "learning_rate": 2.9012338944485463e-07, + "loss": 0.7967, + "step": 10941 + }, + { + "epoch": 0.89, + "grad_norm": 3.69012051220324, + "learning_rate": 2.89679185519553e-07, + "loss": 0.6067, + "step": 10942 + }, + { + "epoch": 0.89, + "grad_norm": 4.539522560333616, + "learning_rate": 2.8923531176724027e-07, + "loss": 0.8178, + "step": 10943 + }, + { + "epoch": 0.89, + "grad_norm": 4.561569584044321, + "learning_rate": 2.887917682190311e-07, + "loss": 1.2437, + "step": 10944 + }, + { + "epoch": 0.89, + "grad_norm": 4.096023672049721, + "learning_rate": 2.883485549060167e-07, + "loss": 0.7392, + "step": 10945 + }, + { + "epoch": 0.89, + "grad_norm": 3.5030239018798355, + "learning_rate": 2.879056718592627e-07, + "loss": 0.4574, + "step": 10946 + }, + { + "epoch": 0.89, + "grad_norm": 4.384378182815733, + "learning_rate": 2.8746311910981485e-07, + "loss": 0.6167, + "step": 10947 + }, + { + "epoch": 0.89, + "grad_norm": 6.413790629940976, + "learning_rate": 2.8702089668869227e-07, + "loss": 1.2176, + "step": 10948 + }, + { + "epoch": 0.89, + "grad_norm": 2.5476452228294972, + "learning_rate": 2.86579004626894e-07, + "loss": 0.4998, + "step": 10949 + }, + { + "epoch": 0.9, + "grad_norm": 5.578922777726011, + "learning_rate": 2.861374429553948e-07, + "loss": 0.8611, + "step": 10950 + }, + { + "epoch": 0.9, + "grad_norm": 4.9133060061104805, + "learning_rate": 2.856962117051465e-07, + "loss": 0.9554, + "step": 10951 + }, + { + "epoch": 0.9, + "grad_norm": 4.709850585262299, + "learning_rate": 2.852553109070766e-07, + "loss": 0.5331, + "step": 10952 + }, + { + "epoch": 0.9, + "grad_norm": 2.8510311396142125, + "learning_rate": 2.8481474059209033e-07, + "loss": 0.5582, + "step": 10953 + }, + { + "epoch": 0.9, + "grad_norm": 4.785827042551584, + "learning_rate": 2.8437450079107034e-07, + "loss": 0.6103, + "step": 10954 + }, + { + "epoch": 0.9, + "grad_norm": 4.681038383589419, + "learning_rate": 2.839345915348757e-07, + "loss": 1.1143, + "step": 10955 + }, + { + "epoch": 0.9, + "grad_norm": 4.7286450723732525, + "learning_rate": 2.8349501285434123e-07, + "loss": 0.9406, + "step": 10956 + }, + { + "epoch": 0.9, + "grad_norm": 2.4851305243777806, + "learning_rate": 2.830557647802812e-07, + "loss": 0.3931, + "step": 10957 + }, + { + "epoch": 0.9, + "grad_norm": 3.76467683896015, + "learning_rate": 2.8261684734348316e-07, + "loss": 0.9277, + "step": 10958 + }, + { + "epoch": 0.9, + "grad_norm": 3.7793588360667427, + "learning_rate": 2.8217826057471423e-07, + "loss": 0.46, + "step": 10959 + }, + { + "epoch": 0.9, + "grad_norm": 2.756079446742792, + "learning_rate": 2.817400045047164e-07, + "loss": 0.5393, + "step": 10960 + }, + { + "epoch": 0.9, + "grad_norm": 2.3470853112431773, + "learning_rate": 2.813020791642118e-07, + "loss": 0.3717, + "step": 10961 + }, + { + "epoch": 0.9, + "grad_norm": 3.240363172485346, + "learning_rate": 2.808644845838943e-07, + "loss": 0.5289, + "step": 10962 + }, + { + "epoch": 0.9, + "grad_norm": 4.777083156136472, + "learning_rate": 2.804272207944397e-07, + "loss": 0.877, + "step": 10963 + }, + { + "epoch": 0.9, + "grad_norm": 6.051156731001556, + "learning_rate": 2.799902878264965e-07, + "loss": 1.5133, + "step": 10964 + }, + { + "epoch": 0.9, + "grad_norm": 4.240849695462603, + "learning_rate": 2.7955368571069284e-07, + "loss": 0.2849, + "step": 10965 + }, + { + "epoch": 0.9, + "grad_norm": 5.070898820430699, + "learning_rate": 2.791174144776321e-07, + "loss": 0.8403, + "step": 10966 + }, + { + "epoch": 0.9, + "grad_norm": 2.889092870804847, + "learning_rate": 2.7868147415789526e-07, + "loss": 0.5334, + "step": 10967 + }, + { + "epoch": 0.9, + "grad_norm": 2.791923339154662, + "learning_rate": 2.782458647820407e-07, + "loss": 0.4653, + "step": 10968 + }, + { + "epoch": 0.9, + "grad_norm": 4.88705712106383, + "learning_rate": 2.778105863806013e-07, + "loss": 1.1669, + "step": 10969 + }, + { + "epoch": 0.9, + "grad_norm": 2.831371608629582, + "learning_rate": 2.7737563898408814e-07, + "loss": 0.3367, + "step": 10970 + }, + { + "epoch": 0.9, + "grad_norm": 6.111330519551319, + "learning_rate": 2.769410226229902e-07, + "loss": 0.879, + "step": 10971 + }, + { + "epoch": 0.9, + "grad_norm": 2.635508115964347, + "learning_rate": 2.765067373277719e-07, + "loss": 0.2942, + "step": 10972 + }, + { + "epoch": 0.9, + "grad_norm": 2.9739200019422767, + "learning_rate": 2.760727831288745e-07, + "loss": 0.3165, + "step": 10973 + }, + { + "epoch": 0.9, + "grad_norm": 4.514726040277123, + "learning_rate": 2.756391600567171e-07, + "loss": 0.9741, + "step": 10974 + }, + { + "epoch": 0.9, + "grad_norm": 4.2116695143322636, + "learning_rate": 2.7520586814169303e-07, + "loss": 0.5593, + "step": 10975 + }, + { + "epoch": 0.9, + "grad_norm": 5.753408779265016, + "learning_rate": 2.7477290741417526e-07, + "loss": 1.1868, + "step": 10976 + }, + { + "epoch": 0.9, + "grad_norm": 3.6749590100957485, + "learning_rate": 2.7434027790451346e-07, + "loss": 0.6321, + "step": 10977 + }, + { + "epoch": 0.9, + "grad_norm": 3.9872602389265652, + "learning_rate": 2.739079796430316e-07, + "loss": 0.6642, + "step": 10978 + }, + { + "epoch": 0.9, + "grad_norm": 4.466734413380715, + "learning_rate": 2.7347601266003165e-07, + "loss": 0.8289, + "step": 10979 + }, + { + "epoch": 0.9, + "grad_norm": 4.257607083269077, + "learning_rate": 2.730443769857943e-07, + "loss": 0.6408, + "step": 10980 + }, + { + "epoch": 0.9, + "grad_norm": 2.639939952215677, + "learning_rate": 2.726130726505738e-07, + "loss": 0.3134, + "step": 10981 + }, + { + "epoch": 0.9, + "grad_norm": 4.701429366481553, + "learning_rate": 2.721820996846031e-07, + "loss": 0.5872, + "step": 10982 + }, + { + "epoch": 0.9, + "grad_norm": 3.505066897522115, + "learning_rate": 2.717514581180919e-07, + "loss": 0.8732, + "step": 10983 + }, + { + "epoch": 0.9, + "grad_norm": 3.8617198443788148, + "learning_rate": 2.7132114798122557e-07, + "loss": 0.8892, + "step": 10984 + }, + { + "epoch": 0.9, + "grad_norm": 4.358700844128491, + "learning_rate": 2.708911693041683e-07, + "loss": 1.0066, + "step": 10985 + }, + { + "epoch": 0.9, + "grad_norm": 3.923681167525303, + "learning_rate": 2.7046152211705865e-07, + "loss": 0.5607, + "step": 10986 + }, + { + "epoch": 0.9, + "grad_norm": 3.404828609512687, + "learning_rate": 2.7003220645001325e-07, + "loss": 0.482, + "step": 10987 + }, + { + "epoch": 0.9, + "grad_norm": 4.090455246821734, + "learning_rate": 2.696032223331252e-07, + "loss": 0.5434, + "step": 10988 + }, + { + "epoch": 0.9, + "grad_norm": 2.77128135135535, + "learning_rate": 2.6917456979646426e-07, + "loss": 0.2923, + "step": 10989 + }, + { + "epoch": 0.9, + "grad_norm": 5.6209549505587475, + "learning_rate": 2.687462488700776e-07, + "loss": 1.2092, + "step": 10990 + }, + { + "epoch": 0.9, + "grad_norm": 3.6326542842188356, + "learning_rate": 2.683182595839889e-07, + "loss": 0.7548, + "step": 10991 + }, + { + "epoch": 0.9, + "grad_norm": 3.3388253743696557, + "learning_rate": 2.6789060196819705e-07, + "loss": 0.7041, + "step": 10992 + }, + { + "epoch": 0.9, + "grad_norm": 4.280074955140449, + "learning_rate": 2.6746327605268017e-07, + "loss": 0.769, + "step": 10993 + }, + { + "epoch": 0.9, + "grad_norm": 4.300141434768321, + "learning_rate": 2.670362818673922e-07, + "loss": 0.8973, + "step": 10994 + }, + { + "epoch": 0.9, + "grad_norm": 2.038387138665519, + "learning_rate": 2.666096194422624e-07, + "loss": 0.2636, + "step": 10995 + }, + { + "epoch": 0.9, + "grad_norm": 2.988886270242992, + "learning_rate": 2.6618328880719803e-07, + "loss": 0.6738, + "step": 10996 + }, + { + "epoch": 0.9, + "grad_norm": 4.710406907947948, + "learning_rate": 2.6575728999208404e-07, + "loss": 0.8795, + "step": 10997 + }, + { + "epoch": 0.9, + "grad_norm": 5.047388030931536, + "learning_rate": 2.653316230267805e-07, + "loss": 0.933, + "step": 10998 + }, + { + "epoch": 0.9, + "grad_norm": 4.702221769066459, + "learning_rate": 2.649062879411246e-07, + "loss": 0.8378, + "step": 10999 + }, + { + "epoch": 0.9, + "grad_norm": 4.787486009716643, + "learning_rate": 2.644812847649303e-07, + "loss": 0.5252, + "step": 11000 + }, + { + "epoch": 0.9, + "grad_norm": 1.4915548159213152, + "learning_rate": 2.640566135279893e-07, + "loss": 0.126, + "step": 11001 + }, + { + "epoch": 0.9, + "grad_norm": 4.430806272111197, + "learning_rate": 2.636322742600689e-07, + "loss": 0.9382, + "step": 11002 + }, + { + "epoch": 0.9, + "grad_norm": 3.450059826246382, + "learning_rate": 2.632082669909136e-07, + "loss": 0.541, + "step": 11003 + }, + { + "epoch": 0.9, + "grad_norm": 1.9120320385365477, + "learning_rate": 2.627845917502442e-07, + "loss": 0.3607, + "step": 11004 + }, + { + "epoch": 0.9, + "grad_norm": 4.437954820256922, + "learning_rate": 2.6236124856775793e-07, + "loss": 0.7392, + "step": 11005 + }, + { + "epoch": 0.9, + "grad_norm": 4.016078629052368, + "learning_rate": 2.6193823747313e-07, + "loss": 0.9399, + "step": 11006 + }, + { + "epoch": 0.9, + "grad_norm": 2.8336758820553998, + "learning_rate": 2.6151555849601107e-07, + "loss": 0.5607, + "step": 11007 + }, + { + "epoch": 0.9, + "grad_norm": 4.533114744036621, + "learning_rate": 2.6109321166603087e-07, + "loss": 0.9896, + "step": 11008 + }, + { + "epoch": 0.9, + "grad_norm": 4.655681551168729, + "learning_rate": 2.6067119701279175e-07, + "loss": 0.8572, + "step": 11009 + }, + { + "epoch": 0.9, + "grad_norm": 4.83599426448416, + "learning_rate": 2.6024951456587677e-07, + "loss": 0.9201, + "step": 11010 + }, + { + "epoch": 0.9, + "grad_norm": 2.348684784011196, + "learning_rate": 2.5982816435484283e-07, + "loss": 0.3516, + "step": 11011 + }, + { + "epoch": 0.9, + "grad_norm": 1.9607888032500733, + "learning_rate": 2.5940714640922516e-07, + "loss": 0.2168, + "step": 11012 + }, + { + "epoch": 0.9, + "grad_norm": 3.1593505187596693, + "learning_rate": 2.5898646075853573e-07, + "loss": 0.6291, + "step": 11013 + }, + { + "epoch": 0.9, + "grad_norm": 3.7827509657674843, + "learning_rate": 2.5856610743226265e-07, + "loss": 0.4637, + "step": 11014 + }, + { + "epoch": 0.9, + "grad_norm": 5.249129440152548, + "learning_rate": 2.5814608645987e-07, + "loss": 1.2756, + "step": 11015 + }, + { + "epoch": 0.9, + "grad_norm": 4.826583744501296, + "learning_rate": 2.5772639787080056e-07, + "loss": 1.0892, + "step": 11016 + }, + { + "epoch": 0.9, + "grad_norm": 3.078319979925435, + "learning_rate": 2.5730704169447176e-07, + "loss": 0.9656, + "step": 11017 + }, + { + "epoch": 0.9, + "grad_norm": 3.0615261040398014, + "learning_rate": 2.5688801796027895e-07, + "loss": 0.4221, + "step": 11018 + }, + { + "epoch": 0.9, + "grad_norm": 3.3212155015481817, + "learning_rate": 2.5646932669759427e-07, + "loss": 0.3062, + "step": 11019 + }, + { + "epoch": 0.9, + "grad_norm": 3.790010764557456, + "learning_rate": 2.5605096793576646e-07, + "loss": 0.4332, + "step": 11020 + }, + { + "epoch": 0.9, + "grad_norm": 3.15283502705698, + "learning_rate": 2.556329417041192e-07, + "loss": 0.5554, + "step": 11021 + }, + { + "epoch": 0.9, + "grad_norm": 5.284318512087921, + "learning_rate": 2.552152480319553e-07, + "loss": 0.6837, + "step": 11022 + }, + { + "epoch": 0.9, + "grad_norm": 3.454615123520454, + "learning_rate": 2.5479788694855343e-07, + "loss": 0.7399, + "step": 11023 + }, + { + "epoch": 0.9, + "grad_norm": 5.495421924009523, + "learning_rate": 2.5438085848316916e-07, + "loss": 0.8502, + "step": 11024 + }, + { + "epoch": 0.9, + "grad_norm": 4.745216338272797, + "learning_rate": 2.5396416266503245e-07, + "loss": 1.3025, + "step": 11025 + }, + { + "epoch": 0.9, + "grad_norm": 1.4879481153781136, + "learning_rate": 2.535477995233543e-07, + "loss": 0.1693, + "step": 11026 + }, + { + "epoch": 0.9, + "grad_norm": 3.3729847640325024, + "learning_rate": 2.531317690873181e-07, + "loss": 0.889, + "step": 11027 + }, + { + "epoch": 0.9, + "grad_norm": 4.671859726369521, + "learning_rate": 2.52716071386086e-07, + "loss": 1.1565, + "step": 11028 + }, + { + "epoch": 0.9, + "grad_norm": 2.979680538540261, + "learning_rate": 2.5230070644879757e-07, + "loss": 0.5938, + "step": 11029 + }, + { + "epoch": 0.9, + "grad_norm": 5.360130147685159, + "learning_rate": 2.518856743045672e-07, + "loss": 0.8711, + "step": 11030 + }, + { + "epoch": 0.9, + "grad_norm": 5.407689265554586, + "learning_rate": 2.514709749824884e-07, + "loss": 1.0209, + "step": 11031 + }, + { + "epoch": 0.9, + "grad_norm": 4.421968303096306, + "learning_rate": 2.510566085116273e-07, + "loss": 0.5024, + "step": 11032 + }, + { + "epoch": 0.9, + "grad_norm": 1.1873366866216424, + "learning_rate": 2.5064257492103064e-07, + "loss": 0.146, + "step": 11033 + }, + { + "epoch": 0.9, + "grad_norm": 4.834142366561029, + "learning_rate": 2.502288742397202e-07, + "loss": 1.1664, + "step": 11034 + }, + { + "epoch": 0.9, + "grad_norm": 1.3021514892264503, + "learning_rate": 2.4981550649669504e-07, + "loss": 0.1722, + "step": 11035 + }, + { + "epoch": 0.9, + "grad_norm": 2.065157901732676, + "learning_rate": 2.4940247172092924e-07, + "loss": 0.3421, + "step": 11036 + }, + { + "epoch": 0.9, + "grad_norm": 3.163428169715771, + "learning_rate": 2.489897699413768e-07, + "loss": 0.4166, + "step": 11037 + }, + { + "epoch": 0.9, + "grad_norm": 3.437325471175225, + "learning_rate": 2.4857740118696406e-07, + "loss": 0.3751, + "step": 11038 + }, + { + "epoch": 0.9, + "grad_norm": 4.199057988048138, + "learning_rate": 2.481653654865973e-07, + "loss": 0.713, + "step": 11039 + }, + { + "epoch": 0.9, + "grad_norm": 3.619639946601736, + "learning_rate": 2.47753662869159e-07, + "loss": 0.5802, + "step": 11040 + }, + { + "epoch": 0.9, + "grad_norm": 3.1151249195378137, + "learning_rate": 2.473422933635067e-07, + "loss": 0.3998, + "step": 11041 + }, + { + "epoch": 0.9, + "grad_norm": 2.940103140856555, + "learning_rate": 2.469312569984755e-07, + "loss": 0.8201, + "step": 11042 + }, + { + "epoch": 0.9, + "grad_norm": 4.039052681644886, + "learning_rate": 2.4652055380287866e-07, + "loss": 0.6402, + "step": 11043 + }, + { + "epoch": 0.9, + "grad_norm": 4.097990478563924, + "learning_rate": 2.46110183805503e-07, + "loss": 0.6293, + "step": 11044 + }, + { + "epoch": 0.9, + "grad_norm": 4.976807621091998, + "learning_rate": 2.45700147035115e-07, + "loss": 0.791, + "step": 11045 + }, + { + "epoch": 0.9, + "grad_norm": 2.9381474601172486, + "learning_rate": 2.4529044352045507e-07, + "loss": 0.5721, + "step": 11046 + }, + { + "epoch": 0.9, + "grad_norm": 2.392734855345606, + "learning_rate": 2.448810732902429e-07, + "loss": 0.2277, + "step": 11047 + }, + { + "epoch": 0.9, + "grad_norm": 5.073692288450157, + "learning_rate": 2.4447203637317396e-07, + "loss": 0.5008, + "step": 11048 + }, + { + "epoch": 0.9, + "grad_norm": 1.0443640424281004, + "learning_rate": 2.440633327979186e-07, + "loss": 0.1341, + "step": 11049 + }, + { + "epoch": 0.9, + "grad_norm": 2.94043086673372, + "learning_rate": 2.436549625931256e-07, + "loss": 0.487, + "step": 11050 + }, + { + "epoch": 0.9, + "grad_norm": 3.6347325369876495, + "learning_rate": 2.432469257874198e-07, + "loss": 0.7156, + "step": 11051 + }, + { + "epoch": 0.9, + "grad_norm": 3.1073434097379553, + "learning_rate": 2.4283922240940285e-07, + "loss": 0.3269, + "step": 11052 + }, + { + "epoch": 0.9, + "grad_norm": 2.4455566731482126, + "learning_rate": 2.4243185248765347e-07, + "loss": 0.4302, + "step": 11053 + }, + { + "epoch": 0.9, + "grad_norm": 4.0889595023649, + "learning_rate": 2.4202481605072715e-07, + "loss": 0.5646, + "step": 11054 + }, + { + "epoch": 0.9, + "grad_norm": 2.6371914942050356, + "learning_rate": 2.4161811312715336e-07, + "loss": 0.3783, + "step": 11055 + }, + { + "epoch": 0.9, + "grad_norm": 4.161157934117554, + "learning_rate": 2.41211743745442e-07, + "loss": 0.4261, + "step": 11056 + }, + { + "epoch": 0.9, + "grad_norm": 4.3216016149663545, + "learning_rate": 2.40805707934077e-07, + "loss": 0.6887, + "step": 11057 + }, + { + "epoch": 0.9, + "grad_norm": 4.645190440247131, + "learning_rate": 2.404000057215189e-07, + "loss": 0.9987, + "step": 11058 + }, + { + "epoch": 0.9, + "grad_norm": 4.5485437389266705, + "learning_rate": 2.399946371362072e-07, + "loss": 0.9675, + "step": 11059 + }, + { + "epoch": 0.9, + "grad_norm": 4.003720375727015, + "learning_rate": 2.3958960220655637e-07, + "loss": 0.814, + "step": 11060 + }, + { + "epoch": 0.9, + "grad_norm": 4.089568719659635, + "learning_rate": 2.391849009609559e-07, + "loss": 0.7647, + "step": 11061 + }, + { + "epoch": 0.9, + "grad_norm": 5.2458157362121085, + "learning_rate": 2.387805334277754e-07, + "loss": 1.0476, + "step": 11062 + }, + { + "epoch": 0.9, + "grad_norm": 3.2327623500986915, + "learning_rate": 2.3837649963535825e-07, + "loss": 0.5227, + "step": 11063 + }, + { + "epoch": 0.9, + "grad_norm": 3.340149049297397, + "learning_rate": 2.379727996120257e-07, + "loss": 0.7746, + "step": 11064 + }, + { + "epoch": 0.9, + "grad_norm": 5.503326569977153, + "learning_rate": 2.3756943338607564e-07, + "loss": 1.101, + "step": 11065 + }, + { + "epoch": 0.9, + "grad_norm": 5.111674690062485, + "learning_rate": 2.3716640098578326e-07, + "loss": 1.2175, + "step": 11066 + }, + { + "epoch": 0.9, + "grad_norm": 4.474486226758203, + "learning_rate": 2.3676370243939706e-07, + "loss": 0.7457, + "step": 11067 + }, + { + "epoch": 0.9, + "grad_norm": 4.7496111669335175, + "learning_rate": 2.363613377751456e-07, + "loss": 0.9001, + "step": 11068 + }, + { + "epoch": 0.9, + "grad_norm": 3.2549787221233446, + "learning_rate": 2.3595930702123292e-07, + "loss": 0.7977, + "step": 11069 + }, + { + "epoch": 0.9, + "grad_norm": 4.210072232193499, + "learning_rate": 2.35557610205841e-07, + "loss": 0.8362, + "step": 11070 + }, + { + "epoch": 0.9, + "grad_norm": 2.482328728267237, + "learning_rate": 2.351562473571245e-07, + "loss": 0.3899, + "step": 11071 + }, + { + "epoch": 0.9, + "grad_norm": 5.867913330107275, + "learning_rate": 2.3475521850321868e-07, + "loss": 0.971, + "step": 11072 + }, + { + "epoch": 0.91, + "grad_norm": 3.6817365818980528, + "learning_rate": 2.3435452367223333e-07, + "loss": 0.5856, + "step": 11073 + }, + { + "epoch": 0.91, + "grad_norm": 4.492208935289324, + "learning_rate": 2.3395416289225591e-07, + "loss": 1.3804, + "step": 11074 + }, + { + "epoch": 0.91, + "grad_norm": 3.522649897360985, + "learning_rate": 2.3355413619134958e-07, + "loss": 0.9513, + "step": 11075 + }, + { + "epoch": 0.91, + "grad_norm": 3.2002375390908013, + "learning_rate": 2.3315444359755468e-07, + "loss": 0.5962, + "step": 11076 + }, + { + "epoch": 0.91, + "grad_norm": 3.699768339248374, + "learning_rate": 2.3275508513888822e-07, + "loss": 0.6855, + "step": 11077 + }, + { + "epoch": 0.91, + "grad_norm": 5.2946389437483745, + "learning_rate": 2.3235606084334285e-07, + "loss": 0.5991, + "step": 11078 + }, + { + "epoch": 0.91, + "grad_norm": 4.008303054571759, + "learning_rate": 2.319573707388889e-07, + "loss": 0.6266, + "step": 11079 + }, + { + "epoch": 0.91, + "grad_norm": 3.0139603695193897, + "learning_rate": 2.3155901485347242e-07, + "loss": 0.5756, + "step": 11080 + }, + { + "epoch": 0.91, + "grad_norm": 4.228017886089983, + "learning_rate": 2.3116099321501716e-07, + "loss": 0.441, + "step": 11081 + }, + { + "epoch": 0.91, + "grad_norm": 5.21420696840283, + "learning_rate": 2.3076330585142138e-07, + "loss": 1.3786, + "step": 11082 + }, + { + "epoch": 0.91, + "grad_norm": 3.7161093803831586, + "learning_rate": 2.303659527905633e-07, + "loss": 0.5781, + "step": 11083 + }, + { + "epoch": 0.91, + "grad_norm": 3.732562707840122, + "learning_rate": 2.2996893406029396e-07, + "loss": 0.6167, + "step": 11084 + }, + { + "epoch": 0.91, + "grad_norm": 3.9199633925383037, + "learning_rate": 2.2957224968844227e-07, + "loss": 0.7024, + "step": 11085 + }, + { + "epoch": 0.91, + "grad_norm": 4.175139268446115, + "learning_rate": 2.291758997028165e-07, + "loss": 0.7179, + "step": 11086 + }, + { + "epoch": 0.91, + "grad_norm": 4.684719713666393, + "learning_rate": 2.2877988413119613e-07, + "loss": 0.814, + "step": 11087 + }, + { + "epoch": 0.91, + "grad_norm": 5.707293402429215, + "learning_rate": 2.2838420300134168e-07, + "loss": 1.1273, + "step": 11088 + }, + { + "epoch": 0.91, + "grad_norm": 5.506806733915437, + "learning_rate": 2.2798885634098934e-07, + "loss": 1.2383, + "step": 11089 + }, + { + "epoch": 0.91, + "grad_norm": 4.601802804472051, + "learning_rate": 2.2759384417784914e-07, + "loss": 0.8593, + "step": 11090 + }, + { + "epoch": 0.91, + "grad_norm": 3.6701148366908787, + "learning_rate": 2.2719916653961117e-07, + "loss": 0.8575, + "step": 11091 + }, + { + "epoch": 0.91, + "grad_norm": 3.647881541444441, + "learning_rate": 2.268048234539405e-07, + "loss": 0.5893, + "step": 11092 + }, + { + "epoch": 0.91, + "grad_norm": 4.767204519975867, + "learning_rate": 2.264108149484784e-07, + "loss": 0.9492, + "step": 11093 + }, + { + "epoch": 0.91, + "grad_norm": 4.01034330234706, + "learning_rate": 2.2601714105084438e-07, + "loss": 0.874, + "step": 11094 + }, + { + "epoch": 0.91, + "grad_norm": 3.393774300890451, + "learning_rate": 2.256238017886314e-07, + "loss": 0.5693, + "step": 11095 + }, + { + "epoch": 0.91, + "grad_norm": 3.0602238658907375, + "learning_rate": 2.2523079718941188e-07, + "loss": 0.3612, + "step": 11096 + }, + { + "epoch": 0.91, + "grad_norm": 5.202968231086185, + "learning_rate": 2.2483812728073372e-07, + "loss": 0.9142, + "step": 11097 + }, + { + "epoch": 0.91, + "grad_norm": 3.485032124470473, + "learning_rate": 2.2444579209012106e-07, + "loss": 0.4293, + "step": 11098 + }, + { + "epoch": 0.91, + "grad_norm": 3.022784020013259, + "learning_rate": 2.2405379164507524e-07, + "loss": 0.6828, + "step": 11099 + }, + { + "epoch": 0.91, + "grad_norm": 4.363452111618262, + "learning_rate": 2.2366212597307424e-07, + "loss": 0.618, + "step": 11100 + }, + { + "epoch": 0.91, + "grad_norm": 4.0461915866057945, + "learning_rate": 2.2327079510157112e-07, + "loss": 0.7344, + "step": 11101 + }, + { + "epoch": 0.91, + "grad_norm": 3.5256665808456873, + "learning_rate": 2.2287979905799672e-07, + "loss": 0.4726, + "step": 11102 + }, + { + "epoch": 0.91, + "grad_norm": 4.923703005863906, + "learning_rate": 2.2248913786975857e-07, + "loss": 1.1763, + "step": 11103 + }, + { + "epoch": 0.91, + "grad_norm": 2.6965162168198713, + "learning_rate": 2.2209881156423973e-07, + "loss": 0.3289, + "step": 11104 + }, + { + "epoch": 0.91, + "grad_norm": 3.421436369491871, + "learning_rate": 2.2170882016880112e-07, + "loss": 0.7714, + "step": 11105 + }, + { + "epoch": 0.91, + "grad_norm": 5.1651362991109435, + "learning_rate": 2.213191637107792e-07, + "loss": 0.8261, + "step": 11106 + }, + { + "epoch": 0.91, + "grad_norm": 3.392052582605556, + "learning_rate": 2.2092984221748602e-07, + "loss": 0.7156, + "step": 11107 + }, + { + "epoch": 0.91, + "grad_norm": 3.515651379382963, + "learning_rate": 2.205408557162131e-07, + "loss": 0.7332, + "step": 11108 + }, + { + "epoch": 0.91, + "grad_norm": 3.996762719331602, + "learning_rate": 2.2015220423422523e-07, + "loss": 0.6411, + "step": 11109 + }, + { + "epoch": 0.91, + "grad_norm": 4.481526049280716, + "learning_rate": 2.1976388779876623e-07, + "loss": 1.0722, + "step": 11110 + }, + { + "epoch": 0.91, + "grad_norm": 6.452038705988483, + "learning_rate": 2.1937590643705542e-07, + "loss": 1.3209, + "step": 11111 + }, + { + "epoch": 0.91, + "grad_norm": 4.40830111144125, + "learning_rate": 2.1898826017628772e-07, + "loss": 0.7558, + "step": 11112 + }, + { + "epoch": 0.91, + "grad_norm": 6.21407611714033, + "learning_rate": 2.1860094904363637e-07, + "loss": 1.1922, + "step": 11113 + }, + { + "epoch": 0.91, + "grad_norm": 2.211696547540112, + "learning_rate": 2.1821397306624915e-07, + "loss": 0.5482, + "step": 11114 + }, + { + "epoch": 0.91, + "grad_norm": 1.10794839879072, + "learning_rate": 2.1782733227125264e-07, + "loss": 0.147, + "step": 11115 + }, + { + "epoch": 0.91, + "grad_norm": 5.539108132473663, + "learning_rate": 2.174410266857474e-07, + "loss": 1.0284, + "step": 11116 + }, + { + "epoch": 0.91, + "grad_norm": 3.4530422166033254, + "learning_rate": 2.1705505633681346e-07, + "loss": 0.4119, + "step": 11117 + }, + { + "epoch": 0.91, + "grad_norm": 2.8801438058179403, + "learning_rate": 2.1666942125150358e-07, + "loss": 0.5742, + "step": 11118 + }, + { + "epoch": 0.91, + "grad_norm": 4.218928014732488, + "learning_rate": 2.1628412145685119e-07, + "loss": 0.994, + "step": 11119 + }, + { + "epoch": 0.91, + "grad_norm": 5.6833087536929625, + "learning_rate": 2.158991569798624e-07, + "loss": 1.198, + "step": 11120 + }, + { + "epoch": 0.91, + "grad_norm": 5.325366267395837, + "learning_rate": 2.1551452784752236e-07, + "loss": 1.2089, + "step": 11121 + }, + { + "epoch": 0.91, + "grad_norm": 4.651134494656022, + "learning_rate": 2.1513023408679168e-07, + "loss": 1.0882, + "step": 11122 + }, + { + "epoch": 0.91, + "grad_norm": 5.374675834672166, + "learning_rate": 2.1474627572460826e-07, + "loss": 0.9013, + "step": 11123 + }, + { + "epoch": 0.91, + "grad_norm": 4.39461346185137, + "learning_rate": 2.1436265278788448e-07, + "loss": 0.5845, + "step": 11124 + }, + { + "epoch": 0.91, + "grad_norm": 3.167430852177149, + "learning_rate": 2.1397936530351214e-07, + "loss": 0.5196, + "step": 11125 + }, + { + "epoch": 0.91, + "grad_norm": 3.3743656027738775, + "learning_rate": 2.13596413298357e-07, + "loss": 0.5903, + "step": 11126 + }, + { + "epoch": 0.91, + "grad_norm": 3.548528392729398, + "learning_rate": 2.1321379679926314e-07, + "loss": 0.4787, + "step": 11127 + }, + { + "epoch": 0.91, + "grad_norm": 2.7005002557902085, + "learning_rate": 2.1283151583305073e-07, + "loss": 0.4916, + "step": 11128 + }, + { + "epoch": 0.91, + "grad_norm": 3.631365562756313, + "learning_rate": 2.1244957042651394e-07, + "loss": 0.5243, + "step": 11129 + }, + { + "epoch": 0.91, + "grad_norm": 4.457370939567119, + "learning_rate": 2.1206796060642742e-07, + "loss": 0.8838, + "step": 11130 + }, + { + "epoch": 0.91, + "grad_norm": 3.947617050297842, + "learning_rate": 2.1168668639953925e-07, + "loss": 0.6216, + "step": 11131 + }, + { + "epoch": 0.91, + "grad_norm": 5.449892882699174, + "learning_rate": 2.1130574783257585e-07, + "loss": 1.1595, + "step": 11132 + }, + { + "epoch": 0.91, + "grad_norm": 2.1387838929076355, + "learning_rate": 2.109251449322397e-07, + "loss": 0.5245, + "step": 11133 + }, + { + "epoch": 0.91, + "grad_norm": 2.3414999678308424, + "learning_rate": 2.105448777252078e-07, + "loss": 0.3202, + "step": 11134 + }, + { + "epoch": 0.91, + "grad_norm": 4.925262482104804, + "learning_rate": 2.1016494623813722e-07, + "loss": 0.9867, + "step": 11135 + }, + { + "epoch": 0.91, + "grad_norm": 3.278064810880702, + "learning_rate": 2.0978535049765769e-07, + "loss": 0.5975, + "step": 11136 + }, + { + "epoch": 0.91, + "grad_norm": 2.7913682482536117, + "learning_rate": 2.0940609053037796e-07, + "loss": 0.2869, + "step": 11137 + }, + { + "epoch": 0.91, + "grad_norm": 3.942497206347499, + "learning_rate": 2.090271663628829e-07, + "loss": 0.8199, + "step": 11138 + }, + { + "epoch": 0.91, + "grad_norm": 5.14434134047888, + "learning_rate": 2.086485780217329e-07, + "loss": 1.3063, + "step": 11139 + }, + { + "epoch": 0.91, + "grad_norm": 5.727023327112773, + "learning_rate": 2.0827032553346615e-07, + "loss": 1.2092, + "step": 11140 + }, + { + "epoch": 0.91, + "grad_norm": 4.28386093935721, + "learning_rate": 2.0789240892459485e-07, + "loss": 0.6101, + "step": 11141 + }, + { + "epoch": 0.91, + "grad_norm": 4.052194647387861, + "learning_rate": 2.0751482822161106e-07, + "loss": 0.6826, + "step": 11142 + }, + { + "epoch": 0.91, + "grad_norm": 2.5490516592790584, + "learning_rate": 2.0713758345098033e-07, + "loss": 0.3835, + "step": 11143 + }, + { + "epoch": 0.91, + "grad_norm": 2.908189906576185, + "learning_rate": 2.0676067463914651e-07, + "loss": 0.5757, + "step": 11144 + }, + { + "epoch": 0.91, + "grad_norm": 2.867167669543695, + "learning_rate": 2.0638410181252898e-07, + "loss": 0.5637, + "step": 11145 + }, + { + "epoch": 0.91, + "grad_norm": 4.697488403153114, + "learning_rate": 2.06007864997525e-07, + "loss": 1.2564, + "step": 11146 + }, + { + "epoch": 0.91, + "grad_norm": 2.991525616886259, + "learning_rate": 2.0563196422050568e-07, + "loss": 0.4381, + "step": 11147 + }, + { + "epoch": 0.91, + "grad_norm": 2.7813495047843086, + "learning_rate": 2.0525639950781996e-07, + "loss": 0.3386, + "step": 11148 + }, + { + "epoch": 0.91, + "grad_norm": 3.695648298957778, + "learning_rate": 2.0488117088579506e-07, + "loss": 0.5629, + "step": 11149 + }, + { + "epoch": 0.91, + "grad_norm": 4.03623000154458, + "learning_rate": 2.0450627838073056e-07, + "loss": 0.6678, + "step": 11150 + }, + { + "epoch": 0.91, + "grad_norm": 2.7482338355973073, + "learning_rate": 2.0413172201890653e-07, + "loss": 0.6516, + "step": 11151 + }, + { + "epoch": 0.91, + "grad_norm": 2.864430039136069, + "learning_rate": 2.0375750182657695e-07, + "loss": 0.5004, + "step": 11152 + }, + { + "epoch": 0.91, + "grad_norm": 4.4182944549418455, + "learning_rate": 2.0338361782997252e-07, + "loss": 0.7622, + "step": 11153 + }, + { + "epoch": 0.91, + "grad_norm": 2.5796549918494818, + "learning_rate": 2.0301007005530226e-07, + "loss": 0.4822, + "step": 11154 + }, + { + "epoch": 0.91, + "grad_norm": 5.101927255290817, + "learning_rate": 2.0263685852874915e-07, + "loss": 0.7614, + "step": 11155 + }, + { + "epoch": 0.91, + "grad_norm": 4.936883196766509, + "learning_rate": 2.022639832764739e-07, + "loss": 1.0994, + "step": 11156 + }, + { + "epoch": 0.91, + "grad_norm": 3.1251344484501056, + "learning_rate": 2.0189144432461449e-07, + "loss": 0.5361, + "step": 11157 + }, + { + "epoch": 0.91, + "grad_norm": 4.220043319689753, + "learning_rate": 2.0151924169928228e-07, + "loss": 0.8652, + "step": 11158 + }, + { + "epoch": 0.91, + "grad_norm": 5.371349797847683, + "learning_rate": 2.011473754265686e-07, + "loss": 0.8478, + "step": 11159 + }, + { + "epoch": 0.91, + "grad_norm": 3.1755451624852777, + "learning_rate": 2.0077584553253927e-07, + "loss": 0.5387, + "step": 11160 + }, + { + "epoch": 0.91, + "grad_norm": 5.700040156838395, + "learning_rate": 2.0040465204323678e-07, + "loss": 0.9394, + "step": 11161 + }, + { + "epoch": 0.91, + "grad_norm": 4.7351776612595184, + "learning_rate": 2.0003379498468035e-07, + "loss": 0.836, + "step": 11162 + }, + { + "epoch": 0.91, + "grad_norm": 4.607577896180465, + "learning_rate": 1.9966327438286582e-07, + "loss": 1.0263, + "step": 11163 + }, + { + "epoch": 0.91, + "grad_norm": 3.4638615943766453, + "learning_rate": 1.992930902637641e-07, + "loss": 0.5572, + "step": 11164 + }, + { + "epoch": 0.91, + "grad_norm": 3.442831959285367, + "learning_rate": 1.9892324265332442e-07, + "loss": 0.5543, + "step": 11165 + }, + { + "epoch": 0.91, + "grad_norm": 5.233236165568079, + "learning_rate": 1.9855373157747048e-07, + "loss": 1.2052, + "step": 11166 + }, + { + "epoch": 0.91, + "grad_norm": 2.152889625127049, + "learning_rate": 1.9818455706210438e-07, + "loss": 0.446, + "step": 11167 + }, + { + "epoch": 0.91, + "grad_norm": 2.1104276927708336, + "learning_rate": 1.9781571913310317e-07, + "loss": 0.1884, + "step": 11168 + }, + { + "epoch": 0.91, + "grad_norm": 5.0639604217808065, + "learning_rate": 1.9744721781632115e-07, + "loss": 1.0051, + "step": 11169 + }, + { + "epoch": 0.91, + "grad_norm": 4.061353204589289, + "learning_rate": 1.9707905313758769e-07, + "loss": 0.8107, + "step": 11170 + }, + { + "epoch": 0.91, + "grad_norm": 3.726630206028116, + "learning_rate": 1.96711225122711e-07, + "loss": 0.8195, + "step": 11171 + }, + { + "epoch": 0.91, + "grad_norm": 6.104973615834729, + "learning_rate": 1.9634373379747274e-07, + "loss": 1.4508, + "step": 11172 + }, + { + "epoch": 0.91, + "grad_norm": 3.120619482005494, + "learning_rate": 1.9597657918763335e-07, + "loss": 0.6486, + "step": 11173 + }, + { + "epoch": 0.91, + "grad_norm": 5.191333913205158, + "learning_rate": 1.9560976131892894e-07, + "loss": 1.0356, + "step": 11174 + }, + { + "epoch": 0.91, + "grad_norm": 3.905687977075724, + "learning_rate": 1.9524328021707118e-07, + "loss": 0.8676, + "step": 11175 + }, + { + "epoch": 0.91, + "grad_norm": 3.586290688649648, + "learning_rate": 1.9487713590774948e-07, + "loss": 0.6796, + "step": 11176 + }, + { + "epoch": 0.91, + "grad_norm": 2.000787043097494, + "learning_rate": 1.9451132841662778e-07, + "loss": 0.3549, + "step": 11177 + }, + { + "epoch": 0.91, + "grad_norm": 5.2012772722664256, + "learning_rate": 1.9414585776934892e-07, + "loss": 1.1755, + "step": 11178 + }, + { + "epoch": 0.91, + "grad_norm": 3.905691271358258, + "learning_rate": 1.937807239915307e-07, + "loss": 0.5006, + "step": 11179 + }, + { + "epoch": 0.91, + "grad_norm": 4.424924940593239, + "learning_rate": 1.9341592710876656e-07, + "loss": 0.5779, + "step": 11180 + }, + { + "epoch": 0.91, + "grad_norm": 4.047146689063044, + "learning_rate": 1.9305146714662826e-07, + "loss": 0.5132, + "step": 11181 + }, + { + "epoch": 0.91, + "grad_norm": 2.7016882955252894, + "learning_rate": 1.926873441306615e-07, + "loss": 0.4587, + "step": 11182 + }, + { + "epoch": 0.91, + "grad_norm": 3.9093433744808506, + "learning_rate": 1.9232355808639025e-07, + "loss": 0.6004, + "step": 11183 + }, + { + "epoch": 0.91, + "grad_norm": 6.0015902471677585, + "learning_rate": 1.9196010903931417e-07, + "loss": 1.1875, + "step": 11184 + }, + { + "epoch": 0.91, + "grad_norm": 3.9358852462762313, + "learning_rate": 1.9159699701491065e-07, + "loss": 0.5939, + "step": 11185 + }, + { + "epoch": 0.91, + "grad_norm": 3.697605370531831, + "learning_rate": 1.9123422203863152e-07, + "loss": 0.5391, + "step": 11186 + }, + { + "epoch": 0.91, + "grad_norm": 2.469171962086072, + "learning_rate": 1.908717841359048e-07, + "loss": 0.428, + "step": 11187 + }, + { + "epoch": 0.91, + "grad_norm": 2.3907613046127594, + "learning_rate": 1.9050968333213683e-07, + "loss": 0.4528, + "step": 11188 + }, + { + "epoch": 0.91, + "grad_norm": 2.5817391032192507, + "learning_rate": 1.9014791965270952e-07, + "loss": 0.4375, + "step": 11189 + }, + { + "epoch": 0.91, + "grad_norm": 4.627772604588454, + "learning_rate": 1.897864931229798e-07, + "loss": 0.8307, + "step": 11190 + }, + { + "epoch": 0.91, + "grad_norm": 2.789077798942602, + "learning_rate": 1.8942540376828355e-07, + "loss": 0.4359, + "step": 11191 + }, + { + "epoch": 0.91, + "grad_norm": 2.7768245176577677, + "learning_rate": 1.890646516139305e-07, + "loss": 0.4011, + "step": 11192 + }, + { + "epoch": 0.91, + "grad_norm": 5.210996217553082, + "learning_rate": 1.887042366852082e-07, + "loss": 0.9132, + "step": 11193 + }, + { + "epoch": 0.91, + "grad_norm": 2.906255086628593, + "learning_rate": 1.8834415900737978e-07, + "loss": 0.5761, + "step": 11194 + }, + { + "epoch": 0.92, + "grad_norm": 2.6180163634599065, + "learning_rate": 1.8798441860568616e-07, + "loss": 0.6036, + "step": 11195 + }, + { + "epoch": 0.92, + "grad_norm": 1.0980136267430478, + "learning_rate": 1.8762501550534217e-07, + "loss": 0.0995, + "step": 11196 + }, + { + "epoch": 0.92, + "grad_norm": 4.290132282228848, + "learning_rate": 1.87265949731541e-07, + "loss": 0.6914, + "step": 11197 + }, + { + "epoch": 0.92, + "grad_norm": 4.136875746607067, + "learning_rate": 1.869072213094525e-07, + "loss": 0.7159, + "step": 11198 + }, + { + "epoch": 0.92, + "grad_norm": 2.467381386165635, + "learning_rate": 1.865488302642199e-07, + "loss": 0.5064, + "step": 11199 + }, + { + "epoch": 0.92, + "grad_norm": 2.4473827868656355, + "learning_rate": 1.8619077662096696e-07, + "loss": 0.4848, + "step": 11200 + }, + { + "epoch": 0.92, + "grad_norm": 4.864204754691415, + "learning_rate": 1.8583306040479032e-07, + "loss": 0.8725, + "step": 11201 + }, + { + "epoch": 0.92, + "grad_norm": 4.180014132625099, + "learning_rate": 1.854756816407649e-07, + "loss": 0.8338, + "step": 11202 + }, + { + "epoch": 0.92, + "grad_norm": 3.8155883572348497, + "learning_rate": 1.851186403539418e-07, + "loss": 0.8488, + "step": 11203 + }, + { + "epoch": 0.92, + "grad_norm": 4.525992468347426, + "learning_rate": 1.8476193656934704e-07, + "loss": 1.1812, + "step": 11204 + }, + { + "epoch": 0.92, + "grad_norm": 4.7447516147126025, + "learning_rate": 1.844055703119846e-07, + "loss": 1.0543, + "step": 11205 + }, + { + "epoch": 0.92, + "grad_norm": 4.17017342661183, + "learning_rate": 1.8404954160683443e-07, + "loss": 0.4521, + "step": 11206 + }, + { + "epoch": 0.92, + "grad_norm": 4.946381787997414, + "learning_rate": 1.8369385047885156e-07, + "loss": 0.8788, + "step": 11207 + }, + { + "epoch": 0.92, + "grad_norm": 4.172479909491882, + "learning_rate": 1.8333849695297e-07, + "loss": 0.7996, + "step": 11208 + }, + { + "epoch": 0.92, + "grad_norm": 3.2283229855748674, + "learning_rate": 1.8298348105409757e-07, + "loss": 0.5261, + "step": 11209 + }, + { + "epoch": 0.92, + "grad_norm": 4.733079662348577, + "learning_rate": 1.826288028071188e-07, + "loss": 0.8896, + "step": 11210 + }, + { + "epoch": 0.92, + "grad_norm": 4.139927670971216, + "learning_rate": 1.8227446223689605e-07, + "loss": 0.9959, + "step": 11211 + }, + { + "epoch": 0.92, + "grad_norm": 5.325872302072155, + "learning_rate": 1.819204593682672e-07, + "loss": 0.775, + "step": 11212 + }, + { + "epoch": 0.92, + "grad_norm": 3.820136054276924, + "learning_rate": 1.8156679422604516e-07, + "loss": 0.816, + "step": 11213 + }, + { + "epoch": 0.92, + "grad_norm": 4.790950829563678, + "learning_rate": 1.8121346683502183e-07, + "loss": 0.6849, + "step": 11214 + }, + { + "epoch": 0.92, + "grad_norm": 3.5023638899812206, + "learning_rate": 1.808604772199618e-07, + "loss": 0.6212, + "step": 11215 + }, + { + "epoch": 0.92, + "grad_norm": 3.478480719963517, + "learning_rate": 1.8050782540560974e-07, + "loss": 0.6308, + "step": 11216 + }, + { + "epoch": 0.92, + "grad_norm": 4.570003184022221, + "learning_rate": 1.8015551141668474e-07, + "loss": 0.5625, + "step": 11217 + }, + { + "epoch": 0.92, + "grad_norm": 3.8625066042343503, + "learning_rate": 1.7980353527788207e-07, + "loss": 0.8208, + "step": 11218 + }, + { + "epoch": 0.92, + "grad_norm": 4.897763261574733, + "learning_rate": 1.7945189701387422e-07, + "loss": 0.8867, + "step": 11219 + }, + { + "epoch": 0.92, + "grad_norm": 3.56730900511684, + "learning_rate": 1.7910059664930978e-07, + "loss": 0.6531, + "step": 11220 + }, + { + "epoch": 0.92, + "grad_norm": 3.5322507412594497, + "learning_rate": 1.7874963420881242e-07, + "loss": 0.4657, + "step": 11221 + }, + { + "epoch": 0.92, + "grad_norm": 3.7200820452997982, + "learning_rate": 1.7839900971698355e-07, + "loss": 0.7998, + "step": 11222 + }, + { + "epoch": 0.92, + "grad_norm": 4.13435755876668, + "learning_rate": 1.7804872319840017e-07, + "loss": 0.7359, + "step": 11223 + }, + { + "epoch": 0.92, + "grad_norm": 2.2755152186076573, + "learning_rate": 1.7769877467761655e-07, + "loss": 0.5649, + "step": 11224 + }, + { + "epoch": 0.92, + "grad_norm": 2.6590296290393876, + "learning_rate": 1.7734916417916136e-07, + "loss": 0.2297, + "step": 11225 + }, + { + "epoch": 0.92, + "grad_norm": 3.842949647874636, + "learning_rate": 1.7699989172754284e-07, + "loss": 0.7091, + "step": 11226 + }, + { + "epoch": 0.92, + "grad_norm": 4.819659656122972, + "learning_rate": 1.7665095734724136e-07, + "loss": 0.8342, + "step": 11227 + }, + { + "epoch": 0.92, + "grad_norm": 3.748007448179637, + "learning_rate": 1.763023610627168e-07, + "loss": 0.9059, + "step": 11228 + }, + { + "epoch": 0.92, + "grad_norm": 3.9416185834126987, + "learning_rate": 1.7595410289840352e-07, + "loss": 0.5522, + "step": 11229 + }, + { + "epoch": 0.92, + "grad_norm": 3.515053494653996, + "learning_rate": 1.756061828787131e-07, + "loss": 0.7281, + "step": 11230 + }, + { + "epoch": 0.92, + "grad_norm": 2.4827523750750853, + "learning_rate": 1.7525860102803438e-07, + "loss": 0.321, + "step": 11231 + }, + { + "epoch": 0.92, + "grad_norm": 4.901220700742775, + "learning_rate": 1.749113573707295e-07, + "loss": 0.874, + "step": 11232 + }, + { + "epoch": 0.92, + "grad_norm": 4.183832641562579, + "learning_rate": 1.7456445193114014e-07, + "loss": 0.733, + "step": 11233 + }, + { + "epoch": 0.92, + "grad_norm": 5.000577475774218, + "learning_rate": 1.742178847335818e-07, + "loss": 1.3244, + "step": 11234 + }, + { + "epoch": 0.92, + "grad_norm": 4.83867868502028, + "learning_rate": 1.7387165580234789e-07, + "loss": 0.7298, + "step": 11235 + }, + { + "epoch": 0.92, + "grad_norm": 1.3992288505410486, + "learning_rate": 1.7352576516170784e-07, + "loss": 0.1665, + "step": 11236 + }, + { + "epoch": 0.92, + "grad_norm": 4.221374384301655, + "learning_rate": 1.7318021283590724e-07, + "loss": 0.9748, + "step": 11237 + }, + { + "epoch": 0.92, + "grad_norm": 1.5943361673092038, + "learning_rate": 1.7283499884916677e-07, + "loss": 0.3766, + "step": 11238 + }, + { + "epoch": 0.92, + "grad_norm": 4.624902684414091, + "learning_rate": 1.724901232256848e-07, + "loss": 1.0204, + "step": 11239 + }, + { + "epoch": 0.92, + "grad_norm": 4.970727945735099, + "learning_rate": 1.7214558598963594e-07, + "loss": 0.7984, + "step": 11240 + }, + { + "epoch": 0.92, + "grad_norm": 4.866006374575356, + "learning_rate": 1.7180138716517025e-07, + "loss": 0.6781, + "step": 11241 + }, + { + "epoch": 0.92, + "grad_norm": 3.6831592115954566, + "learning_rate": 1.714575267764157e-07, + "loss": 0.6031, + "step": 11242 + }, + { + "epoch": 0.92, + "grad_norm": 5.117946502570576, + "learning_rate": 1.7111400484747409e-07, + "loss": 0.756, + "step": 11243 + }, + { + "epoch": 0.92, + "grad_norm": 4.028915158091609, + "learning_rate": 1.7077082140242617e-07, + "loss": 1.0491, + "step": 11244 + }, + { + "epoch": 0.92, + "grad_norm": 3.528198610470189, + "learning_rate": 1.7042797646532606e-07, + "loss": 0.5895, + "step": 11245 + }, + { + "epoch": 0.92, + "grad_norm": 3.3915221070718706, + "learning_rate": 1.700854700602067e-07, + "loss": 0.6763, + "step": 11246 + }, + { + "epoch": 0.92, + "grad_norm": 3.9585794046649387, + "learning_rate": 1.697433022110756e-07, + "loss": 0.6661, + "step": 11247 + }, + { + "epoch": 0.92, + "grad_norm": 4.0598631303407915, + "learning_rate": 1.69401472941918e-07, + "loss": 0.6134, + "step": 11248 + }, + { + "epoch": 0.92, + "grad_norm": 4.833142545038157, + "learning_rate": 1.6905998227669474e-07, + "loss": 1.0471, + "step": 11249 + }, + { + "epoch": 0.92, + "grad_norm": 4.217539587480963, + "learning_rate": 1.6871883023934166e-07, + "loss": 0.6563, + "step": 11250 + }, + { + "epoch": 0.92, + "grad_norm": 2.960147177640882, + "learning_rate": 1.6837801685377298e-07, + "loss": 0.2802, + "step": 11251 + }, + { + "epoch": 0.92, + "grad_norm": 4.584210166954635, + "learning_rate": 1.680375421438779e-07, + "loss": 0.9426, + "step": 11252 + }, + { + "epoch": 0.92, + "grad_norm": 2.839656649825677, + "learning_rate": 1.6769740613352237e-07, + "loss": 0.4337, + "step": 11253 + }, + { + "epoch": 0.92, + "grad_norm": 3.337294598224811, + "learning_rate": 1.6735760884654894e-07, + "loss": 0.5698, + "step": 11254 + }, + { + "epoch": 0.92, + "grad_norm": 6.090740134791479, + "learning_rate": 1.6701815030677525e-07, + "loss": 0.9533, + "step": 11255 + }, + { + "epoch": 0.92, + "grad_norm": 4.086760664500222, + "learning_rate": 1.666790305379956e-07, + "loss": 0.6691, + "step": 11256 + }, + { + "epoch": 0.92, + "grad_norm": 4.063399156552799, + "learning_rate": 1.6634024956398098e-07, + "loss": 0.6545, + "step": 11257 + }, + { + "epoch": 0.92, + "grad_norm": 4.305498616104555, + "learning_rate": 1.660018074084796e-07, + "loss": 0.628, + "step": 11258 + }, + { + "epoch": 0.92, + "grad_norm": 3.9923655462342778, + "learning_rate": 1.656637040952136e-07, + "loss": 0.555, + "step": 11259 + }, + { + "epoch": 0.92, + "grad_norm": 2.48463791219742, + "learning_rate": 1.653259396478829e-07, + "loss": 0.4043, + "step": 11260 + }, + { + "epoch": 0.92, + "grad_norm": 2.1888494863078725, + "learning_rate": 1.6498851409016304e-07, + "loss": 0.3287, + "step": 11261 + }, + { + "epoch": 0.92, + "grad_norm": 4.391289356990704, + "learning_rate": 1.6465142744570617e-07, + "loss": 0.9179, + "step": 11262 + }, + { + "epoch": 0.92, + "grad_norm": 2.4228954424793883, + "learning_rate": 1.6431467973814064e-07, + "loss": 0.2831, + "step": 11263 + }, + { + "epoch": 0.92, + "grad_norm": 3.3720171548713602, + "learning_rate": 1.6397827099107144e-07, + "loss": 0.5803, + "step": 11264 + }, + { + "epoch": 0.92, + "grad_norm": 3.5179120097759555, + "learning_rate": 1.6364220122807862e-07, + "loss": 0.6053, + "step": 11265 + }, + { + "epoch": 0.92, + "grad_norm": 4.620685463962939, + "learning_rate": 1.6330647047272052e-07, + "loss": 0.8843, + "step": 11266 + }, + { + "epoch": 0.92, + "grad_norm": 4.078360315208572, + "learning_rate": 1.6297107874852836e-07, + "loss": 0.5724, + "step": 11267 + }, + { + "epoch": 0.92, + "grad_norm": 6.353974912838414, + "learning_rate": 1.626360260790133e-07, + "loss": 1.1554, + "step": 11268 + }, + { + "epoch": 0.92, + "grad_norm": 2.4036611618739023, + "learning_rate": 1.6230131248766046e-07, + "loss": 0.6715, + "step": 11269 + }, + { + "epoch": 0.92, + "grad_norm": 3.58811237856854, + "learning_rate": 1.6196693799793162e-07, + "loss": 0.7753, + "step": 11270 + }, + { + "epoch": 0.92, + "grad_norm": 5.064100711903729, + "learning_rate": 1.616329026332658e-07, + "loss": 0.7528, + "step": 11271 + }, + { + "epoch": 0.92, + "grad_norm": 5.656043693642232, + "learning_rate": 1.6129920641707654e-07, + "loss": 1.1097, + "step": 11272 + }, + { + "epoch": 0.92, + "grad_norm": 4.467223107675906, + "learning_rate": 1.6096584937275505e-07, + "loss": 0.9837, + "step": 11273 + }, + { + "epoch": 0.92, + "grad_norm": 3.701685744944751, + "learning_rate": 1.6063283152366772e-07, + "loss": 0.7138, + "step": 11274 + }, + { + "epoch": 0.92, + "grad_norm": 5.187258886409783, + "learning_rate": 1.6030015289315804e-07, + "loss": 1.4469, + "step": 11275 + }, + { + "epoch": 0.92, + "grad_norm": 2.559429823627061, + "learning_rate": 1.5996781350454516e-07, + "loss": 0.3951, + "step": 11276 + }, + { + "epoch": 0.92, + "grad_norm": 2.550707903735016, + "learning_rate": 1.5963581338112488e-07, + "loss": 0.3247, + "step": 11277 + }, + { + "epoch": 0.92, + "grad_norm": 4.41070075623855, + "learning_rate": 1.5930415254616804e-07, + "loss": 0.9562, + "step": 11278 + }, + { + "epoch": 0.92, + "grad_norm": 4.845711815923627, + "learning_rate": 1.5897283102292383e-07, + "loss": 0.6267, + "step": 11279 + }, + { + "epoch": 0.92, + "grad_norm": 3.828948205716296, + "learning_rate": 1.5864184883461587e-07, + "loss": 0.6446, + "step": 11280 + }, + { + "epoch": 0.92, + "grad_norm": 3.9044405188980735, + "learning_rate": 1.583112060044445e-07, + "loss": 0.6316, + "step": 11281 + }, + { + "epoch": 0.92, + "grad_norm": 4.860168447675666, + "learning_rate": 1.5798090255558617e-07, + "loss": 1.086, + "step": 11282 + }, + { + "epoch": 0.92, + "grad_norm": 3.8752415390305104, + "learning_rate": 1.5765093851119518e-07, + "loss": 0.7086, + "step": 11283 + }, + { + "epoch": 0.92, + "grad_norm": 5.086774830872845, + "learning_rate": 1.5732131389439853e-07, + "loss": 0.9672, + "step": 11284 + }, + { + "epoch": 0.92, + "grad_norm": 4.262079064086391, + "learning_rate": 1.5699202872830278e-07, + "loss": 0.6275, + "step": 11285 + }, + { + "epoch": 0.92, + "grad_norm": 4.3456980321069585, + "learning_rate": 1.5666308303598833e-07, + "loss": 1.023, + "step": 11286 + }, + { + "epoch": 0.92, + "grad_norm": 4.004805392257209, + "learning_rate": 1.5633447684051395e-07, + "loss": 0.6211, + "step": 11287 + }, + { + "epoch": 0.92, + "grad_norm": 4.4750971894269, + "learning_rate": 1.5600621016491347e-07, + "loss": 0.6443, + "step": 11288 + }, + { + "epoch": 0.92, + "grad_norm": 4.383512278224929, + "learning_rate": 1.5567828303219566e-07, + "loss": 0.8311, + "step": 11289 + }, + { + "epoch": 0.92, + "grad_norm": 4.659952133960186, + "learning_rate": 1.5535069546534887e-07, + "loss": 0.8986, + "step": 11290 + }, + { + "epoch": 0.92, + "grad_norm": 1.6494431478194589, + "learning_rate": 1.5502344748733356e-07, + "loss": 0.3387, + "step": 11291 + }, + { + "epoch": 0.92, + "grad_norm": 5.315047979396131, + "learning_rate": 1.5469653912108862e-07, + "loss": 1.0557, + "step": 11292 + }, + { + "epoch": 0.92, + "grad_norm": 5.456468791457157, + "learning_rate": 1.5436997038953017e-07, + "loss": 1.0145, + "step": 11293 + }, + { + "epoch": 0.92, + "grad_norm": 3.3868376907378184, + "learning_rate": 1.5404374131554877e-07, + "loss": 0.7768, + "step": 11294 + }, + { + "epoch": 0.92, + "grad_norm": 2.3987429541251983, + "learning_rate": 1.537178519220106e-07, + "loss": 0.361, + "step": 11295 + }, + { + "epoch": 0.92, + "grad_norm": 3.3747056365014325, + "learning_rate": 1.5339230223176016e-07, + "loss": 0.7798, + "step": 11296 + }, + { + "epoch": 0.92, + "grad_norm": 4.5850680765242355, + "learning_rate": 1.5306709226761696e-07, + "loss": 0.9988, + "step": 11297 + }, + { + "epoch": 0.92, + "grad_norm": 1.9340330748299945, + "learning_rate": 1.5274222205237664e-07, + "loss": 0.2935, + "step": 11298 + }, + { + "epoch": 0.92, + "grad_norm": 4.923023096241975, + "learning_rate": 1.5241769160881104e-07, + "loss": 0.8663, + "step": 11299 + }, + { + "epoch": 0.92, + "grad_norm": 5.431124122208152, + "learning_rate": 1.520935009596697e-07, + "loss": 0.6967, + "step": 11300 + }, + { + "epoch": 0.92, + "grad_norm": 3.5747143000254358, + "learning_rate": 1.5176965012767443e-07, + "loss": 0.7445, + "step": 11301 + }, + { + "epoch": 0.92, + "grad_norm": 4.122107757528846, + "learning_rate": 1.5144613913552765e-07, + "loss": 0.7541, + "step": 11302 + }, + { + "epoch": 0.92, + "grad_norm": 4.611120114866463, + "learning_rate": 1.511229680059051e-07, + "loss": 0.7325, + "step": 11303 + }, + { + "epoch": 0.92, + "grad_norm": 3.315470373503458, + "learning_rate": 1.5080013676146032e-07, + "loss": 0.5616, + "step": 11304 + }, + { + "epoch": 0.92, + "grad_norm": 5.124800589520938, + "learning_rate": 1.5047764542482245e-07, + "loss": 1.1372, + "step": 11305 + }, + { + "epoch": 0.92, + "grad_norm": 4.052256498311689, + "learning_rate": 1.501554940185962e-07, + "loss": 0.7783, + "step": 11306 + }, + { + "epoch": 0.92, + "grad_norm": 4.691173523402315, + "learning_rate": 1.49833682565364e-07, + "loss": 1.0757, + "step": 11307 + }, + { + "epoch": 0.92, + "grad_norm": 4.582743442181179, + "learning_rate": 1.4951221108768177e-07, + "loss": 0.8932, + "step": 11308 + }, + { + "epoch": 0.92, + "grad_norm": 4.08830725166257, + "learning_rate": 1.491910796080842e-07, + "loss": 1.0209, + "step": 11309 + }, + { + "epoch": 0.92, + "grad_norm": 4.038731637184449, + "learning_rate": 1.488702881490811e-07, + "loss": 1.1201, + "step": 11310 + }, + { + "epoch": 0.92, + "grad_norm": 5.890836273218682, + "learning_rate": 1.4854983673315948e-07, + "loss": 0.7534, + "step": 11311 + }, + { + "epoch": 0.92, + "grad_norm": 3.190074553783258, + "learning_rate": 1.482297253827797e-07, + "loss": 0.4521, + "step": 11312 + }, + { + "epoch": 0.92, + "grad_norm": 4.42871335374221, + "learning_rate": 1.4790995412038156e-07, + "loss": 0.5175, + "step": 11313 + }, + { + "epoch": 0.92, + "grad_norm": 4.36379925264401, + "learning_rate": 1.4759052296837884e-07, + "loss": 0.6415, + "step": 11314 + }, + { + "epoch": 0.92, + "grad_norm": 1.0429800495424937, + "learning_rate": 1.4727143194916304e-07, + "loss": 0.1184, + "step": 11315 + }, + { + "epoch": 0.92, + "grad_norm": 3.913291195983895, + "learning_rate": 1.4695268108510075e-07, + "loss": 1.0162, + "step": 11316 + }, + { + "epoch": 0.93, + "grad_norm": 1.2645169967609522, + "learning_rate": 1.4663427039853574e-07, + "loss": 0.1846, + "step": 11317 + }, + { + "epoch": 0.93, + "grad_norm": 3.820504603394702, + "learning_rate": 1.4631619991178568e-07, + "loss": 0.8034, + "step": 11318 + }, + { + "epoch": 0.93, + "grad_norm": 6.132497291593508, + "learning_rate": 1.459984696471467e-07, + "loss": 1.0389, + "step": 11319 + }, + { + "epoch": 0.93, + "grad_norm": 3.483890146892965, + "learning_rate": 1.4568107962688981e-07, + "loss": 0.6468, + "step": 11320 + }, + { + "epoch": 0.93, + "grad_norm": 1.8060879496132378, + "learning_rate": 1.4536402987326448e-07, + "loss": 0.3804, + "step": 11321 + }, + { + "epoch": 0.93, + "grad_norm": 3.6983043868534735, + "learning_rate": 1.4504732040849234e-07, + "loss": 0.6215, + "step": 11322 + }, + { + "epoch": 0.93, + "grad_norm": 3.8015248517561604, + "learning_rate": 1.4473095125477455e-07, + "loss": 0.5368, + "step": 11323 + }, + { + "epoch": 0.93, + "grad_norm": 2.678951740513247, + "learning_rate": 1.4441492243428668e-07, + "loss": 0.4824, + "step": 11324 + }, + { + "epoch": 0.93, + "grad_norm": 3.2553897273348658, + "learning_rate": 1.4409923396918102e-07, + "loss": 0.5046, + "step": 11325 + }, + { + "epoch": 0.93, + "grad_norm": 4.679285071330336, + "learning_rate": 1.4378388588158598e-07, + "loss": 1.0004, + "step": 11326 + }, + { + "epoch": 0.93, + "grad_norm": 5.874660054217869, + "learning_rate": 1.4346887819360667e-07, + "loss": 0.9498, + "step": 11327 + }, + { + "epoch": 0.93, + "grad_norm": 1.7774450413499203, + "learning_rate": 1.4315421092732262e-07, + "loss": 0.3228, + "step": 11328 + }, + { + "epoch": 0.93, + "grad_norm": 4.245778087293293, + "learning_rate": 1.4283988410479233e-07, + "loss": 0.5838, + "step": 11329 + }, + { + "epoch": 0.93, + "grad_norm": 2.677879261074963, + "learning_rate": 1.4252589774804705e-07, + "loss": 0.3299, + "step": 11330 + }, + { + "epoch": 0.93, + "grad_norm": 3.396066471615801, + "learning_rate": 1.4221225187909692e-07, + "loss": 0.5794, + "step": 11331 + }, + { + "epoch": 0.93, + "grad_norm": 2.993010315065761, + "learning_rate": 1.4189894651992665e-07, + "loss": 0.4717, + "step": 11332 + }, + { + "epoch": 0.93, + "grad_norm": 4.70289365334675, + "learning_rate": 1.415859816924975e-07, + "loss": 0.8767, + "step": 11333 + }, + { + "epoch": 0.93, + "grad_norm": 4.220641939449342, + "learning_rate": 1.412733574187475e-07, + "loss": 0.6153, + "step": 11334 + }, + { + "epoch": 0.93, + "grad_norm": 4.365275761926734, + "learning_rate": 1.4096107372058966e-07, + "loss": 0.5104, + "step": 11335 + }, + { + "epoch": 0.93, + "grad_norm": 3.9165392789832314, + "learning_rate": 1.406491306199137e-07, + "loss": 0.5385, + "step": 11336 + }, + { + "epoch": 0.93, + "grad_norm": 4.824793724718174, + "learning_rate": 1.4033752813858603e-07, + "loss": 1.0546, + "step": 11337 + }, + { + "epoch": 0.93, + "grad_norm": 3.780770875995635, + "learning_rate": 1.4002626629844862e-07, + "loss": 0.5101, + "step": 11338 + }, + { + "epoch": 0.93, + "grad_norm": 2.5442777056394754, + "learning_rate": 1.3971534512131845e-07, + "loss": 0.253, + "step": 11339 + }, + { + "epoch": 0.93, + "grad_norm": 5.6304779254960815, + "learning_rate": 1.3940476462899143e-07, + "loss": 0.815, + "step": 11340 + }, + { + "epoch": 0.93, + "grad_norm": 3.6909485546180965, + "learning_rate": 1.3909452484323682e-07, + "loss": 0.5588, + "step": 11341 + }, + { + "epoch": 0.93, + "grad_norm": 4.095782462527793, + "learning_rate": 1.3878462578580054e-07, + "loss": 0.6584, + "step": 11342 + }, + { + "epoch": 0.93, + "grad_norm": 4.056814596706321, + "learning_rate": 1.3847506747840633e-07, + "loss": 0.7376, + "step": 11343 + }, + { + "epoch": 0.93, + "grad_norm": 3.2531875872661526, + "learning_rate": 1.3816584994275238e-07, + "loss": 0.5637, + "step": 11344 + }, + { + "epoch": 0.93, + "grad_norm": 3.926482984992203, + "learning_rate": 1.378569732005136e-07, + "loss": 0.9198, + "step": 11345 + }, + { + "epoch": 0.93, + "grad_norm": 3.9457916584839743, + "learning_rate": 1.3754843727334156e-07, + "loss": 0.5255, + "step": 11346 + }, + { + "epoch": 0.93, + "grad_norm": 4.727290180209972, + "learning_rate": 1.3724024218286224e-07, + "loss": 0.9668, + "step": 11347 + }, + { + "epoch": 0.93, + "grad_norm": 5.224952767802356, + "learning_rate": 1.3693238795067897e-07, + "loss": 0.9269, + "step": 11348 + }, + { + "epoch": 0.93, + "grad_norm": 4.965815540685722, + "learning_rate": 1.3662487459837114e-07, + "loss": 1.1514, + "step": 11349 + }, + { + "epoch": 0.93, + "grad_norm": 5.049303234978047, + "learning_rate": 1.3631770214749374e-07, + "loss": 0.6255, + "step": 11350 + }, + { + "epoch": 0.93, + "grad_norm": 3.981080124905114, + "learning_rate": 1.3601087061957953e-07, + "loss": 0.515, + "step": 11351 + }, + { + "epoch": 0.93, + "grad_norm": 4.615161473763682, + "learning_rate": 1.3570438003613462e-07, + "loss": 0.8371, + "step": 11352 + }, + { + "epoch": 0.93, + "grad_norm": 4.9398318733080595, + "learning_rate": 1.3539823041864354e-07, + "loss": 1.1855, + "step": 11353 + }, + { + "epoch": 0.93, + "grad_norm": 2.9646347094591925, + "learning_rate": 1.350924217885652e-07, + "loss": 0.5909, + "step": 11354 + }, + { + "epoch": 0.93, + "grad_norm": 3.4578430140887053, + "learning_rate": 1.3478695416733577e-07, + "loss": 0.5344, + "step": 11355 + }, + { + "epoch": 0.93, + "grad_norm": 3.1623744287144895, + "learning_rate": 1.3448182757636763e-07, + "loss": 0.457, + "step": 11356 + }, + { + "epoch": 0.93, + "grad_norm": 4.4389502492090305, + "learning_rate": 1.3417704203704919e-07, + "loss": 0.5576, + "step": 11357 + }, + { + "epoch": 0.93, + "grad_norm": 3.7181312116356975, + "learning_rate": 1.3387259757074334e-07, + "loss": 0.8019, + "step": 11358 + }, + { + "epoch": 0.93, + "grad_norm": 4.336195288213495, + "learning_rate": 1.335684941987908e-07, + "loss": 1.0168, + "step": 11359 + }, + { + "epoch": 0.93, + "grad_norm": 4.387383483589837, + "learning_rate": 1.3326473194250844e-07, + "loss": 0.8317, + "step": 11360 + }, + { + "epoch": 0.93, + "grad_norm": 4.455915677307732, + "learning_rate": 1.3296131082318808e-07, + "loss": 0.7971, + "step": 11361 + }, + { + "epoch": 0.93, + "grad_norm": 1.0769604095285712, + "learning_rate": 1.3265823086209828e-07, + "loss": 0.1388, + "step": 11362 + }, + { + "epoch": 0.93, + "grad_norm": 4.403256666098837, + "learning_rate": 1.3235549208048426e-07, + "loss": 0.5191, + "step": 11363 + }, + { + "epoch": 0.93, + "grad_norm": 2.7045935963524754, + "learning_rate": 1.320530944995657e-07, + "loss": 0.5484, + "step": 11364 + }, + { + "epoch": 0.93, + "grad_norm": 4.352491888105958, + "learning_rate": 1.3175103814053958e-07, + "loss": 0.7698, + "step": 11365 + }, + { + "epoch": 0.93, + "grad_norm": 2.037086127689683, + "learning_rate": 1.3144932302457948e-07, + "loss": 0.3168, + "step": 11366 + }, + { + "epoch": 0.93, + "grad_norm": 4.628660771907785, + "learning_rate": 1.3114794917283403e-07, + "loss": 0.8921, + "step": 11367 + }, + { + "epoch": 0.93, + "grad_norm": 5.473819307131876, + "learning_rate": 1.3084691660642746e-07, + "loss": 0.7624, + "step": 11368 + }, + { + "epoch": 0.93, + "grad_norm": 4.545272780439525, + "learning_rate": 1.3054622534646234e-07, + "loss": 0.7565, + "step": 11369 + }, + { + "epoch": 0.93, + "grad_norm": 3.37639945632583, + "learning_rate": 1.3024587541401402e-07, + "loss": 0.8412, + "step": 11370 + }, + { + "epoch": 0.93, + "grad_norm": 3.231299791289676, + "learning_rate": 1.2994586683013677e-07, + "loss": 0.5228, + "step": 11371 + }, + { + "epoch": 0.93, + "grad_norm": 3.6247239231922217, + "learning_rate": 1.2964619961585934e-07, + "loss": 0.6691, + "step": 11372 + }, + { + "epoch": 0.93, + "grad_norm": 4.335661847549364, + "learning_rate": 1.2934687379218769e-07, + "loss": 0.7996, + "step": 11373 + }, + { + "epoch": 0.93, + "grad_norm": 4.194778115338029, + "learning_rate": 1.2904788938010392e-07, + "loss": 0.8221, + "step": 11374 + }, + { + "epoch": 0.93, + "grad_norm": 4.091606578487979, + "learning_rate": 1.2874924640056352e-07, + "loss": 1.1225, + "step": 11375 + }, + { + "epoch": 0.93, + "grad_norm": 4.524644004317661, + "learning_rate": 1.2845094487450193e-07, + "loss": 0.9753, + "step": 11376 + }, + { + "epoch": 0.93, + "grad_norm": 5.182308703363743, + "learning_rate": 1.2815298482282746e-07, + "loss": 0.9673, + "step": 11377 + }, + { + "epoch": 0.93, + "grad_norm": 5.513074224594544, + "learning_rate": 1.278553662664267e-07, + "loss": 1.0608, + "step": 11378 + }, + { + "epoch": 0.93, + "grad_norm": 3.050545949195318, + "learning_rate": 1.2755808922616075e-07, + "loss": 0.2652, + "step": 11379 + }, + { + "epoch": 0.93, + "grad_norm": 4.282441709700939, + "learning_rate": 1.2726115372286852e-07, + "loss": 0.6766, + "step": 11380 + }, + { + "epoch": 0.93, + "grad_norm": 5.193279806048703, + "learning_rate": 1.2696455977736278e-07, + "loss": 1.0562, + "step": 11381 + }, + { + "epoch": 0.93, + "grad_norm": 4.149582341907641, + "learning_rate": 1.266683074104341e-07, + "loss": 0.8034, + "step": 11382 + }, + { + "epoch": 0.93, + "grad_norm": 3.8986898507073433, + "learning_rate": 1.2637239664284816e-07, + "loss": 0.6925, + "step": 11383 + }, + { + "epoch": 0.93, + "grad_norm": 6.340439187009353, + "learning_rate": 1.2607682749534723e-07, + "loss": 0.9223, + "step": 11384 + }, + { + "epoch": 0.93, + "grad_norm": 5.563330446190727, + "learning_rate": 1.2578159998864858e-07, + "loss": 1.0805, + "step": 11385 + }, + { + "epoch": 0.93, + "grad_norm": 4.127752602449478, + "learning_rate": 1.2548671414344848e-07, + "loss": 0.997, + "step": 11386 + }, + { + "epoch": 0.93, + "grad_norm": 3.686063809926292, + "learning_rate": 1.2519216998041483e-07, + "loss": 0.5383, + "step": 11387 + }, + { + "epoch": 0.93, + "grad_norm": 2.025269407670612, + "learning_rate": 1.2489796752019446e-07, + "loss": 0.286, + "step": 11388 + }, + { + "epoch": 0.93, + "grad_norm": 3.5751929466185035, + "learning_rate": 1.2460410678341027e-07, + "loss": 0.5423, + "step": 11389 + }, + { + "epoch": 0.93, + "grad_norm": 3.870787922141563, + "learning_rate": 1.2431058779066086e-07, + "loss": 0.7163, + "step": 11390 + }, + { + "epoch": 0.93, + "grad_norm": 3.8724761807165287, + "learning_rate": 1.2401741056252027e-07, + "loss": 0.4614, + "step": 11391 + }, + { + "epoch": 0.93, + "grad_norm": 2.818761266802908, + "learning_rate": 1.2372457511953816e-07, + "loss": 0.5997, + "step": 11392 + }, + { + "epoch": 0.93, + "grad_norm": 3.9234466514415307, + "learning_rate": 1.2343208148224205e-07, + "loss": 0.3844, + "step": 11393 + }, + { + "epoch": 0.93, + "grad_norm": 2.6717868926200237, + "learning_rate": 1.2313992967113442e-07, + "loss": 0.4092, + "step": 11394 + }, + { + "epoch": 0.93, + "grad_norm": 4.543066468599289, + "learning_rate": 1.228481197066933e-07, + "loss": 0.8673, + "step": 11395 + }, + { + "epoch": 0.93, + "grad_norm": 4.16694214591345, + "learning_rate": 1.2255665160937346e-07, + "loss": 0.9808, + "step": 11396 + }, + { + "epoch": 0.93, + "grad_norm": 4.468145315616178, + "learning_rate": 1.2226552539960578e-07, + "loss": 0.8954, + "step": 11397 + }, + { + "epoch": 0.93, + "grad_norm": 5.62108876858957, + "learning_rate": 1.2197474109779672e-07, + "loss": 0.9778, + "step": 11398 + }, + { + "epoch": 0.93, + "grad_norm": 3.3769233539605152, + "learning_rate": 1.2168429872432941e-07, + "loss": 0.5391, + "step": 11399 + }, + { + "epoch": 0.93, + "grad_norm": 4.030085613305808, + "learning_rate": 1.21394198299562e-07, + "loss": 0.8454, + "step": 11400 + }, + { + "epoch": 0.93, + "grad_norm": 5.005327340476278, + "learning_rate": 1.2110443984382936e-07, + "loss": 0.9019, + "step": 11401 + }, + { + "epoch": 0.93, + "grad_norm": 4.066588979693674, + "learning_rate": 1.2081502337744245e-07, + "loss": 0.5903, + "step": 11402 + }, + { + "epoch": 0.93, + "grad_norm": 4.59288686119471, + "learning_rate": 1.2052594892068892e-07, + "loss": 1.2391, + "step": 11403 + }, + { + "epoch": 0.93, + "grad_norm": 3.5518887254683364, + "learning_rate": 1.202372164938298e-07, + "loss": 0.5666, + "step": 11404 + }, + { + "epoch": 0.93, + "grad_norm": 4.788599245153345, + "learning_rate": 1.1994882611710502e-07, + "loss": 0.6974, + "step": 11405 + }, + { + "epoch": 0.93, + "grad_norm": 3.8459791543792003, + "learning_rate": 1.1966077781073006e-07, + "loss": 0.8061, + "step": 11406 + }, + { + "epoch": 0.93, + "grad_norm": 5.2033452368178885, + "learning_rate": 1.1937307159489486e-07, + "loss": 1.2126, + "step": 11407 + }, + { + "epoch": 0.93, + "grad_norm": 5.139334628288193, + "learning_rate": 1.1908570748976666e-07, + "loss": 0.5134, + "step": 11408 + }, + { + "epoch": 0.93, + "grad_norm": 5.922911461641456, + "learning_rate": 1.1879868551548935e-07, + "loss": 0.9709, + "step": 11409 + }, + { + "epoch": 0.93, + "grad_norm": 4.413980700520785, + "learning_rate": 1.1851200569218069e-07, + "loss": 0.3545, + "step": 11410 + }, + { + "epoch": 0.93, + "grad_norm": 4.643047829997474, + "learning_rate": 1.1822566803993574e-07, + "loss": 0.6406, + "step": 11411 + }, + { + "epoch": 0.93, + "grad_norm": 2.2187121643032532, + "learning_rate": 1.1793967257882621e-07, + "loss": 0.3273, + "step": 11412 + }, + { + "epoch": 0.93, + "grad_norm": 2.026110346408181, + "learning_rate": 1.1765401932889886e-07, + "loss": 0.279, + "step": 11413 + }, + { + "epoch": 0.93, + "grad_norm": 3.0060657564552162, + "learning_rate": 1.1736870831017711e-07, + "loss": 0.6003, + "step": 11414 + }, + { + "epoch": 0.93, + "grad_norm": 5.191971013035739, + "learning_rate": 1.1708373954265884e-07, + "loss": 0.9175, + "step": 11415 + }, + { + "epoch": 0.93, + "grad_norm": 4.064330439717445, + "learning_rate": 1.1679911304632086e-07, + "loss": 0.9524, + "step": 11416 + }, + { + "epoch": 0.93, + "grad_norm": 4.169885247510807, + "learning_rate": 1.1651482884111276e-07, + "loss": 0.8036, + "step": 11417 + }, + { + "epoch": 0.93, + "grad_norm": 4.785332141244321, + "learning_rate": 1.1623088694696194e-07, + "loss": 1.19, + "step": 11418 + }, + { + "epoch": 0.93, + "grad_norm": 2.594104038984991, + "learning_rate": 1.1594728738377192e-07, + "loss": 0.3819, + "step": 11419 + }, + { + "epoch": 0.93, + "grad_norm": 2.872396175952463, + "learning_rate": 1.156640301714218e-07, + "loss": 0.5467, + "step": 11420 + }, + { + "epoch": 0.93, + "grad_norm": 4.905342440994222, + "learning_rate": 1.1538111532976626e-07, + "loss": 0.7751, + "step": 11421 + }, + { + "epoch": 0.93, + "grad_norm": 2.729105469841603, + "learning_rate": 1.1509854287863609e-07, + "loss": 0.5878, + "step": 11422 + }, + { + "epoch": 0.93, + "grad_norm": 3.2242795366711547, + "learning_rate": 1.1481631283783934e-07, + "loss": 0.6588, + "step": 11423 + }, + { + "epoch": 0.93, + "grad_norm": 0.9178608870536981, + "learning_rate": 1.1453442522715852e-07, + "loss": 0.0959, + "step": 11424 + }, + { + "epoch": 0.93, + "grad_norm": 3.8820096712829804, + "learning_rate": 1.1425288006635283e-07, + "loss": 0.4315, + "step": 11425 + }, + { + "epoch": 0.93, + "grad_norm": 4.015369236993359, + "learning_rate": 1.1397167737515813e-07, + "loss": 0.6537, + "step": 11426 + }, + { + "epoch": 0.93, + "grad_norm": 3.291026797185897, + "learning_rate": 1.1369081717328423e-07, + "loss": 0.458, + "step": 11427 + }, + { + "epoch": 0.93, + "grad_norm": 3.7474722112109133, + "learning_rate": 1.1341029948041871e-07, + "loss": 0.6803, + "step": 11428 + }, + { + "epoch": 0.93, + "grad_norm": 5.460239601393617, + "learning_rate": 1.1313012431622472e-07, + "loss": 0.7746, + "step": 11429 + }, + { + "epoch": 0.93, + "grad_norm": 3.5032495408235245, + "learning_rate": 1.1285029170034156e-07, + "loss": 0.6912, + "step": 11430 + }, + { + "epoch": 0.93, + "grad_norm": 2.1065549064346887, + "learning_rate": 1.125708016523841e-07, + "loss": 0.3045, + "step": 11431 + }, + { + "epoch": 0.93, + "grad_norm": 4.201927306332262, + "learning_rate": 1.1229165419194332e-07, + "loss": 0.663, + "step": 11432 + }, + { + "epoch": 0.93, + "grad_norm": 4.893359351425234, + "learning_rate": 1.1201284933858581e-07, + "loss": 0.6112, + "step": 11433 + }, + { + "epoch": 0.93, + "grad_norm": 2.5457924134993117, + "learning_rate": 1.117343871118548e-07, + "loss": 0.3572, + "step": 11434 + }, + { + "epoch": 0.93, + "grad_norm": 3.7618255828224187, + "learning_rate": 1.114562675312697e-07, + "loss": 0.7144, + "step": 11435 + }, + { + "epoch": 0.93, + "grad_norm": 3.830967488476228, + "learning_rate": 1.1117849061632491e-07, + "loss": 0.668, + "step": 11436 + }, + { + "epoch": 0.93, + "grad_norm": 3.0456683266792077, + "learning_rate": 1.1090105638649262e-07, + "loss": 0.6332, + "step": 11437 + }, + { + "epoch": 0.93, + "grad_norm": 4.645145777245306, + "learning_rate": 1.1062396486121785e-07, + "loss": 0.7712, + "step": 11438 + }, + { + "epoch": 0.93, + "grad_norm": 5.69933829079648, + "learning_rate": 1.1034721605992504e-07, + "loss": 1.1358, + "step": 11439 + }, + { + "epoch": 0.94, + "grad_norm": 3.3023589956484822, + "learning_rate": 1.1007081000201203e-07, + "loss": 0.8938, + "step": 11440 + }, + { + "epoch": 0.94, + "grad_norm": 4.714037306552824, + "learning_rate": 1.0979474670685441e-07, + "loss": 0.9522, + "step": 11441 + }, + { + "epoch": 0.94, + "grad_norm": 3.8215571782472995, + "learning_rate": 1.0951902619380284e-07, + "loss": 0.8513, + "step": 11442 + }, + { + "epoch": 0.94, + "grad_norm": 5.151668741371961, + "learning_rate": 1.0924364848218461e-07, + "loss": 1.1703, + "step": 11443 + }, + { + "epoch": 0.94, + "grad_norm": 4.258142399241855, + "learning_rate": 1.0896861359130151e-07, + "loss": 0.992, + "step": 11444 + }, + { + "epoch": 0.94, + "grad_norm": 3.842097233666027, + "learning_rate": 1.0869392154043256e-07, + "loss": 0.6473, + "step": 11445 + }, + { + "epoch": 0.94, + "grad_norm": 5.282634427069878, + "learning_rate": 1.084195723488335e-07, + "loss": 1.0108, + "step": 11446 + }, + { + "epoch": 0.94, + "grad_norm": 4.82352021553961, + "learning_rate": 1.0814556603573334e-07, + "loss": 0.611, + "step": 11447 + }, + { + "epoch": 0.94, + "grad_norm": 3.9540453231454236, + "learning_rate": 1.0787190262034008e-07, + "loss": 0.6282, + "step": 11448 + }, + { + "epoch": 0.94, + "grad_norm": 3.724155380279264, + "learning_rate": 1.075985821218356e-07, + "loss": 0.4692, + "step": 11449 + }, + { + "epoch": 0.94, + "grad_norm": 4.823909371886717, + "learning_rate": 1.0732560455937902e-07, + "loss": 0.6942, + "step": 11450 + }, + { + "epoch": 0.94, + "grad_norm": 4.864402366534795, + "learning_rate": 1.070529699521039e-07, + "loss": 1.1268, + "step": 11451 + }, + { + "epoch": 0.94, + "grad_norm": 4.665735809875102, + "learning_rate": 1.0678067831912164e-07, + "loss": 0.933, + "step": 11452 + }, + { + "epoch": 0.94, + "grad_norm": 4.071968052088595, + "learning_rate": 1.0650872967951864e-07, + "loss": 0.5692, + "step": 11453 + }, + { + "epoch": 0.94, + "grad_norm": 4.402671875049084, + "learning_rate": 1.0623712405235742e-07, + "loss": 0.799, + "step": 11454 + }, + { + "epoch": 0.94, + "grad_norm": 2.8339504762304664, + "learning_rate": 1.0596586145667553e-07, + "loss": 0.6069, + "step": 11455 + }, + { + "epoch": 0.94, + "grad_norm": 4.554616223835177, + "learning_rate": 1.0569494191148832e-07, + "loss": 1.0409, + "step": 11456 + }, + { + "epoch": 0.94, + "grad_norm": 4.355035502155545, + "learning_rate": 1.0542436543578505e-07, + "loss": 0.9743, + "step": 11457 + }, + { + "epoch": 0.94, + "grad_norm": 2.8526725805057938, + "learning_rate": 1.0515413204853276e-07, + "loss": 0.2967, + "step": 11458 + }, + { + "epoch": 0.94, + "grad_norm": 4.924321023359295, + "learning_rate": 1.0488424176867351e-07, + "loss": 0.5891, + "step": 11459 + }, + { + "epoch": 0.94, + "grad_norm": 3.8122948981271754, + "learning_rate": 1.0461469461512552e-07, + "loss": 0.7662, + "step": 11460 + }, + { + "epoch": 0.94, + "grad_norm": 3.9773162802992377, + "learning_rate": 1.0434549060678201e-07, + "loss": 0.8828, + "step": 11461 + }, + { + "epoch": 0.94, + "grad_norm": 4.494031706823871, + "learning_rate": 1.0407662976251453e-07, + "loss": 1.0307, + "step": 11462 + }, + { + "epoch": 0.94, + "grad_norm": 3.871334368520525, + "learning_rate": 1.0380811210116748e-07, + "loss": 0.2681, + "step": 11463 + }, + { + "epoch": 0.94, + "grad_norm": 3.420993607912421, + "learning_rate": 1.0353993764156356e-07, + "loss": 0.5039, + "step": 11464 + }, + { + "epoch": 0.94, + "grad_norm": 3.5214802423323555, + "learning_rate": 1.0327210640250051e-07, + "loss": 0.5811, + "step": 11465 + }, + { + "epoch": 0.94, + "grad_norm": 4.1476833037383765, + "learning_rate": 1.0300461840275278e-07, + "loss": 0.5421, + "step": 11466 + }, + { + "epoch": 0.94, + "grad_norm": 3.064162042829437, + "learning_rate": 1.0273747366106867e-07, + "loss": 0.5128, + "step": 11467 + }, + { + "epoch": 0.94, + "grad_norm": 4.408249378397272, + "learning_rate": 1.024706721961749e-07, + "loss": 0.5601, + "step": 11468 + }, + { + "epoch": 0.94, + "grad_norm": 2.885012696896012, + "learning_rate": 1.0220421402677261e-07, + "loss": 0.5723, + "step": 11469 + }, + { + "epoch": 0.94, + "grad_norm": 3.891852518029071, + "learning_rate": 1.0193809917154018e-07, + "loss": 0.7822, + "step": 11470 + }, + { + "epoch": 0.94, + "grad_norm": 5.022331987363963, + "learning_rate": 1.0167232764913104e-07, + "loss": 0.9398, + "step": 11471 + }, + { + "epoch": 0.94, + "grad_norm": 3.880819542465347, + "learning_rate": 1.0140689947817305e-07, + "loss": 0.6669, + "step": 11472 + }, + { + "epoch": 0.94, + "grad_norm": 5.514773015344065, + "learning_rate": 1.0114181467727302e-07, + "loss": 1.0464, + "step": 11473 + }, + { + "epoch": 0.94, + "grad_norm": 3.204128200139511, + "learning_rate": 1.0087707326501218e-07, + "loss": 0.5684, + "step": 11474 + }, + { + "epoch": 0.94, + "grad_norm": 3.7287083542166064, + "learning_rate": 1.0061267525994678e-07, + "loss": 0.5869, + "step": 11475 + }, + { + "epoch": 0.94, + "grad_norm": 4.8170199043790864, + "learning_rate": 1.0034862068061147e-07, + "loss": 0.9989, + "step": 11476 + }, + { + "epoch": 0.94, + "grad_norm": 5.513320291452303, + "learning_rate": 1.0008490954551419e-07, + "loss": 0.7419, + "step": 11477 + }, + { + "epoch": 0.94, + "grad_norm": 3.078026930248082, + "learning_rate": 9.982154187314075e-08, + "loss": 0.1895, + "step": 11478 + }, + { + "epoch": 0.94, + "grad_norm": 4.0083071016056, + "learning_rate": 9.95585176819508e-08, + "loss": 0.5666, + "step": 11479 + }, + { + "epoch": 0.94, + "grad_norm": 4.944779680595329, + "learning_rate": 9.929583699038182e-08, + "loss": 0.9947, + "step": 11480 + }, + { + "epoch": 0.94, + "grad_norm": 5.471368599698787, + "learning_rate": 9.90334998168474e-08, + "loss": 1.0571, + "step": 11481 + }, + { + "epoch": 0.94, + "grad_norm": 4.1216598426591595, + "learning_rate": 9.877150617973507e-08, + "loss": 0.6191, + "step": 11482 + }, + { + "epoch": 0.94, + "grad_norm": 4.424815174836256, + "learning_rate": 9.850985609741015e-08, + "loss": 1.1982, + "step": 11483 + }, + { + "epoch": 0.94, + "grad_norm": 3.558857226478264, + "learning_rate": 9.824854958821295e-08, + "loss": 0.857, + "step": 11484 + }, + { + "epoch": 0.94, + "grad_norm": 3.6881372715841327, + "learning_rate": 9.798758667045993e-08, + "loss": 0.5128, + "step": 11485 + }, + { + "epoch": 0.94, + "grad_norm": 2.6042232762236632, + "learning_rate": 9.772696736244369e-08, + "loss": 0.3684, + "step": 11486 + }, + { + "epoch": 0.94, + "grad_norm": 3.3849127166399193, + "learning_rate": 9.746669168243184e-08, + "loss": 0.4047, + "step": 11487 + }, + { + "epoch": 0.94, + "grad_norm": 3.3419144586146996, + "learning_rate": 9.720675964866866e-08, + "loss": 0.491, + "step": 11488 + }, + { + "epoch": 0.94, + "grad_norm": 3.7121433656782155, + "learning_rate": 9.69471712793757e-08, + "loss": 0.6647, + "step": 11489 + }, + { + "epoch": 0.94, + "grad_norm": 1.3240341254558812, + "learning_rate": 9.668792659274729e-08, + "loss": 0.1673, + "step": 11490 + }, + { + "epoch": 0.94, + "grad_norm": 4.5540746486450585, + "learning_rate": 9.64290256069561e-08, + "loss": 0.9346, + "step": 11491 + }, + { + "epoch": 0.94, + "grad_norm": 4.080260945098309, + "learning_rate": 9.617046834014987e-08, + "loss": 1.0139, + "step": 11492 + }, + { + "epoch": 0.94, + "grad_norm": 3.7921434537664847, + "learning_rate": 9.591225481045186e-08, + "loss": 0.8898, + "step": 11493 + }, + { + "epoch": 0.94, + "grad_norm": 1.4892994629718517, + "learning_rate": 9.565438503596258e-08, + "loss": 0.3442, + "step": 11494 + }, + { + "epoch": 0.94, + "grad_norm": 3.137716244372587, + "learning_rate": 9.539685903475704e-08, + "loss": 0.4682, + "step": 11495 + }, + { + "epoch": 0.94, + "grad_norm": 3.5793078099482996, + "learning_rate": 9.513967682488634e-08, + "loss": 0.7187, + "step": 11496 + }, + { + "epoch": 0.94, + "grad_norm": 2.9457444408370574, + "learning_rate": 9.488283842437829e-08, + "loss": 0.5016, + "step": 11497 + }, + { + "epoch": 0.94, + "grad_norm": 1.1224986446700729, + "learning_rate": 9.462634385123681e-08, + "loss": 0.1085, + "step": 11498 + }, + { + "epoch": 0.94, + "grad_norm": 5.600689552970591, + "learning_rate": 9.437019312343976e-08, + "loss": 0.7454, + "step": 11499 + }, + { + "epoch": 0.94, + "grad_norm": 3.659396098124453, + "learning_rate": 9.411438625894331e-08, + "loss": 0.5225, + "step": 11500 + }, + { + "epoch": 0.94, + "grad_norm": 2.199140005369275, + "learning_rate": 9.385892327567759e-08, + "loss": 0.3812, + "step": 11501 + }, + { + "epoch": 0.94, + "grad_norm": 5.064817368758036, + "learning_rate": 9.360380419154935e-08, + "loss": 0.8633, + "step": 11502 + }, + { + "epoch": 0.94, + "grad_norm": 2.621385853262332, + "learning_rate": 9.33490290244421e-08, + "loss": 0.5887, + "step": 11503 + }, + { + "epoch": 0.94, + "grad_norm": 4.058381904526087, + "learning_rate": 9.309459779221375e-08, + "loss": 0.6112, + "step": 11504 + }, + { + "epoch": 0.94, + "grad_norm": 4.484172345562006, + "learning_rate": 9.284051051269949e-08, + "loss": 0.7339, + "step": 11505 + }, + { + "epoch": 0.94, + "grad_norm": 4.215384163118494, + "learning_rate": 9.258676720371007e-08, + "loss": 1.1403, + "step": 11506 + }, + { + "epoch": 0.94, + "grad_norm": 3.3297724747788098, + "learning_rate": 9.233336788303016e-08, + "loss": 0.5412, + "step": 11507 + }, + { + "epoch": 0.94, + "grad_norm": 1.2661471977938814, + "learning_rate": 9.208031256842332e-08, + "loss": 0.1712, + "step": 11508 + }, + { + "epoch": 0.94, + "grad_norm": 4.154101220777355, + "learning_rate": 9.182760127762757e-08, + "loss": 0.7015, + "step": 11509 + }, + { + "epoch": 0.94, + "grad_norm": 2.023772803963613, + "learning_rate": 9.1575234028356e-08, + "loss": 0.3518, + "step": 11510 + }, + { + "epoch": 0.94, + "grad_norm": 2.673827508559293, + "learning_rate": 9.132321083829943e-08, + "loss": 0.2518, + "step": 11511 + }, + { + "epoch": 0.94, + "grad_norm": 2.8791526657741016, + "learning_rate": 9.107153172512318e-08, + "loss": 0.5467, + "step": 11512 + }, + { + "epoch": 0.94, + "grad_norm": 3.850501346120811, + "learning_rate": 9.082019670646813e-08, + "loss": 0.6633, + "step": 11513 + }, + { + "epoch": 0.94, + "grad_norm": 2.531166946857835, + "learning_rate": 9.056920579995299e-08, + "loss": 0.5177, + "step": 11514 + }, + { + "epoch": 0.94, + "grad_norm": 3.5638372089779664, + "learning_rate": 9.031855902317033e-08, + "loss": 0.4288, + "step": 11515 + }, + { + "epoch": 0.94, + "grad_norm": 4.875937634908744, + "learning_rate": 9.006825639368944e-08, + "loss": 0.6471, + "step": 11516 + }, + { + "epoch": 0.94, + "grad_norm": 4.9193632747619445, + "learning_rate": 8.981829792905628e-08, + "loss": 0.8261, + "step": 11517 + }, + { + "epoch": 0.94, + "grad_norm": 2.9089533805539123, + "learning_rate": 8.956868364679128e-08, + "loss": 0.4271, + "step": 11518 + }, + { + "epoch": 0.94, + "grad_norm": 2.383332574181771, + "learning_rate": 8.9319413564391e-08, + "loss": 0.324, + "step": 11519 + }, + { + "epoch": 0.94, + "grad_norm": 2.6635614921681072, + "learning_rate": 8.907048769932813e-08, + "loss": 0.4885, + "step": 11520 + }, + { + "epoch": 0.94, + "grad_norm": 4.417415217501595, + "learning_rate": 8.882190606905206e-08, + "loss": 0.5147, + "step": 11521 + }, + { + "epoch": 0.94, + "grad_norm": 2.5786572783250215, + "learning_rate": 8.85736686909866e-08, + "loss": 0.4211, + "step": 11522 + }, + { + "epoch": 0.94, + "grad_norm": 4.139069378559404, + "learning_rate": 8.832577558253285e-08, + "loss": 1.0832, + "step": 11523 + }, + { + "epoch": 0.94, + "grad_norm": 3.3481033051612243, + "learning_rate": 8.807822676106637e-08, + "loss": 0.4845, + "step": 11524 + }, + { + "epoch": 0.94, + "grad_norm": 3.4623507690569926, + "learning_rate": 8.783102224393992e-08, + "loss": 0.8474, + "step": 11525 + }, + { + "epoch": 0.94, + "grad_norm": 4.897890325096617, + "learning_rate": 8.758416204848019e-08, + "loss": 0.5634, + "step": 11526 + }, + { + "epoch": 0.94, + "grad_norm": 6.2330917403536095, + "learning_rate": 8.73376461919917e-08, + "loss": 0.9752, + "step": 11527 + }, + { + "epoch": 0.94, + "grad_norm": 4.622719270232019, + "learning_rate": 8.709147469175449e-08, + "loss": 0.8331, + "step": 11528 + }, + { + "epoch": 0.94, + "grad_norm": 3.632434371411331, + "learning_rate": 8.684564756502423e-08, + "loss": 0.8461, + "step": 11529 + }, + { + "epoch": 0.94, + "grad_norm": 4.221314964205183, + "learning_rate": 8.660016482903156e-08, + "loss": 0.8041, + "step": 11530 + }, + { + "epoch": 0.94, + "grad_norm": 3.096728943256637, + "learning_rate": 8.635502650098437e-08, + "loss": 0.458, + "step": 11531 + }, + { + "epoch": 0.94, + "grad_norm": 2.889713743014272, + "learning_rate": 8.611023259806561e-08, + "loss": 0.3912, + "step": 11532 + }, + { + "epoch": 0.94, + "grad_norm": 5.032860247914431, + "learning_rate": 8.586578313743377e-08, + "loss": 0.8403, + "step": 11533 + }, + { + "epoch": 0.94, + "grad_norm": 2.898698151282602, + "learning_rate": 8.562167813622457e-08, + "loss": 0.4575, + "step": 11534 + }, + { + "epoch": 0.94, + "grad_norm": 3.2422326007950137, + "learning_rate": 8.537791761154823e-08, + "loss": 0.4273, + "step": 11535 + }, + { + "epoch": 0.94, + "grad_norm": 4.608676149082793, + "learning_rate": 8.513450158049109e-08, + "loss": 1.062, + "step": 11536 + }, + { + "epoch": 0.94, + "grad_norm": 4.788955881010936, + "learning_rate": 8.489143006011613e-08, + "loss": 0.7896, + "step": 11537 + }, + { + "epoch": 0.94, + "grad_norm": 4.747015684652336, + "learning_rate": 8.464870306746087e-08, + "loss": 0.9884, + "step": 11538 + }, + { + "epoch": 0.94, + "grad_norm": 6.603052178222852, + "learning_rate": 8.440632061954057e-08, + "loss": 0.8629, + "step": 11539 + }, + { + "epoch": 0.94, + "grad_norm": 4.844263423828449, + "learning_rate": 8.416428273334387e-08, + "loss": 0.8126, + "step": 11540 + }, + { + "epoch": 0.94, + "grad_norm": 3.903695648370362, + "learning_rate": 8.392258942583775e-08, + "loss": 0.6307, + "step": 11541 + }, + { + "epoch": 0.94, + "grad_norm": 4.425499726387105, + "learning_rate": 8.368124071396255e-08, + "loss": 0.6124, + "step": 11542 + }, + { + "epoch": 0.94, + "grad_norm": 2.4550308257582176, + "learning_rate": 8.344023661463696e-08, + "loss": 0.3733, + "step": 11543 + }, + { + "epoch": 0.94, + "grad_norm": 4.116310760936996, + "learning_rate": 8.319957714475357e-08, + "loss": 0.6365, + "step": 11544 + }, + { + "epoch": 0.94, + "grad_norm": 4.5552693050682, + "learning_rate": 8.295926232118168e-08, + "loss": 0.6594, + "step": 11545 + }, + { + "epoch": 0.94, + "grad_norm": 3.678799517409882, + "learning_rate": 8.271929216076724e-08, + "loss": 0.9425, + "step": 11546 + }, + { + "epoch": 0.94, + "grad_norm": 4.454174229725478, + "learning_rate": 8.247966668032958e-08, + "loss": 0.7239, + "step": 11547 + }, + { + "epoch": 0.94, + "grad_norm": 3.417164602813985, + "learning_rate": 8.224038589666639e-08, + "loss": 0.7123, + "step": 11548 + }, + { + "epoch": 0.94, + "grad_norm": 4.485645869758046, + "learning_rate": 8.200144982654978e-08, + "loss": 1.0404, + "step": 11549 + }, + { + "epoch": 0.94, + "grad_norm": 4.749260992056846, + "learning_rate": 8.17628584867286e-08, + "loss": 0.7257, + "step": 11550 + }, + { + "epoch": 0.94, + "grad_norm": 4.735974713852315, + "learning_rate": 8.152461189392725e-08, + "loss": 0.947, + "step": 11551 + }, + { + "epoch": 0.94, + "grad_norm": 2.691532152322852, + "learning_rate": 8.128671006484457e-08, + "loss": 0.4618, + "step": 11552 + }, + { + "epoch": 0.94, + "grad_norm": 3.606720790843942, + "learning_rate": 8.104915301615723e-08, + "loss": 0.5511, + "step": 11553 + }, + { + "epoch": 0.94, + "grad_norm": 4.203064780496871, + "learning_rate": 8.081194076451749e-08, + "loss": 0.9137, + "step": 11554 + }, + { + "epoch": 0.94, + "grad_norm": 2.423709468178528, + "learning_rate": 8.057507332655201e-08, + "loss": 0.5172, + "step": 11555 + }, + { + "epoch": 0.94, + "grad_norm": 3.544884911549885, + "learning_rate": 8.033855071886476e-08, + "loss": 0.7933, + "step": 11556 + }, + { + "epoch": 0.94, + "grad_norm": 4.05296144706269, + "learning_rate": 8.01023729580347e-08, + "loss": 0.7496, + "step": 11557 + }, + { + "epoch": 0.94, + "grad_norm": 5.168752164032706, + "learning_rate": 7.986654006061633e-08, + "loss": 0.6369, + "step": 11558 + }, + { + "epoch": 0.94, + "grad_norm": 5.6099727114647076, + "learning_rate": 7.963105204314092e-08, + "loss": 0.9393, + "step": 11559 + }, + { + "epoch": 0.94, + "grad_norm": 4.972524823079395, + "learning_rate": 7.939590892211523e-08, + "loss": 0.9579, + "step": 11560 + }, + { + "epoch": 0.94, + "grad_norm": 2.4525684664562473, + "learning_rate": 7.916111071402222e-08, + "loss": 0.3673, + "step": 11561 + }, + { + "epoch": 0.95, + "grad_norm": 5.365456772535376, + "learning_rate": 7.89266574353198e-08, + "loss": 0.5321, + "step": 11562 + }, + { + "epoch": 0.95, + "grad_norm": 3.5464746563623795, + "learning_rate": 7.869254910244206e-08, + "loss": 0.5361, + "step": 11563 + }, + { + "epoch": 0.95, + "grad_norm": 3.8067799548706502, + "learning_rate": 7.845878573179866e-08, + "loss": 0.7528, + "step": 11564 + }, + { + "epoch": 0.95, + "grad_norm": 2.0718976468322223, + "learning_rate": 7.822536733977592e-08, + "loss": 0.4032, + "step": 11565 + }, + { + "epoch": 0.95, + "grad_norm": 5.789649920090571, + "learning_rate": 7.799229394273522e-08, + "loss": 0.8454, + "step": 11566 + }, + { + "epoch": 0.95, + "grad_norm": 4.544293359733326, + "learning_rate": 7.775956555701458e-08, + "loss": 0.8429, + "step": 11567 + }, + { + "epoch": 0.95, + "grad_norm": 4.0705618784929, + "learning_rate": 7.752718219892597e-08, + "loss": 0.5423, + "step": 11568 + }, + { + "epoch": 0.95, + "grad_norm": 4.604745100289837, + "learning_rate": 7.729514388476023e-08, + "loss": 0.7956, + "step": 11569 + }, + { + "epoch": 0.95, + "grad_norm": 3.7360391468404823, + "learning_rate": 7.706345063078047e-08, + "loss": 0.8195, + "step": 11570 + }, + { + "epoch": 0.95, + "grad_norm": 5.623807845515393, + "learning_rate": 7.683210245322869e-08, + "loss": 1.0799, + "step": 11571 + }, + { + "epoch": 0.95, + "grad_norm": 3.2628999944535972, + "learning_rate": 7.660109936832027e-08, + "loss": 0.6359, + "step": 11572 + }, + { + "epoch": 0.95, + "grad_norm": 4.573771335494906, + "learning_rate": 7.63704413922478e-08, + "loss": 0.7494, + "step": 11573 + }, + { + "epoch": 0.95, + "grad_norm": 4.991034643761258, + "learning_rate": 7.614012854118058e-08, + "loss": 1.2426, + "step": 11574 + }, + { + "epoch": 0.95, + "grad_norm": 5.16808363897129, + "learning_rate": 7.591016083126124e-08, + "loss": 1.2851, + "step": 11575 + }, + { + "epoch": 0.95, + "grad_norm": 3.2343520940224546, + "learning_rate": 7.568053827860966e-08, + "loss": 0.5812, + "step": 11576 + }, + { + "epoch": 0.95, + "grad_norm": 3.2577967730041224, + "learning_rate": 7.545126089932186e-08, + "loss": 0.7634, + "step": 11577 + }, + { + "epoch": 0.95, + "grad_norm": 3.050302492327812, + "learning_rate": 7.522232870946889e-08, + "loss": 0.7246, + "step": 11578 + }, + { + "epoch": 0.95, + "grad_norm": 5.43669760424669, + "learning_rate": 7.499374172509789e-08, + "loss": 0.8774, + "step": 11579 + }, + { + "epoch": 0.95, + "grad_norm": 4.640794761815384, + "learning_rate": 7.476549996223215e-08, + "loss": 1.0219, + "step": 11580 + }, + { + "epoch": 0.95, + "grad_norm": 4.831791310140802, + "learning_rate": 7.453760343686999e-08, + "loss": 0.774, + "step": 11581 + }, + { + "epoch": 0.95, + "grad_norm": 4.7100391569805735, + "learning_rate": 7.431005216498588e-08, + "loss": 0.8476, + "step": 11582 + }, + { + "epoch": 0.95, + "grad_norm": 3.514649034623249, + "learning_rate": 7.408284616253092e-08, + "loss": 0.6735, + "step": 11583 + }, + { + "epoch": 0.95, + "grad_norm": 3.860514730999645, + "learning_rate": 7.385598544543015e-08, + "loss": 0.7812, + "step": 11584 + }, + { + "epoch": 0.95, + "grad_norm": 3.5803739223711872, + "learning_rate": 7.362947002958698e-08, + "loss": 0.5444, + "step": 11585 + }, + { + "epoch": 0.95, + "grad_norm": 3.2568546454751908, + "learning_rate": 7.340329993087813e-08, + "loss": 0.5813, + "step": 11586 + }, + { + "epoch": 0.95, + "grad_norm": 4.295020809146325, + "learning_rate": 7.31774751651576e-08, + "loss": 0.9003, + "step": 11587 + }, + { + "epoch": 0.95, + "grad_norm": 3.081893972177772, + "learning_rate": 7.295199574825384e-08, + "loss": 0.6387, + "step": 11588 + }, + { + "epoch": 0.95, + "grad_norm": 5.705099452527816, + "learning_rate": 7.272686169597253e-08, + "loss": 0.8707, + "step": 11589 + }, + { + "epoch": 0.95, + "grad_norm": 4.952450169969662, + "learning_rate": 7.250207302409496e-08, + "loss": 0.6748, + "step": 11590 + }, + { + "epoch": 0.95, + "grad_norm": 5.098531567953999, + "learning_rate": 7.227762974837793e-08, + "loss": 0.6623, + "step": 11591 + }, + { + "epoch": 0.95, + "grad_norm": 2.735206583392966, + "learning_rate": 7.205353188455277e-08, + "loss": 0.4195, + "step": 11592 + }, + { + "epoch": 0.95, + "grad_norm": 2.2890640064475805, + "learning_rate": 7.182977944832859e-08, + "loss": 0.3627, + "step": 11593 + }, + { + "epoch": 0.95, + "grad_norm": 5.931514297049116, + "learning_rate": 7.160637245538949e-08, + "loss": 0.9897, + "step": 11594 + }, + { + "epoch": 0.95, + "grad_norm": 2.9622194419255887, + "learning_rate": 7.138331092139573e-08, + "loss": 0.506, + "step": 11595 + }, + { + "epoch": 0.95, + "grad_norm": 6.133161143859423, + "learning_rate": 7.116059486198201e-08, + "loss": 1.2324, + "step": 11596 + }, + { + "epoch": 0.95, + "grad_norm": 3.989076014377061, + "learning_rate": 7.093822429276032e-08, + "loss": 0.6505, + "step": 11597 + }, + { + "epoch": 0.95, + "grad_norm": 5.539630457357993, + "learning_rate": 7.07161992293176e-08, + "loss": 0.8703, + "step": 11598 + }, + { + "epoch": 0.95, + "grad_norm": 4.498339770297644, + "learning_rate": 7.049451968721699e-08, + "loss": 0.7966, + "step": 11599 + }, + { + "epoch": 0.95, + "grad_norm": 4.006096866758366, + "learning_rate": 7.02731856819977e-08, + "loss": 0.6534, + "step": 11600 + }, + { + "epoch": 0.95, + "grad_norm": 4.770977893654591, + "learning_rate": 7.0052197229174e-08, + "loss": 0.9353, + "step": 11601 + }, + { + "epoch": 0.95, + "grad_norm": 3.9628131174003864, + "learning_rate": 6.983155434423517e-08, + "loss": 1.1756, + "step": 11602 + }, + { + "epoch": 0.95, + "grad_norm": 3.7831215570400714, + "learning_rate": 6.961125704264937e-08, + "loss": 0.6618, + "step": 11603 + }, + { + "epoch": 0.95, + "grad_norm": 5.3595549402144105, + "learning_rate": 6.93913053398565e-08, + "loss": 1.1731, + "step": 11604 + }, + { + "epoch": 0.95, + "grad_norm": 2.6089979218198756, + "learning_rate": 6.917169925127476e-08, + "loss": 0.416, + "step": 11605 + }, + { + "epoch": 0.95, + "grad_norm": 3.289736387729071, + "learning_rate": 6.895243879229852e-08, + "loss": 0.6777, + "step": 11606 + }, + { + "epoch": 0.95, + "grad_norm": 3.1473913374249305, + "learning_rate": 6.873352397829603e-08, + "loss": 0.5736, + "step": 11607 + }, + { + "epoch": 0.95, + "grad_norm": 4.712640401611258, + "learning_rate": 6.851495482461279e-08, + "loss": 0.8186, + "step": 11608 + }, + { + "epoch": 0.95, + "grad_norm": 3.7269873024694307, + "learning_rate": 6.829673134656934e-08, + "loss": 0.8095, + "step": 11609 + }, + { + "epoch": 0.95, + "grad_norm": 4.398859974456807, + "learning_rate": 6.807885355946176e-08, + "loss": 0.7604, + "step": 11610 + }, + { + "epoch": 0.95, + "grad_norm": 5.468407590378123, + "learning_rate": 6.786132147856283e-08, + "loss": 0.7929, + "step": 11611 + }, + { + "epoch": 0.95, + "grad_norm": 1.0647493326111022, + "learning_rate": 6.764413511912094e-08, + "loss": 0.1609, + "step": 11612 + }, + { + "epoch": 0.95, + "grad_norm": 3.014610172645361, + "learning_rate": 6.742729449635888e-08, + "loss": 0.6214, + "step": 11613 + }, + { + "epoch": 0.95, + "grad_norm": 2.6337823901937814, + "learning_rate": 6.721079962547783e-08, + "loss": 0.5395, + "step": 11614 + }, + { + "epoch": 0.95, + "grad_norm": 3.595467025102142, + "learning_rate": 6.699465052165122e-08, + "loss": 0.6058, + "step": 11615 + }, + { + "epoch": 0.95, + "grad_norm": 2.3282336878955827, + "learning_rate": 6.677884720003136e-08, + "loss": 0.2651, + "step": 11616 + }, + { + "epoch": 0.95, + "grad_norm": 5.320522459061478, + "learning_rate": 6.656338967574505e-08, + "loss": 1.1039, + "step": 11617 + }, + { + "epoch": 0.95, + "grad_norm": 5.061575871391227, + "learning_rate": 6.634827796389465e-08, + "loss": 1.2823, + "step": 11618 + }, + { + "epoch": 0.95, + "grad_norm": 4.021015682318374, + "learning_rate": 6.613351207955865e-08, + "loss": 0.6708, + "step": 11619 + }, + { + "epoch": 0.95, + "grad_norm": 4.8257923699654635, + "learning_rate": 6.591909203779167e-08, + "loss": 0.7875, + "step": 11620 + }, + { + "epoch": 0.95, + "grad_norm": 4.35300776701248, + "learning_rate": 6.570501785362227e-08, + "loss": 0.7842, + "step": 11621 + }, + { + "epoch": 0.95, + "grad_norm": 4.960849040274898, + "learning_rate": 6.54912895420573e-08, + "loss": 0.9143, + "step": 11622 + }, + { + "epoch": 0.95, + "grad_norm": 4.417844368499578, + "learning_rate": 6.527790711807813e-08, + "loss": 0.9415, + "step": 11623 + }, + { + "epoch": 0.95, + "grad_norm": 4.634517255764881, + "learning_rate": 6.506487059664113e-08, + "loss": 0.6651, + "step": 11624 + }, + { + "epoch": 0.95, + "grad_norm": 3.3349906425666074, + "learning_rate": 6.485217999268045e-08, + "loss": 0.4669, + "step": 11625 + }, + { + "epoch": 0.95, + "grad_norm": 4.088055091691627, + "learning_rate": 6.463983532110418e-08, + "loss": 0.7205, + "step": 11626 + }, + { + "epoch": 0.95, + "grad_norm": 6.189227315555454, + "learning_rate": 6.442783659679596e-08, + "loss": 0.8922, + "step": 11627 + }, + { + "epoch": 0.95, + "grad_norm": 4.062688209602089, + "learning_rate": 6.421618383461726e-08, + "loss": 0.792, + "step": 11628 + }, + { + "epoch": 0.95, + "grad_norm": 5.0459100063850295, + "learning_rate": 6.400487704940284e-08, + "loss": 1.041, + "step": 11629 + }, + { + "epoch": 0.95, + "grad_norm": 3.179069687860232, + "learning_rate": 6.379391625596587e-08, + "loss": 0.698, + "step": 11630 + }, + { + "epoch": 0.95, + "grad_norm": 5.734301783196438, + "learning_rate": 6.358330146909231e-08, + "loss": 0.8751, + "step": 11631 + }, + { + "epoch": 0.95, + "grad_norm": 3.6330821281949284, + "learning_rate": 6.337303270354644e-08, + "loss": 0.7615, + "step": 11632 + }, + { + "epoch": 0.95, + "grad_norm": 3.1857711430187416, + "learning_rate": 6.31631099740665e-08, + "loss": 0.5557, + "step": 11633 + }, + { + "epoch": 0.95, + "grad_norm": 3.2242637373736787, + "learning_rate": 6.295353329536736e-08, + "loss": 0.5299, + "step": 11634 + }, + { + "epoch": 0.95, + "grad_norm": 2.627338941591819, + "learning_rate": 6.274430268213949e-08, + "loss": 0.5305, + "step": 11635 + }, + { + "epoch": 0.95, + "grad_norm": 4.179602518667075, + "learning_rate": 6.253541814904895e-08, + "loss": 0.5425, + "step": 11636 + }, + { + "epoch": 0.95, + "grad_norm": 5.042256472962217, + "learning_rate": 6.232687971073792e-08, + "loss": 0.5536, + "step": 11637 + }, + { + "epoch": 0.95, + "grad_norm": 2.5314106009173982, + "learning_rate": 6.211868738182303e-08, + "loss": 0.3046, + "step": 11638 + }, + { + "epoch": 0.95, + "grad_norm": 3.710831625897087, + "learning_rate": 6.191084117689871e-08, + "loss": 0.5028, + "step": 11639 + }, + { + "epoch": 0.95, + "grad_norm": 1.887473282809646, + "learning_rate": 6.170334111053444e-08, + "loss": 0.3401, + "step": 11640 + }, + { + "epoch": 0.95, + "grad_norm": 1.086127827375325, + "learning_rate": 6.149618719727358e-08, + "loss": 0.1172, + "step": 11641 + }, + { + "epoch": 0.95, + "grad_norm": 2.805108073029417, + "learning_rate": 6.128937945163782e-08, + "loss": 0.2569, + "step": 11642 + }, + { + "epoch": 0.95, + "grad_norm": 3.3531768646930997, + "learning_rate": 6.108291788812393e-08, + "loss": 0.3978, + "step": 11643 + }, + { + "epoch": 0.95, + "grad_norm": 4.222728997768416, + "learning_rate": 6.087680252120254e-08, + "loss": 0.7847, + "step": 11644 + }, + { + "epoch": 0.95, + "grad_norm": 4.961397698122342, + "learning_rate": 6.06710333653221e-08, + "loss": 1.2055, + "step": 11645 + }, + { + "epoch": 0.95, + "grad_norm": 3.9685647046638475, + "learning_rate": 6.046561043490606e-08, + "loss": 0.9056, + "step": 11646 + }, + { + "epoch": 0.95, + "grad_norm": 5.588238146585753, + "learning_rate": 6.026053374435404e-08, + "loss": 1.2207, + "step": 11647 + }, + { + "epoch": 0.95, + "grad_norm": 3.733864306330916, + "learning_rate": 6.005580330804117e-08, + "loss": 0.4693, + "step": 11648 + }, + { + "epoch": 0.95, + "grad_norm": 2.8971312445805486, + "learning_rate": 5.985141914031767e-08, + "loss": 0.2831, + "step": 11649 + }, + { + "epoch": 0.95, + "grad_norm": 4.5183158215651575, + "learning_rate": 5.964738125550984e-08, + "loss": 0.6881, + "step": 11650 + }, + { + "epoch": 0.95, + "grad_norm": 4.10165312450103, + "learning_rate": 5.944368966792014e-08, + "loss": 0.5499, + "step": 11651 + }, + { + "epoch": 0.95, + "grad_norm": 5.395994324312094, + "learning_rate": 5.924034439182658e-08, + "loss": 1.0545, + "step": 11652 + }, + { + "epoch": 0.95, + "grad_norm": 2.180941467475703, + "learning_rate": 5.903734544148221e-08, + "loss": 0.3215, + "step": 11653 + }, + { + "epoch": 0.95, + "grad_norm": 3.1291064968405005, + "learning_rate": 5.8834692831117315e-08, + "loss": 0.5487, + "step": 11654 + }, + { + "epoch": 0.95, + "grad_norm": 3.620358573775542, + "learning_rate": 5.863238657493608e-08, + "loss": 0.8608, + "step": 11655 + }, + { + "epoch": 0.95, + "grad_norm": 3.6106870421584873, + "learning_rate": 5.8430426687119954e-08, + "loss": 0.9586, + "step": 11656 + }, + { + "epoch": 0.95, + "grad_norm": 3.2684985178727715, + "learning_rate": 5.822881318182483e-08, + "loss": 0.6098, + "step": 11657 + }, + { + "epoch": 0.95, + "grad_norm": 2.9087540299140833, + "learning_rate": 5.802754607318273e-08, + "loss": 0.5448, + "step": 11658 + }, + { + "epoch": 0.95, + "grad_norm": 4.108416999885527, + "learning_rate": 5.7826625375302923e-08, + "loss": 0.7536, + "step": 11659 + }, + { + "epoch": 0.95, + "grad_norm": 3.4963580682447812, + "learning_rate": 5.762605110226804e-08, + "loss": 0.5322, + "step": 11660 + }, + { + "epoch": 0.95, + "grad_norm": 4.019122073312331, + "learning_rate": 5.742582326813795e-08, + "loss": 0.6686, + "step": 11661 + }, + { + "epoch": 0.95, + "grad_norm": 3.5195503585473022, + "learning_rate": 5.722594188694697e-08, + "loss": 0.7019, + "step": 11662 + }, + { + "epoch": 0.95, + "grad_norm": 3.1784774680724825, + "learning_rate": 5.702640697270667e-08, + "loss": 0.6628, + "step": 11663 + }, + { + "epoch": 0.95, + "grad_norm": 3.7942647579416007, + "learning_rate": 5.682721853940365e-08, + "loss": 0.6642, + "step": 11664 + }, + { + "epoch": 0.95, + "grad_norm": 5.492623859467021, + "learning_rate": 5.662837660099951e-08, + "loss": 0.5757, + "step": 11665 + }, + { + "epoch": 0.95, + "grad_norm": 4.765473552833394, + "learning_rate": 5.642988117143311e-08, + "loss": 1.0545, + "step": 11666 + }, + { + "epoch": 0.95, + "grad_norm": 2.114077177890035, + "learning_rate": 5.6231732264616644e-08, + "loss": 0.3435, + "step": 11667 + }, + { + "epoch": 0.95, + "grad_norm": 3.7433908228118784, + "learning_rate": 5.603392989444068e-08, + "loss": 0.6281, + "step": 11668 + }, + { + "epoch": 0.95, + "grad_norm": 3.0458702215608127, + "learning_rate": 5.583647407477022e-08, + "loss": 0.62, + "step": 11669 + }, + { + "epoch": 0.95, + "grad_norm": 4.270318292611534, + "learning_rate": 5.5639364819445875e-08, + "loss": 0.8412, + "step": 11670 + }, + { + "epoch": 0.95, + "grad_norm": 2.779845237480693, + "learning_rate": 5.5442602142284364e-08, + "loss": 0.4793, + "step": 11671 + }, + { + "epoch": 0.95, + "grad_norm": 3.0113458173780154, + "learning_rate": 5.5246186057076875e-08, + "loss": 0.488, + "step": 11672 + }, + { + "epoch": 0.95, + "grad_norm": 3.3714534494585875, + "learning_rate": 5.505011657759296e-08, + "loss": 0.5611, + "step": 11673 + }, + { + "epoch": 0.95, + "grad_norm": 4.923472859924644, + "learning_rate": 5.4854393717574396e-08, + "loss": 1.0725, + "step": 11674 + }, + { + "epoch": 0.95, + "grad_norm": 3.6161754209717114, + "learning_rate": 5.465901749074243e-08, + "loss": 0.5456, + "step": 11675 + }, + { + "epoch": 0.95, + "grad_norm": 4.247407489721118, + "learning_rate": 5.446398791079055e-08, + "loss": 0.7884, + "step": 11676 + }, + { + "epoch": 0.95, + "grad_norm": 3.7660720679954554, + "learning_rate": 5.42693049913906e-08, + "loss": 0.676, + "step": 11677 + }, + { + "epoch": 0.95, + "grad_norm": 5.2160897355578095, + "learning_rate": 5.407496874618778e-08, + "loss": 1.0326, + "step": 11678 + }, + { + "epoch": 0.95, + "grad_norm": 4.227074451795597, + "learning_rate": 5.388097918880564e-08, + "loss": 0.986, + "step": 11679 + }, + { + "epoch": 0.95, + "grad_norm": 2.7225632670772963, + "learning_rate": 5.3687336332841065e-08, + "loss": 0.458, + "step": 11680 + }, + { + "epoch": 0.95, + "grad_norm": 5.588303275939599, + "learning_rate": 5.3494040191867655e-08, + "loss": 1.1258, + "step": 11681 + }, + { + "epoch": 0.95, + "grad_norm": 3.487444079482097, + "learning_rate": 5.3301090779434574e-08, + "loss": 0.3763, + "step": 11682 + }, + { + "epoch": 0.95, + "grad_norm": 3.7603394226626325, + "learning_rate": 5.310848810906766e-08, + "loss": 0.626, + "step": 11683 + }, + { + "epoch": 0.96, + "grad_norm": 5.578678130957751, + "learning_rate": 5.2916232194266116e-08, + "loss": 1.232, + "step": 11684 + }, + { + "epoch": 0.96, + "grad_norm": 4.811411493552024, + "learning_rate": 5.272432304850694e-08, + "loss": 0.7073, + "step": 11685 + }, + { + "epoch": 0.96, + "grad_norm": 2.6471063109711923, + "learning_rate": 5.253276068524216e-08, + "loss": 0.5287, + "step": 11686 + }, + { + "epoch": 0.96, + "grad_norm": 3.6723646418446254, + "learning_rate": 5.2341545117899353e-08, + "loss": 0.9197, + "step": 11687 + }, + { + "epoch": 0.96, + "grad_norm": 4.1312490454129955, + "learning_rate": 5.215067635988169e-08, + "loss": 0.8088, + "step": 11688 + }, + { + "epoch": 0.96, + "grad_norm": 4.303647713081, + "learning_rate": 5.1960154424569587e-08, + "loss": 0.6594, + "step": 11689 + }, + { + "epoch": 0.96, + "grad_norm": 2.3245269674291773, + "learning_rate": 5.176997932531569e-08, + "loss": 0.2501, + "step": 11690 + }, + { + "epoch": 0.96, + "grad_norm": 4.024797051793267, + "learning_rate": 5.158015107545156e-08, + "loss": 0.8117, + "step": 11691 + }, + { + "epoch": 0.96, + "grad_norm": 3.6393877702713913, + "learning_rate": 5.139066968828377e-08, + "loss": 0.5532, + "step": 11692 + }, + { + "epoch": 0.96, + "grad_norm": 3.0030678265837416, + "learning_rate": 5.120153517709281e-08, + "loss": 0.5272, + "step": 11693 + }, + { + "epoch": 0.96, + "grad_norm": 1.3423220512615752, + "learning_rate": 5.101274755513808e-08, + "loss": 0.1617, + "step": 11694 + }, + { + "epoch": 0.96, + "grad_norm": 4.293777898690641, + "learning_rate": 5.082430683565065e-08, + "loss": 0.9318, + "step": 11695 + }, + { + "epoch": 0.96, + "grad_norm": 3.721454187574026, + "learning_rate": 5.063621303184163e-08, + "loss": 0.5775, + "step": 11696 + }, + { + "epoch": 0.96, + "grad_norm": 3.8328649913876216, + "learning_rate": 5.044846615689325e-08, + "loss": 0.5656, + "step": 11697 + }, + { + "epoch": 0.96, + "grad_norm": 4.067704682786649, + "learning_rate": 5.02610662239672e-08, + "loss": 0.6409, + "step": 11698 + }, + { + "epoch": 0.96, + "grad_norm": 4.045261495652824, + "learning_rate": 5.0074013246199096e-08, + "loss": 0.6285, + "step": 11699 + }, + { + "epoch": 0.96, + "grad_norm": 3.760271628485974, + "learning_rate": 4.9887307236700654e-08, + "loss": 0.3907, + "step": 11700 + }, + { + "epoch": 0.96, + "grad_norm": 3.3602929536408532, + "learning_rate": 4.97009482085592e-08, + "loss": 0.6208, + "step": 11701 + }, + { + "epoch": 0.96, + "grad_norm": 2.8847486528892277, + "learning_rate": 4.9514936174837047e-08, + "loss": 0.4227, + "step": 11702 + }, + { + "epoch": 0.96, + "grad_norm": 2.5872727368371575, + "learning_rate": 4.932927114857322e-08, + "loss": 0.5445, + "step": 11703 + }, + { + "epoch": 0.96, + "grad_norm": 5.348455025343582, + "learning_rate": 4.914395314278231e-08, + "loss": 0.5013, + "step": 11704 + }, + { + "epoch": 0.96, + "grad_norm": 4.005260929206768, + "learning_rate": 4.8958982170453915e-08, + "loss": 0.6913, + "step": 11705 + }, + { + "epoch": 0.96, + "grad_norm": 3.484792319652042, + "learning_rate": 4.8774358244554346e-08, + "loss": 0.4886, + "step": 11706 + }, + { + "epoch": 0.96, + "grad_norm": 4.283131853262212, + "learning_rate": 4.859008137802379e-08, + "loss": 0.6121, + "step": 11707 + }, + { + "epoch": 0.96, + "grad_norm": 6.101735286795017, + "learning_rate": 4.840615158378026e-08, + "loss": 1.1604, + "step": 11708 + }, + { + "epoch": 0.96, + "grad_norm": 4.927126922279178, + "learning_rate": 4.8222568874716216e-08, + "loss": 0.6162, + "step": 11709 + }, + { + "epoch": 0.96, + "grad_norm": 5.701134441874893, + "learning_rate": 4.803933326370025e-08, + "loss": 1.0281, + "step": 11710 + }, + { + "epoch": 0.96, + "grad_norm": 4.122986484143584, + "learning_rate": 4.7856444763575424e-08, + "loss": 1.07, + "step": 11711 + }, + { + "epoch": 0.96, + "grad_norm": 4.973057498389193, + "learning_rate": 4.7673903387162044e-08, + "loss": 1.0825, + "step": 11712 + }, + { + "epoch": 0.96, + "grad_norm": 6.210457935937212, + "learning_rate": 4.749170914725543e-08, + "loss": 1.2721, + "step": 11713 + }, + { + "epoch": 0.96, + "grad_norm": 3.6817758788117056, + "learning_rate": 4.730986205662702e-08, + "loss": 0.6935, + "step": 11714 + }, + { + "epoch": 0.96, + "grad_norm": 3.7642625493959705, + "learning_rate": 4.712836212802274e-08, + "loss": 0.601, + "step": 11715 + }, + { + "epoch": 0.96, + "grad_norm": 3.7641600812630798, + "learning_rate": 4.694720937416519e-08, + "loss": 0.6075, + "step": 11716 + }, + { + "epoch": 0.96, + "grad_norm": 5.638167474691066, + "learning_rate": 4.67664038077531e-08, + "loss": 1.1841, + "step": 11717 + }, + { + "epoch": 0.96, + "grad_norm": 3.255147060846687, + "learning_rate": 4.658594544145911e-08, + "loss": 0.641, + "step": 11718 + }, + { + "epoch": 0.96, + "grad_norm": 3.736717762131721, + "learning_rate": 4.640583428793255e-08, + "loss": 0.5668, + "step": 11719 + }, + { + "epoch": 0.96, + "grad_norm": 3.955164869337488, + "learning_rate": 4.622607035979942e-08, + "loss": 0.869, + "step": 11720 + }, + { + "epoch": 0.96, + "grad_norm": 3.213184542465023, + "learning_rate": 4.6046653669659656e-08, + "loss": 0.5982, + "step": 11721 + }, + { + "epoch": 0.96, + "grad_norm": 4.653497997895835, + "learning_rate": 4.5867584230089853e-08, + "loss": 0.6563, + "step": 11722 + }, + { + "epoch": 0.96, + "grad_norm": 3.528597158347067, + "learning_rate": 4.568886205364276e-08, + "loss": 0.7737, + "step": 11723 + }, + { + "epoch": 0.96, + "grad_norm": 3.3007991599918984, + "learning_rate": 4.551048715284445e-08, + "loss": 0.6716, + "step": 11724 + }, + { + "epoch": 0.96, + "grad_norm": 3.869732963197292, + "learning_rate": 4.5332459540198825e-08, + "loss": 0.5945, + "step": 11725 + }, + { + "epoch": 0.96, + "grad_norm": 3.985582699442896, + "learning_rate": 4.515477922818479e-08, + "loss": 0.6312, + "step": 11726 + }, + { + "epoch": 0.96, + "grad_norm": 4.484632735394568, + "learning_rate": 4.497744622925793e-08, + "loss": 0.3245, + "step": 11727 + }, + { + "epoch": 0.96, + "grad_norm": 4.567783803478978, + "learning_rate": 4.480046055584775e-08, + "loss": 0.9989, + "step": 11728 + }, + { + "epoch": 0.96, + "grad_norm": 4.243537942446385, + "learning_rate": 4.462382222035988e-08, + "loss": 0.6423, + "step": 11729 + }, + { + "epoch": 0.96, + "grad_norm": 1.0891058539920115, + "learning_rate": 4.444753123517609e-08, + "loss": 0.1768, + "step": 11730 + }, + { + "epoch": 0.96, + "grad_norm": 3.8314988858753942, + "learning_rate": 4.427158761265371e-08, + "loss": 0.5654, + "step": 11731 + }, + { + "epoch": 0.96, + "grad_norm": 3.365254800376362, + "learning_rate": 4.4095991365125656e-08, + "loss": 0.6513, + "step": 11732 + }, + { + "epoch": 0.96, + "grad_norm": 5.554928906459958, + "learning_rate": 4.3920742504900415e-08, + "loss": 1.4647, + "step": 11733 + }, + { + "epoch": 0.96, + "grad_norm": 3.6148808353058794, + "learning_rate": 4.3745841044262606e-08, + "loss": 0.7798, + "step": 11734 + }, + { + "epoch": 0.96, + "grad_norm": 5.058028006949942, + "learning_rate": 4.357128699547131e-08, + "loss": 0.825, + "step": 11735 + }, + { + "epoch": 0.96, + "grad_norm": 5.582851708848683, + "learning_rate": 4.339708037076229e-08, + "loss": 1.3073, + "step": 11736 + }, + { + "epoch": 0.96, + "grad_norm": 4.427789519056039, + "learning_rate": 4.3223221182346894e-08, + "loss": 0.7024, + "step": 11737 + }, + { + "epoch": 0.96, + "grad_norm": 3.9148871543389543, + "learning_rate": 4.304970944241149e-08, + "loss": 0.6003, + "step": 11738 + }, + { + "epoch": 0.96, + "grad_norm": 4.306396967498024, + "learning_rate": 4.2876545163118566e-08, + "loss": 0.8819, + "step": 11739 + }, + { + "epoch": 0.96, + "grad_norm": 3.8677418803361903, + "learning_rate": 4.27037283566073e-08, + "loss": 0.9604, + "step": 11740 + }, + { + "epoch": 0.96, + "grad_norm": 2.408385076006431, + "learning_rate": 4.253125903498967e-08, + "loss": 0.2947, + "step": 11741 + }, + { + "epoch": 0.96, + "grad_norm": 4.746703569901941, + "learning_rate": 4.2359137210356e-08, + "loss": 0.769, + "step": 11742 + }, + { + "epoch": 0.96, + "grad_norm": 2.6779079957653233, + "learning_rate": 4.21873628947711e-08, + "loss": 0.5306, + "step": 11743 + }, + { + "epoch": 0.96, + "grad_norm": 3.4562236694218886, + "learning_rate": 4.2015936100275324e-08, + "loss": 0.9029, + "step": 11744 + }, + { + "epoch": 0.96, + "grad_norm": 4.228704550219655, + "learning_rate": 4.184485683888573e-08, + "loss": 0.4066, + "step": 11745 + }, + { + "epoch": 0.96, + "grad_norm": 5.424287972794271, + "learning_rate": 4.167412512259328e-08, + "loss": 1.0257, + "step": 11746 + }, + { + "epoch": 0.96, + "grad_norm": 3.4697683143800604, + "learning_rate": 4.150374096336618e-08, + "loss": 0.435, + "step": 11747 + }, + { + "epoch": 0.96, + "grad_norm": 4.440624052189856, + "learning_rate": 4.13337043731471e-08, + "loss": 0.6654, + "step": 11748 + }, + { + "epoch": 0.96, + "grad_norm": 2.7522882705179637, + "learning_rate": 4.116401536385539e-08, + "loss": 0.3438, + "step": 11749 + }, + { + "epoch": 0.96, + "grad_norm": 3.4657812590362957, + "learning_rate": 4.099467394738543e-08, + "loss": 0.4506, + "step": 11750 + }, + { + "epoch": 0.96, + "grad_norm": 4.639754963246002, + "learning_rate": 4.0825680135606615e-08, + "loss": 0.6189, + "step": 11751 + }, + { + "epoch": 0.96, + "grad_norm": 2.5670399308099583, + "learning_rate": 4.065703394036613e-08, + "loss": 0.2676, + "step": 11752 + }, + { + "epoch": 0.96, + "grad_norm": 3.113158281399538, + "learning_rate": 4.048873537348341e-08, + "loss": 0.5267, + "step": 11753 + }, + { + "epoch": 0.96, + "grad_norm": 4.481364819272389, + "learning_rate": 4.03207844467568e-08, + "loss": 0.9964, + "step": 11754 + }, + { + "epoch": 0.96, + "grad_norm": 4.975723737572638, + "learning_rate": 4.01531811719591e-08, + "loss": 0.8212, + "step": 11755 + }, + { + "epoch": 0.96, + "grad_norm": 4.07136840966976, + "learning_rate": 3.998592556083758e-08, + "loss": 0.7558, + "step": 11756 + }, + { + "epoch": 0.96, + "grad_norm": 3.977434357886183, + "learning_rate": 3.981901762511675e-08, + "loss": 0.6283, + "step": 11757 + }, + { + "epoch": 0.96, + "grad_norm": 6.1372750896314745, + "learning_rate": 3.9652457376496146e-08, + "loss": 0.9247, + "step": 11758 + }, + { + "epoch": 0.96, + "grad_norm": 3.6768346894998594, + "learning_rate": 3.9486244826650865e-08, + "loss": 0.7823, + "step": 11759 + }, + { + "epoch": 0.96, + "grad_norm": 5.162561476866158, + "learning_rate": 3.932037998723104e-08, + "loss": 1.0312, + "step": 11760 + }, + { + "epoch": 0.96, + "grad_norm": 3.231638084068061, + "learning_rate": 3.915486286986403e-08, + "loss": 0.718, + "step": 11761 + }, + { + "epoch": 0.96, + "grad_norm": 2.7585378451458094, + "learning_rate": 3.898969348615167e-08, + "loss": 0.2716, + "step": 11762 + }, + { + "epoch": 0.96, + "grad_norm": 2.8033127495889065, + "learning_rate": 3.8824871847671366e-08, + "loss": 0.6429, + "step": 11763 + }, + { + "epoch": 0.96, + "grad_norm": 3.7423527907068834, + "learning_rate": 3.8660397965976094e-08, + "loss": 0.4373, + "step": 11764 + }, + { + "epoch": 0.96, + "grad_norm": 4.069044778469548, + "learning_rate": 3.849627185259497e-08, + "loss": 0.6415, + "step": 11765 + }, + { + "epoch": 0.96, + "grad_norm": 4.315625982864917, + "learning_rate": 3.833249351903268e-08, + "loss": 0.6324, + "step": 11766 + }, + { + "epoch": 0.96, + "grad_norm": 2.7279005421132565, + "learning_rate": 3.816906297676948e-08, + "loss": 0.3445, + "step": 11767 + }, + { + "epoch": 0.96, + "grad_norm": 3.50701363886008, + "learning_rate": 3.800598023726121e-08, + "loss": 0.6086, + "step": 11768 + }, + { + "epoch": 0.96, + "grad_norm": 4.296092547140078, + "learning_rate": 3.784324531193928e-08, + "loss": 0.7383, + "step": 11769 + }, + { + "epoch": 0.96, + "grad_norm": 5.23251361838104, + "learning_rate": 3.768085821221013e-08, + "loss": 0.9903, + "step": 11770 + }, + { + "epoch": 0.96, + "grad_norm": 4.213264983725306, + "learning_rate": 3.7518818949456305e-08, + "loss": 1.123, + "step": 11771 + }, + { + "epoch": 0.96, + "grad_norm": 4.368028614421407, + "learning_rate": 3.735712753503706e-08, + "loss": 0.9072, + "step": 11772 + }, + { + "epoch": 0.96, + "grad_norm": 1.8344672689359063, + "learning_rate": 3.719578398028556e-08, + "loss": 0.2159, + "step": 11773 + }, + { + "epoch": 0.96, + "grad_norm": 3.012385244807552, + "learning_rate": 3.703478829651164e-08, + "loss": 0.5381, + "step": 11774 + }, + { + "epoch": 0.96, + "grad_norm": 5.864676780705077, + "learning_rate": 3.687414049500015e-08, + "loss": 0.9533, + "step": 11775 + }, + { + "epoch": 0.96, + "grad_norm": 3.4633608720572053, + "learning_rate": 3.671384058701155e-08, + "loss": 0.6767, + "step": 11776 + }, + { + "epoch": 0.96, + "grad_norm": 4.1948794508399985, + "learning_rate": 3.6553888583782395e-08, + "loss": 0.7211, + "step": 11777 + }, + { + "epoch": 0.96, + "grad_norm": 2.889285302320259, + "learning_rate": 3.639428449652427e-08, + "loss": 0.5263, + "step": 11778 + }, + { + "epoch": 0.96, + "grad_norm": 3.5537407969712893, + "learning_rate": 3.6235028336426004e-08, + "loss": 0.2516, + "step": 11779 + }, + { + "epoch": 0.96, + "grad_norm": 4.2204192477410345, + "learning_rate": 3.607612011464923e-08, + "loss": 0.9395, + "step": 11780 + }, + { + "epoch": 0.96, + "grad_norm": 3.598581066594799, + "learning_rate": 3.591755984233391e-08, + "loss": 0.8617, + "step": 11781 + }, + { + "epoch": 0.96, + "grad_norm": 4.756522623237823, + "learning_rate": 3.57593475305934e-08, + "loss": 0.9931, + "step": 11782 + }, + { + "epoch": 0.96, + "grad_norm": 4.064404256654831, + "learning_rate": 3.560148319051826e-08, + "loss": 0.97, + "step": 11783 + }, + { + "epoch": 0.96, + "grad_norm": 2.465086785120551, + "learning_rate": 3.5443966833174084e-08, + "loss": 0.3152, + "step": 11784 + }, + { + "epoch": 0.96, + "grad_norm": 4.096692457761413, + "learning_rate": 3.528679846960148e-08, + "loss": 0.4706, + "step": 11785 + }, + { + "epoch": 0.96, + "grad_norm": 5.195789170395069, + "learning_rate": 3.5129978110818866e-08, + "loss": 0.6458, + "step": 11786 + }, + { + "epoch": 0.96, + "grad_norm": 5.942434317814537, + "learning_rate": 3.497350576781688e-08, + "loss": 1.2318, + "step": 11787 + }, + { + "epoch": 0.96, + "grad_norm": 3.358805952965998, + "learning_rate": 3.4817381451564546e-08, + "loss": 0.2734, + "step": 11788 + }, + { + "epoch": 0.96, + "grad_norm": 4.925661326114637, + "learning_rate": 3.466160517300532e-08, + "loss": 0.7324, + "step": 11789 + }, + { + "epoch": 0.96, + "grad_norm": 3.084076068790197, + "learning_rate": 3.450617694305825e-08, + "loss": 0.5408, + "step": 11790 + }, + { + "epoch": 0.96, + "grad_norm": 4.839331507480073, + "learning_rate": 3.4351096772617945e-08, + "loss": 0.9241, + "step": 11791 + }, + { + "epoch": 0.96, + "grad_norm": 3.8008598230611255, + "learning_rate": 3.4196364672555715e-08, + "loss": 0.6092, + "step": 11792 + }, + { + "epoch": 0.96, + "grad_norm": 4.8795245977568715, + "learning_rate": 3.4041980653716777e-08, + "loss": 0.4539, + "step": 11793 + }, + { + "epoch": 0.96, + "grad_norm": 4.536553255378163, + "learning_rate": 3.388794472692303e-08, + "loss": 0.4606, + "step": 11794 + }, + { + "epoch": 0.96, + "grad_norm": 4.871365365644232, + "learning_rate": 3.3734256902971385e-08, + "loss": 0.7989, + "step": 11795 + }, + { + "epoch": 0.96, + "grad_norm": 5.1551404757420975, + "learning_rate": 3.3580917192635454e-08, + "loss": 1.1861, + "step": 11796 + }, + { + "epoch": 0.96, + "grad_norm": 5.287953974934314, + "learning_rate": 3.3427925606663856e-08, + "loss": 0.8849, + "step": 11797 + }, + { + "epoch": 0.96, + "grad_norm": 3.5919220106556877, + "learning_rate": 3.327528215577913e-08, + "loss": 0.5035, + "step": 11798 + }, + { + "epoch": 0.96, + "grad_norm": 4.393547504328156, + "learning_rate": 3.3122986850682713e-08, + "loss": 0.8699, + "step": 11799 + }, + { + "epoch": 0.96, + "grad_norm": 5.847976191247303, + "learning_rate": 3.297103970204829e-08, + "loss": 1.0686, + "step": 11800 + }, + { + "epoch": 0.96, + "grad_norm": 5.3101053476366795, + "learning_rate": 3.2819440720527894e-08, + "loss": 1.2983, + "step": 11801 + }, + { + "epoch": 0.96, + "grad_norm": 4.348969443173985, + "learning_rate": 3.266818991674692e-08, + "loss": 1.0874, + "step": 11802 + }, + { + "epoch": 0.96, + "grad_norm": 5.313600503412035, + "learning_rate": 3.251728730130854e-08, + "loss": 1.148, + "step": 11803 + }, + { + "epoch": 0.96, + "grad_norm": 4.772193377266182, + "learning_rate": 3.236673288478931e-08, + "loss": 0.8395, + "step": 11804 + }, + { + "epoch": 0.96, + "grad_norm": 4.48591309072955, + "learning_rate": 3.221652667774355e-08, + "loss": 1.0058, + "step": 11805 + }, + { + "epoch": 0.96, + "grad_norm": 3.960003907617976, + "learning_rate": 3.2066668690698967e-08, + "loss": 0.608, + "step": 11806 + }, + { + "epoch": 0.97, + "grad_norm": 1.563836151855298, + "learning_rate": 3.191715893415992e-08, + "loss": 0.1871, + "step": 11807 + }, + { + "epoch": 0.97, + "grad_norm": 3.8170741667588723, + "learning_rate": 3.1767997418607474e-08, + "loss": 0.5567, + "step": 11808 + }, + { + "epoch": 0.97, + "grad_norm": 3.9128601595015318, + "learning_rate": 3.1619184154496605e-08, + "loss": 0.5273, + "step": 11809 + }, + { + "epoch": 0.97, + "grad_norm": 3.4878065354273864, + "learning_rate": 3.1470719152257856e-08, + "loss": 0.5814, + "step": 11810 + }, + { + "epoch": 0.97, + "grad_norm": 3.383885581930084, + "learning_rate": 3.132260242229901e-08, + "loss": 0.6354, + "step": 11811 + }, + { + "epoch": 0.97, + "grad_norm": 2.9485465857088755, + "learning_rate": 3.117483397500232e-08, + "loss": 0.2627, + "step": 11812 + }, + { + "epoch": 0.97, + "grad_norm": 3.9127081237186885, + "learning_rate": 3.1027413820724494e-08, + "loss": 0.8579, + "step": 11813 + }, + { + "epoch": 0.97, + "grad_norm": 1.7947974661861645, + "learning_rate": 3.0880341969801164e-08, + "loss": 0.2722, + "step": 11814 + }, + { + "epoch": 0.97, + "grad_norm": 4.421281975902339, + "learning_rate": 3.073361843253908e-08, + "loss": 0.7727, + "step": 11815 + }, + { + "epoch": 0.97, + "grad_norm": 3.7079889954384253, + "learning_rate": 3.058724321922446e-08, + "loss": 0.5543, + "step": 11816 + }, + { + "epoch": 0.97, + "grad_norm": 4.573949392109921, + "learning_rate": 3.044121634011687e-08, + "loss": 0.893, + "step": 11817 + }, + { + "epoch": 0.97, + "grad_norm": 4.674279380148187, + "learning_rate": 3.029553780545258e-08, + "loss": 0.9186, + "step": 11818 + }, + { + "epoch": 0.97, + "grad_norm": 3.3034868267727884, + "learning_rate": 3.015020762544341e-08, + "loss": 0.4583, + "step": 11819 + }, + { + "epoch": 0.97, + "grad_norm": 4.894623775380931, + "learning_rate": 3.00052258102751e-08, + "loss": 1.0015, + "step": 11820 + }, + { + "epoch": 0.97, + "grad_norm": 3.603660981984285, + "learning_rate": 2.9860592370111186e-08, + "loss": 0.361, + "step": 11821 + }, + { + "epoch": 0.97, + "grad_norm": 4.915259342684215, + "learning_rate": 2.9716307315089677e-08, + "loss": 0.7958, + "step": 11822 + }, + { + "epoch": 0.97, + "grad_norm": 4.8168026583412065, + "learning_rate": 2.9572370655324146e-08, + "loss": 1.1639, + "step": 11823 + }, + { + "epoch": 0.97, + "grad_norm": 4.374519272465158, + "learning_rate": 2.94287824009043e-08, + "loss": 0.89, + "step": 11824 + }, + { + "epoch": 0.97, + "grad_norm": 2.663285053546188, + "learning_rate": 2.928554256189431e-08, + "loss": 0.5214, + "step": 11825 + }, + { + "epoch": 0.97, + "grad_norm": 5.9762625834375305, + "learning_rate": 2.914265114833614e-08, + "loss": 1.0695, + "step": 11826 + }, + { + "epoch": 0.97, + "grad_norm": 5.332668720167278, + "learning_rate": 2.9000108170244013e-08, + "loss": 1.2487, + "step": 11827 + }, + { + "epoch": 0.97, + "grad_norm": 4.408214880538728, + "learning_rate": 2.8857913637610478e-08, + "loss": 0.7891, + "step": 11828 + }, + { + "epoch": 0.97, + "grad_norm": 5.41942697624612, + "learning_rate": 2.8716067560403128e-08, + "loss": 1.0684, + "step": 11829 + }, + { + "epoch": 0.97, + "grad_norm": 2.8164144094370407, + "learning_rate": 2.8574569948564002e-08, + "loss": 0.4722, + "step": 11830 + }, + { + "epoch": 0.97, + "grad_norm": 5.132366332899768, + "learning_rate": 2.8433420812011836e-08, + "loss": 1.1149, + "step": 11831 + }, + { + "epoch": 0.97, + "grad_norm": 4.1411737141967455, + "learning_rate": 2.829262016064094e-08, + "loss": 1.1777, + "step": 11832 + }, + { + "epoch": 0.97, + "grad_norm": 4.309970324128132, + "learning_rate": 2.815216800432008e-08, + "loss": 0.6603, + "step": 11833 + }, + { + "epoch": 0.97, + "grad_norm": 4.351849238006262, + "learning_rate": 2.8012064352894718e-08, + "loss": 0.8311, + "step": 11834 + }, + { + "epoch": 0.97, + "grad_norm": 3.1894191347071508, + "learning_rate": 2.7872309216185333e-08, + "loss": 0.6127, + "step": 11835 + }, + { + "epoch": 0.97, + "grad_norm": 2.6702761891664815, + "learning_rate": 2.7732902603988532e-08, + "loss": 0.5173, + "step": 11836 + }, + { + "epoch": 0.97, + "grad_norm": 2.9034952122764306, + "learning_rate": 2.7593844526075943e-08, + "loss": 0.349, + "step": 11837 + }, + { + "epoch": 0.97, + "grad_norm": 4.847761447024961, + "learning_rate": 2.7455134992194767e-08, + "loss": 0.7418, + "step": 11838 + }, + { + "epoch": 0.97, + "grad_norm": 4.856028845939515, + "learning_rate": 2.7316774012068337e-08, + "loss": 0.6664, + "step": 11839 + }, + { + "epoch": 0.97, + "grad_norm": 3.4415462519630657, + "learning_rate": 2.7178761595394455e-08, + "loss": 0.6076, + "step": 11840 + }, + { + "epoch": 0.97, + "grad_norm": 3.7737666124134774, + "learning_rate": 2.70410977518476e-08, + "loss": 0.4558, + "step": 11841 + }, + { + "epoch": 0.97, + "grad_norm": 3.920737183462944, + "learning_rate": 2.6903782491077278e-08, + "loss": 0.7898, + "step": 11842 + }, + { + "epoch": 0.97, + "grad_norm": 3.068358658790362, + "learning_rate": 2.6766815822709124e-08, + "loss": 0.7369, + "step": 11843 + }, + { + "epoch": 0.97, + "grad_norm": 4.147692732508822, + "learning_rate": 2.663019775634379e-08, + "loss": 0.7402, + "step": 11844 + }, + { + "epoch": 0.97, + "grad_norm": 2.802750877458447, + "learning_rate": 2.6493928301556947e-08, + "loss": 0.5201, + "step": 11845 + }, + { + "epoch": 0.97, + "grad_norm": 5.292352851159922, + "learning_rate": 2.635800746790096e-08, + "loss": 1.2759, + "step": 11846 + }, + { + "epoch": 0.97, + "grad_norm": 4.2793226440870615, + "learning_rate": 2.62224352649032e-08, + "loss": 0.5542, + "step": 11847 + }, + { + "epoch": 0.97, + "grad_norm": 2.7325544908920576, + "learning_rate": 2.6087211702067184e-08, + "loss": 0.465, + "step": 11848 + }, + { + "epoch": 0.97, + "grad_norm": 4.104911052916324, + "learning_rate": 2.5952336788871434e-08, + "loss": 0.758, + "step": 11849 + }, + { + "epoch": 0.97, + "grad_norm": 3.1921776183039756, + "learning_rate": 2.581781053476895e-08, + "loss": 0.3688, + "step": 11850 + }, + { + "epoch": 0.97, + "grad_norm": 5.523292465051399, + "learning_rate": 2.568363294919052e-08, + "loss": 1.1649, + "step": 11851 + }, + { + "epoch": 0.97, + "grad_norm": 2.707201016531321, + "learning_rate": 2.5549804041541392e-08, + "loss": 0.3458, + "step": 11852 + }, + { + "epoch": 0.97, + "grad_norm": 4.421203294947658, + "learning_rate": 2.5416323821201848e-08, + "loss": 0.9934, + "step": 11853 + }, + { + "epoch": 0.97, + "grad_norm": 3.8331440448136003, + "learning_rate": 2.5283192297528846e-08, + "loss": 0.5116, + "step": 11854 + }, + { + "epoch": 0.97, + "grad_norm": 2.4544071763317814, + "learning_rate": 2.5150409479853255e-08, + "loss": 0.277, + "step": 11855 + }, + { + "epoch": 0.97, + "grad_norm": 4.6384962008616775, + "learning_rate": 2.5017975377483738e-08, + "loss": 0.7894, + "step": 11856 + }, + { + "epoch": 0.97, + "grad_norm": 4.104411309353446, + "learning_rate": 2.4885889999703426e-08, + "loss": 0.5235, + "step": 11857 + }, + { + "epoch": 0.97, + "grad_norm": 3.8549440430266007, + "learning_rate": 2.4754153355769915e-08, + "loss": 0.5376, + "step": 11858 + }, + { + "epoch": 0.97, + "grad_norm": 4.0554960882424425, + "learning_rate": 2.462276545491804e-08, + "loss": 0.8901, + "step": 11859 + }, + { + "epoch": 0.97, + "grad_norm": 4.691121017551824, + "learning_rate": 2.4491726306357656e-08, + "loss": 0.9459, + "step": 11860 + }, + { + "epoch": 0.97, + "grad_norm": 5.946935112710814, + "learning_rate": 2.4361035919273635e-08, + "loss": 0.8441, + "step": 11861 + }, + { + "epoch": 0.97, + "grad_norm": 4.272086858888599, + "learning_rate": 2.423069430282643e-08, + "loss": 1.0533, + "step": 11862 + }, + { + "epoch": 0.97, + "grad_norm": 2.4291959785483113, + "learning_rate": 2.4100701466153177e-08, + "loss": 0.3649, + "step": 11863 + }, + { + "epoch": 0.97, + "grad_norm": 3.7287457594289273, + "learning_rate": 2.397105741836603e-08, + "loss": 0.6391, + "step": 11864 + }, + { + "epoch": 0.97, + "grad_norm": 3.531476649737334, + "learning_rate": 2.384176216855161e-08, + "loss": 0.5818, + "step": 11865 + }, + { + "epoch": 0.97, + "grad_norm": 3.020496239571027, + "learning_rate": 2.3712815725773774e-08, + "loss": 0.4824, + "step": 11866 + }, + { + "epoch": 0.97, + "grad_norm": 4.094119632365212, + "learning_rate": 2.3584218099070298e-08, + "loss": 0.8664, + "step": 11867 + }, + { + "epoch": 0.97, + "grad_norm": 3.9308390645019133, + "learning_rate": 2.345596929745564e-08, + "loss": 0.5896, + "step": 11868 + }, + { + "epoch": 0.97, + "grad_norm": 4.0067100581001664, + "learning_rate": 2.3328069329919824e-08, + "loss": 0.7413, + "step": 11869 + }, + { + "epoch": 0.97, + "grad_norm": 3.6962664252409225, + "learning_rate": 2.3200518205427346e-08, + "loss": 0.6535, + "step": 11870 + }, + { + "epoch": 0.97, + "grad_norm": 4.581880556701163, + "learning_rate": 2.307331593291995e-08, + "loss": 0.9012, + "step": 11871 + }, + { + "epoch": 0.97, + "grad_norm": 2.5539149399642733, + "learning_rate": 2.2946462521313274e-08, + "loss": 0.6333, + "step": 11872 + }, + { + "epoch": 0.97, + "grad_norm": 3.5346921718157773, + "learning_rate": 2.2819957979499098e-08, + "loss": 0.6966, + "step": 11873 + }, + { + "epoch": 0.97, + "grad_norm": 4.8985294569505085, + "learning_rate": 2.2693802316345327e-08, + "loss": 1.0231, + "step": 11874 + }, + { + "epoch": 0.97, + "grad_norm": 3.3983434931825447, + "learning_rate": 2.2567995540694888e-08, + "loss": 0.3441, + "step": 11875 + }, + { + "epoch": 0.97, + "grad_norm": 3.1909594209394427, + "learning_rate": 2.2442537661365727e-08, + "loss": 0.6087, + "step": 11876 + }, + { + "epoch": 0.97, + "grad_norm": 4.214137835453607, + "learning_rate": 2.231742868715303e-08, + "loss": 0.7074, + "step": 11877 + }, + { + "epoch": 0.97, + "grad_norm": 5.373568865436762, + "learning_rate": 2.2192668626824788e-08, + "loss": 0.9279, + "step": 11878 + }, + { + "epoch": 0.97, + "grad_norm": 3.858297833004091, + "learning_rate": 2.206825748912733e-08, + "loss": 0.5545, + "step": 11879 + }, + { + "epoch": 0.97, + "grad_norm": 3.673507367762111, + "learning_rate": 2.194419528278091e-08, + "loss": 0.8805, + "step": 11880 + }, + { + "epoch": 0.97, + "grad_norm": 6.24616288686617, + "learning_rate": 2.1820482016481902e-08, + "loss": 0.9465, + "step": 11881 + }, + { + "epoch": 0.97, + "grad_norm": 5.86202337498881, + "learning_rate": 2.1697117698901704e-08, + "loss": 1.1141, + "step": 11882 + }, + { + "epoch": 0.97, + "grad_norm": 3.3885582585985783, + "learning_rate": 2.1574102338688395e-08, + "loss": 0.4595, + "step": 11883 + }, + { + "epoch": 0.97, + "grad_norm": 5.144123928509865, + "learning_rate": 2.1451435944464528e-08, + "loss": 1.4252, + "step": 11884 + }, + { + "epoch": 0.97, + "grad_norm": 3.7105695583681575, + "learning_rate": 2.1329118524827662e-08, + "loss": 0.586, + "step": 11885 + }, + { + "epoch": 0.97, + "grad_norm": 3.9768130214957003, + "learning_rate": 2.120715008835261e-08, + "loss": 0.7267, + "step": 11886 + }, + { + "epoch": 0.97, + "grad_norm": 4.60573974763432, + "learning_rate": 2.1085530643588094e-08, + "loss": 0.8107, + "step": 11887 + }, + { + "epoch": 0.97, + "grad_norm": 5.176063171160444, + "learning_rate": 2.096426019906006e-08, + "loss": 0.7135, + "step": 11888 + }, + { + "epoch": 0.97, + "grad_norm": 3.689431942878935, + "learning_rate": 2.0843338763268382e-08, + "loss": 0.6004, + "step": 11889 + }, + { + "epoch": 0.97, + "grad_norm": 4.014500092531755, + "learning_rate": 2.0722766344689617e-08, + "loss": 0.7066, + "step": 11890 + }, + { + "epoch": 0.97, + "grad_norm": 3.82800488707636, + "learning_rate": 2.0602542951774774e-08, + "loss": 0.6034, + "step": 11891 + }, + { + "epoch": 0.97, + "grad_norm": 5.390492523335453, + "learning_rate": 2.0482668592951004e-08, + "loss": 0.8957, + "step": 11892 + }, + { + "epoch": 0.97, + "grad_norm": 4.226787649241726, + "learning_rate": 2.036314327662159e-08, + "loss": 0.8212, + "step": 11893 + }, + { + "epoch": 0.97, + "grad_norm": 2.9940600042133187, + "learning_rate": 2.0243967011164267e-08, + "loss": 0.37, + "step": 11894 + }, + { + "epoch": 0.97, + "grad_norm": 4.627394164356139, + "learning_rate": 2.0125139804932913e-08, + "loss": 0.602, + "step": 11895 + }, + { + "epoch": 0.97, + "grad_norm": 4.748931715639397, + "learning_rate": 2.0006661666256978e-08, + "loss": 0.8945, + "step": 11896 + }, + { + "epoch": 0.97, + "grad_norm": 3.1576488933848617, + "learning_rate": 1.988853260344037e-08, + "loss": 0.3595, + "step": 11897 + }, + { + "epoch": 0.97, + "grad_norm": 3.3844736354884932, + "learning_rate": 1.97707526247648e-08, + "loss": 0.4471, + "step": 11898 + }, + { + "epoch": 0.97, + "grad_norm": 5.928263498979704, + "learning_rate": 1.965332173848533e-08, + "loss": 1.2007, + "step": 11899 + }, + { + "epoch": 0.97, + "grad_norm": 1.678241608241835, + "learning_rate": 1.9536239952833712e-08, + "loss": 0.2836, + "step": 11900 + }, + { + "epoch": 0.97, + "grad_norm": 5.065948311688887, + "learning_rate": 1.9419507276016158e-08, + "loss": 1.2764, + "step": 11901 + }, + { + "epoch": 0.97, + "grad_norm": 5.294378649306096, + "learning_rate": 1.9303123716215565e-08, + "loss": 0.9947, + "step": 11902 + }, + { + "epoch": 0.97, + "grad_norm": 3.096427690802625, + "learning_rate": 1.9187089281589853e-08, + "loss": 0.6188, + "step": 11903 + }, + { + "epoch": 0.97, + "grad_norm": 5.080728240667454, + "learning_rate": 1.9071403980273075e-08, + "loss": 1.1303, + "step": 11904 + }, + { + "epoch": 0.97, + "grad_norm": 3.347288640554572, + "learning_rate": 1.895606782037318e-08, + "loss": 0.6736, + "step": 11905 + }, + { + "epoch": 0.97, + "grad_norm": 4.780075138837884, + "learning_rate": 1.8841080809975933e-08, + "loss": 1.0143, + "step": 11906 + }, + { + "epoch": 0.97, + "grad_norm": 5.762028599734933, + "learning_rate": 1.872644295714099e-08, + "loss": 0.9305, + "step": 11907 + }, + { + "epoch": 0.97, + "grad_norm": 5.049603974319265, + "learning_rate": 1.8612154269903036e-08, + "loss": 1.0608, + "step": 11908 + }, + { + "epoch": 0.97, + "grad_norm": 3.939565444899833, + "learning_rate": 1.8498214756274558e-08, + "loss": 0.5482, + "step": 11909 + }, + { + "epoch": 0.97, + "grad_norm": 4.353598562575978, + "learning_rate": 1.8384624424241383e-08, + "loss": 0.9264, + "step": 11910 + }, + { + "epoch": 0.97, + "grad_norm": 3.210083125387604, + "learning_rate": 1.827138328176603e-08, + "loss": 0.6805, + "step": 11911 + }, + { + "epoch": 0.97, + "grad_norm": 3.789070033516132, + "learning_rate": 1.81584913367866e-08, + "loss": 0.6795, + "step": 11912 + }, + { + "epoch": 0.97, + "grad_norm": 4.278633192332255, + "learning_rate": 1.8045948597215646e-08, + "loss": 1.0238, + "step": 11913 + }, + { + "epoch": 0.97, + "grad_norm": 1.605182109465508, + "learning_rate": 1.793375507094186e-08, + "loss": 0.3206, + "step": 11914 + }, + { + "epoch": 0.97, + "grad_norm": 3.003408022361503, + "learning_rate": 1.7821910765830063e-08, + "loss": 0.6041, + "step": 11915 + }, + { + "epoch": 0.97, + "grad_norm": 2.765548228462366, + "learning_rate": 1.771041568971954e-08, + "loss": 0.4168, + "step": 11916 + }, + { + "epoch": 0.97, + "grad_norm": 4.456842463420886, + "learning_rate": 1.7599269850426258e-08, + "loss": 0.8319, + "step": 11917 + }, + { + "epoch": 0.97, + "grad_norm": 3.731562967711699, + "learning_rate": 1.7488473255740657e-08, + "loss": 0.653, + "step": 11918 + }, + { + "epoch": 0.97, + "grad_norm": 3.1815817245847033, + "learning_rate": 1.7378025913428743e-08, + "loss": 0.6323, + "step": 11919 + }, + { + "epoch": 0.97, + "grad_norm": 3.659768289388934, + "learning_rate": 1.726792783123321e-08, + "loss": 0.5598, + "step": 11920 + }, + { + "epoch": 0.97, + "grad_norm": 3.5609989582963357, + "learning_rate": 1.7158179016870668e-08, + "loss": 0.5553, + "step": 11921 + }, + { + "epoch": 0.97, + "grad_norm": 3.873352263562927, + "learning_rate": 1.7048779478034404e-08, + "loss": 0.8343, + "step": 11922 + }, + { + "epoch": 0.97, + "grad_norm": 2.0772548699872506, + "learning_rate": 1.6939729222393286e-08, + "loss": 0.279, + "step": 11923 + }, + { + "epoch": 0.97, + "grad_norm": 3.0376698788409873, + "learning_rate": 1.6831028257590087e-08, + "loss": 0.6518, + "step": 11924 + }, + { + "epoch": 0.97, + "grad_norm": 3.657201488738241, + "learning_rate": 1.6722676591245378e-08, + "loss": 0.4616, + "step": 11925 + }, + { + "epoch": 0.97, + "grad_norm": 3.8920629960410293, + "learning_rate": 1.6614674230953643e-08, + "loss": 0.751, + "step": 11926 + }, + { + "epoch": 0.97, + "grad_norm": 3.7380583054259944, + "learning_rate": 1.6507021184285488e-08, + "loss": 0.5969, + "step": 11927 + }, + { + "epoch": 0.97, + "grad_norm": 4.484805408914432, + "learning_rate": 1.63997174587871e-08, + "loss": 0.6674, + "step": 11928 + }, + { + "epoch": 0.98, + "grad_norm": 2.708051571600095, + "learning_rate": 1.629276306197969e-08, + "loss": 0.4126, + "step": 11929 + }, + { + "epoch": 0.98, + "grad_norm": 4.009169489358039, + "learning_rate": 1.6186158001360587e-08, + "loss": 0.9243, + "step": 11930 + }, + { + "epoch": 0.98, + "grad_norm": 3.5079588041639775, + "learning_rate": 1.607990228440215e-08, + "loss": 0.6558, + "step": 11931 + }, + { + "epoch": 0.98, + "grad_norm": 6.309796315389396, + "learning_rate": 1.597399591855231e-08, + "loss": 1.3317, + "step": 11932 + }, + { + "epoch": 0.98, + "grad_norm": 4.191601799634067, + "learning_rate": 1.5868438911234575e-08, + "loss": 0.7678, + "step": 11933 + }, + { + "epoch": 0.98, + "grad_norm": 5.033040535587328, + "learning_rate": 1.5763231269848578e-08, + "loss": 1.1157, + "step": 11934 + }, + { + "epoch": 0.98, + "grad_norm": 5.441359331862082, + "learning_rate": 1.5658373001768423e-08, + "loss": 1.1209, + "step": 11935 + }, + { + "epoch": 0.98, + "grad_norm": 0.9344080602485448, + "learning_rate": 1.555386411434434e-08, + "loss": 0.1267, + "step": 11936 + }, + { + "epoch": 0.98, + "grad_norm": 5.224522930216481, + "learning_rate": 1.544970461490214e-08, + "loss": 0.7561, + "step": 11937 + }, + { + "epoch": 0.98, + "grad_norm": 4.551766070858134, + "learning_rate": 1.53458945107432e-08, + "loss": 1.2067, + "step": 11938 + }, + { + "epoch": 0.98, + "grad_norm": 3.9293824274572704, + "learning_rate": 1.5242433809143364e-08, + "loss": 0.2524, + "step": 11939 + }, + { + "epoch": 0.98, + "grad_norm": 4.711783064616934, + "learning_rate": 1.5139322517355172e-08, + "loss": 0.9472, + "step": 11940 + }, + { + "epoch": 0.98, + "grad_norm": 3.415126915218515, + "learning_rate": 1.5036560642606167e-08, + "loss": 0.8046, + "step": 11941 + }, + { + "epoch": 0.98, + "grad_norm": 3.426248150142898, + "learning_rate": 1.493414819210004e-08, + "loss": 0.5639, + "step": 11942 + }, + { + "epoch": 0.98, + "grad_norm": 3.3045330296380113, + "learning_rate": 1.4832085173014376e-08, + "loss": 0.6792, + "step": 11943 + }, + { + "epoch": 0.98, + "grad_norm": 1.062424557004166, + "learning_rate": 1.4730371592504567e-08, + "loss": 0.1663, + "step": 11944 + }, + { + "epoch": 0.98, + "grad_norm": 3.925225735951944, + "learning_rate": 1.4629007457699906e-08, + "loss": 0.6722, + "step": 11945 + }, + { + "epoch": 0.98, + "grad_norm": 3.3487778120695566, + "learning_rate": 1.4527992775704713e-08, + "loss": 0.8364, + "step": 11946 + }, + { + "epoch": 0.98, + "grad_norm": 4.472139605321025, + "learning_rate": 1.4427327553601101e-08, + "loss": 0.8356, + "step": 11947 + }, + { + "epoch": 0.98, + "grad_norm": 3.172637141606483, + "learning_rate": 1.432701179844398e-08, + "loss": 0.495, + "step": 11948 + }, + { + "epoch": 0.98, + "grad_norm": 4.643966694879904, + "learning_rate": 1.4227045517266059e-08, + "loss": 1.0396, + "step": 11949 + }, + { + "epoch": 0.98, + "grad_norm": 5.667096334034253, + "learning_rate": 1.4127428717073955e-08, + "loss": 1.1284, + "step": 11950 + }, + { + "epoch": 0.98, + "grad_norm": 3.9324324700908826, + "learning_rate": 1.4028161404850415e-08, + "loss": 0.5657, + "step": 11951 + }, + { + "epoch": 0.98, + "grad_norm": 4.679804891627546, + "learning_rate": 1.3929243587553764e-08, + "loss": 0.6958, + "step": 11952 + }, + { + "epoch": 0.98, + "grad_norm": 3.805587188154656, + "learning_rate": 1.3830675272117344e-08, + "loss": 0.8619, + "step": 11953 + }, + { + "epoch": 0.98, + "grad_norm": 2.6120518053429453, + "learning_rate": 1.3732456465451182e-08, + "loss": 0.3556, + "step": 11954 + }, + { + "epoch": 0.98, + "grad_norm": 5.866311449363066, + "learning_rate": 1.3634587174439218e-08, + "loss": 0.8505, + "step": 11955 + }, + { + "epoch": 0.98, + "grad_norm": 3.881058075921292, + "learning_rate": 1.3537067405942072e-08, + "loss": 1.0058, + "step": 11956 + }, + { + "epoch": 0.98, + "grad_norm": 1.8398806888597605, + "learning_rate": 1.3439897166795945e-08, + "loss": 0.318, + "step": 11957 + }, + { + "epoch": 0.98, + "grad_norm": 4.063571392378166, + "learning_rate": 1.3343076463810389e-08, + "loss": 0.5925, + "step": 11958 + }, + { + "epoch": 0.98, + "grad_norm": 5.298045597186789, + "learning_rate": 1.3246605303773864e-08, + "loss": 0.6629, + "step": 11959 + }, + { + "epoch": 0.98, + "grad_norm": 6.473056012845691, + "learning_rate": 1.3150483693447625e-08, + "loss": 1.2219, + "step": 11960 + }, + { + "epoch": 0.98, + "grad_norm": 5.532689657101384, + "learning_rate": 1.3054711639569616e-08, + "loss": 1.2582, + "step": 11961 + }, + { + "epoch": 0.98, + "grad_norm": 3.7416802434167455, + "learning_rate": 1.295928914885336e-08, + "loss": 0.8745, + "step": 11962 + }, + { + "epoch": 0.98, + "grad_norm": 3.9350085760977156, + "learning_rate": 1.2864216227986837e-08, + "loss": 0.6069, + "step": 11963 + }, + { + "epoch": 0.98, + "grad_norm": 4.849630921846114, + "learning_rate": 1.276949288363527e-08, + "loss": 1.3068, + "step": 11964 + }, + { + "epoch": 0.98, + "grad_norm": 4.206955644824944, + "learning_rate": 1.267511912243724e-08, + "loss": 0.8276, + "step": 11965 + }, + { + "epoch": 0.98, + "grad_norm": 3.423765874126548, + "learning_rate": 1.2581094951008566e-08, + "loss": 0.5732, + "step": 11966 + }, + { + "epoch": 0.98, + "grad_norm": 4.300025809881357, + "learning_rate": 1.2487420375939529e-08, + "loss": 0.8077, + "step": 11967 + }, + { + "epoch": 0.98, + "grad_norm": 6.206757278466938, + "learning_rate": 1.2394095403797102e-08, + "loss": 1.3847, + "step": 11968 + }, + { + "epoch": 0.98, + "grad_norm": 3.9055794737494822, + "learning_rate": 1.2301120041122161e-08, + "loss": 0.6262, + "step": 11969 + }, + { + "epoch": 0.98, + "grad_norm": 5.613323997082934, + "learning_rate": 1.2208494294432272e-08, + "loss": 0.8649, + "step": 11970 + }, + { + "epoch": 0.98, + "grad_norm": 2.230109954266526, + "learning_rate": 1.2116218170220018e-08, + "loss": 0.2765, + "step": 11971 + }, + { + "epoch": 0.98, + "grad_norm": 2.2792223501490074, + "learning_rate": 1.202429167495356e-08, + "loss": 0.3214, + "step": 11972 + }, + { + "epoch": 0.98, + "grad_norm": 4.2794069348955475, + "learning_rate": 1.1932714815076075e-08, + "loss": 0.7824, + "step": 11973 + }, + { + "epoch": 0.98, + "grad_norm": 4.515920252114767, + "learning_rate": 1.1841487597007983e-08, + "loss": 0.7136, + "step": 11974 + }, + { + "epoch": 0.98, + "grad_norm": 4.47317136412364, + "learning_rate": 1.1750610027142506e-08, + "loss": 1.1016, + "step": 11975 + }, + { + "epoch": 0.98, + "grad_norm": 1.1656497913356112, + "learning_rate": 1.1660082111850101e-08, + "loss": 0.1311, + "step": 11976 + }, + { + "epoch": 0.98, + "grad_norm": 3.428046457044096, + "learning_rate": 1.156990385747736e-08, + "loss": 0.5926, + "step": 11977 + }, + { + "epoch": 0.98, + "grad_norm": 4.010542686065303, + "learning_rate": 1.1480075270343671e-08, + "loss": 0.4662, + "step": 11978 + }, + { + "epoch": 0.98, + "grad_norm": 4.1502107158997195, + "learning_rate": 1.139059635674733e-08, + "loss": 0.6396, + "step": 11979 + }, + { + "epoch": 0.98, + "grad_norm": 3.514111467989537, + "learning_rate": 1.1301467122959432e-08, + "loss": 0.6339, + "step": 11980 + }, + { + "epoch": 0.98, + "grad_norm": 4.807595894519607, + "learning_rate": 1.1212687575227754e-08, + "loss": 0.7422, + "step": 11981 + }, + { + "epoch": 0.98, + "grad_norm": 1.8296572392135173, + "learning_rate": 1.1124257719775655e-08, + "loss": 0.2994, + "step": 11982 + }, + { + "epoch": 0.98, + "grad_norm": 4.495040332096613, + "learning_rate": 1.1036177562800954e-08, + "loss": 0.5596, + "step": 11983 + }, + { + "epoch": 0.98, + "grad_norm": 5.374313945016398, + "learning_rate": 1.0948447110478711e-08, + "loss": 0.8237, + "step": 11984 + }, + { + "epoch": 0.98, + "grad_norm": 3.3603395917008236, + "learning_rate": 1.0861066368957341e-08, + "loss": 0.5336, + "step": 11985 + }, + { + "epoch": 0.98, + "grad_norm": 5.797164024142589, + "learning_rate": 1.0774035344363054e-08, + "loss": 1.0428, + "step": 11986 + }, + { + "epoch": 0.98, + "grad_norm": 4.798474908659315, + "learning_rate": 1.0687354042795417e-08, + "loss": 0.5348, + "step": 11987 + }, + { + "epoch": 0.98, + "grad_norm": 3.653075763774985, + "learning_rate": 1.060102247033068e-08, + "loss": 0.8898, + "step": 11988 + }, + { + "epoch": 0.98, + "grad_norm": 3.7221055998469206, + "learning_rate": 1.0515040633020112e-08, + "loss": 0.8894, + "step": 11989 + }, + { + "epoch": 0.98, + "grad_norm": 5.239262202588881, + "learning_rate": 1.0429408536891117e-08, + "loss": 1.143, + "step": 11990 + }, + { + "epoch": 0.98, + "grad_norm": 1.4338166321686046, + "learning_rate": 1.0344126187946113e-08, + "loss": 0.1791, + "step": 11991 + }, + { + "epoch": 0.98, + "grad_norm": 2.6841008740597605, + "learning_rate": 1.0259193592162541e-08, + "loss": 0.5626, + "step": 11992 + }, + { + "epoch": 0.98, + "grad_norm": 2.378483590578005, + "learning_rate": 1.0174610755493974e-08, + "loss": 0.4065, + "step": 11993 + }, + { + "epoch": 0.98, + "grad_norm": 2.2204144413208717, + "learning_rate": 1.0090377683869557e-08, + "loss": 0.2904, + "step": 11994 + }, + { + "epoch": 0.98, + "grad_norm": 5.9902759711671125, + "learning_rate": 1.0006494383193454e-08, + "loss": 1.1846, + "step": 11995 + }, + { + "epoch": 0.98, + "grad_norm": 3.853469311831628, + "learning_rate": 9.92296085934541e-09, + "loss": 0.547, + "step": 11996 + }, + { + "epoch": 0.98, + "grad_norm": 5.9650594451607555, + "learning_rate": 9.839777118181293e-09, + "loss": 1.2478, + "step": 11997 + }, + { + "epoch": 0.98, + "grad_norm": 5.338080966871579, + "learning_rate": 9.756943165531441e-09, + "loss": 0.9709, + "step": 11998 + }, + { + "epoch": 0.98, + "grad_norm": 5.606574446922868, + "learning_rate": 9.67445900720232e-09, + "loss": 1.0099, + "step": 11999 + }, + { + "epoch": 0.98, + "grad_norm": 3.2828035934487065, + "learning_rate": 9.592324648975415e-09, + "loss": 0.714, + "step": 12000 + }, + { + "epoch": 0.98, + "grad_norm": 6.215669569185245, + "learning_rate": 9.510540096608345e-09, + "loss": 1.2572, + "step": 12001 + }, + { + "epoch": 0.98, + "grad_norm": 3.2673720077363075, + "learning_rate": 9.429105355833745e-09, + "loss": 0.5131, + "step": 12002 + }, + { + "epoch": 0.98, + "grad_norm": 2.8685929308413742, + "learning_rate": 9.348020432359829e-09, + "loss": 0.7717, + "step": 12003 + }, + { + "epoch": 0.98, + "grad_norm": 4.6612516918354405, + "learning_rate": 9.267285331870378e-09, + "loss": 0.8762, + "step": 12004 + }, + { + "epoch": 0.98, + "grad_norm": 4.835309486902613, + "learning_rate": 9.186900060024207e-09, + "loss": 1.3228, + "step": 12005 + }, + { + "epoch": 0.98, + "grad_norm": 5.1110886609000055, + "learning_rate": 9.106864622456246e-09, + "loss": 0.6116, + "step": 12006 + }, + { + "epoch": 0.98, + "grad_norm": 3.398732853475271, + "learning_rate": 9.02717902477701e-09, + "loss": 0.7167, + "step": 12007 + }, + { + "epoch": 0.98, + "grad_norm": 6.905729799829959, + "learning_rate": 8.947843272571477e-09, + "loss": 1.4806, + "step": 12008 + }, + { + "epoch": 0.98, + "grad_norm": 2.3584098306988754, + "learning_rate": 8.868857371401306e-09, + "loss": 0.3806, + "step": 12009 + }, + { + "epoch": 0.98, + "grad_norm": 3.295833479211921, + "learning_rate": 8.790221326802074e-09, + "loss": 0.7821, + "step": 12010 + }, + { + "epoch": 0.98, + "grad_norm": 6.056794579846256, + "learning_rate": 8.711935144287142e-09, + "loss": 1.2853, + "step": 12011 + }, + { + "epoch": 0.98, + "grad_norm": 4.741309227622882, + "learning_rate": 8.633998829343237e-09, + "loss": 1.0193, + "step": 12012 + }, + { + "epoch": 0.98, + "grad_norm": 4.416962423605944, + "learning_rate": 8.55641238743321e-09, + "loss": 1.1095, + "step": 12013 + }, + { + "epoch": 0.98, + "grad_norm": 4.034344568422561, + "learning_rate": 8.479175823996044e-09, + "loss": 0.5427, + "step": 12014 + }, + { + "epoch": 0.98, + "grad_norm": 3.6788069740696177, + "learning_rate": 8.40228914444574e-09, + "loss": 0.6993, + "step": 12015 + }, + { + "epoch": 0.98, + "grad_norm": 4.450727521986115, + "learning_rate": 8.325752354171324e-09, + "loss": 0.3584, + "step": 12016 + }, + { + "epoch": 0.98, + "grad_norm": 2.6095034716190466, + "learning_rate": 8.24956545853739e-09, + "loss": 0.5702, + "step": 12017 + }, + { + "epoch": 0.98, + "grad_norm": 4.655746956771417, + "learning_rate": 8.173728462885222e-09, + "loss": 0.8922, + "step": 12018 + }, + { + "epoch": 0.98, + "grad_norm": 4.668643716701874, + "learning_rate": 8.098241372530013e-09, + "loss": 0.5593, + "step": 12019 + }, + { + "epoch": 0.98, + "grad_norm": 3.4627075621636396, + "learning_rate": 8.023104192763642e-09, + "loss": 0.7913, + "step": 12020 + }, + { + "epoch": 0.98, + "grad_norm": 3.3071089435334793, + "learning_rate": 7.948316928851896e-09, + "loss": 0.5397, + "step": 12021 + }, + { + "epoch": 0.98, + "grad_norm": 3.7342020527729853, + "learning_rate": 7.873879586037803e-09, + "loss": 0.7649, + "step": 12022 + }, + { + "epoch": 0.98, + "grad_norm": 5.573511773337518, + "learning_rate": 7.79979216953941e-09, + "loss": 1.1509, + "step": 12023 + }, + { + "epoch": 0.98, + "grad_norm": 5.709589592626596, + "learning_rate": 7.726054684549234e-09, + "loss": 1.1941, + "step": 12024 + }, + { + "epoch": 0.98, + "grad_norm": 3.209516616837038, + "learning_rate": 7.652667136235914e-09, + "loss": 0.3777, + "step": 12025 + }, + { + "epoch": 0.98, + "grad_norm": 2.3845853169481668, + "learning_rate": 7.579629529744225e-09, + "loss": 0.2514, + "step": 12026 + }, + { + "epoch": 0.98, + "grad_norm": 3.636504614812306, + "learning_rate": 7.506941870192851e-09, + "loss": 0.6073, + "step": 12027 + }, + { + "epoch": 0.98, + "grad_norm": 2.594125764328588, + "learning_rate": 7.434604162678271e-09, + "loss": 0.5927, + "step": 12028 + }, + { + "epoch": 0.98, + "grad_norm": 5.09639929471377, + "learning_rate": 7.362616412269763e-09, + "loss": 0.8389, + "step": 12029 + }, + { + "epoch": 0.98, + "grad_norm": 4.47282169012807, + "learning_rate": 7.290978624013289e-09, + "loss": 0.6142, + "step": 12030 + }, + { + "epoch": 0.98, + "grad_norm": 2.471227365977711, + "learning_rate": 7.2196908029315e-09, + "loss": 0.3131, + "step": 12031 + }, + { + "epoch": 0.98, + "grad_norm": 3.8664298739220238, + "learning_rate": 7.148752954020955e-09, + "loss": 0.6902, + "step": 12032 + }, + { + "epoch": 0.98, + "grad_norm": 3.0530976836411043, + "learning_rate": 7.07816508225323e-09, + "loss": 0.4628, + "step": 12033 + }, + { + "epoch": 0.98, + "grad_norm": 5.484740333351431, + "learning_rate": 7.0079271925771465e-09, + "loss": 1.124, + "step": 12034 + }, + { + "epoch": 0.98, + "grad_norm": 4.338984250732091, + "learning_rate": 6.9380392899159875e-09, + "loss": 0.9777, + "step": 12035 + }, + { + "epoch": 0.98, + "grad_norm": 4.42961326301565, + "learning_rate": 6.868501379168058e-09, + "loss": 0.6223, + "step": 12036 + }, + { + "epoch": 0.98, + "grad_norm": 5.52526406643918, + "learning_rate": 6.799313465208346e-09, + "loss": 1.2402, + "step": 12037 + }, + { + "epoch": 0.98, + "grad_norm": 3.624715613096466, + "learning_rate": 6.730475552886306e-09, + "loss": 0.6295, + "step": 12038 + }, + { + "epoch": 0.98, + "grad_norm": 2.777159478225706, + "learning_rate": 6.661987647026969e-09, + "loss": 0.6173, + "step": 12039 + }, + { + "epoch": 0.98, + "grad_norm": 2.5679183930589797, + "learning_rate": 6.593849752430936e-09, + "loss": 0.4285, + "step": 12040 + }, + { + "epoch": 0.98, + "grad_norm": 4.2585865016766355, + "learning_rate": 6.5260618738749445e-09, + "loss": 0.7382, + "step": 12041 + }, + { + "epoch": 0.98, + "grad_norm": 4.215975875018411, + "learning_rate": 6.458624016110193e-09, + "loss": 0.9977, + "step": 12042 + }, + { + "epoch": 0.98, + "grad_norm": 2.232271902640726, + "learning_rate": 6.391536183864566e-09, + "loss": 0.3883, + "step": 12043 + }, + { + "epoch": 0.98, + "grad_norm": 4.153310257696214, + "learning_rate": 6.324798381839303e-09, + "loss": 0.7757, + "step": 12044 + }, + { + "epoch": 0.98, + "grad_norm": 2.174932807336339, + "learning_rate": 6.2584106147134395e-09, + "loss": 0.3164, + "step": 12045 + }, + { + "epoch": 0.98, + "grad_norm": 2.3192495238076245, + "learning_rate": 6.192372887139919e-09, + "loss": 0.3615, + "step": 12046 + }, + { + "epoch": 0.98, + "grad_norm": 4.130341919061836, + "learning_rate": 6.126685203747818e-09, + "loss": 0.802, + "step": 12047 + }, + { + "epoch": 0.98, + "grad_norm": 4.352963632177688, + "learning_rate": 6.0613475691417845e-09, + "loss": 0.5643, + "step": 12048 + }, + { + "epoch": 0.98, + "grad_norm": 5.739231463377654, + "learning_rate": 5.996359987902045e-09, + "loss": 1.1208, + "step": 12049 + }, + { + "epoch": 0.98, + "grad_norm": 4.948863433441496, + "learning_rate": 5.931722464583289e-09, + "loss": 1.0449, + "step": 12050 + }, + { + "epoch": 0.99, + "grad_norm": 1.2005746125622068, + "learning_rate": 5.8674350037163374e-09, + "loss": 0.1879, + "step": 12051 + }, + { + "epoch": 0.99, + "grad_norm": 2.313853268223101, + "learning_rate": 5.803497609807585e-09, + "loss": 0.3264, + "step": 12052 + }, + { + "epoch": 0.99, + "grad_norm": 5.948738610094623, + "learning_rate": 5.7399102873390015e-09, + "loss": 1.0435, + "step": 12053 + }, + { + "epoch": 0.99, + "grad_norm": 4.410587302649346, + "learning_rate": 5.676673040767578e-09, + "loss": 0.6349, + "step": 12054 + }, + { + "epoch": 0.99, + "grad_norm": 3.5198431546681044, + "learning_rate": 5.613785874525879e-09, + "loss": 0.8272, + "step": 12055 + }, + { + "epoch": 0.99, + "grad_norm": 3.5320677154198457, + "learning_rate": 5.551248793022601e-09, + "loss": 0.707, + "step": 12056 + }, + { + "epoch": 0.99, + "grad_norm": 2.365378882592431, + "learning_rate": 5.48906180064035e-09, + "loss": 0.5722, + "step": 12057 + }, + { + "epoch": 0.99, + "grad_norm": 4.508363246514844, + "learning_rate": 5.42722490173897e-09, + "loss": 0.6041, + "step": 12058 + }, + { + "epoch": 0.99, + "grad_norm": 3.7126384252880693, + "learning_rate": 5.365738100652773e-09, + "loss": 0.7763, + "step": 12059 + }, + { + "epoch": 0.99, + "grad_norm": 3.957741692123336, + "learning_rate": 5.304601401691089e-09, + "loss": 0.64, + "step": 12060 + }, + { + "epoch": 0.99, + "grad_norm": 5.367737865698479, + "learning_rate": 5.243814809140491e-09, + "loss": 0.8158, + "step": 12061 + }, + { + "epoch": 0.99, + "grad_norm": 5.098217074943541, + "learning_rate": 5.1833783272609016e-09, + "loss": 1.0765, + "step": 12062 + }, + { + "epoch": 0.99, + "grad_norm": 3.065663853354703, + "learning_rate": 5.123291960288934e-09, + "loss": 0.8675, + "step": 12063 + }, + { + "epoch": 0.99, + "grad_norm": 3.3357442866701796, + "learning_rate": 5.0635557124362185e-09, + "loss": 0.6963, + "step": 12064 + }, + { + "epoch": 0.99, + "grad_norm": 3.910208697172912, + "learning_rate": 5.0041695878905175e-09, + "loss": 0.9939, + "step": 12065 + }, + { + "epoch": 0.99, + "grad_norm": 4.52745161151713, + "learning_rate": 4.94513359081461e-09, + "loss": 0.7236, + "step": 12066 + }, + { + "epoch": 0.99, + "grad_norm": 2.697380862613846, + "learning_rate": 4.886447725345744e-09, + "loss": 0.3768, + "step": 12067 + }, + { + "epoch": 0.99, + "grad_norm": 4.151388769159717, + "learning_rate": 4.82811199559785e-09, + "loss": 0.8545, + "step": 12068 + }, + { + "epoch": 0.99, + "grad_norm": 7.4866699720702625, + "learning_rate": 4.7701264056609905e-09, + "loss": 1.1383, + "step": 12069 + }, + { + "epoch": 0.99, + "grad_norm": 2.836390930087791, + "learning_rate": 4.712490959598581e-09, + "loss": 0.4125, + "step": 12070 + }, + { + "epoch": 0.99, + "grad_norm": 3.1532673518670102, + "learning_rate": 4.655205661450724e-09, + "loss": 0.7576, + "step": 12071 + }, + { + "epoch": 0.99, + "grad_norm": 1.4577000518900594, + "learning_rate": 4.5982705152336496e-09, + "loss": 0.1866, + "step": 12072 + }, + { + "epoch": 0.99, + "grad_norm": 3.3940924734291844, + "learning_rate": 4.5416855249375e-09, + "loss": 0.4667, + "step": 12073 + }, + { + "epoch": 0.99, + "grad_norm": 4.407259559105291, + "learning_rate": 4.485450694528548e-09, + "loss": 0.8662, + "step": 12074 + }, + { + "epoch": 0.99, + "grad_norm": 3.0699053562622516, + "learning_rate": 4.42956602794975e-09, + "loss": 0.441, + "step": 12075 + }, + { + "epoch": 0.99, + "grad_norm": 4.38530006511475, + "learning_rate": 4.374031529116862e-09, + "loss": 0.7571, + "step": 12076 + }, + { + "epoch": 0.99, + "grad_norm": 4.387214073840642, + "learning_rate": 4.318847201923437e-09, + "loss": 0.9429, + "step": 12077 + }, + { + "epoch": 0.99, + "grad_norm": 4.66135024157567, + "learning_rate": 4.264013050238047e-09, + "loss": 0.801, + "step": 12078 + }, + { + "epoch": 0.99, + "grad_norm": 4.0667162608964755, + "learning_rate": 4.209529077903174e-09, + "loss": 0.5635, + "step": 12079 + }, + { + "epoch": 0.99, + "grad_norm": 4.742693783361723, + "learning_rate": 4.155395288739095e-09, + "loss": 0.6254, + "step": 12080 + }, + { + "epoch": 0.99, + "grad_norm": 3.044281661809891, + "learning_rate": 4.101611686539442e-09, + "loss": 0.6202, + "step": 12081 + }, + { + "epoch": 0.99, + "grad_norm": 3.0761978720826253, + "learning_rate": 4.0481782750745325e-09, + "loss": 0.4201, + "step": 12082 + }, + { + "epoch": 0.99, + "grad_norm": 6.880557286207846, + "learning_rate": 3.995095058090259e-09, + "loss": 0.9233, + "step": 12083 + }, + { + "epoch": 0.99, + "grad_norm": 3.5296076657355235, + "learning_rate": 3.942362039306979e-09, + "loss": 0.5142, + "step": 12084 + }, + { + "epoch": 0.99, + "grad_norm": 4.464720030419047, + "learning_rate": 3.889979222421181e-09, + "loss": 1.1719, + "step": 12085 + }, + { + "epoch": 0.99, + "grad_norm": 4.1649747010164475, + "learning_rate": 3.837946611104926e-09, + "loss": 0.7354, + "step": 12086 + }, + { + "epoch": 0.99, + "grad_norm": 2.6497758735676253, + "learning_rate": 3.786264209004742e-09, + "loss": 0.5491, + "step": 12087 + }, + { + "epoch": 0.99, + "grad_norm": 6.349024582490853, + "learning_rate": 3.7349320197443974e-09, + "loss": 1.4597, + "step": 12088 + }, + { + "epoch": 0.99, + "grad_norm": 1.7430878185833116, + "learning_rate": 3.6839500469210145e-09, + "loss": 0.3025, + "step": 12089 + }, + { + "epoch": 0.99, + "grad_norm": 5.420295160858151, + "learning_rate": 3.633318294108956e-09, + "loss": 0.7601, + "step": 12090 + }, + { + "epoch": 0.99, + "grad_norm": 3.9294396445199298, + "learning_rate": 3.583036764857051e-09, + "loss": 0.8451, + "step": 12091 + }, + { + "epoch": 0.99, + "grad_norm": 4.166301839906999, + "learning_rate": 3.533105462689701e-09, + "loss": 0.774, + "step": 12092 + }, + { + "epoch": 0.99, + "grad_norm": 4.400574956124744, + "learning_rate": 3.483524391106885e-09, + "loss": 0.9232, + "step": 12093 + }, + { + "epoch": 0.99, + "grad_norm": 2.33060608225683, + "learning_rate": 3.434293553584156e-09, + "loss": 0.3535, + "step": 12094 + }, + { + "epoch": 0.99, + "grad_norm": 1.7566506342748722, + "learning_rate": 3.385412953572087e-09, + "loss": 0.2392, + "step": 12095 + }, + { + "epoch": 0.99, + "grad_norm": 3.964425428089187, + "learning_rate": 3.3368825944973813e-09, + "loss": 0.6337, + "step": 12096 + }, + { + "epoch": 0.99, + "grad_norm": 3.5144354601565175, + "learning_rate": 3.2887024797617628e-09, + "loss": 0.4264, + "step": 12097 + }, + { + "epoch": 0.99, + "grad_norm": 4.3015414214756476, + "learning_rate": 3.2408726127425294e-09, + "loss": 0.7685, + "step": 12098 + }, + { + "epoch": 0.99, + "grad_norm": 3.7046508179692763, + "learning_rate": 3.1933929967919996e-09, + "loss": 0.6863, + "step": 12099 + }, + { + "epoch": 0.99, + "grad_norm": 3.050043842814273, + "learning_rate": 3.146263635238622e-09, + "loss": 0.498, + "step": 12100 + }, + { + "epoch": 0.99, + "grad_norm": 5.431672901040743, + "learning_rate": 3.0994845313853106e-09, + "loss": 0.87, + "step": 12101 + }, + { + "epoch": 0.99, + "grad_norm": 6.753347493145347, + "learning_rate": 3.0530556885116637e-09, + "loss": 1.0715, + "step": 12102 + }, + { + "epoch": 0.99, + "grad_norm": 2.7226135852669464, + "learning_rate": 3.0069771098723e-09, + "loss": 0.4195, + "step": 12103 + }, + { + "epoch": 0.99, + "grad_norm": 3.2460739822213736, + "learning_rate": 2.9612487986968587e-09, + "loss": 0.8109, + "step": 12104 + }, + { + "epoch": 0.99, + "grad_norm": 2.845247910493663, + "learning_rate": 2.915870758190553e-09, + "loss": 0.4189, + "step": 12105 + }, + { + "epoch": 0.99, + "grad_norm": 3.5210622289945293, + "learning_rate": 2.870842991534173e-09, + "loss": 0.3681, + "step": 12106 + }, + { + "epoch": 0.99, + "grad_norm": 3.3116643272956425, + "learning_rate": 2.826165501884082e-09, + "loss": 0.5471, + "step": 12107 + }, + { + "epoch": 0.99, + "grad_norm": 5.084274314757648, + "learning_rate": 2.7818382923722188e-09, + "loss": 0.7128, + "step": 12108 + }, + { + "epoch": 0.99, + "grad_norm": 5.071137499790908, + "learning_rate": 2.737861366105543e-09, + "loss": 0.831, + "step": 12109 + }, + { + "epoch": 0.99, + "grad_norm": 3.2311637576709096, + "learning_rate": 2.694234726166589e-09, + "loss": 0.8248, + "step": 12110 + }, + { + "epoch": 0.99, + "grad_norm": 3.6235213770575205, + "learning_rate": 2.650958375613466e-09, + "loss": 0.7375, + "step": 12111 + }, + { + "epoch": 0.99, + "grad_norm": 1.0996566294527779, + "learning_rate": 2.6080323174798583e-09, + "loss": 0.152, + "step": 12112 + }, + { + "epoch": 0.99, + "grad_norm": 4.692264412669275, + "learning_rate": 2.565456554773915e-09, + "loss": 0.6277, + "step": 12113 + }, + { + "epoch": 0.99, + "grad_norm": 1.7889331061473897, + "learning_rate": 2.5232310904810265e-09, + "loss": 0.2284, + "step": 12114 + }, + { + "epoch": 0.99, + "grad_norm": 4.218512258355676, + "learning_rate": 2.4813559275604914e-09, + "loss": 0.7349, + "step": 12115 + }, + { + "epoch": 0.99, + "grad_norm": 4.812025075684706, + "learning_rate": 2.439831068947185e-09, + "loss": 1.0505, + "step": 12116 + }, + { + "epoch": 0.99, + "grad_norm": 2.8313037967846735, + "learning_rate": 2.3986565175526665e-09, + "loss": 0.6363, + "step": 12117 + }, + { + "epoch": 0.99, + "grad_norm": 4.111197016156909, + "learning_rate": 2.357832276262961e-09, + "loss": 0.6512, + "step": 12118 + }, + { + "epoch": 0.99, + "grad_norm": 3.696129752437114, + "learning_rate": 2.3173583479391137e-09, + "loss": 0.5707, + "step": 12119 + }, + { + "epoch": 0.99, + "grad_norm": 3.2821224263699524, + "learning_rate": 2.2772347354182987e-09, + "loss": 0.4001, + "step": 12120 + }, + { + "epoch": 0.99, + "grad_norm": 3.429749999447198, + "learning_rate": 2.2374614415132666e-09, + "loss": 1.0118, + "step": 12121 + }, + { + "epoch": 0.99, + "grad_norm": 5.773818785295129, + "learning_rate": 2.198038469011787e-09, + "loss": 1.1201, + "step": 12122 + }, + { + "epoch": 0.99, + "grad_norm": 1.1309515054933859, + "learning_rate": 2.1589658206772058e-09, + "loss": 0.1553, + "step": 12123 + }, + { + "epoch": 0.99, + "grad_norm": 3.181621946353914, + "learning_rate": 2.1202434992484423e-09, + "loss": 0.6917, + "step": 12124 + }, + { + "epoch": 0.99, + "grad_norm": 5.798752576064015, + "learning_rate": 2.081871507439992e-09, + "loss": 1.1969, + "step": 12125 + }, + { + "epoch": 0.99, + "grad_norm": 3.898025030454768, + "learning_rate": 2.0438498479413705e-09, + "loss": 0.5628, + "step": 12126 + }, + { + "epoch": 0.99, + "grad_norm": 4.041043124480218, + "learning_rate": 2.0061785234176677e-09, + "loss": 0.6858, + "step": 12127 + }, + { + "epoch": 0.99, + "grad_norm": 4.066576743807657, + "learning_rate": 1.9688575365095497e-09, + "loss": 0.8998, + "step": 12128 + }, + { + "epoch": 0.99, + "grad_norm": 3.3224466226674085, + "learning_rate": 1.9318868898327015e-09, + "loss": 0.4893, + "step": 12129 + }, + { + "epoch": 0.99, + "grad_norm": 4.3158964199414225, + "learning_rate": 1.8952665859789387e-09, + "loss": 1.0153, + "step": 12130 + }, + { + "epoch": 0.99, + "grad_norm": 3.705210911064366, + "learning_rate": 1.8589966275156523e-09, + "loss": 0.7379, + "step": 12131 + }, + { + "epoch": 0.99, + "grad_norm": 3.579219121921822, + "learning_rate": 1.8230770169841427e-09, + "loss": 0.7464, + "step": 12132 + }, + { + "epoch": 0.99, + "grad_norm": 3.598790107251219, + "learning_rate": 1.787507756903506e-09, + "loss": 0.3411, + "step": 12133 + }, + { + "epoch": 0.99, + "grad_norm": 3.074528102971161, + "learning_rate": 1.7522888497656376e-09, + "loss": 0.6873, + "step": 12134 + }, + { + "epoch": 0.99, + "grad_norm": 2.343070224980005, + "learning_rate": 1.7174202980402287e-09, + "loss": 0.483, + "step": 12135 + }, + { + "epoch": 0.99, + "grad_norm": 4.61630229921253, + "learning_rate": 1.6829021041708805e-09, + "loss": 0.8581, + "step": 12136 + }, + { + "epoch": 0.99, + "grad_norm": 5.0913027516957134, + "learning_rate": 1.6487342705773234e-09, + "loss": 1.0862, + "step": 12137 + }, + { + "epoch": 0.99, + "grad_norm": 4.793490104705858, + "learning_rate": 1.6149167996548643e-09, + "loss": 0.9786, + "step": 12138 + }, + { + "epoch": 0.99, + "grad_norm": 4.979654398550657, + "learning_rate": 1.5814496937732737e-09, + "loss": 1.2805, + "step": 12139 + }, + { + "epoch": 0.99, + "grad_norm": 6.044335416386995, + "learning_rate": 1.5483329552790082e-09, + "loss": 1.0259, + "step": 12140 + }, + { + "epoch": 0.99, + "grad_norm": 4.232417148180656, + "learning_rate": 1.5155665864935442e-09, + "loss": 0.9337, + "step": 12141 + }, + { + "epoch": 0.99, + "grad_norm": 3.1515921609154787, + "learning_rate": 1.4831505897128229e-09, + "loss": 0.5646, + "step": 12142 + }, + { + "epoch": 0.99, + "grad_norm": 3.4521180420535695, + "learning_rate": 1.4510849672100258e-09, + "loss": 0.3731, + "step": 12143 + }, + { + "epoch": 0.99, + "grad_norm": 2.650689960042224, + "learning_rate": 1.4193697212322444e-09, + "loss": 0.5591, + "step": 12144 + }, + { + "epoch": 0.99, + "grad_norm": 3.795854212893908, + "learning_rate": 1.3880048540032554e-09, + "loss": 0.5542, + "step": 12145 + }, + { + "epoch": 0.99, + "grad_norm": 4.290467557286918, + "learning_rate": 1.3569903677207453e-09, + "loss": 1.0103, + "step": 12146 + }, + { + "epoch": 0.99, + "grad_norm": 2.233485159890757, + "learning_rate": 1.3263262645585307e-09, + "loss": 0.3186, + "step": 12147 + }, + { + "epoch": 0.99, + "grad_norm": 3.040748043743469, + "learning_rate": 1.2960125466671137e-09, + "loss": 0.4931, + "step": 12148 + }, + { + "epoch": 0.99, + "grad_norm": 4.1337209065940765, + "learning_rate": 1.266049216170906e-09, + "loss": 0.9532, + "step": 12149 + }, + { + "epoch": 0.99, + "grad_norm": 4.040915504923403, + "learning_rate": 1.2364362751698944e-09, + "loss": 0.9432, + "step": 12150 + }, + { + "epoch": 0.99, + "grad_norm": 3.61108572071553, + "learning_rate": 1.2071737257401962e-09, + "loss": 0.6336, + "step": 12151 + }, + { + "epoch": 0.99, + "grad_norm": 4.652113726957336, + "learning_rate": 1.1782615699323929e-09, + "loss": 0.5952, + "step": 12152 + }, + { + "epoch": 0.99, + "grad_norm": 3.98305392012479, + "learning_rate": 1.149699809773752e-09, + "loss": 0.5316, + "step": 12153 + }, + { + "epoch": 0.99, + "grad_norm": 4.605235076813467, + "learning_rate": 1.1214884472660059e-09, + "loss": 0.6964, + "step": 12154 + }, + { + "epoch": 0.99, + "grad_norm": 4.000185190848004, + "learning_rate": 1.0936274843864615e-09, + "loss": 0.992, + "step": 12155 + }, + { + "epoch": 0.99, + "grad_norm": 5.933712871831323, + "learning_rate": 1.0661169230891111e-09, + "loss": 0.8956, + "step": 12156 + }, + { + "epoch": 0.99, + "grad_norm": 3.976258849223066, + "learning_rate": 1.038956765300747e-09, + "loss": 1.0503, + "step": 12157 + }, + { + "epoch": 0.99, + "grad_norm": 2.8920392286889363, + "learning_rate": 1.012147012926512e-09, + "loss": 0.361, + "step": 12158 + }, + { + "epoch": 0.99, + "grad_norm": 0.9254614358134056, + "learning_rate": 9.856876678443484e-10, + "loss": 0.1275, + "step": 12159 + }, + { + "epoch": 0.99, + "grad_norm": 6.667265819278876, + "learning_rate": 9.595787319105488e-10, + "loss": 0.814, + "step": 12160 + }, + { + "epoch": 0.99, + "grad_norm": 6.063537108631886, + "learning_rate": 9.338202069536506e-10, + "loss": 1.0902, + "step": 12161 + }, + { + "epoch": 0.99, + "grad_norm": 5.345372693382865, + "learning_rate": 9.084120947805419e-10, + "loss": 0.9118, + "step": 12162 + }, + { + "epoch": 0.99, + "grad_norm": 5.53785749891009, + "learning_rate": 8.833543971714653e-10, + "loss": 0.6818, + "step": 12163 + }, + { + "epoch": 0.99, + "grad_norm": 5.580761358394674, + "learning_rate": 8.586471158827936e-10, + "loss": 1.1031, + "step": 12164 + }, + { + "epoch": 0.99, + "grad_norm": 2.9874127649923854, + "learning_rate": 8.342902526470298e-10, + "loss": 0.7016, + "step": 12165 + }, + { + "epoch": 0.99, + "grad_norm": 2.721465501571412, + "learning_rate": 8.102838091705867e-10, + "loss": 0.4219, + "step": 12166 + }, + { + "epoch": 0.99, + "grad_norm": 3.6672290208821887, + "learning_rate": 7.866277871371175e-10, + "loss": 1.019, + "step": 12167 + }, + { + "epoch": 0.99, + "grad_norm": 3.720348100268207, + "learning_rate": 7.633221882041852e-10, + "loss": 0.903, + "step": 12168 + }, + { + "epoch": 0.99, + "grad_norm": 4.33209776877341, + "learning_rate": 7.403670140054831e-10, + "loss": 0.9556, + "step": 12169 + }, + { + "epoch": 0.99, + "grad_norm": 1.9504423045579515, + "learning_rate": 7.177622661508343e-10, + "loss": 0.3021, + "step": 12170 + }, + { + "epoch": 0.99, + "grad_norm": 6.979290745231007, + "learning_rate": 6.955079462234171e-10, + "loss": 1.5872, + "step": 12171 + }, + { + "epoch": 0.99, + "grad_norm": 3.9990523954197768, + "learning_rate": 6.73604055784205e-10, + "loss": 0.449, + "step": 12172 + }, + { + "epoch": 0.99, + "grad_norm": 4.5592680367105105, + "learning_rate": 6.520505963680813e-10, + "loss": 0.7497, + "step": 12173 + }, + { + "epoch": 1.0, + "grad_norm": 3.9589191050959562, + "learning_rate": 6.308475694860594e-10, + "loss": 0.6861, + "step": 12174 + }, + { + "epoch": 1.0, + "grad_norm": 3.6985531257099646, + "learning_rate": 6.099949766241731e-10, + "loss": 0.5641, + "step": 12175 + }, + { + "epoch": 1.0, + "grad_norm": 5.523605378907456, + "learning_rate": 5.894928192440308e-10, + "loss": 0.9872, + "step": 12176 + }, + { + "epoch": 1.0, + "grad_norm": 3.1178306652394356, + "learning_rate": 5.693410987833714e-10, + "loss": 0.7559, + "step": 12177 + }, + { + "epoch": 1.0, + "grad_norm": 3.862073416113283, + "learning_rate": 5.495398166538435e-10, + "loss": 0.6149, + "step": 12178 + }, + { + "epoch": 1.0, + "grad_norm": 5.5432532289577665, + "learning_rate": 5.30088974244336e-10, + "loss": 0.9842, + "step": 12179 + }, + { + "epoch": 1.0, + "grad_norm": 2.801296327518294, + "learning_rate": 5.109885729176478e-10, + "loss": 0.5074, + "step": 12180 + }, + { + "epoch": 1.0, + "grad_norm": 5.111438497992996, + "learning_rate": 4.922386140127078e-10, + "loss": 1.0625, + "step": 12181 + }, + { + "epoch": 1.0, + "grad_norm": 3.462694911081913, + "learning_rate": 4.738390988440201e-10, + "loss": 0.8326, + "step": 12182 + }, + { + "epoch": 1.0, + "grad_norm": 3.6998639465625174, + "learning_rate": 4.5579002870110854e-10, + "loss": 0.6114, + "step": 12183 + }, + { + "epoch": 1.0, + "grad_norm": 6.221654366729723, + "learning_rate": 4.380914048490725e-10, + "loss": 0.9554, + "step": 12184 + }, + { + "epoch": 1.0, + "grad_norm": 1.3629454830687455, + "learning_rate": 4.207432285291413e-10, + "loss": 0.1495, + "step": 12185 + }, + { + "epoch": 1.0, + "grad_norm": 3.8014041633328066, + "learning_rate": 4.0374550095645394e-10, + "loss": 0.7206, + "step": 12186 + }, + { + "epoch": 1.0, + "grad_norm": 4.268004847733571, + "learning_rate": 3.8709822332339e-10, + "loss": 0.6349, + "step": 12187 + }, + { + "epoch": 1.0, + "grad_norm": 5.32831186500912, + "learning_rate": 3.708013967956836e-10, + "loss": 0.9325, + "step": 12188 + }, + { + "epoch": 1.0, + "grad_norm": 2.2594682309992455, + "learning_rate": 3.5485502251686457e-10, + "loss": 0.326, + "step": 12189 + }, + { + "epoch": 1.0, + "grad_norm": 1.1085646358781025, + "learning_rate": 3.3925910160381713e-10, + "loss": 0.1365, + "step": 12190 + }, + { + "epoch": 1.0, + "grad_norm": 4.098294810681433, + "learning_rate": 3.2401363515011106e-10, + "loss": 0.7592, + "step": 12191 + }, + { + "epoch": 1.0, + "grad_norm": 3.3957241265330365, + "learning_rate": 3.091186242248911e-10, + "loss": 0.6154, + "step": 12192 + }, + { + "epoch": 1.0, + "grad_norm": 4.451104193699946, + "learning_rate": 2.9457406987121184e-10, + "loss": 0.788, + "step": 12193 + }, + { + "epoch": 1.0, + "grad_norm": 3.835245723800428, + "learning_rate": 2.8037997310936814e-10, + "loss": 0.5518, + "step": 12194 + }, + { + "epoch": 1.0, + "grad_norm": 4.925595085825402, + "learning_rate": 2.6653633493411993e-10, + "loss": 0.9264, + "step": 12195 + }, + { + "epoch": 1.0, + "grad_norm": 5.9349646188925025, + "learning_rate": 2.530431563152469e-10, + "loss": 0.8856, + "step": 12196 + }, + { + "epoch": 1.0, + "grad_norm": 3.2103321274378227, + "learning_rate": 2.3990043819976937e-10, + "loss": 0.5727, + "step": 12197 + }, + { + "epoch": 1.0, + "grad_norm": 5.350349197288943, + "learning_rate": 2.2710818150750713e-10, + "loss": 0.981, + "step": 12198 + }, + { + "epoch": 1.0, + "grad_norm": 3.35746844741046, + "learning_rate": 2.1466638713663057e-10, + "loss": 0.764, + "step": 12199 + }, + { + "epoch": 1.0, + "grad_norm": 4.045052932146058, + "learning_rate": 2.0257505595810966e-10, + "loss": 0.7412, + "step": 12200 + }, + { + "epoch": 1.0, + "grad_norm": 2.933136007973078, + "learning_rate": 1.908341888195997e-10, + "loss": 0.3473, + "step": 12201 + }, + { + "epoch": 1.0, + "grad_norm": 4.204389975478312, + "learning_rate": 1.7944378654488616e-10, + "loss": 0.9525, + "step": 12202 + }, + { + "epoch": 1.0, + "grad_norm": 5.066516739160856, + "learning_rate": 1.6840384993166425e-10, + "loss": 0.7245, + "step": 12203 + }, + { + "epoch": 1.0, + "grad_norm": 4.003356870050669, + "learning_rate": 1.5771437975375948e-10, + "loss": 0.8505, + "step": 12204 + }, + { + "epoch": 1.0, + "grad_norm": 3.6243068643927847, + "learning_rate": 1.4737537676112745e-10, + "loss": 0.561, + "step": 12205 + }, + { + "epoch": 1.0, + "grad_norm": 4.980907710831678, + "learning_rate": 1.373868416776336e-10, + "loss": 0.7577, + "step": 12206 + }, + { + "epoch": 1.0, + "grad_norm": 3.7313240058721955, + "learning_rate": 1.277487752043838e-10, + "loss": 0.6469, + "step": 12207 + }, + { + "epoch": 1.0, + "grad_norm": 3.0864475947641568, + "learning_rate": 1.1846117801583846e-10, + "loss": 0.5824, + "step": 12208 + }, + { + "epoch": 1.0, + "grad_norm": 4.742322727664848, + "learning_rate": 1.0952405076425365e-10, + "loss": 0.9012, + "step": 12209 + }, + { + "epoch": 1.0, + "grad_norm": 4.228041571087677, + "learning_rate": 1.0093739407468495e-10, + "loss": 0.4204, + "step": 12210 + }, + { + "epoch": 1.0, + "grad_norm": 3.0323303040850136, + "learning_rate": 9.270120855053855e-11, + "loss": 0.6893, + "step": 12211 + }, + { + "epoch": 1.0, + "grad_norm": 2.983008160472688, + "learning_rate": 8.48154947680202e-11, + "loss": 0.6281, + "step": 12212 + }, + { + "epoch": 1.0, + "grad_norm": 4.856370516222304, + "learning_rate": 7.728025328002098e-11, + "loss": 0.6949, + "step": 12213 + }, + { + "epoch": 1.0, + "grad_norm": 3.861057508428135, + "learning_rate": 7.009548461500704e-11, + "loss": 0.5709, + "step": 12214 + }, + { + "epoch": 1.0, + "grad_norm": 2.8994326876419683, + "learning_rate": 6.326118927646452e-11, + "loss": 0.3508, + "step": 12215 + }, + { + "epoch": 1.0, + "grad_norm": 5.741387773543702, + "learning_rate": 5.677736774400977e-11, + "loss": 0.845, + "step": 12216 + }, + { + "epoch": 1.0, + "grad_norm": 3.373884783945141, + "learning_rate": 5.0644020471168894e-11, + "loss": 0.5915, + "step": 12217 + }, + { + "epoch": 1.0, + "grad_norm": 5.499579017923998, + "learning_rate": 4.4861147888708436e-11, + "loss": 1.2168, + "step": 12218 + }, + { + "epoch": 1.0, + "grad_norm": 3.0255508426308597, + "learning_rate": 3.942875040130467e-11, + "loss": 0.4464, + "step": 12219 + }, + { + "epoch": 1.0, + "grad_norm": 5.220051245827825, + "learning_rate": 3.4346828389764106e-11, + "loss": 0.8531, + "step": 12220 + }, + { + "epoch": 1.0, + "grad_norm": 4.085008211334056, + "learning_rate": 2.961538221102345e-11, + "loss": 0.5081, + "step": 12221 + }, + { + "epoch": 1.0, + "grad_norm": 3.9561086440767137, + "learning_rate": 2.523441219648426e-11, + "loss": 0.7413, + "step": 12222 + }, + { + "epoch": 1.0, + "grad_norm": 3.4472799777661978, + "learning_rate": 2.1203918652568102e-11, + "loss": 0.5137, + "step": 12223 + }, + { + "epoch": 1.0, + "grad_norm": 5.6581789617909575, + "learning_rate": 1.7523901862381844e-11, + "loss": 0.9722, + "step": 12224 + }, + { + "epoch": 1.0, + "grad_norm": 5.314810540569065, + "learning_rate": 1.4194362084052338e-11, + "loss": 0.9416, + "step": 12225 + }, + { + "epoch": 1.0, + "grad_norm": 3.561922239934296, + "learning_rate": 1.121529955017131e-11, + "loss": 0.4867, + "step": 12226 + }, + { + "epoch": 1.0, + "grad_norm": 4.230862525679776, + "learning_rate": 8.586714470570912e-12, + "loss": 0.717, + "step": 12227 + }, + { + "epoch": 1.0, + "grad_norm": 3.8264697118005886, + "learning_rate": 6.308607028993052e-12, + "loss": 0.8789, + "step": 12228 + }, + { + "epoch": 1.0, + "grad_norm": 3.792263429259282, + "learning_rate": 4.3809773847547365e-12, + "loss": 0.7892, + "step": 12229 + }, + { + "epoch": 1.0, + "grad_norm": 5.202397881453768, + "learning_rate": 2.803825673858285e-12, + "loss": 1.2797, + "step": 12230 + }, + { + "epoch": 1.0, + "grad_norm": 3.416111011046913, + "learning_rate": 1.5771520062157764e-12, + "loss": 0.3239, + "step": 12231 + }, + { + "epoch": 1.0, + "grad_norm": 4.859482561826479, + "learning_rate": 7.009564673143843e-13, + "loss": 1.079, + "step": 12232 + }, + { + "epoch": 1.0, + "grad_norm": 4.819431466634126, + "learning_rate": 1.7523911988170939e-13, + "loss": 0.7518, + "step": 12233 + }, + { + "epoch": 1.0, + "grad_norm": 6.337511901978712, + "learning_rate": 0.0, + "loss": 1.3796, + "step": 12234 + }, + { + "epoch": 1.0, + "step": 12234, + "total_flos": 1636528356546560.0, + "train_loss": 0.783344195976315, + "train_runtime": 55679.5489, + "train_samples_per_second": 1.758, + "train_steps_per_second": 0.22 + } + ], + "logging_steps": 1.0, + "max_steps": 12234, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 1000, + "total_flos": 1636528356546560.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}